可以循环检测:
ok=""
s1=instr(1,all,"<")
s2=instr(s1+1,all,">")
while s1>0 and s2<len(all)
ok=ok &mid(all,s1+1,s2-s1-1)
s1=instr(s2+1,all,"<")
s2=instr(s1+1,all,">")
wend
应该这样就可以搞定所有标签了。
用这个函数吧Function RemoveHTML( strText )
Dim TAGLIST
TAGLIST = "!--!DOCTYPEAACRONYMADDRESSAPPLETAREABBASEBASEFONT" &_
"BGSOUNDBIGBLOCKQUOTEBODYBRBUTTONCAPTIONCENTERCITECODE" &_
"COLCOLGROUPCOMMENTDDDELDFNDIRDIVDLDTEMEMBEDFIELDSET" &_
"FONTFORMFRAMEFRAMESETHEADH1H2H3H4H5H6HRHTMLIIFRAMEIMG" &_
"INPUTINSISINDEXKBDLABELLAYERLAGENDLILINKLISTINGMAPMARQUEE" &_
"MENUMETANOBRNOFRAMESNOSCRIPTOBJECTOLOPTIONPPARAMPLAINTEXT" &_
"PREQSSAMPSCRIPTSELECTSMALLSPANSTRIKESTRONGSTYLESUBSUP" &_
"TABLETBODYTDTEXTAREATFOOTTHTHEADTITLETRTTUULVARWBRXMP"
Const BLOCKTAGLIST = "APPLETEMBEDFRAMESETHEADNOFRAMESNOSCRIPTOBJECTSCRIPTSTYLE"
Dim nPos1
Dim nPos2
Dim nPos3
Dim strResult
Dim strTagName
Dim bRemove
Dim bSearchForBlock
nPos1 = InStr(strText, "<")
Do While nPos1 >0
nPos2 = InStr(nPos1 + 1, strText, ">")
If nPos2 >0 Then
strTagName = Mid(strText, nPos1 + 1, nPos2 - nPos1 - 1)
strTagName = Replace(Replace(strTagName, vbCr, " "), vbLf, " ")
nPos3 = InStr(strTagName, " ")
If nPos3 >0 Then
strTagName = Left(strTagName, nPos3 - 1)
End If
If Left(strTagName, 1) = "/" Then
strTagName = Mid(strTagName, 2)
bSearchForBlock = False
Else
bSearchForBlock = True
End If
If InStr(1, TAGLIST, "" &strTagName &"", vbTextCompare) >0 Then
bRemove = True
If bSearchForBlock Then
If InStr(1, BLOCKTAGLIST, "" &strTagName &"", vbTextCompare) >0 Then
nPos2 = Len(strText)
nPos3 = InStr(nPos1 + 1, strText, "</" &strTagName, vbTextCompare)
If nPos3 >0 Then
nPos3 = InStr(nPos3 + 1, strText, ">")
End If
If nPos3 >0 Then
nPos2 = nPos3
End If
End If
End If
Else
bRemove = False
End If
If bRemove Then
strResult = strResult &Left(strText, nPos1 - 1)
strText = Mid(strText, nPos2 + 1)
Else
strResult = strResult &Left(strText, nPos1)
strText = Mid(strText, nPos1 + 1)
End If
Else
strResult = strResult &strText
strText = ""
End If
nPos1 = InStr(strText, "<")
Loop
strResult = strResult &strText
RemoveHTML = strResult
End Function
不需要移除的标签可以从TAGLIST中删除
工程>引用>Microsoft VBScript Regular Expressions 5.5'新建窗体,添加command1
Private Sub Command1_Click()
Dim mystr As String, re As RegExp
mystr = "<a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_0')" &"href=" &"""" &"http://movie.gougou.com/Sections/movies?search=%b7%b6%a1%a4%b5%cf%c8%fb%b6%fb&searchby=2&page=1" &"""" &" target='_blank'>范·迪塞尔</a><a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_1')" &"""" &" href=" &"""" &"http://movie.gougou.com/Sections/movies?" &_
"search=%b1%a3%c2%de%a1%a4%ce%d6%bf%cb&searchby=2&page=1 " &"""" &"target='_blank'>保罗·沃克</a><a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_2')" &"""" &" href=" &"""" &"http://movie.gougou.com/Sections/movies?search=%c3%d7%d0%aa%b6%fb%a1%a4%c2%de%b5%c2%c0%ef%b8%f1%d7%c8&searchby=2&page=1" &"""" &" target='_blank'>米歇尔" &_
"·罗德里格兹</a><a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_3')" &"""" &" href=" &"""" &"http://movie.gougou.com/Sections/movies?search=%c7%c7%b5%a4%c4%c8%a1%a4%b2%bc%c2%b3%cb%b9%cc%d8&searchby=2&page=1" &"""" &" target='_blank'>乔丹娜·布鲁斯特</a><a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_4')" &"""" &_
"href=" &"""" &"http://movie.gougou.com/Sections/movies?search=%41%6c%6f%6e%73%6f&searchby=2&page=1" &"""" &" target='_blank'>Alonso</a>"
Set re = New RegExp
re.IgnoreCase = True
re.Global = True
re.Pattern = "<a.*?onmousedown.+?blank.+?>|</a>"
MsgBox re.Replace(mystr, "")
Set re = Nothing
End Sub
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)