vb高手请进,不要复制,急急急急,vb去除html标签,200分。谢谢了。

vb高手请进,不要复制,急急急急,vb去除html标签,200分。谢谢了。,第1张

没有具体程序,假设变量all代表HTML代码字符串。

可以循环检测:

ok=""

s1=instr(1,all,"<")

s2=instr(s1+1,all,">")

while s1>0 and s2<len(all)

ok=ok &mid(all,s1+1,s2-s1-1)

s1=instr(s2+1,all,"<")

s2=instr(s1+1,all,">")

wend

应该这样就可以搞定所有标签了。

用这个函数吧

Function RemoveHTML( strText )

Dim TAGLIST

TAGLIST = "!--!DOCTYPEAACRONYMADDRESSAPPLETAREABBASEBASEFONT" &_

"BGSOUNDBIGBLOCKQUOTEBODYBRBUTTONCAPTIONCENTERCITECODE" &_

"COLCOLGROUPCOMMENTDDDELDFNDIRDIVDLDTEMEMBEDFIELDSET" &_

"FONTFORMFRAMEFRAMESETHEADH1H2H3H4H5H6HRHTMLIIFRAMEIMG" &_

"INPUTINSISINDEXKBDLABELLAYERLAGENDLILINKLISTINGMAPMARQUEE" &_

"MENUMETANOBRNOFRAMESNOSCRIPTOBJECTOLOPTIONPPARAMPLAINTEXT" &_

"PREQSSAMPSCRIPTSELECTSMALLSPANSTRIKESTRONGSTYLESUBSUP" &_

"TABLETBODYTDTEXTAREATFOOTTHTHEADTITLETRTTUULVARWBRXMP"

Const BLOCKTAGLIST = "APPLETEMBEDFRAMESETHEADNOFRAMESNOSCRIPTOBJECTSCRIPTSTYLE"

Dim nPos1

Dim nPos2

Dim nPos3

Dim strResult

Dim strTagName

Dim bRemove

Dim bSearchForBlock

nPos1 = InStr(strText, "<")

Do While nPos1 >0

nPos2 = InStr(nPos1 + 1, strText, ">")

If nPos2 >0 Then

strTagName = Mid(strText, nPos1 + 1, nPos2 - nPos1 - 1)

strTagName = Replace(Replace(strTagName, vbCr, " "), vbLf, " ")

nPos3 = InStr(strTagName, " ")

If nPos3 >0 Then

strTagName = Left(strTagName, nPos3 - 1)

End If

If Left(strTagName, 1) = "/" Then

strTagName = Mid(strTagName, 2)

bSearchForBlock = False

Else

bSearchForBlock = True

End If

If InStr(1, TAGLIST, "" &strTagName &"", vbTextCompare) >0 Then

bRemove = True

If bSearchForBlock Then

If InStr(1, BLOCKTAGLIST, "" &strTagName &"", vbTextCompare) >0 Then

nPos2 = Len(strText)

nPos3 = InStr(nPos1 + 1, strText, "</" &strTagName, vbTextCompare)

If nPos3 >0 Then

nPos3 = InStr(nPos3 + 1, strText, ">")

End If

If nPos3 >0 Then

nPos2 = nPos3

End If

End If

End If

Else

bRemove = False

End If

If bRemove Then

strResult = strResult &Left(strText, nPos1 - 1)

strText = Mid(strText, nPos2 + 1)

Else

strResult = strResult &Left(strText, nPos1)

strText = Mid(strText, nPos1 + 1)

End If

Else

strResult = strResult &strText

strText = ""

End If

nPos1 = InStr(strText, "<")

Loop

strResult = strResult &strText

RemoveHTML = strResult

End Function

不需要移除的标签可以从TAGLIST中删除

工程>引用>Microsoft VBScript Regular Expressions 5.5

'新建窗体,添加command1

Private Sub Command1_Click()

Dim mystr As String, re As RegExp

mystr = "<a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_0')" &"href=" &"""" &"http://movie.gougou.com/Sections/movies?search=%b7%b6%a1%a4%b5%cf%c8%fb%b6%fb&searchby=2&page=1" &"""" &" target='_blank'>范·迪塞尔</a><a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_1')" &"""" &" href=" &"""" &"http://movie.gougou.com/Sections/movies?" &_

"search=%b1%a3%c2%de%a1%a4%ce%d6%bf%cb&searchby=2&page=1 " &"""" &"target='_blank'>保罗·沃克</a><a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_2')" &"""" &" href=" &"""" &"http://movie.gougou.com/Sections/movies?search=%c3%d7%d0%aa%b6%fb%a1%a4%c2%de%b5%c2%c0%ef%b8%f1%d7%c8&searchby=2&page=1" &"""" &" target='_blank'>米歇尔" &_

"·罗德里格兹</a><a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_3')" &"""" &" href=" &"""" &"http://movie.gougou.com/Sections/movies?search=%c7%c7%b5%a4%c4%c8%a1%a4%b2%bc%c2%b3%cb%b9%cc%d8&searchby=2&page=1" &"""" &" target='_blank'>乔丹娜·布鲁斯特</a><a onmousedown=" &"""" &"return tongji(this.innerHTML,this.href,'artist_4')" &"""" &_

"href=" &"""" &"http://movie.gougou.com/Sections/movies?search=%41%6c%6f%6e%73%6f&searchby=2&page=1" &"""" &" target='_blank'>Alonso</a>"

Set re = New RegExp

re.IgnoreCase = True

re.Global = True

re.Pattern = "<a.*?onmousedown.+?blank.+?>|</a>"

MsgBox re.Replace(mystr, "")

Set re = Nothing

End Sub


欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/6285612.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-03-19
下一篇 2023-03-19

发表评论

登录后才能评论

评论列表(0条)

保存