{
String s = @"<Body>
<div>这里是要取出的文本A <img src=""/>这里是要取出的文本B <a href="">超链接里的文本不取出 </a>这里是要取出的文本C </div>
<body>"
Regex regex = new Regex( "(/?\\w+)[^>]*>([^<]*)<", RegexOptions.IgnoreCase )
MatchCollection ms = regex.Matches( s )
foreach( Match m in ms )
{
string tagName = m.Groups[1].Value.ToLower()
string text = m.Groups[2].Value.Trim()
if( tagName != "a" &&text.Length >0 )
Console.WriteLine( text )
}
}
结果:
这里是要取出的文本A
这里是要取出的文本B
这里是要取出的文本C
var html = @"div class='1'><h2>啦啦啦</h2>
<ul>
<li><a href=""http://"" target=""_blank"">问题</a>"
var pattern = @"<h2>(?<title>.*?)</h2>.*(\r\n.*)*<a\shref=""(?<url>.*?)"""
var match = System.Text.RegularExpressions.Regex.Match(html, pattern)
if (match.Success)
{
var title = match.Groups["title"].Value
var url = match.Groups["url"].Value
}
string html = "这是你的html"Match m1 = Regex.Match(html, @"onclick='editit\((\d+)\)'")
Match m2 = Regex.Match(html, "page\\s*=\\s*\"(adminadminedit.asp\\?adminid=)")
if (m1.Success && m2.Success) Console.WriteLine(m2.Groups[1].Value + m1.Groups[1].Value)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)