{
html = SystemTextRegularExpressionsRegexReplace(html, @"<script[^>]>[^<]</script>", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
return html;
}
public static string DelHTML(string Htmlstring)//将HTML去除
{
Htmlstring = HtmlstringReplace(">=", "")Replace("<=", "");//stiven [2008429]
//删除脚本
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"<script[^>]>[^<]</script>", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
//删除HTML
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"<([^>])>", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"([\r\n])[\s]+", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"-->", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"<!--", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
//Htmlstring =SystemTextRegularExpressions RegexReplace(Htmlstring,@"<A></A>","");
//Htmlstring =SystemTextRegularExpressions RegexReplace(Htmlstring,@"<[a-zA-Z]=\[a-zA-Z]\[a-zA-Z]+=\d&\w=%[a-zA-Z]|[A-Z0-9]","");
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(quot|#34);", "\"", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(amp|#38);", "&", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(lt|#60);", "<", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(gt|#62);", ">", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(nbsp|#160);", " ", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(iexcl|#161);", "\xa1", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(cent|#162);", "\xa2", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(pound|#163);", "\xa3", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(copy|#169);", "\xa9", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"(\d+);", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
HtmlstringReplace("<", "");
HtmlstringReplace(">", "");
HtmlstringReplace("\r\n", "");
//Htmlstring=>就像这样把 文件html的源代码如下:<html<head<title</title</head<body <table</table</body</html经过空格和换行过滤后得到的应该是只有一行,如下:
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)