怎么去除数据库中提取出来的html标签,换行形式变为<br>???

怎么去除数据库中提取出来的html标签,换行形式变为<br>???,第1张

public static string DelScripts(string html)
{
html = SystemTextRegularExpressionsRegexReplace(html, @"<script[^>]>[^<]</script>", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
return html;
}
public static string DelHTML(string Htmlstring)//将HTML去除
{
Htmlstring = HtmlstringReplace(">=", "")Replace("<=", "");//stiven [2008429]
//删除脚本
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"<script[^>]>[^<]</script>", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
//删除HTML
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"<([^>])>", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"([\r\n])[\s]+", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"-->", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"<!--", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
//Htmlstring =SystemTextRegularExpressions RegexReplace(Htmlstring,@"<A></A>","");
//Htmlstring =SystemTextRegularExpressions RegexReplace(Htmlstring,@"<[a-zA-Z]=\[a-zA-Z]\[a-zA-Z]+=\d&\w=%[a-zA-Z]|[A-Z0-9]","");
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(quot|#34);", "\"", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(amp|#38);", "&", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(lt|#60);", "<", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(gt|#62);", ">", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(nbsp|#160);", " ", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(iexcl|#161);", "\xa1", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(cent|#162);", "\xa2", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(pound|#163);", "\xa3", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"&(copy|#169);", "\xa9", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
Htmlstring = SystemTextRegularExpressionsRegexReplace(Htmlstring, @"(\d+);", "", SystemTextRegularExpressionsRegexOptionsIgnoreCase);
HtmlstringReplace("<", "");
HtmlstringReplace(">", "");
HtmlstringReplace("\r\n", "");
//Htmlstring=>就像这样把 文件html的源代码如下:<html<head<title</title</head<body <table</table</body</html经过空格换行过滤后得到的应该是只有一行,如下:


欢迎分享,转载请注明来源:内存溢出

原文地址: https://outofmemory.cn/yw/13341949.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-07-18
下一篇 2023-07-18

发表评论

登录后才能评论

评论列表(0条)

保存