/// <summary>
///去除HTML标记
/// </summary>
/// <paramname="NoHTML">包括HTML的源码 </param>
/// <returns>已经去除后的文字</returns>
public static stringNoHTML(stringHtmlstring)
{
//删除脚本
Htmlstring =Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",RegexOptions.IgnoreCase)
//删除HTML
Htmlstring =Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(quot|#34)","\"",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(amp|#38)","&",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(lt|#60)","<",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(gt|#62)",">",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(nbsp|#160)"," ",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(iexcl|#161)","\xa1",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(cent|#162)","\xa2",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(pound|#163)","\xa3",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring,@"&(copy|#169)","\xa9",RegexOptions.IgnoreCase)
Htmlstring =Regex.Replace(Htmlstring, @"(\d+)","",RegexOptions.IgnoreCase)
Htmlstring.Replace("<","")
Htmlstring.Replace(">","")
Htmlstring.Replace("\r\n","")
Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim()
returnHtmlstring
}
写一个静态方法
/// <summary>
///移除HTML标签
/// </summary>
/// <paramname="HTMLStr">HTMLStr</param>
public static string ParseTags(stringHTMLStr)
{
returnSystem.Text.RegularExpressions.Regex.Replace(HTMLStr, "<[^>]*>", "")
}
#endregion
#region取出文本中的图片地址
/// <summary>
///取出文本中的图片地址
/// </summary>
/// <paramname="HTMLStr">HTMLStr</param>
public static stringGetImgUrl(stringHTMLStr)
{
stringstr = string.Empty
stringsPattern = @"^<img\s+[^>]*>"
Regexr = newRegex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>",
RegexOptions.Compiled)
Matchm =r.Match(HTMLStr.ToLower())
if(m.Success)
str =m.Result("${url}")
returnstr
}
#endregion
标签上加 runat="server" <img ID="xx" src=" " width="200" height="100" runat="server" >后台this.xx.src=“src”试一下
没有几种.原理就是删掉正则匹配到的html标签
至于删除用repalce还是remove我觉得没那么重要了就..
html标签的正则,网上抄的不知道对不对:
"<(.[^>]*)>"
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)