텍스트만 남기는 HTML 코드 필터링

5559 단어 htmlWeb
    using System; 
  using System.Web; 
  using System.Text.RegularExpressions;
 public static string NoHTML(string Htmlstring) 
  { 
  //     
  Htmlstring = Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>", "",RegexOptions.IgnoreCase); 
  //  HTML 
  Htmlstring = Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOption s.IgnoreCase); 
  Htmlstring = Regex.Replace(Htmlstring,@"([\r
])[\s]+","",Regex Options.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"-->","",RegexOptions.Ign oreCase);   Htmlstring = Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions. IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(quot|#34);","\"",Rege xOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(amp|#38);","&",RegexO ptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(lt|#60);","<",RegexOpt ions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(gt|#62);",">",RegexOpt ions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",RegexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1", RegexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",R egexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(pound|# );","\xa3", RegexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",R egexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);","",RegexOptions.IgnoreCase);   Htmlstring.Replace("<","");   Htmlstring.Replace(">","");   Htmlstring.Replace("\r
","");   Htmlstring=HttpContext.Current.Server.HtmlEncode(H tmlstring).Trim();   return Htmlstring;   }

 
/**////  HTML      C#   
  /// <summary> 
  ///   HTML   
  /// </summary> 
  /// <param name="strHtml">  HTML    </param> 
  /// <returns>        </returns> 
  using System; 
  using System.Text.RegularExpressions; 
  public class StripHTMLTest{ 
  public static void Main(){ 
  string s=StripHTML("<HTML><HEAD><TITLE>    </TITLE></HEAD><BODY>  </BODY></HTML>"); 
  Console.WriteLine(s); 
  } 
  public static string StripHTML(string strHtml){ 
  string [] aryReg ={ 
  @"<script[^>]*?>.*?</script>", 
  @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", 
  @"([\r
])[\s]+",   @"&(quot|#34);",   @"&(amp|#38);",   @"&(lt|#60);",   @"&(gt|#62);",   @"&(nbsp|#160);",   @"&(iexcl|#161);",   @"&(cent|#162);",   @"&(pound|# );",   @"&(copy|#169);",   @"&#(\d+);",   @"-->",   @"<!--.*
"   };   string [] aryRep = {   "",   "",   "",   "\"",   "&",   "<",   ">",   " ",   "\xa1",//chr(161),   "\xa2",//chr(162),   "\xa3",//chr( ),   "\xa9",//chr(169),   "",   "\r
",   ""   };   string newReg =aryReg[0];   string strOutput=strHtml;   for(int i = 0;i<aryReg.Length;i++){   Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase);   strOutput = regex.Replace(strOutput,aryRep[i]);   }   strOutput.Replace("<","");   strOutput.Replace(">","");   strOutput.Replace("\r
","");   return strOutput;   }   }       HTML #region HTML   /**//// <summary>   /// HTML   /// </summary>   /// <param name="HTMLStr">HTMLStr</param>   public static string ParseTags(string HTMLStr)   {   return System.Text.RegularExpressions.Regex.Replace(HTMLS tr, "<[^>]*>", "");   }

 
           #region            
  /**//// <summary> 
  ///            
  /// </summary> 
  /// <param name="HTMLStr">HTMLStr</param> 
  public static string GetImgUrl(string HTMLStr) 
  { 
  string str = string.Empty; 
  string sPattern = @"^<img\s+[^>]*>"; 
  Regex r = new Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^> ]*>", 
  RegexOptions.Compiled); 
  Match m = r.Match(HTMLStr.ToLower()); 
  if (m.Success) 
  str = m.Result("${url}"); 
  return str; 
  } 

좋은 웹페이지 즐겨찾기