페이지 원본 가져오기 (페이지 인코딩 형식 자동으로 가져오기)

 #region  ( )
        /// <summary>
        ///  ( )                /// <param name="url">url</param>
        /// <returns> </returns>
/// </summary>
        private string GetHtmlAutoEncoding(string url)
        {
            HttpWebRequest req = null;
            HttpWebResponse resp = null;
            Stream stream = null;
            StreamReader read = null;
            try
            {
                string sUrl = url;
                req = (HttpWebRequest)HttpWebRequest.Create(sUrl);
                req.UserAgent = "Mozilla/5.0 (Windows NT 5.2; rv:6.0) Gecko/20100101 Firefox/6.0";
                req.Accept = "*/*";
                req.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
                req.ContentType = "text/xml";
                req.Timeout = 20000;

                resp = (HttpWebResponse)req.GetResponse();
                Encoding enc = Encoding.GetEncoding(resp.CharacterSet);
                string sHTML = string.Empty;
                stream = resp.GetResponseStream();
                if (resp.ContentEncoding.ToLower().Contains("gzip"))
                {
                    stream = new GZipStream(stream, CompressionMode.Decompress);
                    read = new StreamReader(stream, enc);
                    sHTML = read.ReadToEnd();
                }
                else
                {
                    string sChartSet = "";
                    read = new StreamReader(stream, enc);
                    sHTML = read.ReadToEnd();
                    Match charSetMatch = Regex.Match(sHTML, "charset=(?<code>[a-zA-Z0-9\\-]+)", RegexOptions.IgnoreCase);
                    sChartSet = charSetMatch.Groups["code"].Value;
                    //if it's not utf-8,we should redecode the html.
                    Regex rx = new Regex("([\u4e00-\u9fa5]{2,4})");
                    if (!rx.IsMatch(sHTML))
                    {
                        if (!string.IsNullOrEmpty(sChartSet.Trim()))
                            sHTML = Encoding.GetEncoding(sChartSet).GetString(enc.GetBytes(sHTML));
                    }
                }
                return sHTML;
            }
            catch
            {
                return "";
            }
            finally
            {
                if (resp != null)
                {
                    resp.Close();
                }
                if (stream != null)
                {
                    stream.Close();
                }
                if (read != null)
                {
                    read.Close();
                }
            }
        }
        #endregion

좋은 웹페이지 즐겨찾기