HttpClient jsoup 에 따라 웹 페이지 분석

2361 단어 자바JSoup
package jsoup;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 *   HttpClient  html  ,    jsoup html      
 * @author Administrator
 *
 */
public class JustTest {
	public static void main(String[] args) {
		String html = getHtmlByUrl("http://www.iteye.com/");
		if (html != null && !"".equals(html)) {
			Document doc = Jsoup.parse(html);
			Elements linksElements = doc
					.select("div#page>div#content>div#main>div.left>div#recommend>ul>li>a");
			//           id “page” div   id “content” div   id “main” div  
			// class “left” div   id “recommend” div  ul  li  a  
			for (Element ele : linksElements) {
				String href = ele.attr("href");
				String title = ele.text();
				System.out.println(href + "," + title);
			}
		}
	}

	/**
	 *   URL     html  
	 * 
	 * @param url
	 * @return
	 */
	public static String getHtmlByUrl(String url) {
		String html = null;
		HttpClient httpClient = new DefaultHttpClient();//   httpClient  
		HttpGet httpget = new HttpGet(url);//  get     URL
		try {
			HttpResponse responce = httpClient.execute(httpget);//   responce  
			int resStatu = responce.getStatusLine().getStatusCode();//    
			if (resStatu == HttpStatus.SC_OK) {// 200        
				//       
				HttpEntity entity = responce.getEntity();
				if (entity != null) {
					html = EntityUtils.toString(entity);//   html   
					System.out.println(html);
				}
			}
		} catch (Exception e) {
			System.out.println("  【" + url + "】    !");
			e.printStackTrace();
		} finally {
			httpClient.getConnectionManager().shutdown();
		}
		return html;
	}
}

좋은 웹페이지 즐겨찾기