httpclient4 웹 페이지 캡처

4298 단어 httpclient
최근에 전문 검색을 하고 있습니다.그래서 데이터가 헷갈려요.업계 뉴스를 잡을 수가 없어요.httpclient
코드를 입력합니다.나누다
TEbInformationModel model = new TEbInformationModel();
			
			HttpClient httpclient = new DefaultHttpClient();
			
			httpclient.getParams().setParameter("http.protocol.content-charset",HTTP.UTF_8);  
			httpclient.getParams().setParameter(HTTP.CONTENT_ENCODING, HTTP.UTF_8);  
			httpclient.getParams().setParameter(HTTP.CHARSET_PARAM, HTTP.UTF_8);  
			httpclient.getParams().setParameter(HTTP.DEFAULT_PROTOCOL_CHARSET,HTTP.UTF_8);
			httpclient.getParams().setParameter(HTTP.CONTENT_TYPE, HTTP.UTF_8);
			
			HttpPost httppost = new HttpPost(httpurl); 
			
			httppost.setHeader("Accept-Language", "zh-cn,zh;q=0.5");  
			httppost.setHeader("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7");  
			
			
			httppost.getParams().setParameter("http.protocol.content-charset",HTTP.UTF_8);  
	        httppost.getParams().setParameter(HTTP.CONTENT_ENCODING, HTTP.UTF_8);  
	        httppost.getParams().setParameter(HTTP.CHARSET_PARAM, HTTP.UTF_8);  
	        httppost.getParams().setParameter(HTTP.DEFAULT_PROTOCOL_CHARSET, HTTP.UTF_8);
	        httppost.getParams().setParameter(HTTP.CONTENT_TYPE, HTTP.UTF_8);

			HttpResponse response = httpclient.execute(httppost); 

	        
	        
			InputStream is = response.getEntity().getContent(); 
			BufferedReader br = new BufferedReader(new InputStreamReader(is,"GBK"));
			StringBuffer sbf = new StringBuffer();
			String line = null;
			while ((line = br.readLine()) != null)
			{
			sbf.append(line);
			}
			/**      */
			br.close();
			
			String title = getSubTitle(getStringNoBlank(getTitle(sbf.toString(),"title")));
			String context = getSubContext(getStringNoBlank(getTitle(sbf.toString(),"content")));
			String key = getSubKey(getStringNoBlank(getTitle(sbf.toString(),"key")));
			
			System.out.println("  :"+title);
			System.out.println("  :"+context);
			System.out.println("   :"+key);

정규 일치 부분

private  String getStringNoBlank(String str) {      
        if(str!=null && !"".equals(str)) {      
            Pattern p = Pattern.compile("\\s*|\t|\r|
"); Matcher m = p.matcher(str); String strNoBlank = m.replaceAll(""); return strNoBlank; }else { return str; } } public String getSubTitle(String str){ return str.substring(str.indexOf("<h1>")+4, str.lastIndexOf("</h1>")); } public String getSubContext(String str){ return str.substring(str.indexOf("<P>")+3, str.lastIndexOf("</P>")); } public String getSubKey(String str){ return str.substring(str.indexOf("</b>")+4, str.lastIndexOf("</p>")); } private String getTitle( String s,String type) { String regex = null; String title = ""; final List<String> list = new ArrayList<String>(); if("title".equals(type)){ regex = "<div class=\"zz_leftneirong1\">.*?</h1>"; }else if("content".equals(type)){ regex = "<div class=\"zz_leftneirong4\" id=\"content\" name=\"content\">.*? </div>"; }else{ regex = " <p class=\"key\"><b> :</b>.*?</p>"; } final Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ); final Matcher ma = pa.matcher(s); while (ma.find()) { list.add(ma.group()); } for (int i = 0; i < list.size(); i++) { title = title + list.get(i); } return title; }

좋은 웹페이지 즐겨찾기