JSOP HTML 분석 및 이미지 다운로드

6160 단어 자바 html JSoup


import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

	/**
	 *   HTML  
	 * liguoliang
	 * 2015 9 23   3:02:56
	 * @param httpUrl
	 * @param encode
	 * @return String
	 */
	public static String getHtmlCode(String httpUrl, String encode) {
		StringBuffer content = new StringBuffer();
		URL url;
		BufferedReader br = null;
		try {
			url = new URL(httpUrl);
			br = new BufferedReader(new InputStreamReader(url.openStream(), encode)); // //  openStream             BufferedReader   
			String input;
			while ((input = br.readLine()) != null) { //       ，          
				content.append(input + "
");
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {
				br.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		return content.toString();
	}

/**
	 *   HTML    
	 * liguoliang
	 * 2015 9 23   3:03:09
	 * @param httpUrl
	 * @param filePath void
	 */
	public static String getHtmlPicture(String httpUrl, String filePath) {
		FileOutputStream fos = null;
		String fileName = null;
		InputStream in = null;
		URL url = null;
		String imageUrl = null;
		try {
			File imageFile = new File(filePath);
			if (!imageFile.exists()) {
				imageFile.mkdirs();
			}

			Document doc = Jsoup.connect(httpUrl).get();
			// src     
			Elements image = doc.select("[src]");

			for (Element src : image) {
				if (src.tagName().equals("img")) {
					imageUrl = src.attr("src"); 
					System.out.println("    ：" + imageUrl);
					File file = new File(imageUrl);
					fileName = file.getName();
					if (imageUrl.startsWith("http://") && fileName.contains(".") && !"".equals(imageUrl)) {
						url = new URL(imageUrl);
						URLConnection connection = url.openConnection();
						in = connection.getInputStream();
						//            
						File[] files = imageFile.listFiles();
						for (File file2 : files) {
							if (file2.getName().equals(fileName)) {
								file2.delete();
							}
						}
						File targetFile = new File(filePath + fileName);
						fos = new FileOutputStream(targetFile);
						byte[] buffer = new byte[1024];
						int len = 0;
						while ((len = in.read(buffer)) != -1) {
							fos.write(buffer, 0, len);
						}
					}
				}
			}
			fos.flush();
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {
				fos.close();
				in.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		return imageUrl;
	}

	/**
	 * 
	 * liguoliang
	 * 2015 9 23   7:06:57
	 * @param filePath
	 * @param imageUrl void
	 */
	public static void downImag(String filePath, String imageUrl) {
		String fileName = imageUrl.substring(imageUrl.lastIndexOf("/"));
		URL url = null;
		InputStream in = null;
		OutputStream os = null;
		try {
			File file = new File(filePath);
			if (!file.exists()) {
				file.mkdirs();
			}
			url = new URL(imageUrl);
			URLConnection connection = url.openConnection();
			in = connection.getInputStream();
			File targetPath = new File(filePath + fileName);
			os = new FileOutputStream(targetPath);
			byte[] b = new byte[1024];
			int length = 0;
			while ((length = (in.read(b))) != -1) {
				os.write(b, 0, length);
			}
			os.flush();
		} catch (Exception e) {
			// TODO: handle exception
		} finally {
			try {
				os.close();
				in.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

	public static void main(String[] args) {
		String httpUrl = "http://bizhi.sogou.com/detail/info/1245069?f=index_dl";
		String encode = "gb2312";
		String filePath = "e:/phone/newfile/111/";
		String resource = getHtmlCode(httpUrl, encode);
		System.out.println(resource);
		//   ：
		/*Document document = Jsoup.parse(resource);
		Elements element = document.getElementsByTag("img");
		for (Element element2 : element) {
			String imgUrl = element2.attr("src");
			File file = new File(imgUrl);
			if (!"".equals(imgUrl) && imgUrl.startsWith("http://") && file.getName().contains(".")) {
				System.out.println("       ===========================");
				downImag(filePath, imgUrl);
				System.out.println("    ：" + imgUrl);
			}
		}*/
		//   ：
		getHtmlPicture(httpUrl, filePath);
	}

이 내용에 흥미가 있습니까?

현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:

Is Eclipse IDE dying?

In 2014 the Eclipse IDE is the leading development environment for Java with a market share of approximately 65%. but ac...

텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.

CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.

HSP3.5HGIMG4에 도움말에 없는 이벤트 동작 소개

Iterator 와 for... of 순환

좋은 웹페이지 즐겨찾기

개발자 우수 사이트 수집

개발자가 알아야 할 필수 사이트 100선 추천 우리는 당신을 위해 100개의 자주 사용하는 개발자 학습 사이트를 정리했습니다