자바 웹 페이지 의 그림 캡 처

본문 소개
1.웹 페이지 에 있 는 그림 의 경로 와 제목 을 정규 로 사용 합 니 다.
2.그리고 다운로드
3.서버 에 업로드


public class Picture {

	private String title;
	private String source;
	private String upPath;
//get set ...
	
}


import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.sun.xml.internal.fastinfoset.stax.events.Util;

public class CatchPicture {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		//             
		String regular="[*]<b>.*?</b><br/><img src=\"(.*?)\" border=0 alt=\'(.*?)\' style=\".*?\" class=\".*?\">
";
		List<Picture> list=new CatchPicture().lookWeiboPic("http://gaoxiao.jokeji.cn/GrapHtml/dongtai/20120921221658.htm","GBK",regular,"2,1");
		System.out.println(list.size());
	}
	//  URL        
	public List<Picture> lookWeiboPic(String url,String charset,String regular,String attIndex){
		List<Picture> list=new ArrayList<Picture>();
		try {
			//     url
			//               
			//        list  
			if(!Util.isEmptyString(url)){
					String htmls = getPageSource(url.trim(),charset);
					Pattern pattern =null;
					pattern = Pattern.compile(regular.trim());
					if(!Util.isEmptyString(htmls)){
						Matcher matcher = pattern.matcher(htmls);
						
						//        
						String[] sort = regular.trim().split(","); //  :0      title , 1         
						//                http://www.moonbasa.com/p-032111106.html-->   http://www.moonbasa.com
						String[] suffix;
						suffix =url.trim().split("cn");
						String httphread = "";
						if (suffix.length > 1) {
							httphread = suffix[0] + "cn";
	
						} else {
							suffix = url.trim().split("com");
							httphread = suffix[0] + "com";
						}
						//       
						while(matcher.find()){
							Picture picture=new Picture();
							
							//   title
							if (-1 == Integer.parseInt(sort[0])) {
								//         
								picture.setTitle("");
							} else {
								//     #
								String title=matcher.group(Integer.parseInt(sort[0])).replace("#", " ");
								picture.setTitle(title);
							}
							
							//   source
							if (-1 == Integer.parseInt(sort[1])) {
								//           
								picture.setSource("");
							}else{
								String webImgUrl=matcher.group(Integer.parseInt(sort[1]));
								//             
								String[] pathType=webImgUrl.split(":");
								if(pathType.length>1){
									//    
									picture.setSource(webImgUrl);
								}else{
									//          ..
									pathType=webImgUrl.split("\\.\\.");
									if(pathType.length>1){
										picture.setSource(httphread+pathType[1]);
									}else{
										if(webImgUrl.startsWith("/")){
											picture.setSource(httphread+pathType[0]);
										}else{
											picture.setSource(httphread+"/"+pathType[0]);
										}
									}
								}
							}
							String upPath=upload(picture.getSource(),"d:\\image\\");
							picture.setUpPath(upPath);
							list.add(picture);
						}//--end while
					}
		
				}
			}catch (Exception e) {
				e.printStackTrace();
			}
		return list;
	} 
	
	/**
	 *              
	 * @param pageUrl
	 * @param encoding
	 * @return
	 */
	public String getPageSource(String pageUrl,String encoding) {    
    StringBuffer sb = new StringBuffer();    
    try {    
        //   URL      
        URL url = new URL(pageUrl);    
        //  openStream             BufferedReader      
        BufferedReader in = new BufferedReader(new InputStreamReader(url    
                .openStream(), encoding));    
        String line;    
        //  www      
        while ((line = in.readLine()) != null) {    
            sb.append(line);    
            sb.append("
"); } in.close(); } catch (Exception ex) { System.err.println(ex); } return sb.toString(); } /** * * @param urlStr * @param path * @return * @throws Exception */ public String upload(String urlStr,String path) throws Exception{ Calendar calendar = Calendar.getInstance(); String month = calendar.get(Calendar.YEAR) + "/" + (calendar.get(Calendar.MONTH) + 1); String filename = java.util.UUID.randomUUID().toString() + getExtension(urlStr); path =path + month + "/"; download(urlStr,path,filename); return path+month + "/" + filename; } /** * * @param urlString * @param filename * @param savePath * @return * @throws Exception */ public void download(String urlString, String filename,String savePath) throws Exception { // URL URL url = new URL(urlString); // URLConnection con = url.openConnection(); // con.setConnectTimeout(5*1000); // InputStream is = con.getInputStream(); // 1K byte[] bs = new byte[1024]; // int len; // File sf=new File(savePath); if(!sf.exists()){ sf.mkdirs(); } OutputStream os = new FileOutputStream(sf.getPath()+"\\"+filename); // while ((len = is.read(bs)) != -1) { os.write(bs, 0, len); } // , os.close(); is.close(); } /** * * @param fileUrl * @return */ public String getExtension(String fileUrl){ return fileUrl.substring(fileUrl.lastIndexOf("."), fileUrl.length()); } }

좋은 웹페이지 즐겨찾기