html parser, 경량급 웹 캡 처 분석 도구
                                            
 1868 단어  HtmlParser
                    
public static void main(String[] args) throws IOException, ParserException {
        String site = "http://tech.qq.com/a/20131112/011680.htm";
        String site2="http://www.chinanews.com/gn/2013/11-12/5492942.shtml";
        URL url  = new URL(site2);
        URLConnection urlConnection = url.openConnection();
        Parser parser = new Parser(urlConnection);
        parser.setEncoding("GBK");
        /*TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        String textInPage = visitor.getExtractedText();*/
       /* AndFilter andFilter = new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("id","Cnt-Main-Article-QQ"));
        NodeList nodes = parser.parse(andFilter);
        System.out.println("html:["+nodes.toHtml()+"]");*/
        //CssSelectorNodeFilter cssSelectorNodeFilter = new CssSelectorNodeFilter("#Cnt-Main-Article-QQ");
        CssSelectorNodeFilter cssSelectorNodeFilter = new CssSelectorNodeFilter(".left_zw");
        NodeList nodes2 = parser.parse(cssSelectorNodeFilter);
        System.out.println("html:["+nodes2.toHtml()+"]");
        //logger.info("text:["+textInPage+"]");
        logger.info("ok");
    }해당 pom 은:
<dependency>
            <groupId>org.htmlparser</groupId>
            <artifactId>htmlparser</artifactId>
            <version>2.1</version>
        </dependency>