Beautiful Soup 모듈 을 사용 하여 나 쁜 백과 의 농담 을 가 져 옵 니 다.

더 많은 설명 은 링크 를 엽 니 다. 여 기 는 BeautifulSoup 을 사용 하여 기어 갑 니 다.
#!/usr/bin/python
#coding: utf-8

from bs4 import BeautifulSoup
import re, sys, urllib, urllib2
reload(sys)
sys.setdefaultencoding( "utf-8" )

while True:
    url = "http://www.qiushibaike.com/hot/page/"

    try:
        x = int(raw_input(u"       (  0  ),      35 :"))
    except Exception as e:
        print e
        print u"     "
        continue

    if x == 0:
        break
    url = url + str(x) + "/"

    headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}

    try:
        html = urllib2.Request(url, headers = headers)
        html = urllib2.urlopen(html).read()

        html = html.decode("utf-8")

        soup = BeautifulSoup(html, "lxml")
        items = soup.find_all("div", {"class" : "content"})
        sys.stdout.write("
") sys.stdout.write(u" %d
" % x) sys.stdout.write("
") for x, item in zip(range(1, len(items) + 1), items): sys.stdout.write(u" %d " % x) sys.stdout.write("
") sys.stdout.write(item.get_text()) sys.stdout.write("
") except Exception as e: print e print u" , !"

사용 클래스 패키지
#!/usr/bin/python
#coding: utf-8

import re, sys, urllib, urllib2
from bs4 import BeautifulSoup

class Qiushi_spider(object):

    def __init__(self, x):
        self.x = x
        self.url = "http://www.qiushibaike.com/hot/page/" + str(self.x) + "/"

    def find_out(self):

        try:
            headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}

            html = urllib2.Request(self.url, headers = headers)
            html = urllib2.urlopen(html).read()

            soup = BeautifulSoup(html, "lxml")
            items = soup.find_all("div", {"class", "content"})

            sys.stdout.write("
") sys.stdout.write(u" %d
" % self.x) sys.stdout.write("
") for num, item in zip(range(1, len(items) + 1), items): sys.stdout.write(u" %d " % num) sys.stdout.write("
") sys.stdout.write(item.get_text()) sys.stdout.write("
") except Exception as e: print e print u" , " if __name__ == "__main__": while True: try: x = int(raw_input(u" ( 0 ), 35 :")) if x == 0: break spider = Qiushi_spider(x) spider.find_out() except Exception as e: print e print u" , "

좋은 웹페이지 즐겨찾기