단순 코드 1

1370 단어 python3.x

from bs4 import BeautifulSoup
def getHtml(url):
    import urllib
    import urllib.request
    print("     ")
    request = urllib.request.Request(url)
    request.add_header("user-agent", "Mozilla/5.0")
    response2 = urllib.request.urlopen(url)
    print(response2.getcode())
    # print(response2)
    # response2.read()
    # print(len(response2.read()))
    data=response2.read().decode("utf-8")
    return data


def urllist():
    baseurl="http://www.sanye.cx/?cate=69"
    lists=[]
    for x in range(1,11):
        url=baseurl+"&page="+str(x)
        lists.append(url)
    # lists.reverse()
    return lists

def logtext(content):
    f=open('sanye.md','a+',encoding="utf-8")
    f.write(content)
    f.write("\r
") def parsedata(data): soup=BeautifulSoup(data,'html.parser') print(soup.title.text) div=soup.find(class_='list') ul=div.find('ul') lis=ul.find_all ('li') for li in lis: a=li.find('h2').find('a') name=a.get_text() print(name) name="##"+name logtext(name) def run(): urllists=urllist() print(urllists) for url in urllists: data=getHtml(url) parsedata(data) run()

좋은 웹페이지 즐겨찾기