MainInformation.py

함수 기능: 셋방 페이지 정보 얻기

import requests
from bs4 import BeautifulSoup
import time
def getMainInformation(url):
    html=requests.get(url)
    bsHtml=BeautifulSoup(html.text,'lxml')

    data={
        'title':'hello',
        'price':'hello',
        'sex':'hello',
        'name':'hello',
        'photo':'hello',
        'add':'hello',
        'ownerPhoto':'hello'
    }
    #  
    midTitle=bsHtml.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
    for i in midTitle:
        data['title']=i.get_text()


    #  
    midAdd=bsHtml.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span.pr5')
    for i in midAdd:
        data['add']=i.get_text().strip()


    #  
    midPri=bsHtml.select('#pricePart > div.day_l > span')
    for i in midPri:
        data['price']=i.get_text()


    #  
    midOwnerPhone=bsHtml.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
    for i in midOwnerPhone:
        data['ownerPhoto']=i.get('src')


    #  
    midSex=bsHtml.select('div[class="member_pic"] > div')
    for i in midSex:
        x=i.get('class')
        if x[0]=='member_ico':
           data['sex']='male'
        else:
            data['sex']='female'


    #  
    midName=bsHtml.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
    for i in midName:
        data['name']=i.get_text()


    #  
    midPh = bsHtml.select('#detailImageBox > div.pho_show_r > div > ul > li > img[data-width="800"]')

    for i in midPh:
        data['photo'] = i.get('data-src')
        break
    time.sleep(1)
    return data


#  
url='http://bj.xiaozhu.com/fangzi/1466098635.html'
getMainInformation(url)

getPageHref.py

함수 기능: 셋방 링크 가져오기

import requests
from bs4 import BeautifulSoup
import time
def getPageHref(url):
    html=requests.get(url)
    bsHtml=BeautifulSoup(html.text,'lxml')

    #  
    hrefs=bsHtml.select('#page_list > ul > li > a[class="resule_img_a"]')
    self_hrefs=[]
    for i in hrefs:
        self_hrefs.append(i.get('href'))
    time.sleep(1)
    #  
    return self_hrefs


#  
url='http://bj.xiaozhu.com/search-duanzufang-p1-0/'
getPageHref(url)

Main.py

함수 기능: 집값이 400보다 큰 전세 정보를 선별

import getPageHref
import MainInformation
import pymongo


def main():
    urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1,3)]
    client=pymongo.MongoClient('localhost',27017)
    houseMess=client['houseMess']
    sheet_tab=houseMess['sheet_tab']
    # for url in urls:
    #     urlss=getPageHref.getPageHref(url)
    #     for i in urlss:
    #         data=MainInformation.getMainInformation(i)
    #         sheet_tab.insert_one(data)

    #  400 
    house=[]
    for i in sheet_tab.find():
         if eval(i['price'])>=400:
             house.append(i)
    for i in house:
        print(i)
main()

이 내용에 흥미가 있습니까?

현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:

다양한 언어의 JSON

JSON은 Javascript 표기법을 사용하여 데이터 구조를 레이아웃하는 데이터 형식입니다. 그러나 Javascript가 코드에서 이러한 구조를 나타낼 수 있는 유일한 언어는 아닙니다. 저는 일반적으로 '객체'{}...

텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.

CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.

좋은 웹페이지 즐겨찾기

개발자 우수 사이트 수집

개발자가 알아야 할 필수 사이트 100선 추천 우리는 당신을 위해 100개의 자주 사용하는 개발자 학습 사이트를 정리했습니다