Python 학습 노트(6):MongoDB 에서 주택 공급 원 을 선별 합 니 다.

내 코드
from bs4 import BeautifulSoup
import requests
import pymongo

client = pymongo.MongoClient('localhost',27017)

duanzu_db = client['duanzu_db']
sheet_fy = duanzu_db['sheet_fy']

#     url   
urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(i) for i in range(1,3)]

#        
def get_lorder_sex(class_name):
    if class_name == ['member_girl_ico']:
        return ' '
    elif class_name == ['member_boy_ico']:
        return ' '

#        
def get_links(url):
    wq_data = requests.get(url)
    soup = BeautifulSoup(wq_data.text,'lxml')
    links = soup.select('#page_list > ul > li > a')
    for link in links:
        href = link.get('href')
        get_attraction(href)

def get_attraction(url,data=None):
    wb_data = requests.get(url)

    #  lxml               
    soup = BeautifulSoup(wb_data.text,'lxml')

    #Chrome       ,         ,  ,    ,Copy Css Path,  :nth-child()
    titles = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
    adds = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
    rends = soup.select('div.day_l > span')
    imgs = soup.select('div.pho_show_l > div.pho_show_big > div > img')
    img_householders =  soup.select('div.js_box.clearfix > div.member_pic > a > img')
    names = soup.select('div.js_box.clearfix > div.w_240 > h6 > a')
    genders = soup.select('div.js_box.clearfix > div.w_240 > h6 > span')

    for title,add,rend,img,img_householder,name,gender in zip(titles,adds,rends,imgs,img_householders,names,genders):
        #        ,get_text()    ,get()      ,get()      ,     
        data = {
            'title':title.get_text(),
            'add':add.get_text(),
            'rend':rend.get_text(),
            'img':img.get('src'),
            'img_householder':img_householder.get('src'),
            'name':name.get_text(),
            'gender':get_lorder_sex(gender.get('class'))
        }
        sheet_fy.insert_one(data)

def find_sheet_fy():
    #  duanzu_db    sheet_fy ,      , find()  
    for info in sheet_fy.find():
        if int(info['rend']) >= 500:
            print(info)

for single_url in urls:
    get_links(single_url)

find_sheet_fy()

총결산
4.567917.find 방법 에 직접 선별 조건 을 쓸 수 있 습 니 다.4.567918.
for info in sheet_fy.find({'rend':{'$gte':500}}):

$lt/$lt/$lt/$gt/$gte/$ne,순서대로<=/>/=/!=.(l 표시 less g 표시 greater e 표시 equal n 표시 not)

좋은 웹페이지 즐겨찾기