파충류 예 - 여러 페이지, 함수 템 플 릿
http://bj.xiaozhu.com/
정보 포함: 여러 페이지;각 페이지 24 개 링크 오 르 기 요구: 모든 링크 의 제목, 주소, 가격, 이미지 링크, 주인 이름, 주인 성별from bs4 import BeautifulSoup
import requests
def get_info(page_number):
urls = get_page_link(page_number)
for url in urls:
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'html.parser')
title = soup.title.text
address = soup.select('div.pho_info > p')[0].get('title')
price = soup.select('div.day_l > span')[0].text
pic = soup.select('#curBigImage')[0].get('src')
host_name = soup.select('a.lorder_name')[0].text
host_gender = soup.select('div.member_pic > div')[0].get('class')[0]
def print_gender(class_name):# 、
if class_name == 'member_ico1':
return ' '
if class_name == 'member_ico':
return ' '
data = {
'title':title,
'address':address,
'price':price,
'pic':pic,
'host_name':host_name,
'host_gender':print_gender(host_gender)
}
print(data)
def get_page_link(page_number):#
page_link = []# ,
for each_number in range(1, page_number):# 24
full_url = 'http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(each_number))
wb_data = requests.get(full_url)
soup = BeautifulSoup(wb_data.text,'html.parser')
for url in soup.select('a.resule_img_a'):
page_link.append(url.get('href'))
return page_link# :print !!! :return print(page_link)
get_info(page_number)#
부분 출력:
{'title': ' - | - ', 'host_name': 'zoehh', 'price': '398', 'address': ' ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/6,0,61,6262,1800,1200,05be8a2a.jpg', 'host_gender': ' '}
{'title': '【 】 Soho & - | - ', 'host_name': 'Liicy', 'price': '285', 'address': ' ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/6,0,56,6219,1800,1200,27d55c0f.jpg', 'host_gender': ' '}
{'title': ' - | - ', 'host_name': ' ', 'price': '395', 'address': ' ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/6,0,62,2913,1800,1200,4ecf03a3.jpg', 'host_gender': None}
{'title': ' 6 - | - ', 'host_name': ' ', 'price': '197', 'address': ' 6 ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/4,0,26,6729,1800,1200,768006fe.jpg', 'host_gender': ' '}
{'title': ' - | - ', 'host_name': ' ', 'price': '596', 'address': ' ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/6,0,3,992,1800,1200,32297300.jpg', 'host_gender': ' '}
{'title': ' , - | - ', 'host_name': 'zhengfanwu', 'price': '998', 'address': ' ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/6,0,99,6792,1800,1200,65ec7d49.jpg', 'host_gender': ' '}
{'title': '【 】 10 - | - ', 'host_name': ' ', 'price': '228', 'address': ' ( 500 , )', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/1,0,94,4002,825,550,d2a2390e.jpg', 'host_gender': ' '}
{'title': ' - 6 - | - ', 'host_name': ' ', 'price': '108', 'address': ' ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/6,0,5,3548,1800,1200,9f2e73e2.jpg', 'host_gender': ' '}
{'title': ' , 5 , - | - ', 'host_name': ' ', 'price': '395', 'address': ' ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/6,0,66,803,1800,1200,38a4c686.jpg', 'host_gender': None}
{'title': ' 798、 、 。- | - ', 'host_name': ' Sunny', 'price': '268', 'address': ' ', 'pic': 'http://image.xiaozhustatic1.com/00,800,533/2,0,71,458,1800,1200,a9c5ea82.jpg', 'host_gender': None}
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
Fortinet FortiWeb Web Application Firewall Policy BypassFrom: Geffrey Velasquez Date: Wed, 2 May 2012 20:33:23 -0500...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.