aiohttp 와 asyncio 라 이브 러 리 가 자동차 판 매 량 순위 에 올 랐 다.
주 코드:
from Test.Homework.configs import *
import aiohttp
import asyncio
import datetime
import re
import pymongo
from Test.Homework.configs_mysql import MYSQL
print(datetime.datetime.today(), ' ')
class AioCar(object):
def __init__(self):
# id, , , id
self.url_1 = 'http://db.auto.sohu.com/cxdata/xml/basic/brandList.xml'
async def have_brand_id(self, url):
"""
id brand_id
:param url:
:return:
"""
async with aiohttp.ClientSession() as session:
async with session.get(self.url_1) as resp:
result = await resp.text()
brand_ids = re.findall('id="(\d+)"', result, re.S)
brand_ids = list(brand_ids)
# brand_ids = ", ".join(brand_ids) # !! ,
# print(brand_ids)
return brand_ids
async def have_brand_name(self, url):
"""
http://db.auto.sohu.com/cxdata/xml/basic/brand145ModelListWithCorp.xml
, , ,
:param url:
:return:brand_name
"""
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
result = await resp.text(encoding="GBK")
brand_name = re.findall('brand name="(.*?)"', result, re.S)[0]
return brand_name
async def have_ids(self, url):
"""
, id, id
, ,
:param url:
:return:
"""
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
result = await resp.text(encoding="gbk")
ids = re.findall('id="(\d+)"', result, re.S)
return ids
async def have_leardboard_message(self, url):
"""
id ,
:param id:
:return:
"""
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
try:
result = await resp.text(encoding="GBK")
except UnicodeError:
result = await resp.text(encoding="utf-8")
datas = re.findall('date="(.*?)" salesNum="(\d+)"', result, re.S)
item = []
name = re.findall('name="(.*?)"', result, re.S)[0]
for data in datas:
datetime = data[0]
sale_nums = data[1]
data_list = [name, datetime, sale_nums]
item.append(data_list)
return item
@staticmethod
def save_message_to_mongodb(data):
try:
if db[MONGO_COLLECTION].insert(data):
pass
except Exception as e:
print(e.args)
else:
pass
async def main(self):
aio_car = AioCar()
task1 = aio_car.have_brand_id(self.url_1)
return await asyncio.ensure_future(asyncio.gather(task1))
if __name__ == '__main__':
aio_car = AioCar()
mysql = MYSQL()
loop = asyncio.get_event_loop()
results = loop.run_until_complete(aio_car.main())
url_2 = 'http://db.auto.sohu.com/cxdata/xml/basic/brand{}ModelListWithCorp.xml' # id
url_3 = 'http://db.auto.sohu.com/cxdata/xml/sales/model/model{}sales.xml'
client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]
for brand_id in results[0]:
url = url_2.format(brand_id)
task = aio_car.have_brand_name(url) #
brand_name = loop.run_until_complete(task) # ,
ids = loop.run_until_complete(aio_car.have_ids(url)) # ids
for id in ids:
url = url_3.format(id)
task = aio_car.have_leardboard_message(url)
datas = loop.run_until_complete(task)
for item in datas:
item = [brand_name, item[0], item[1], item[2]]
item = " ".join(item)
item = {
'result' : item
}
aio_car.save_message_to_mongodb(item) # MongoDB
mysql.insert(item) # Mysql
print(' ', datetime.datetime.today())