경동 상품 평론 100페이지를 얻다

4724 단어
#items.py
import scrapy
class InsistItem(scrapy.Item):
    comment=scrapy.Field()

#pipelines.py
import json
class InsistPipeline(object):
    def __init__(self):
        self.f=open('tencent.json','w',encoding='gbk')
    def process_item(self, item, spider):
        #item(Item , item)
        # 
        content=json.dumps(dict(item),ensure_ascii=False)+",
" self.f.write(content) return item #tengxun.py import scrapy from insist.items import InsistItem import json class TengxunSpider(scrapy.Spider): name = 'tengxun' allowed_domains = ['sclub.jd.com'] #start_urls = ['https://item.jd.com/4432058.html'] baseURL = 'https://sclub.jd.com/comment/productPageComments.action?productId=4432058&score=0&sortType=5&pageSize=10&isShadowSku=0&rid=0&fold=1&page=' offset = 0 start_urls = [baseURL + str(offset)] def parse(self, response): com=json.loads(response.body.decode('gbk')) comment=com['comments'] for co in comment: item = InsistItem() item['comment']=co['content'] yield item if self.offset<100: self.offset+=1 yield scrapy.Request(self.baseURL+str(self.offset),callback=self.parse)

 
다음으로 전송:https://www.cnblogs.com/persistence-ok/p/11576574.html

좋은 웹페이지 즐겨찾기