Python 파충류 지 련 채용 정보

15408 단어 개발 하 다.
목적: 오 를 직위 명, 5 개 의향 도시, 지 련 채용 에 있 는 이 정 보 를 입력 하고 표 에 인쇄 합 니 다.
#coding:utf-8
import urllib2
import re
import xlwt


class ZLZP(object):

    def __init__(self,workname,citys):
        #         
        self.workname = workname
        #   URL  
        self.url = 'http://sou.zhaopin.com/jobs/searchresult.ashx?'
        #     
        args = 'kw=%s&jl='%workname
        #     
        for city in citys:
            #            
            if city == citys[-1]:
                args += city
            else:
                args += city
                args += '%2B'
        # kw=python&jl=  %2B  %2B  %2B  %2B  
        #      url  
        self.url += args
        self.headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0'
        }
    #          HTML   
    def get_page(self,pageNum):
        # 1.         url  
        getUrl = self.url + '&p=%s'%pageNum
        # 2.  request  
        request = urllib2.Request(getUrl,headers=self.headers)
        try:
            response = urllib2.urlopen(request)
        except Exception ,e:
            print '   %s     ,  %s'%(pageNum,e)
            return None
        else:
            return response.read()

    #  html         
    def get_total(self,html):
        # 1.       
        pattern = re.compile(r'# 2.                
        rs = re.search(pattern,html)
        # 3.      ,     
        total = int(rs.group(1))
        print  '    %s       ...'%total
        #    60  ,   +1
        if total%60 == 0:
            self.totalPage = total/60
        else:
            self.totalPage = total/60+1
    #       
    def get_data(self,html):
        # 1.    
        # print html
        pattern = re.compile(r'(.*?).*?(.*?)(.*?)',re.S)
        # 2. html      
        results = re.findall(pattern,html)
        # 3.       
        rs_data = []
        for rs in results:
            remove_b = re.compile(r'<.>',re.S)
            name = re.sub(remove_b,'',rs[0])
            rs_tp = (name,rs[1],rs[2],rs[3])
            rs_data.append(rs_tp)
        return rs_data
    #       
    def start(self):
        # 1.          HTML   
        html = self.get_page(1)
        if html == None:
            return
        # 2. html          
        self.get_total(html)
        # 3.for        (  10 )
        #   workbook  
        workbook = xlwt.Workbook(encoding='utf-8')
        print type(self.workname)
        # unicode    str         unicode  
        #       GBK,
        print type(unicode(self.workname,encoding='utf-8'))
        name = u'   '+unicode(self.workname,encoding='utf-8')
        print type(name)
        sheet = workbook.add_sheet(name)
        sheet.write(0,0,'    ')
        sheet.write(0,1,'    ')
        sheet.write(0,2,'    ')
        sheet.write(0,3,'    ')
        # count        
        count = 1
        for x in range(1,11):
            # 1.         HTML   
            print '     %s   ....'%x
            html = self.get_page(x)
            if html == None:
                continue
            # 2. html        
            rs_data = self.get_data(html)
            # 3.      
            for rs in rs_data:
                sheet.write(count,0,rs[0])
                sheet.write(count,1,rs[1])
                sheet.write(count,2,rs[2])
                sheet.write(count,3,rs[3])
                count+=1
        #     
        print type(self.workname)
        filename = u'  %s    .xls'%unicode(self.workname,encoding='utf-8')
        workbook.save(filename)

'''
    Python3  :
             ,   UTF-8  
      Python      ,   Unicode     
    unicode(self.workname,encoding='utf-8')
          str        、'' ""     
'''


if __name__ == '__main__':

    workname = raw_input('           :')
    #       
    citys = []
    #       5 ,      
    while len(citys) < 5:
        city = raw_input('       ,    5 ,  0  :')
        if city == '0':
            break
        citys.append(city)

    zlzp = ZLZP(workname,citys)
    zlzp.start()

좋은 웹페이지 즐겨찾기