파충류 고양이 눈 영화 랭킹 기어오르기 코드

3948 단어
from urllib import request, error, parse
import re, ssl
import pymysql
from fake_useragent import UserAgent


def maoyan_sipder():
    start_url = 'https://maoyan.com/board/4?offset=0'
    start_request(start_url)


def start_request(url):
    headers = {
        "User-Agent": UserAgent().random
    }
    req = request.Request(url=url, headers=headers)
    context = ssl._create_unverified_context()
    try:
        response = request.urlopen(req, context=context, timeout=10)
        if response.status == 200:
            print('    ')
            html_str = response.read().decode('utf-8')
            result = parse_data(html_str)
            if len(result) > 0:
                #     
                save_data_to_db(result)
                #        url  
                current_url = response.url
                #           
                pattern = re.compile('.*?offset=(\d+)')
                offset_result = re.findall(pattern, current_url)
                if offset_result:
                    offset = offset_result[0]
                    print('     ', offset)
                    next_offset = int(offset) + 10
                    next_url = 'https://maoyan.com/board/4?offset=' + str(next_offset)
                    print(next_url)
                    start_request(next_url)

            else:
                import time
                time.sleep(5)
                #       
                cursor.close()
                mysql_client.close()
                print('     ')
    except error.HTTPError as err:
        print(err.code, err.reason)
    except error.URLError as err:
        print(err.reason)


def parse_data(html_str):
    pattern = re.compile(
        '(.*?).*?' +
        '.*?' +
        '.*?(.*?).*?' +
        '(.*?).*?' +
        '(.*?).*?' +
        '(.*?).*?' +
        '(.*?)', re.S
    )
    result = re.findall(pattern, html_str)
    return result


def save_data_to_db(result):
    for movieInfo in result:

        info = {}
        info['rank'] = int(movieInfo[0])
        info['coverImage'] = movieInfo[1]
        info['title'] = movieInfo[2]
        info['actor'] = movieInfo[3]
        info['publishTime'] = movieInfo[4]
        info['score'] = float(movieInfo[5]+movieInfo[6])
        insert_sql = """
        INSERT INTO maoyanmovie(%s)
        VALUES(%s)
        """ % (','.join(info.keys()),','.join(['%s']*len(info)))
        
        try:
            cursor.execute(insert_sql,list(info.values()))
            mysql_client.commit()
        except Exception as err:
            print(err)
            mysql_client.rollback()

        # insert_sql = """
        # INSERT INTO maoyanmovie(rank,coverImage,title,actor,publishTime,score)
        # VALUES(%s,%s,%s,%s,%s,%s)
        # """
        # try:
        #     cursor.execute(
        #         insert_sql,
        #         [
        #             int(movieInfo[0]),
        #             movieInfo[1],
        #             movieInfo[2],
        #             movieInfo[3].replace('','').replace('
',''), # movieInfo[4], # float(movieInfo[5]+movieInfo[6]) # ] # ) # mysql_client.commit() # except Exception as err: # print(err) # mysql_client.rollback() if __name__ == "__main__": # mysql_client = pymysql.Connect(host='localhost', port=3306, user='root', password='201314', database='maoyan', charset='utf8') # cursor = mysql_client.cursor() maoyan_sipder()

좋은 웹페이지 즐겨찾기