간단한python 파충류, 펀드 정보 얻기

2308 단어

간단한 파충류, 펀드 정보를 추출하고 mysql 데이터베이스를 사용했습니다. 코드는 다음과 같습니다.

import pymysql
from urllib.error import HTTPError
from urllib.request import urlopen
from bs4 import BeautifulSoup
import uuid
import datetime

#       
conn = pymysql.connect(host='127.0.0.1', user='root', passwd='1234', db='mysql',charset='utf8')
cur = conn.cursor()
cur.execute("USE fund")

def getFundNumStr(str):
    for i in range(6-len(str)):
        str = "0"+str;
    return str

#    
def getFundData(url):
    #    
    try:
        html = urlopen(url)
    except HTTPError :
        return None;
    #      
    try:
        bsObj = BeautifulSoup(html, "lxml")
        name = bsObj.find("div",{"class":"fundDetail-tit"}).div.get_text()
        name = name[:name.index("(")]
        value = bsObj.find("",{"id":"gz_gsz"}).get_text()
        time = bsObj.find("",{"id":"gz_gztime"}).get_text()
        if(time != '--'):
            time = "20"+time[time.index("(")+1:time.index(")")]
        data = [name,value,time]
    except AttributeError:
        return None
    return data

#      ,        
def saveNewFundInfo(code,name):
    cur.execute("SELECT * FROM fund_info WHERE code = %s", (code))
    if cur.rowcount == 0:
        cur.execute("INSERT INTO fund_info (code,name) VALUES (%s, %s)", (code, name))

try:
    for num in range(419,1000000):
        funCode = getFundNumStr(str(num))
        url = "http://fund.eastmoney.com/"+funCode+".html"
        data = getFundData(url)
        if data != None:
            id = str(uuid.uuid1()).replace("-","");
            time = datetime.datetime.now().strftime('%Y-%m-%d')
            saveNewFundInfo(funCode, data[0])
            if(data[1] != '--'):
                cur.execute("INSERT INTO fund_day_data (id,code,data,data_time,create_time,update_time) \
                  VALUES (%s, %s,%s, %s, %s, %s)", (id,funCode, float(data[1]),data[2],time,time))
            cur.connection.commit()
    print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
finally:
    cur.close()
    conn.close()

좋은 웹페이지 즐겨찾기