영화의 천국을 찾아 데이터베이스에 저장하다
5615 단어 파충류 국내 주요 영화 사이트
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
from time import sleep
import re
import pymysql
import time
moviesLinks = set() # ,
conn = pymysql.connect(host = '127.0.0.1',port = 3306,user='root',passwd='None',db='mysql',charset = 'utf8')
cur = conn.cursor()
cur.execute('use movies')
def getLinks(pageUrl):
global moviesLinks
html = urlopen(pageUrl)
bs4 = BeautifulSoup(html,"xml")
for link in bs4.findAll("a",{"href":re.compile("/html/gndy/+[a-z]+/[0-9]+/[0-9]+\.html")}): # ( , , )
if link.attrs['href'] not in moviesLinks:
newLink = link.attrs['href']
moviesLinks.add(newLink)
getPageImformation(newLink)
def getPageImformation(pageUrl):
try:
url = 'http://www.dytt8.net/' + pageUrl
html = urlopen(url)
bs4 = BeautifulSoup(html, "xml")
name = bs4.find("div",{"id":"Zoom"}).p.get_text().split('◎')[1][4:].strip() #
downloadLink = bs4.find("td",{"bgcolor":"#fdfddf"}).a.get_text()
print(name+ " "+downloadLink)
store(name,downloadLink)
except Exception:
print(" , ")
print("--------------------------------
")
def store(name,downloadLink):
cur.execute('select * from Movie_heaven where name = %s',name)
row = cur.fetchone()
print(row)
if row == None:
cur.execute('insert into Movie_heaven(name,downloadLink) values (%s,%s)',(name,downloadLink))
cur.connection.commit()
else:
print(' !')
while(True):
getLinks('http://www.dytt8.net/')
time.sleep(60)
cur.close()
conn.close()
# 3
# 4
# 5
# 6
# 7
# 8IMDb
#
다음은 조회 기능입니다.
conn = pymysql.connect(host = '127.0.0.1',port = 3306,user='root',passwd='None',db='mysql',charset = 'utf8')
cur = conn.cursor()
cur.execute('use movies')
def search(name):
sql = "select * from Movie_heaven where name LIKE '%?%'"
sql = sql.replace("?",name)
cur.execute(sql)
print(cur.fetchone())
name = input(" :")
search(name)
cur.close()
conn.close()