[python 파충류] 주식 투기 개념 잡기
7352 단어 python 스 킬
내 가 쓴 주식 투기 개념 코드 를 별도로 동봉 합 니 다.
불 여우 브 라 우 저, F12 를 사용 하여 네트워크 를 선택 하여 전송 주 소 를 분석 합 니 다.
import urllib.request
import re
import requests
# def main():
# # url = "http://www.iwencai.com/school/dictionary?qs=study_dictonary_stock"
# # url='http://www.iwencai.com/yike/article-class-list?tagId=37'
# url="http://www.iwencai.com/yike/detail/auid/716981f756614a79"
# try:
# data = urllib.request.urlopen(url).read()
# content = data.decode('UTF-8')
#
# # pattern = re.compile('.*?(.*?) .*?'
# # '(.*?)',
# # re.S)
# pattern = re.compile('*?(.*?)',
# re.S)
# items = re.findall(pattern, content)
# print(items[0])
# # for item in items:
# # print(item[0],item[1])
#
# except e:
# print(e.code)
# print(e.re)
def main():
for i in range(1,300):
con_list=getPage(i)
for item in con_list:
subUrl=item['URL']
concrete=getConcrete(subUrl)
if concrete!=None :
if len(concrete)!=0:
print(item['title'])
concrete=concrete.replace('','')
# concrete.replace(' ', '')
print(concrete.replace(' ', ''))
print("================================")
def getConcrete(subUrl):
concrete_url = "http://www.iwencai.com/" + subUrl
# print(concrete_url)
try:
data = urllib.request.urlopen(concrete_url).read()
content = data.decode('UTF-8') #
pattern = re.compile('(.*?)',re.S)
items = re.findall(pattern, content)
if len(items)==0:
pattern = re.compile('(.*?)',re.S)
items = re.findall(pattern, content)
return items[0]
except:
print(" --------------")
return []
#
def getPage(pageIndex):
siteURL="http://www.iwencai.com/yike/index-page-ajax/"
url = siteURL + "?p=" + str(pageIndex)+"&filterTag=37"
# request = urllib2.Request(url)
# response = urllib2.urlopen(request)
# return response.read().decode('gbk')
# data = urllib.request.urlopen(url).read()
# content = data.decode('gbk')
# return content
headers = {
'Referer': 'http://www.sse.com.cn/disclosure/credibility/supervision/inquiries/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
r = requests.get(url, headers=headers)
return r.json()['list']
# print(r.json()['list'][1]['summ'])
if __name__ == '__main__':
main()