-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnianbao.py
90 lines (79 loc) · 3.18 KB
/
nianbao.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#! /usr/bin/python3
# -*- coding:utf-8 -*-
# pip3 list --outdated --format=freeze | grep -v '^\-e' | cut -d = -f 1 |
# xargs -n1 pip3 install -U
import requests
import re
import logging
import openpyxl
import time
import concurrent.futures
import os
file_name_replan = re.compile('.*filename=(.*)')
A_stock_replan = re.compile('\n(6\d{5})\t')
THREAD_NUM = 25
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/55.0.2883.87 Safari/537.36',
'Host': 'query.sse.com.cn',
'Referer': 'http://www.sse.com.cn/assortment/stock/list/share/',
}
# zcfzb_url = 'http://quotes.money.163.com/service/zcfzb_603789.html'
# lrb_url = 'http://quotes.money.163.com/service/lrb_600001.html'
# xjllb_url = 'http://quotes.money.163.com/service/xjllb_600001.html'
m163_plan = 'http://quotes.money.163.com/service/{zlx}_{num}.html'
SH_list_url = 'http://query.sse.com.cn/security/stock/downloadStockListFile.do?csrcCode=&stockCode=&areaName=&stockType=1'
SZ_list_url = 'http://www.szse.cn/szseWeb/ShowReport.szse?SHOWTYPE=xlsx&CATALOGID=1110&tab1PAGENO=1&ENCODE=1&TABKEY=tab1'
logger = logging.getLogger('163_stock')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
logger.addHandler(ch)
def download_from_url(url):
try:
req = requests.get(url)
file_name = file_name_replan.search(
req.headers['Content-Disposition']).group(1)
with open(file_name, 'wb') as f:
f.write(req.content)
logger.info('downloaded {}'.format(url))
except:
logger.exception('requests error in {}'.format(url))
def download_one(i):
# 资产负债表
download_from_url(m163_plan.format(zlx='zcfzb', num=i))
# 利润表
download_from_url(m163_plan.format(zlx='lrb', num=i))
# 现金流量表
download_from_url(m163_plan.format(zlx='xjllb', num=i))
if __name__ == '__main__':
if os.path.exists('target_num.txt'):
with open('target_num.txt', 'r') as f:
stock_list = f.read().split()
else:
# shanghai
sh_req = requests.get(SH_list_url, headers=headers)
sh_stock_list = A_stock_replan.findall(sh_req.text)
logger.info('SH has {} items'.format(len(sh_stock_list)))
# shenzhen
sz_req = requests.get(SZ_list_url)
with open('SZ.xlsx', 'wb') as f:
f.write(sz_req.content)
W = openpyxl.load_workbook('SZ.xlsx')
sz_stock_list = [i.value for i in W.worksheets[0]['A0']][1:]
logger.info('SZ has {} items'.format(len(sz_stock_list)))
# zonggong
stock_list = sh_stock_list + sz_stock_list
logger.info('Totally {} items'.format(len(stock_list)))
with open('target_num.txt', 'w') as f:
_ = [f.write(i + '\r\n') for i in stock_list]
logging.info('start~~~~')
start_t = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=THREAD_NUM) as executor:
_ = [executor.submit(download_one, i) for i in stock_list]
end_t = time.time()
logger.info('All done in {} S'.format(end_t - start_t))