From 48a43d99dfdc091d1c262ab4fe179817c1559da2 Mon Sep 17 00:00:00 2001 From: Rockyzsu Date: Mon, 9 Nov 2020 09:21:50 +0800 Subject: [PATCH] update --- BaseService.py | 38 ++++- StockAnalyze.py | 8 +- fetch_each_day.py | 2 +- fund/fund_info_spider.py | 295 ++++++++++++++++++++++++--------------- fupan.py | 2 +- jubi.py | 2 +- k_line.py | 4 +- plot_line.py | 2 +- recordMyChoice.py | 4 +- sqlite_database.py | 6 +- store_news.py | 2 +- zdt.py | 2 +- 12 files changed, 231 insertions(+), 136 deletions(-) diff --git a/BaseService.py b/BaseService.py index 3909cd0..858a603 100644 --- a/BaseService.py +++ b/BaseService.py @@ -1,12 +1,42 @@ #-*-coding=utf-8-*- import os -import tushare as ts +from loguru import logger +import requests +import config -def changeDir(): - cwnd = os.getcwd() - os.chdir(os.path.join(cwnd, 'data')) +class BaseService: + def __init__(self, logfile='default.log'): + self.logger = logger + self.logger.add(logfile) + def check_path(self, path): + if not os.path.exists(path): + try: + os.makedirs(path) + except Exception as e: + self.logger.error(e) + def get_filename(self, url): + return url.split('/')[-1] + def notify(self,text): + url = f"https://sc.ftqq.com/{config.WECHAT_ID}.send?text=" + text + try: + res = requests.get(url) + except Exception as e: + print(e) + return False + else: + return True + + def save_iamge(self, content, path): + with open(path, 'wb') as fp: + fp.write(content) + + def get(self,url): + raise NotImplemented + + def post(self): + raise NotImplemented diff --git a/StockAnalyze.py b/StockAnalyze.py index e91208e..3433239 100644 --- a/StockAnalyze.py +++ b/StockAnalyze.py @@ -246,12 +246,12 @@ def main(): # print(v, ratio) ## 涨跌幅分布 ##### - # today=datetime.datetime.now().strftime("%Y-%m-%d") - # today_tendency(today) + # TODAY=datetime.datetime.now().strftime("%Y-%m-%d") + # today_tendency(TODAY) ## 分析涨停的区域分布 #### - # today = datetime.datetime.now().strftime("%Y%m%d") - # zt_location(today) + # TODAY = datetime.datetime.now().strftime("%Y%m%d") + # zt_location(TODAY) ## 显示百分比价格 # show_percentage(121) diff --git a/fetch_each_day.py b/fetch_each_day.py index d4598ee..b4060de 100644 --- a/fetch_each_day.py +++ b/fetch_each_day.py @@ -19,7 +19,7 @@ class FetchDaily(object): def __init__(self): self.today = datetime.datetime.now().strftime('%Y-%m-%d') - # self.today = '2020-02-07' + # self.TODAY = '2020-02-07' self.path = DATA_PATH if not os.path.exists(self.path): try: diff --git a/fund/fund_info_spider.py b/fund/fund_info_spider.py index 2ef8548..71fd3ff 100644 --- a/fund/fund_info_spider.py +++ b/fund/fund_info_spider.py @@ -9,20 +9,21 @@ import sys sys.path.append('..') -from settings import DBSelector, _json_data, send_from_aliyun, llogger +from settings import DBSelector, _json_data, send_from_aliyun +from BaseService import BaseService # 基金数据爬虫 now = datetime.datetime.now() -today = now.strftime('%Y-%m-%d') +TODAY = now.strftime('%Y-%m-%d') _time = now.strftime('%H:%M:%S') if _time < '11:30:00': - today += 'morning' + TODAY += 'morning' elif _time < '14:45:00': - today += 'noon' + TODAY += 'noon' else: - today += 'close' + TODAY += 'close' NOTIFY_HOUR = 13 MAX_PAGE = 114 @@ -41,30 +42,30 @@ DB = DBSelector() conn = DB.get_mysql_conn('db_fund', 'qq') cursor = conn.cursor() -base_dir = os.path.dirname(os.path.abspath(__file__)) -log_path = os.path.join(base_dir, '..', 'log') -logger = llogger(os.path.join(log_path, 'fund_info.log')) -class FundSpider(object): +class FundSpider(BaseService): def __init__(self): - self.create_tb() + super(FundSpider, self).__init__('fundspider.log') + self.create_table() + self.session = requests.Session() - def create_tb(self): + def create_table(self): create_table = 'create table if not EXISTS `{}` (`基金代码` varchar(20) PRIMARY KEY,`基金简称` varchar(100),`最新规模-万` float,' \ '`实时价格` float,`涨跌幅` float,`成交额-万` float,`净值日期` VARCHAR(10),`单位净值` float,`累计净值` float,`折溢价率` float ,`申购状态` VARCHAR(20),`申赎状态` varchar(20),`基金经理` VARCHAR(200),' \ '`成立日期` VARCHAR(20), `管理人名称` VARCHAR(200),`实时估值` INT,`QDII` INT ,`更新时间` VARCHAR(20));'.format( - today) + TODAY) try: cursor.execute(create_table) except Exception as e: conn.rollback() - print(e) + self.logger.error(e) else: conn.commit() def convert(self, float_str): + try: return_float = float(float_str) except: @@ -73,7 +74,7 @@ def convert(self, float_str): def insert_data(self, jjdm, jjjc, zxgm, zxjg, jgzffd, cj_total_amount, jzrq, dwjz, ljjz, zyjl, sgzt, shzt, jjjl, clrq, glrmc): - insert_data = 'insert into `{}` VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'.format(today) + update_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') is_realtime = 1 zxgm = self.convert(zxgm) @@ -84,27 +85,48 @@ def insert_data(self, jjdm, jjjc, zxgm, zxjg, jgzffd, cj_total_amount, jzrq, dwj ljjz = self.convert(ljjz) zyjl = self.convert(zyjl) + insert_data = 'insert into `{}` VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'.format(TODAY) try: cursor.execute(insert_data, ( jjdm, jjjc, zxgm, zxjg, jgzffd, cj_total_amount, jzrq, dwjz, ljjz, zyjl, sgzt, shzt, jjjl, clrq, glrmc, is_realtime, update_time)) except Exception as e: - logger.info(e) + self.logger.info(e) conn.rollback() else: conn.commit() def check_exist(self, code): - check_code_exists = 'select count(*) from `{}` WHERE `基金代码`=%s'.format(today) - + check_code_exists = 'select count(*) from `{}` WHERE `基金代码`=%s'.format(TODAY) cursor.execute(check_code_exists, (code[2:])) ret = cursor.fetchone() return ret + def get(self, url, params, retry=5): + start = 0 + while start < retry: + try: + response = self.session.get(url, headers=headers, params=params, + verify=False) + except Exception as e: + self.logger.error(e) + start += 1 + + else: + content = response.text + + return content + + if start == retry: + return None + def crawl(self): code_set = set() + url = 'http://stock.gtimg.cn/data/index.php' + for p in range(1, MAX_PAGE): + params = ( ('appn', 'rank'), ('t', 'ranklof/chr'), @@ -114,14 +136,15 @@ def crawl(self): ('v', 'list_data'), ) - session = requests.Session() - response = session.get('http://stock.gtimg.cn/data/index.php', headers=headers, params=params, verify=False) - ls_data = re.search('var list_data=(.*?);', response.text, re.S) + content = self.get(url, params) + ls_data = re.search('var list_data=(.*?);', content, re.S) if ls_data: ret = ls_data.group(1) + else: + continue - js = demjson.decode(ret) # 解析json的库 + js = demjson.decode(ret) # 解析json的库 detail_url = 'http://gu.qq.com/{}' query_string = js.get('data') time.sleep(5 * random.random()) @@ -136,41 +159,46 @@ def crawl(self): if ret[0] > 0: continue try: - r = session.get(detail_url.format(code), headers=headers) + r = self.session.get(detail_url.format(code), headers=headers) except: time.sleep(10) try: - r = session.get(detail_url.format(code), headers=headers) + r = self.session.get(detail_url.format(code), headers=headers) except: continue - search_str = re.search('', r.text) - - if search_str: - s = search_str.group(1) - js_ = demjson.decode(s) - - sub_js = js_.get('data').get('data').get('data') - zxjg = sub_js.get('zxjg') - jgzffd = sub_js.get('jgzffd') - cj_total_amount = sub_js.get('cj_total_amount') - - zyjl = float(sub_js.get('zyjl', 0)) * 100 - - info = js_.get('data').get('data').get('info') - jjdm = info.get('jjdm') - jjjc = info.get('jjjc') - zxgm = info.get('zxgm') - dwjz = info.get('dwjz') - ljjz = info.get('ljjz') - sgzt = info.get('sgzt') - shzt = info.get('shzt') - jjjl = info.get('jjjl') - clrq = info.get('clrq') - glrmc = info.get('glrmc') - jzrq = info.get('jzrq') - self.insert_data(jjdm, jjjc, zxgm, zxjg, jgzffd, cj_total_amount, jzrq, dwjz, ljjz, zyjl, sgzt, - shzt, jjjl, clrq, glrmc) + jjdm, jjjc, zxgm, zxjg, jgzffd, cj_total_amount, jzrq, dwjz, ljjz, zyjl, sgzt, shzt, jjjl, clrq, glrmc = self.parse_html( + r) + self.insert_data(jjdm, jjjc, zxgm, zxjg, jgzffd, cj_total_amount, jzrq, dwjz, ljjz, zyjl, sgzt, + shzt, jjjl, clrq, glrmc) + + def parse_html(self, r): + search_str = re.search('', r.text) + + if search_str: + s = search_str.group(1) + js_ = demjson.decode(s) + + sub_js = js_.get('data').get('data').get('data') + zxjg = sub_js.get('zxjg') + jgzffd = sub_js.get('jgzffd') + cj_total_amount = sub_js.get('cj_total_amount') + + zyjl = float(sub_js.get('zyjl', 0)) * 100 + + info = js_.get('data').get('data').get('info') + jjdm = info.get('jjdm') + jjjc = info.get('jjjc') + zxgm = info.get('zxgm') + dwjz = info.get('dwjz') + ljjz = info.get('ljjz') + sgzt = info.get('sgzt') + shzt = info.get('shzt') + jjjl = info.get('jjjl') + clrq = info.get('clrq') + glrmc = info.get('glrmc') + jzrq = info.get('jzrq') + return jjdm, jjjc, zxgm, zxjg, jgzffd, cj_total_amount, jzrq, dwjz, ljjz, zyjl, sgzt, shzt, jjjl, clrq, glrmc def change_table_field(self, table): add_column1 = 'alter table `{}` add column `实时净值` float'.format(table) @@ -178,7 +206,7 @@ def change_table_field(self, table): try: cursor.execute(add_column1) except Exception as e: - logger.error(e) + self.logger.error(e) conn.rollback() else: conn.commit() @@ -186,7 +214,7 @@ def change_table_field(self, table): try: cursor.execute(add_column2) except Exception as e: - logger.error(e) + self.logger.error(e) conn.rollback() else: conn.commit() @@ -202,18 +230,17 @@ def udpate_db(self, table, jz, yjl, is_realtime, code): cursor.execute(update_sql, (jz, yjl, is_realtime, code)) conn.commit() - def update_jinzhi(self, table): + def update_netvalue(self, table): ''' 更新净值 :param table: :return: ''' # table='2020-02-25' # 用于获取code列 - # today=datetime.datetime.now().strftime('%Y-%m-%d') + # TODAY=datetime.datetime.now().strftime('%Y-%m-%d') self.change_table_field(table) url = 'http://web.ifzq.gtimg.cn/fund/newfund/fundSsgz/getSsgz?app=web&symbol=jj{}' - session = requests.Session() ret = self.get_fund_info(table) for item in ret: @@ -221,11 +248,11 @@ def update_jinzhi(self, table): is_realtime = 1 realtime_price = item[2] try: - resp = session.get(url.format(code), headers=headers) + resp = self.session.get(url.format(code), headers=headers) except: time.sleep(5) try: - resp = session.get(url.format(code), headers=headers) + resp = self.session.get(url.format(code), headers=headers) except: continue @@ -235,7 +262,7 @@ def update_jinzhi(self, table): try: data_list = data.get('data') except Exception as e: - logger.error(e) + self.logger.error(e) continue else: @@ -245,13 +272,13 @@ def update_jinzhi(self, table): else: is_realtime = 0 - yjl, jz = self.get_jj(table, code) + yjl, jz = self.get_fund(table, code) self.udpate_db(table, jz, yjl, is_realtime, code) - logger.info('更新成功') + self.logger.info('更新成功') - def get_jj(self, table, code): + def get_fund(self, table, code): query = f'select `折溢价率`,`单位净值` from `{table}` where `基金代码`=%s' cursor.execute(query, code) ret = cursor.fetchone() @@ -260,102 +287,140 @@ def get_jj(self, table, code): jz = round(jz, 3) return yjl, jz + def query_fund_data(self, today, order): + query_sql = '''select `基金代码`,`基金简称`,`实时价格`,`实时净值`,`溢价率`,`净值日期` from `{}` where `申购状态`='开放' and `申赎状态`='开放' and `溢价率` is not null and !(`实时价格`=1 and `涨跌幅`=0 and `成交额-万`=0) order by `溢价率` {} limit 10'''.format( + today, order) + cursor.execute(query_sql) + result = cursor.fetchall() + return result + + def html_formator(self, ret, html): + + for row in ret: + html += f'{row[0]}{row[1]}{row[2]}{row[3]}{row[4]}{row[5]}' + html += '' + return html + + def combine_html(self, html, today): + + body = '
' \ + '' + + html += body + result_asc = self.query_fund_data(today, 'asc') + html = self.html_formator(html, result_asc) + + html += body + + result_desc = self.query_fund_data(today, 'desc') + html = self.html_formator(html, result_desc) + return html + def notify(self, today): now = datetime.datetime.now() if now.hour > NOTIFY_HOUR: # 下午才会发通知 - query_sql = '''select `基金代码`,`基金简称`,`实时价格`,`实时净值`,`溢价率`,`净值日期` from `{}` where `申购状态`='开放' and `申赎状态`='开放' and !(`实时价格`=1 and `涨跌幅`=0 and `成交额-万`=0) order by `溢价率` limit 10'''.format( - today) - cursor.execute(query_sql) - ret = cursor.fetchall() - html = '
基金代码基金简称实时价格实时净值溢价率净值日期
' \ - '' - for i in ret: - html += f'' - html += '
基金代码基金简称实时价格实时净值溢价率净值日期
{i[0]}{i[1]}{i[2]}{i[3]}{i[4]}{i[5]}
' - - query_sql = '''select `基金代码`,`基金简称`,`实时价格`,`实时净值`,`溢价率`,`净值日期` from `{}` where `申购状态`='开放' and `申赎状态`='开放' and `溢价率` is not null and !(`实时价格`=1 and `涨跌幅`=0 and `成交额-万`=0) order by `溢价率` desc limit 10'''.format( - today) - cursor.execute(query_sql) - ret = cursor.fetchall() - html += '
' \ - '' - for i in ret: - html += f'' - html += '
基金代码基金简称实时价格实时净值溢价率净值日期
{i[0]}{i[1]}{i[2]}{i[3]}{i[4]}{i[5]}
' title = f'{today} 基金折溢价' + html = '' + html = self.combine_html(html, TODAY) + try: send_from_aliyun(title, html, types='html') except Exception as e: - logger.error(e) - logger.info('发送失败') + self.logger.error(e) + self.logger.info('发送失败') else: - logger.info('发送成功') + self.logger.info('发送成功') -class JSLFund(object): +class JSLFund(BaseService): ''' 集思录的指数 ''' def __init__(self): + super(JSLFund, self).__init__('jslfund.log') - host = _json_data['mongo']['qq']['host'] - # host='127.0.0.1' - port = _json_data['mongo']['qq']['port'] - user = _json_data['mongo']['qq']['user'] - password = _json_data['mongo']['qq']['password'] - connect_uri = f'mongodb://{user}:{password}@{host}:{port}' - client = pymongo.MongoClient(connect_uri) today_ = datetime.datetime.now().strftime('%Y-%m-%d') - self.doc1 = client['fund_daily'][f'jsl_stock_lof_{today_}'] - self.doc2 = client['fund_daily'][f'jsl_index_lof_{today_}'] - self.url = 'https://www.jisilu.cn/data/lof/stock_lof_list/?___jsl=LST___t=1582355333844&rp=25' - self.index_lof = 'https://www.jisilu.cn/data/lof/index_lof_list/?___jsl=LST___t=1582356112906&rp=25' + client = self.mongo_client() + + self.jsl_stock_lof = client['fund_daily'][f'jsl_stock_lof_{today_}'] + self.jsl_index_lof = client['fund_daily'][f'jsl_index_lof_{today_}'] + + self.stock_url = 'https://www.jisilu.cn/data/lof/stock_lof_list/?___jsl=LST___t=1582355333844&rp=25' + self.index_lof_url = 'https://www.jisilu.cn/data/lof/index_lof_list/?___jsl=LST___t=1582356112906&rp=25' self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'} - def crawl(self): + def mongo_client(self): + mongo = _json_data['mongo']['qq'] + host = ['host'] + port = mongo['port'] + user = mongo['user'] + password = mongo['password'] - r = requests.get( - url=self.url, - headers=self.headers) + connect_uri = f'mongodb://{user}:{password}@{host}:{port}' + client = pymongo.MongoClient(connect_uri) + return client - js = r.json() - rows = js.get('rows') + def get(self, url, retry=5): - for item in rows: - cell = item.get('cell') + start = 0 + while start < retry: try: - self.doc1.insert_one(cell) + r = requests.get( + url=url, + headers=self.headers) except Exception as e: - logger.error(e) + self.logger.error(e) + start += 1 - r2 = requests.get( - url=self.index_lof, - headers=self.headers) + else: + js = r.json() + return js - js = r2.json() - rows = js.get('rows') + if start == retry: + return None + + def crawl(self): + self.parse_json(types='stock') + self.parse_json(types='index') + + def parse_json(self, types): + + if types == 'stock': + url = self.stock_url + mongo_doc = self.jsl_stock_lof + else: + url = self.index_lof_url + mongo_doc = self.jsl_stock_lof + + return_js = self.get(url=url) + rows = return_js.get('rows') for item in rows: cell = item.get('cell') try: - self.doc2.insert_one(cell) + mongo_doc.insert_one(cell) except Exception as e: - logger.error(e) + self.logger.error(e) + self.notify(f'{self.__class__} 写入mongodb出错') -if __name__ == '__main__': +def main(): tencent_spider = FundSpider() tencent_spider.crawl() - tencent_spider.update_jinzhi(today) - tencent_spider.notify(today) + tencent_spider.update_netvalue(TODAY) + tencent_spider.notify(TODAY) jsl_spider = JSLFund() jsl_spider.crawl() + + +if __name__ == '__main__': + main() diff --git a/fupan.py b/fupan.py index 4d9acd3..6efab3d 100644 --- a/fupan.py +++ b/fupan.py @@ -17,7 +17,7 @@ db = client['stock'] doc = db['industry'] today = '2018-05-08' -# today = datetime.datetime.now().strftime('%Y-%m-%d') +# TODAY = datetime.datetime.now().strftime('%Y-%m-%d') daily_engine = DBSelector().get_engine('db_daily','qq') daily_df = pd.read_sql(today, daily_engine, index_col='index') diff --git a/jubi.py b/jubi.py index 171233d..f113383 100644 --- a/jubi.py +++ b/jubi.py @@ -149,7 +149,7 @@ def get_signiture(self): signature = hmac.new(md5, message, digestmod=hashlib.sha256).digest() # print(signature) - # req=requests.post(url,data={'signature':signature,'key':public_key,'nonce':nonce,'coin':'zet'}) + # req=requests.post(stock_url,data={'signature':signature,'key':public_key,'nonce':nonce,'coin':'zet'}) req = requests.post(url, data={'coin': coin}) print(req.status_code) print(req.text) diff --git a/k_line.py b/k_line.py index c4f1db5..75ef60b 100644 --- a/k_line.py +++ b/k_line.py @@ -177,8 +177,8 @@ def update_daily(): today = datetime.datetime.now().strftime('%Y-%m-%d') cmd = 'select * from `{}`;'.format(today) cursor.execute(cmd) - #today = '2017-11-17' - #daily_df = pd.read_sql_table(today,daily_conn,index_col='index') + #TODAY = '2017-11-17' + #daily_df = pd.read_sql_table(TODAY,daily_conn,index_col='index') days_info = cursor.fetchall() for i in days_info: code = i[1] diff --git a/plot_line.py b/plot_line.py index 5a4d556..a1fe962 100644 --- a/plot_line.py +++ b/plot_line.py @@ -102,7 +102,7 @@ def plot_stock_line(api,code, name, table_name, current, start='2019-10-01', sav plt.subplots_adjust(hspace=0.3) if save: - # path = os.path.join(os.path.dirname(__file__),'data',today) + # path = os.path.join(os.path.dirname(__file__),'data',TODAY) fig.savefig(title + '.png') else: plt.show() diff --git a/recordMyChoice.py b/recordMyChoice.py index 55967b2..3165f5c 100644 --- a/recordMyChoice.py +++ b/recordMyChoice.py @@ -25,7 +25,7 @@ def __init__(self): # weekday=now+datetime.timedelta(days=-2) # weekday=weekday.strftime("%Y-%m-%d") # print(weekday) - # today=now.strftime('%Y-%m-%d') + # TODAY=now.strftime('%Y-%m-%d') self.path = os.path.join(os.getcwd(), 'data') self.filename = os.path.join(self.path, 'recordMyChoice.xls') @@ -77,7 +77,7 @@ def __init__(self): self.cur = self.conn.cursor() self.table_name = 'tb_profit' self.today = datetime.datetime.now().strftime('%Y-%m-%d') - # self.today = '2018-04-13' + # self.TODAY = '2018-04-13' def holding_stock_sql(self): path = os.path.join(os.path.dirname(__file__), 'data', 'mystock.csv') diff --git a/sqlite_database.py b/sqlite_database.py index 319ab54..19ce456 100644 --- a/sqlite_database.py +++ b/sqlite_database.py @@ -11,8 +11,8 @@ class SqliteDb(): def __init__(self,dbname): ''' - self.today = time.strftime("%Y-%m-%d") - self.DBname = self.today + '.db' + self.TODAY = time.strftime("%Y-%m-%d") + self.DBname = self.TODAY + '.db' self.conn = sqlite3.connect(self.DBname) ''' today = time.strftime("_%Y_%m_%d") @@ -27,7 +27,7 @@ def __init__(self,dbname): def store_break(self,price_data): #data 是创新高(低)的个股信息 dataframe - #print(today) + #print(TODAY) #create_tb = 'CREATE TABLE STOCK (date TEXT,id text PRIMARY KEY, p_change REAL,turnover REAL);' #conn.commit() diff --git a/store_news.py b/store_news.py index e1ea9ff..006db3a 100644 --- a/store_news.py +++ b/store_news.py @@ -70,7 +70,7 @@ def save_sql(): # pass url_link = re.findall(r'---> (.*)', s)[0] # if url_link: - # print('url', url_link[0]) + # print('stock_url', url_link[0]) # date_times='h' # titles='h' # url_link='h' diff --git a/zdt.py b/zdt.py index 73c42d1..4612590 100644 --- a/zdt.py +++ b/zdt.py @@ -21,7 +21,7 @@ class GetZDT(object): def __init__(self,today): ''' - today 格式 20200701 + TODAY 格式 20200701 :param today: ''' self.user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/64.0.3282.167 Chrome/64.0.3282.167 Safari/537.36"