diff --git a/README.MD b/README.MD index 42cf54a..2a08b8e 100644 --- a/README.MD +++ b/README.MD @@ -20,11 +20,12 @@ * fund/fund_share_update.py 上交所,深交所 基金场内基金份额监控 * fund/fund_share_monitor.py 上交所,深交所 基金基金份额查询,规模变动 * fund/fund_info_spider.py 集思录基金,腾讯证券基金折价率,溢价率 爬虫 -* etf_info.py 市场指数基金的持仓股监控 +* fund/etf_info.py 市场指数基金的持仓股监控 +* fund/ttjj.py 天天基金数据获取 ### 已有: -* black_list_sql.py 记录A股市场上所有有黑历史的股票名单,并存入数据库 +* datahub/black_list_sql.py 记录A股市场上所有有黑历史的股票名单,并存入数据库 * big_deal.py 监控每天A股市场上的大单交易 * bond_monitor 可转债监控 * ceiling_break.py 新股一直板开板后多少天能够重新回到开板价格 diff --git a/fund/danjuan_fund.py b/fund/danjuan_fund.py index 13038c0..0ad6e53 100644 --- a/fund/danjuan_fund.py +++ b/fund/danjuan_fund.py @@ -34,7 +34,7 @@ def mongo_init(self): try: self.main_doc.ensure_index('plan_code', unique=True) except Exception as e: - print(e) + self.logger.error(e) @property def headers(self): @@ -55,22 +55,16 @@ def save_data(self, data_list): try: self.main_doc.insert_one(item) except Exception as e: - pass - else: - pass + self.logger.error(e) + def get_plan_code(self): - for page in range(1, 50): + for page in range(1, 50): # 暂定 50页,实际数据很少 content = self.crawl(page) return_data = self.parse(content) self.save_data(return_data) time.sleep(1) - print('page ', page) - def run(self): - self.get_plan_code() # 获取plan code 并入库 - self.get_detail() # 获取具体持仓 - self.plan_detail() # 方案的持有信息,收益等 @property def code_list(self): @@ -81,8 +75,7 @@ def update_data(self, condition, data): try: self.main_doc.update_one(condition, {'$set': data}) except Exception as e: - # pass - print(e) + self.logger.error(e) else: pass @@ -97,7 +90,7 @@ def plan_detail(self): detail_info = self.post_process(detail_info) self.update_data({'plan_code': code}, detail_info) else: - print('code {} is empty'.format(code)) + self.logger.error('code {} is empty'.format(code)) def post_process(self, detail_info): ''' @@ -127,7 +120,13 @@ def get_detail(self): holdings = content.get('data').get('items') self.update_data({'plan_code': code}, {"holding": holdings}) else: - print('code {} is empty'.format(code)) + self.logger.error('code {} is empty'.format(code)) + + + def run(self): + self.get_plan_code() # 获取plan code 并入库 + self.get_detail() # 获取具体持仓 + self.plan_detail() # 方案的持有信息,收益等 if __name__ == '__main__': diff --git a/fund/danjuan_fund_data_analysis.py b/fund/danjuan_fund_data_analysis.py new file mode 100644 index 0000000..ae1f348 --- /dev/null +++ b/fund/danjuan_fund_data_analysis.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +# @Time : 2021/4/20 12:54 +# @File : danjuan_fund_data_analysis.py +# @Author : Rocky C@www.30daydo.com +# 蛋卷数据分析 +import datetime +import sys +from collections import defaultdict +sys.path.append('..') +from configure.settings import DBSelector +from common.BaseService import BaseService +import pandas as pd + + +WEEK_DAY = -7 # 上一周的价格 + +class DanjuanAnalyser(BaseService): + + def __init__(self): + super(DanjuanAnalyser, self).__init__('../log/Danjuan_analysis.log') + + + def select_collection(self,current_date): + self.db = DBSelector().mongo(location_type='qq') + doc = self.db['db_danjuan'][f'danjuan_fund_{current_date}'] + return doc + + def get_top_plan(self,collection,top=10): + fund_dict = {} + for item in collection.find({},{'holding':1}): + plan_holding = item.get('holding',[]) # list + for hold in plan_holding: + name = hold['fd_name'] + fund_dict.setdefault(name,0) + fund_dict[name]+=1 + fund_dict=list(sorted(fund_dict.items(),key=lambda x:x[1],reverse=True))[:top] + return fund_dict + + + def get_top_plan_percent(self,collection,top=10): + fund_dict = {} + for item in collection.find({},{'holding':1}): + plan_holding = item.get('holding',[]) # list + for hold in plan_holding: + name = hold['fd_name'] + percent =hold['percent'] + fund_dict.setdefault(name,0) + fund_dict[name]+=percent + fund_dict=list(sorted(fund_dict.items(),key=lambda x:x[1],reverse=True))[:top] + return fund_dict + + def start(self): + + today=datetime.datetime.now() + last_week = today + datetime.timedelta(days=WEEK_DAY) + last_week_str = last_week.strftime('%Y-%m-%d') + # 因为没有执行上周的数据,用历史数据替代 + last_week_str = '2021-03-18' + + today_doc = self.select_collection(self.today) + last_week_doc = self.select_collection(last_week_str) + + # 持有个数 + # fund_dict = self.get_top_plan(today_doc,20) + # self.pretty(fund_dict,self.today) + + # old_fund_dict = self.get_top_plan(last_week_doc,20) + # self.pretty(old_fund_dict,last_week_str) + # diff_set = self.new_fund(fund_dict,old_fund_dict) + # print(diff_set) + + # 按持有比例 + # new_fund_percent = self.get_top_plan_percent(today_doc,20) + # old_fund_percent = self.get_top_plan_percent(last_week_doc,20) + # + # self.pretty(new_fund_percent,self.today,'percent') + # self.pretty(old_fund_percent,last_week_str,'percnet') + + # 清仓 + clean_fund = self.clear_warehouse_fund(today_doc,200) + self.simple_display(clean_fund) + + def simple_display(self,data): + for i in data: + print(i) + + def pretty(self,fund_dict,date,kind): + df = pd.DataFrame(fund_dict,columns=['fund','holding_num']) + print(df.head(100)) + df.to_excel(f'{date}-{kind}.xlsx') + + def new_fund(self,new_fund_dict,old_fund_dict): + new_fund_list = list(map(lambda x: x[0], new_fund_dict)) + old_fund_list = list(map(lambda x: x[0], old_fund_dict)) + diff_set= set(old_fund_list)-set(new_fund_list) + return diff_set + + def clear_warehouse_fund(self,collection,top): + ''' + 清仓的基金 + ''' + fund_dict = {} + for item in collection.find({},{'holding':1}): + plan_holding = item.get('holding',[]) # list + for hold in plan_holding: + name = hold['fd_name'] + percent =hold['percent'] + + if percent>0: + continue + + fund_dict.setdefault(name,0) + fund_dict[name]+=1 + fund_dict=list(sorted(fund_dict.items(),key=lambda x:x[1],reverse=True))[:top] + return fund_dict + + +def main(): + app = DanjuanAnalyser() + app.start() + +if __name__ == '__main__': + main()