diff --git a/tushare/stock/cons.py b/tushare/stock/cons.py index 64e4d086..62a8d7fb 100644 --- a/tushare/stock/cons.py +++ b/tushare/stock/cons.py @@ -1,53 +1,61 @@ -# -*- coding:utf-8 -*- +# -*- coding:utf-8 -*- """ Created on 2014/07/31 @author: Jimmy Liu @group : waditu @contact: jimmysoa@sina.cn """ -@property -def data_path(): - import os - import inspect - caller_file = inspect.stack()[1][1] - pardir = os.path.abspath(os.path.join(os.path.dirname(caller_file), os.path.pardir)) - return os.path.abspath(os.path.join(pardir, os.path.pardir)) VERSION = '0.1.6' -K_LABELS = ['D','W','M'] -K_MIN_LABELS = ['5','15','30','60'] -K_TYPE = {'D':'akdaily','W':'akweekly','M':'akmonthly'} -INDEX_LABELS = ['sh','sz','hs300','sz50','cyb','zxb'] -INDEX_LIST = {'sh':'sh000001','sz':'sz399001','hs300':'sz399300','sz50':'sh000016','zxb':'sz399005','cyb':'sz399006'} -P_TYPE = {'http':'http://','ftp':'ftp://'} -DAY_PRICE_PAGES = 38 -DOMAINS = {'sina':'sina.com.cn','sinahq':'sinajs.cn','ifeng':'ifeng.com'} -TICK_COLUMNS = ['time','price','change','volume','amount','type'] -DAY_TRADING_COLUMNS = ['code','symbol','name','changepercent','trade','open','high','low','settlement','volume','turnoverratio'] -REPORT_COLS = ['code','name','eps','eps_yoy','bvps','roe','epcf','net_profits','profits_yoy','distrib','report_date'] -FORECAST_COLS = ['code','name','type','report_date','pre_eps','range'] -PROFIT_COLS = ['code','name','roe','net_profit_ratio','gross_profit_rate','net_profits','eps','business_income','bips'] -OPERATION_COLS = ['code', 'name','arturnover','arturndays','inventory_turnover','inventory_days','currentasset_turnover','currentasset_days'] -GROWTH_COLS = ['code','name','mbrg','nprg','nav','targ','epsg','seg'] -DEBTPAYING_COLS = ['code','name','currentratio','quickratio','cashratio','icratio','sheqratio','adratio'] -CASHFLOW_COLS = ['code','name','cf_sales','rateofreturn','cf_nm','cf_liabilities','cashflowratio'] -DAY_PRICE_COLUMNS = ['date','open','high','close','low','volume','price_change','p_change', - 'ma5','ma10','ma20','v_ma5','v_ma10','v_ma20','turnover'] -INX_DAY_PRICE_COLUMNS = ['date','open','high','close','low','volume','price_change','p_change', - 'ma5','ma10','ma20','v_ma5','v_ma10','v_ma20'] -LIVE_DATA_COLS = ['name','open','pre_close','price','high','low','bid','ask','volume','amount', - 'b1_v','b1_p','b2_v','b2_p','b3_v','b3_p','b4_v','b4_p','b5_v','b5_p', - 'a1_v','a1_p','a2_v','a2_p','a3_v','a3_p','a4_v','a4_p','a5_v','a5_p','date','time','s'] -TICK_PRICE_URL = '%smarket.finance.%s/downxls.php?date=%s&symbol=%s' +K_LABELS = ['D', 'W', 'M'] +K_MIN_LABELS = ['5', '15', '30', '60'] +K_TYPE = {'D': 'akdaily', 'W': 'akweekly', 'M': 'akmonthly'} +INDEX_LABELS = ['sh', 'sz', 'hs300', 'sz50', 'cyb', 'zxb'] +INDEX_LIST = {'sh': 'sh000001', 'sz': 'sz399001', 'hs300': 'sz399300', + 'sz50': 'sh000016', 'zxb': 'sz399005', 'cyb': 'sz399006'} +P_TYPE = {'http': 'http://', 'ftp': 'ftp://'} +PAGE_NUM = [38, 60, 80, 100] +DOMAINS = {'sina': 'sina.com.cn', 'sinahq': 'sinajs.cn', + 'ifeng': 'ifeng.com', 'sf': 'finance.sina.com.cn', + 'vsf': 'vip.stock.finance.sina.com.cn'} +PAGES = {'fd': 'index.phtml', 'dl': 'downxls.php', 'jv': 'json_v2.php', + 'cpt': 'newFLJK.php', 'ids': 'newSinaHy.php'} +TICK_COLUMNS = ['time', 'price', 'change', 'volume', 'amount', 'type'] +DAY_TRADING_COLUMNS = ['code', 'symbol', 'name', 'changepercent', + 'trade', 'open', 'high', 'low', 'settlement', 'volume', 'turnoverratio'] +REPORT_COLS = ['code', 'name', 'eps', 'eps_yoy', 'bvps', 'roe', + 'epcf', 'net_profits', 'profits_yoy', 'distrib', 'report_date'] +FORECAST_COLS = ['code', 'name', 'type', 'report_date', 'pre_eps', 'range'] +PROFIT_COLS = ['code', 'name', 'roe', 'net_profit_ratio', + 'gross_profit_rate', 'net_profits', 'eps', 'business_income', 'bips'] +OPERATION_COLS = ['code', 'name', 'arturnover', 'arturndays', 'inventory_turnover', + 'inventory_days', 'currentasset_turnover', 'currentasset_days'] +GROWTH_COLS = ['code', 'name', 'mbrg', 'nprg', 'nav', 'targ', 'epsg', 'seg'] +DEBTPAYING_COLS = ['code', 'name', 'currentratio', + 'quickratio', 'cashratio', 'icratio', 'sheqratio', 'adratio'] +CASHFLOW_COLS = ['code', 'name', 'cf_sales', 'rateofreturn', + 'cf_nm', 'cf_liabilities', 'cashflowratio'] +DAY_PRICE_COLUMNS = ['date', 'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change', + 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20', 'turnover'] +INX_DAY_PRICE_COLUMNS = ['date', 'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change', + 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20'] +LIVE_DATA_COLS = ['name', 'open', 'pre_close', 'price', 'high', 'low', 'bid', 'ask', 'volume', 'amount', + 'b1_v', 'b1_p', 'b2_v', 'b2_p', 'b3_v', 'b3_p', 'b4_v', 'b4_p', 'b5_v', 'b5_p', + 'a1_v', 'a1_p', 'a2_v', 'a2_p', 'a3_v', 'a3_p', 'a4_v', 'a4_p', 'a5_v', 'a5_p', 'date', 'time', 's'] +TICK_PRICE_URL = '%smarket.%s/%s?date=%s&symbol=%s' DAY_PRICE_URL = '%sapi.finance.%s/%s/?code=%s&type=last' LIVE_DATA_URL = '%shq.%s/rn=%s&list=%s' DAY_PRICE_MIN_URL = '%sapi.finance.%s/akmin?scode=%s&type=%s' -SINA_DAY_PRICE_URL = '%svip.stock.finance.%s/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?num=80&sort=changepercent&asc=0&node=hs_a&symbol=&_s_r_a=page&page=%s' -REPORT_URL = '%svip.stock.finance.%s/q/go.php/vFinanceAnalyze/kind/mainindex/index.phtml?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=60' -FORECAST_URL = '%svip.stock.finance.%s/q/go.php/vFinanceAnalyze/kind/performance/index.phtml?s_i=&s_a=&s_c=&s_type=&reportdate=%s&quarter=%s&p=%s&num=60' -PROFIT_URL = '%svip.stock.finance.%s/q/go.php/vFinanceAnalyze/kind/profit/index.phtml?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=60' -OPERATION_URL = '%svip.stock.finance.%s/q/go.php/vFinanceAnalyze/kind/operation/index.phtml?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=60' -GROWTH_URL = '%svip.stock.finance.%s/q/go.php/vFinanceAnalyze/kind/grow/index.phtml?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=60' -DEBTPAYING_URL = '%svip.stock.finance.%s/q/go.php/vFinanceAnalyze/kind/debtpaying/index.phtml?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=60' -CASHFLOW_URL = '%svip.stock.finance.%s/q/go.php/vFinanceAnalyze/kind/cashflow/index.phtml?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=60' -ALL_STOCK_BASICS_FILE = '%s/tushare/data/all.csv'%data_path \ No newline at end of file +SINA_DAY_PRICE_URL = '%s%s/quotes_service/api/%s/Market_Center.getHQNodeData?num=80&sort=changepercent&asc=0&node=hs_a&symbol=&_s_r_a=page&page=%s' +REPORT_URL = '%s%s/q/go.php/vFinanceAnalyze/kind/mainindex/%s?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=%s' +FORECAST_URL = '%s%s/q/go.php/vFinanceAnalyze/kind/performance/%s?s_i=&s_a=&s_c=&s_type=&reportdate=%s&quarter=%s&p=%s&num=%s' +PROFIT_URL = '%s%s/q/go.php/vFinanceAnalyze/kind/profit/%s?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=%s' +OPERATION_URL = '%s%s/q/go.php/vFinanceAnalyze/kind/operation/%s?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=%s' +GROWTH_URL = '%s%s/q/go.php/vFinanceAnalyze/kind/grow/%s?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=%s' +DEBTPAYING_URL = '%s%s/q/go.php/vFinanceAnalyze/kind/debtpaying/%s?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=%s' +CASHFLOW_URL = '%s%s/q/go.php/vFinanceAnalyze/kind/cashflow/%s?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=%s' +ALL_STOCK_BASICS_FILE = '%s/tushare/data/all.csv' + +SINA_CONCEPTS_INDEX_URL = '%smoney.%s/q/view/%s?param=class' +SINA_INDUSTRY_INDEX_URL = '%s%s/q/view/%s' +SINA_DATA_DETAIL_URL = '%s%s/quotes_service/api/%s/Market_Center.getHQNodeData?page=1&num=400&sort=symbol&asc=1&node=%s&symbol=&_s_r_a=page' \ No newline at end of file diff --git a/tushare/stock/fundamental.py b/tushare/stock/fundamental.py index af47d062..50cf4592 100644 --- a/tushare/stock/fundamental.py +++ b/tushare/stock/fundamental.py @@ -11,7 +11,7 @@ import lxml.html import re -def get_stock_basics(file_path=ct.ALL_STOCK_BASICS_FILE): +def get_stock_basics(file_path=None): """ 获取沪深上市公司基本情况 Parameters @@ -38,6 +38,7 @@ def get_stock_basics(file_path=ct.ALL_STOCK_BASICS_FILE): pb,市净率 timeToMarket,上市日期 """ + file_path = file_path if file_path else ct.ALL_STOCK_BASICS_FILE%_data_path() df = pd.read_csv(file_path,dtype={'code':'object'},encoding='GBK') df = df.set_index('code') return df @@ -71,8 +72,10 @@ def get_report_data(year,quarter): df = pd.DataFrame(data,columns=ct.REPORT_COLS) return df -def _get_report_data(year,quarter,pageNo,dataArr): - url = ct.REPORT_URL%(ct.P_TYPE['http'],ct.DOMAINS['sina'],year,quarter,pageNo) + +def _get_report_data(year, quarter, pageNo, dataArr): + url = ct.REPORT_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], + year, quarter, pageNo, ct.PAGE_NUM[1]) print 'getting page %s ...'%pageNo try: html = lxml.html.parse(url) @@ -127,8 +130,10 @@ def get_forecast_data(year,quarter): df = pd.DataFrame(data,columns=ct.FORECAST_COLS) return df -def _get_forecast_data(year,quarter,pageNo,dataArr): - url = ct.FORECAST_URL%(ct.P_TYPE['http'],ct.DOMAINS['sina'],year,quarter,pageNo) + +def _get_forecast_data(year, quarter, pageNo, dataArr): + url = ct.FORECAST_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, + quarter, pageNo, ct.PAGE_NUM[1]) print 'getting page %s ...'%pageNo try: html = lxml.html.parse(url) @@ -179,8 +184,10 @@ def get_profit_data(year,quarter): df = pd.DataFrame(data,columns=ct.PROFIT_COLS) return df -def _get_profit_data(year,quarter,pageNo,dataArr): - url = ct.PROFIT_URL%(ct.P_TYPE['http'],ct.DOMAINS['sina'],year,quarter,pageNo) + +def _get_profit_data(year, quarter, pageNo, dataArr): + url = ct.PROFIT_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, + quarter, pageNo, ct.PAGE_NUM[1]) print 'getting page %s ...'%pageNo try: html = lxml.html.parse(url) @@ -238,8 +245,10 @@ def get_operation_data(year,quarter): df = pd.DataFrame(data,columns=ct.OPERATION_COLS) return df -def _get_operation_data(year,quarter,pageNo,dataArr): - url = ct.OPERATION_URL%(ct.P_TYPE['http'],ct.DOMAINS['sina'],year,quarter,pageNo) + +def _get_operation_data(year, quarter, pageNo, dataArr): + url = ct.OPERATION_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, + quarter, pageNo, ct.PAGE_NUM[1]) print 'getting page %s ...'%pageNo try: html = lxml.html.parse(url) @@ -295,8 +304,10 @@ def get_growth_data(year,quarter): df = pd.DataFrame(data,columns=ct.GROWTH_COLS) return df -def _get_growth_data(year,quarter,pageNo,dataArr): - url = ct.GROWTH_URL%(ct.P_TYPE['http'],ct.DOMAINS['sina'],year,quarter,pageNo) + +def _get_growth_data(year, quarter, pageNo, dataArr): + url = ct.GROWTH_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, + quarter, pageNo, ct.PAGE_NUM[1]) print 'getting page %s ...'%pageNo try: html = lxml.html.parse(url) @@ -352,8 +363,10 @@ def get_debtpaying_data(year,quarter): df = pd.DataFrame(data,columns=ct.DEBTPAYING_COLS) return df -def _get_debtpaying_data(year,quarter,pageNo,dataArr): - url = ct.DEBTPAYING_URL%(ct.P_TYPE['http'],ct.DOMAINS['sina'],year,quarter,pageNo) + +def _get_debtpaying_data(year, quarter, pageNo, dataArr): + url = ct.DEBTPAYING_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, + quarter, pageNo, ct.PAGE_NUM[1]) print 'getting page %s ...'%pageNo try: html = lxml.html.parse(url) @@ -408,8 +421,10 @@ def get_cashflow_data(year,quarter): df = pd.DataFrame(data,columns=ct.CASHFLOW_COLS) return df -def _get_cashflow_data(year,quarter,pageNo,dataArr): - url = ct.CASHFLOW_URL%(ct.P_TYPE['http'],ct.DOMAINS['sina'],year,quarter,pageNo) + +def _get_cashflow_data(year, quarter, pageNo, dataArr): + url = ct.CASHFLOW_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, + quarter, pageNo, ct.PAGE_NUM[1]) print 'getting page %s ...'%pageNo try: html = lxml.html.parse(url) @@ -445,4 +460,9 @@ def _check_input(year,quarter): else: return True - \ No newline at end of file +def _data_path(): + import os + import inspect + caller_file = inspect.stack()[1][1] + pardir = os.path.abspath(os.path.join(os.path.dirname(caller_file), os.path.pardir)) + return os.path.abspath(os.path.join(pardir, os.path.pardir)) \ No newline at end of file diff --git a/tushare/stock/macro.py b/tushare/stock/macro.py index f9485a5d..a012c7fd 100644 --- a/tushare/stock/macro.py +++ b/tushare/stock/macro.py @@ -15,6 +15,7 @@ import re import json + def get_gdp_year(): """ 获取年度国内生产总值数据 @@ -34,20 +35,22 @@ def get_gdp_year(): lbdy :批发零售贸易及餐饮业(亿元) """ rdint = vs.random() - url = vs.MACRO_URL%(vs.P_TYPE['http'],vs.DOMAINS['sina'],rdint,vs.MACRO_TYPE[0],0,70,rdint) + url = vs.MACRO_URL%(vs.P_TYPE['http'], vs.DOMAINS['sina'], rdint, vs.MACRO_TYPE[0], + 0, 70, rdint) request = urllib2.Request(url) - text = urllib2.urlopen(request,timeout=10).read() + text = urllib2.urlopen(request, timeout=10).read() regSym = re.compile(r'\,count:(.*?)\}') datastr = regSym.findall(text) datastr = datastr[0] datastr = datastr.split('data:')[1] - datastr = datastr.replace('"','').replace('null','0') + datastr = datastr.replace('"', '').replace('null','0') js = json.loads(datastr) - df = pd.DataFrame(js,columns=vs.GDP_YEAR_COLS) + df = pd.DataFrame(js, columns=vs.GDP_YEAR_COLS) df[df==0] = np.NaN return df - + + def get_gdp_quarter(): """ 获取季度国内生产总值数据 @@ -65,7 +68,8 @@ def get_gdp_quarter(): ti_yoy :第三产业增加值同比增长(%) """ rdint = vs.random() - url = vs.MACRO_URL%(vs.P_TYPE['http'],vs.DOMAINS['sina'],rdint,vs.MACRO_TYPE[0],1,250,rdint) + url = vs.MACRO_URL%(vs.P_TYPE['http'], vs.DOMAINS['sina'], rdint, vs.MACRO_TYPE[0], + 1, 250, rdint) request = urllib2.Request(url) text = urllib2.urlopen(request,timeout=10).read() @@ -73,9 +77,9 @@ def get_gdp_quarter(): datastr = regSym.findall(text) datastr = datastr[0] datastr = datastr.split('data:')[1] - datastr = datastr.replace('"','').replace('null','0') + datastr = datastr.replace('"', '').replace('null', '0') js = json.loads(datastr) - df = pd.DataFrame(js,columns=vs.GDP_QUARTER_COLS) + df = pd.DataFrame(js, columns=vs.GDP_QUARTER_COLS) df['quarter'] = df['quarter'].astype(object) df[df==0] = np.NaN return df diff --git a/tushare/stock/trading.py b/tushare/stock/trading.py index a50e229c..bb1bf620 100644 --- a/tushare/stock/trading.py +++ b/tushare/stock/trading.py @@ -84,7 +84,7 @@ def _parsing_dayprice_json(pageNum=1): DataFrame 当日所有股票交易数据(DataFrame) """ print 'getting page %s ...'%pageNum - url = ct.SINA_DAY_PRICE_URL%(ct.P_TYPE['http'],ct.DOMAINS['sina'],pageNum) + url = ct.SINA_DAY_PRICE_URL%(ct.P_TYPE['http'],ct.DOMAINS['vsf'],ct.PAGES['jv'],pageNum) request = urllib2.Request(url) text = urllib2.urlopen(request,timeout=10).read() if text == 'null': @@ -124,7 +124,8 @@ def get_tick_data(code=None, date=None, retry_count=3, pause=0.001): if code is None or len(code)!=6 or date is None: return None symbol = code_to_symbol(code) - url = ct.TICK_PRICE_URL % (ct.P_TYPE['http'],ct.DOMAINS['sina'],date,symbol) + url = ct.TICK_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['sf'], ct.PAGES['dl'], + date, symbol) for _ in range(retry_count): time.sleep(pause) try: @@ -148,7 +149,7 @@ def get_today_all(): """ df = _parsing_dayprice_json(1) if df is not None: - for i in range(2,ct.DAY_PRICE_PAGES): + for i in range(2,ct.PAGE_NUM[0]): newdf = _parsing_dayprice_json(i) df = df.append(newdf,ignore_index=True) return df