Skip to content

Commit

Permalink
新增沪深300成份股及所占权重数据
Browse files Browse the repository at this point in the history
  • Loading branch information
jimmysoa committed Mar 5, 2015
1 parent 68285cd commit ddbd793
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 19 deletions.
64 changes: 48 additions & 16 deletions tushare/stock/classifying.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ def get_industry_classified():
name :股票名称
c_name :行业名称
"""
df = _get_type_data(ct.SINA_INDUSTRY_INDEX_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['ids']))
df = _get_type_data(ct.SINA_INDUSTRY_INDEX_URL%(ct.P_TYPE['http'],
ct.DOMAINS['vsf'], ct.PAGES['ids']))
data = []
for row in df.values:
rowDf = _get_detail(row[0])
rowDf['c_name'] = row[1]
data.append(rowDf)
data = pd.concat(data,ignore_index=True)
data = pd.concat(data, ignore_index=True)
return data


Expand All @@ -48,7 +49,8 @@ def get_concept_classified():
name :股票名称
c_name :概念名称
"""
df = _get_type_data(ct.SINA_CONCEPTS_INDEX_URL%(ct.P_TYPE['http'], ct.DOMAINS['sf'], ct.PAGES['cpt']))
df = _get_type_data(ct.SINA_CONCEPTS_INDEX_URL%(ct.P_TYPE['http'],
ct.DOMAINS['sf'], ct.PAGES['cpt']))
data = []
for row in df.values:
rowDf = _get_detail(row[0])
Expand All @@ -69,7 +71,7 @@ def get_area_classified(file_path=None):
area :地域名称
"""
df = fd.get_stock_basics(file_path)
df = df[['name','area']]
df = df[['name', 'area']]
df.reset_index(level=0, inplace=True)
df = df.sort('area').reset_index(drop=True)
return df
Expand All @@ -86,7 +88,7 @@ def get_gem_classified(file_path=None):
"""
df = fd.get_stock_basics(file_path)
df.reset_index(level=0, inplace=True)
df = df[['code','name']]
df = df[ct.FOR_CLASSIFY_B_COLS]
df = df.ix[df.code.str[0] == '3']
df = df.sort('code').reset_index(drop=True)
return df
Expand All @@ -103,7 +105,7 @@ def get_sme_classified(file_path=None):
"""
df = fd.get_stock_basics(file_path)
df.reset_index(level=0, inplace=True)
df = df[['code','name']]
df = df[ct.FOR_CLASSIFY_B_COLS]
df = df.ix[df.code.str[0:3] == '002']
df = df.sort('code').reset_index(drop=True)
return df
Expand All @@ -119,7 +121,7 @@ def get_st_classified(file_path=None):
"""
df = fd.get_stock_basics(file_path)
df.reset_index(level=0, inplace=True)
df = df[['code','name']]
df = df[ct.FOR_CLASSIFY_B_COLS]
df = df.ix[df.name.str.contains('ST')]
df = df.sort('code').reset_index(drop=True)
return df
Expand All @@ -130,20 +132,21 @@ def _get_detail(tag,retry_count=3,pause=0.001):
time.sleep(pause)
try:
print 'getting tag : %s'%tag
request = urllib2.Request(ct.SINA_DATA_DETAIL_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],ct.PAGES['jv'],tag))
text = urllib2.urlopen(request,timeout=10).read()
request = urllib2.Request(ct.SINA_DATA_DETAIL_URL%(ct.P_TYPE['http'],
ct.DOMAINS['vsf'], ct.PAGES['jv'],
tag))
text = urllib2.urlopen(request, timeout=10).read()
except _network_error_classes:
pass
else:
reg = re.compile(r'\,(.*?)\:')
text = reg.sub(r',"\1":', text)
text = text.replace('"{symbol','{"symbol')
text = text.replace('{symbol','{"symbol"')
jstr = json.dumps(text,encoding='GBK')
text = text.replace('"{symbol', '{"symbol')
text = text.replace('{symbol', '{"symbol"')
jstr = json.dumps(text, encoding='GBK')
js = json.loads(jstr)
the_fields = ['code','symbol','name','changepercent','trade','open','high','low','settlement','volume','turnoverratio']
df = pd.DataFrame(pd.read_json(js,dtype={'code':object}),columns=the_fields)
df = df[['code','name']]
df = pd.DataFrame(pd.read_json(js, dtype={'code':object}), columns=ct.THE_FIELDS)
df = df[ct.FOR_CLASSIFY_B_COLS]
return df
raise IOError("%s获取失败,请检查网络和URL:%s" % (code, url))

Expand All @@ -155,8 +158,37 @@ def _get_type_data(url):
data_str = data_str.decode('GBK')
data_str = data_str.split('=')[1]
data_json = json.loads(data_str)
df = pd.DataFrame([[row.split(',')[0],row.split(',')[1]] for row in data_json.values()], columns=['tag', 'name'])
df = pd.DataFrame([[row.split(',')[0], row.split(',')[1]] for row in data_json.values()],
columns=['tag', 'name'])
return df
except Exception as er:
print str(er)


def get_hs300s():
"""
获取沪深300当前成份股及所占权重
Return
--------
DataFrame
code :股票代码
name :股票名称
date :日期
weight:权重
"""
try:
df = pd.read_excel(ct.HS300_CLASSIFY_URL%(ct.P_TYPE['http'], ct.DOMAINS['idx'],
ct.INDEX_C_COMM, ct.PAGES['hs300b']), parse_cols=[0,1])
df.columns = ct.FOR_CLASSIFY_B_COLS
df['code'] = df['code'].map(lambda x :str(x).zfill(6))
wt = pd.read_excel(ct.HS300_CLASSIFY_URL%(ct.P_TYPE['http'], ct.DOMAINS['idx'],
ct.INDEX_C_COMM, ct.PAGES['hs300w']), parse_cols=[0,3,6])
wt.columns = ct.FOR_CLASSIFY_W_COLS
wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
return pd.merge(df,wt)
except Exception as er:
print str(er)


if __name__ == '__main__':
print get_hs300s()
14 changes: 11 additions & 3 deletions tushare/stock/cons.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
PAGE_NUM = [38, 60, 80, 100]
DOMAINS = {'sina': 'sina.com.cn', 'sinahq': 'sinajs.cn',
'ifeng': 'ifeng.com', 'sf': 'finance.sina.com.cn',
'vsf': 'vip.stock.finance.sina.com.cn'}
'vsf': 'vip.stock.finance.sina.com.cn',
'idx':'www.csindex.com.cn'}
PAGES = {'fd': 'index.phtml', 'dl': 'downxls.php', 'jv': 'json_v2.php',
'cpt': 'newFLJK.php', 'ids': 'newSinaHy.php', 'lnews':'rollnews_ch_out_interface.php',
'ntinfo':'vCB_BulletinGather.php'}
'ntinfo':'vCB_BulletinGather.php', 'hs300b':'000300cons.xls',
'hs300w':'000300closeweight.xls'}
TICK_COLUMNS = ['time', 'price', 'change', 'volume', 'amount', 'type']
DAY_TRADING_COLUMNS = ['code', 'symbol', 'name', 'changepercent',
'trade', 'open', 'high', 'low', 'settlement', 'volume', 'turnoverratio']
Expand All @@ -43,6 +45,9 @@
LIVE_DATA_COLS = ['name', 'open', 'pre_close', 'price', 'high', 'low', 'bid', 'ask', 'volume', 'amount',
'b1_v', 'b1_p', 'b2_v', 'b2_p', 'b3_v', 'b3_p', 'b4_v', 'b4_p', 'b5_v', 'b5_p',
'a1_v', 'a1_p', 'a2_v', 'a2_p', 'a3_v', 'a3_p', 'a4_v', 'a4_p', 'a5_v', 'a5_p', 'date', 'time', 's']
FOR_CLASSIFY_B_COLS = ['code','name']
FOR_CLASSIFY_W_COLS = ['date','code','weight']
THE_FIELDS = ['code','symbol','name','changepercent','trade','open','high','low','settlement','volume','turnoverratio']
TICK_PRICE_URL = '%smarket.%s/%s?date=%s&symbol=%s'
DAY_PRICE_URL = '%sapi.finance.%s/%s/?code=%s&type=last'
LIVE_DATA_URL = '%shq.%s/rn=%s&list=%s'
Expand All @@ -59,4 +64,7 @@

SINA_CONCEPTS_INDEX_URL = '%smoney.%s/q/view/%s?param=class'
SINA_INDUSTRY_INDEX_URL = '%s%s/q/view/%s'
SINA_DATA_DETAIL_URL = '%s%s/quotes_service/api/%s/Market_Center.getHQNodeData?page=1&num=400&sort=symbol&asc=1&node=%s&symbol=&_s_r_a=page'
SINA_DATA_DETAIL_URL = '%s%s/quotes_service/api/%s/Market_Center.getHQNodeData?page=1&num=400&sort=symbol&asc=1&node=%s&symbol=&_s_r_a=page'

INDEX_C_COMM = 'sseportal/ps/zhs/hqjt/csi'
HS300_CLASSIFY_URL = '%s%s/%s/%s'

0 comments on commit ddbd793

Please sign in to comment.