Skip to content

Commit

Permalink
Merge branch 'main' of github.com:datawhalechina/torch-rechub into main
Browse files Browse the repository at this point in the history
  • Loading branch information
bokang-ugent committed Jul 4, 2022
2 parents 0cfda9c + c246a82 commit 4e07525
Show file tree
Hide file tree
Showing 7 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion examples/matching/run_ml_dssm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def get_movielens_data(data_path, load_cache=False):
data = pd.read_csv(data_path)
data["cate_id"] = data["genres"].apply(lambda x: x.split("|")[0])
sparse_features = ['user_id', 'movie_id', 'gender', 'age', 'occupation', 'zip', "cate_id"]
user_col, item_col, label_col = "user_id", "movie_id", "label"
user_col, item_col = "user_id", "movie_id"

feature_max_idx = {}
for feature in sparse_features:
Expand Down
2 changes: 1 addition & 1 deletion examples/matching/run_ml_youtube_dnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def get_movielens_data(data_path, load_cache=False):
neg_ratio=3,
min_item=0)
x_train = gen_model_input(df_train, user_profile, user_col, item_profile, item_col, seq_max_len=50)
y_train = np.array([0] * df_train.shape[0]) #label=0 means the first pred value is positiva sample
y_train = np.array([0] * df_train.shape[0]) #label=0 means the first pred value is positive sample
x_test = gen_model_input(df_test, user_profile, user_col, item_profile, item_col, seq_max_len=50)
np.save("./data/ml-1m/saved/data_cache.npy", np.array((x_train, y_train, x_test), dtype=object))

Expand Down
2 changes: 1 addition & 1 deletion examples/matching/run_ml_youtube_sbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def main(dataset_path, epoch, learning_rate, batch_size, weight_decay, device, s
sample_weight_feature,
user_params={"dims": [128, 64, 16]},
item_params={"dims": [128, 64, 16]},
batch_size=batch_size,
batch_size=batch_size, # !! should be same as batch size of dataloader
n_neg=3,
temperature=0.02)
#mode=2 means use list-wise loss: softmax
Expand Down
2 changes: 1 addition & 1 deletion examples/ranking/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ test:用于测试模型预测的1天广告数据,约四百六十万。
- 预处理之后的数据下载地址:https://cowtransfer.com/s/e8b67418ce044c
- 使用方法

```python
```shell
python run_census.py --model_name SharedBottom
python run_census.py --model_name ESMM
python run_census.py --model_name MMOE
Expand Down
2 changes: 1 addition & 1 deletion examples/ranking/run_ali_ccp_multi_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def get_ali_ccp_data_dict(model_name, data_path='./data/ali-ccp'):

col_names = data.columns.values.tolist()
dense_cols = ['D109_14', 'D110_14', 'D127_14', 'D150_14', 'D508', 'D509', 'D702', 'D853']
sparse_cols = [col for col in col_names if col not in dense_cols and col not in ['cvr_label', 'ctr_label']]
sparse_cols = [col for col in col_names if col not in dense_cols and col not in ['cvr_label', 'ctr_label', 'ctcvr_label']]
print("sparse cols:%d dense cols:%d" % (len(sparse_cols), len(dense_cols)))
#define dense and sparse features
if model_name == "ESMM":
Expand Down
2 changes: 1 addition & 1 deletion examples/ranking/run_census.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_census_data_dict(model_name, data_path='./data/census-income'):

col_names = data.columns.values.tolist()
dense_cols = ['age', 'wage per hour', 'capital gains', 'capital losses', 'divdends from stocks', 'num persons worked for employer', 'weeks worked in year']
sparse_cols = [col for col in col_names if col not in dense_cols and col not in ['cvr_label', 'ctr_label']]
sparse_cols = [col for col in col_names if col not in dense_cols and col not in ['cvr_label', 'ctr_label', 'ctcvr_label']]
print("sparse cols:%d dense cols:%d" % (len(sparse_cols), len(dense_cols)))
#define dense and sparse features
if model_name == "ESMM":
Expand Down
4 changes: 2 additions & 2 deletions examples/ranking/run_criteo.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def get_criteo_data_dict(data_path):
dense_features = [f for f in data.columns.tolist() if f[0] == "I"]
sparse_features = [f for f in data.columns.tolist() if f[0] == "C"]

data[sparse_features] = data[sparse_features].fillna('-996',)
data[dense_features] = data[dense_features].fillna(0,)
data[sparse_features] = data[sparse_features].fillna('0')
data[dense_features] = data[dense_features].fillna(0)

for feat in tqdm(dense_features): #discretize dense feature and as new sparse feature
sparse_features.append(feat + "_cat")
Expand Down

0 comments on commit 4e07525

Please sign in to comment.