Skip to content

Commit

Permalink
code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
dkatz23238 committed Nov 9, 2022
1 parent 16506a0 commit 9752333
Show file tree
Hide file tree
Showing 14 changed files with 6,411 additions and 5,136 deletions.
2 changes: 1 addition & 1 deletion .linucb
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4
9
4,793 changes: 0 additions & 4,793 deletions analytics.txt

This file was deleted.

144 changes: 0 additions & 144 deletions cbandit.py

This file was deleted.

11 changes: 0 additions & 11 deletions experimental_results.md

This file was deleted.

47 changes: 35 additions & 12 deletions linearucb_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import random
from time import perf_counter
import os

from sklearn.utils.extmath import randomized_svd

if os.path.exists(".linucb"):
with open(".linucb") as f:
Expand Down Expand Up @@ -32,6 +32,10 @@ def default(self, obj):

use_wiki_data = True
use_only_wiki_data = True
center_data = False
binarize_tags = False
svd_tags = True
svd_tags_components = 100

# Top k artists
k = 1500
Expand All @@ -48,7 +52,13 @@ def get_user_artist_list(U_pp: pd.DataFrame, user_id: int):
A_v = (pd.read_csv(
"./processed_data/artist_vectors.csv", encoding="utf-8").set_index("name")
.astype(np.int8)
)
).apply(np.log1p)

original_Av_columns = A_v.columns

if binarize_tags:
A_v = (A_v > 0).astype(int)


U_pp = (pd.read_csv(
"./processed_data/user_play_pair.csv", encoding="utf-8").set_index("userID")
Expand All @@ -73,19 +83,26 @@ def get_user_artist_list(U_pp: pd.DataFrame, user_id: int):
experiment_data = {}
experiment_data["use_wiki_data"] = use_wiki_data
experiment_data["use_only_wiki_data"] = use_only_wiki_data
experiment_data["svd_tags"] = svd_tags
experiment_data["center_data"] = center_data
experiment_data["svd_tags_components"] = svd_tags_components
experiment_data["binarize_tags"] = binarize_tags

if center_data:
# Center design matrix
A_v = A_v - A_v.mean()


if use_wiki_data:
if svd_tags:
U, D, VT = randomized_svd(A_v, svd_tags_components)
A_v = pd.DataFrame(U, index=A_v.index)

if use_wiki_data and not use_only_wiki_data:
A_v = A_v.merge(Udf, right_index=True, left_index=True)

if use_only_wiki_data:
A_v = Udf


# Center design matrix
A_v = A_v - A_v.mean()


for uid in user_ids[:J]:

X = A_v
Expand Down Expand Up @@ -131,11 +148,11 @@ def get_user_artist_list(U_pp: pd.DataFrame, user_id: int):

x_cand = X.iloc[nth_artist]
reward = target[nth_artist]

x = x_cand
# Bayesian Update rule
A = A + x_cand @ x_cand.T
# A = A + x_cand.values[:, None].dot(x_cand.values[:, None].T)
b = b + reward * x_cand
A = A + x @ x.T

b = b + reward * x

seen.append(nth_artist)
decision_sequence.append((X.index[nth_artist], reward))
Expand All @@ -156,3 +173,9 @@ def get_user_artist_list(U_pp: pd.DataFrame, user_id: int):

with open(f"linearucb_experiment_data_{rounds}.json", "w") as f:
f.write(json.dumps(experiment_data, cls=NpEncoder))


component_interpretation_matrix = pd.DataFrame(
VT, columns=original_Av_columns).T

component_interpretation_matrix.to_csv("component_interpretation_matrix.csv")
1 change: 0 additions & 1 deletion linearucb_experiment_data_0.json

This file was deleted.

6,309 changes: 6,308 additions & 1 deletion linearucb_experiment_data_1.json

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion linearucb_experiment_data_2.json

This file was deleted.

1 change: 0 additions & 1 deletion linearucb_experiment_data_3.json

This file was deleted.

1 change: 0 additions & 1 deletion linearucb_experiment_data_4.json

This file was deleted.

76 changes: 63 additions & 13 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,84 @@

![Alt text](britney_vs_madona.png "Title")

## Scripts
## Experimentation

Create `Udf.csv` file and run experiment.
Process wikipedia data to create a `Udf.csv` file.

```py
python wiki_text.py
```

Update the global variables to modify experiment parameters.

```py
use_wiki_data = True
use_only_wiki_data = True
center_data = False
binarize_tags = False
svd_tags = True
svd_tags_components = 100

# Top k artists
k = 1500
# J experiments
J = 150
# H trials
H = 35
```

```sh
python linearucb_experiment.py
```

Then analyze the results using the result analysis script.

```py
python results_analysis.py
```

## Outomes
### (Newer code)

```sh
use_wiki_data True
use_wiki_data False
use_only_wiki_data False
control: 0.02 | reward: 0.17 | lift: 0.15000000000000002
svd_tags True
center_data False
svd_tags_components 50
binarize_tags False
control: 0.02 | reward: 0.23 | lift: 0.21000000000000002

use_wiki_data True
use_only_wiki_data True
control: 0.02 | reward: 0.02 | lift: 0.0
use_wiki_data False
use_only_wiki_data False
svd_tags True
center_data False
svd_tags_components 3
binarize_tags True
control: 0.02 | reward: 0.16 | lift: 0.14

use_wiki_data True
use_only_wiki_data True
control: 0.02 | reward: 0.03 | lift: 0.009999999999999998
use_wiki_data False
use_only_wiki_data False
svd_tags True
center_data False
svd_tags_components 30
binarize_tags False
control: 0.02 | reward: 0.2 | lift: 0.18000000000000002

use_wiki_data True
use_only_wiki_data True
control: 0.02 | reward: 0.02 | lift: 0.0
use_wiki_data False
use_only_wiki_data False
svd_tags True
center_data False
svd_tags_components 100
binarize_tags False
control: 0.02 | reward: 0.19 | lift: 0.17

use_wiki_data False
use_only_wiki_data False
svd_tags True
center_data False
svd_tags_components 3
binarize_tags False
control: 0.02 | reward: 0.19 | lift: 0.17

```
6 changes: 4 additions & 2 deletions results_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import glob

runs = []
meta_keys = ["use_wiki_data", "use_only_wiki_data"]

meta_keys = ["use_wiki_data", "use_only_wiki_data",
"svd_tags", "center_data", "svd_tags_components", "binarize_tags"]

for f in glob.glob("linearucb_experiment_data_*.json"):
with open(f) as fhandle:
Expand All @@ -17,7 +19,7 @@
]

for mk in meta_keys:
print(f"{mk} {run[mk]}")
print(f"{mk} {run.get(mk,None)}")

r = round(sum(reward) / (35*150), 2)
c = round(sum(control) / (35*150), 2)
Expand Down
Loading

0 comments on commit 9752333

Please sign in to comment.