Description
Hi!
Thanks for a really impressive package.
In my world there are two common scenarios when building recommendation systems. You either want to recommend products that a customer has never liked (or bought) from your whole catalogue or you want to recommend products from a subset of the catalogue, e.g. products that are discounted. Most implementations of collaborative filtering focus on the first scenario. My question is how to use the item_exclude
to tackle the second scenario. This is somewhat related to a previous issue
For instance, say that we have 60 artists whose album are on sale in the lastfm
dataset that we want to recommend.
Example code from: http://dsnotes.com/post/2017-06-28-matrix-factorization-for-recommender-systems-part-2/
set.seed(1)
library(data.table)
raw_data = fread("lastfm-dataset-360K/usersha1-artmbid-artname-plays.tsv",
showProgress = FALSE, encoding = "UTF-8",
quote = "")
setnames(raw_data, c("user_id", "artist_id", "artist_name", "number_plays"))
user_encoding <- raw_data[, .(uid = .GRP), keyby = user_id]
item_encoding = raw_data[, .(iid = .GRP, artist_name = artist_name[[1]]), keyby = artist_id]
Here I'll sample 60 artists "on sale" and create a table of items to exclude from the predictions.
on_sale <- sample(item_encoding$artist_name, 60)
items_exclude <- item_encoding[!(artist_name %in% on_sale)]
on_sale
[1] "the bridge" "snippet"
[3] "v.o.s." "the ullulators"
[5] "藤井フミヤ" "erika jo"
[7] "gore" "amaral"
[9] "ceili rain" "schwarze puppen"
[11] "dan wheeler" "yuki suzuki"
[13] "krymplings" "olivia ruiz"
[15] "edgewater" "karl johan"
[17] "pamela z" "global spirit"
[19] "damien youth" "fires of babylon"
[21] "comic relief" "emmanuel horvilleur"
[23] "sandra stephens" "cyclopede"
[25] "Михаил Боярский" "the great eastern"
[27] "radwimps" "papa austin with the great peso"
[29] "phasen" "mari menari"
[31] "Холодне Сонце" "laura story"
[33] "mugwart" "errand boy"
[35] "erlend krauser" "göran fristorp"
[37] "mousse t & emma lanford" "dj vlad & dirty harry"
[39] "denim" "thomas leer & robert rental"
[41] "the underdog project vs the sunclub" "sense club"
[43] "mary kiani" "ladies night"
[45] "tresk" "the peddlers"
[47] "quatuor ysaÿe" "brandhärd"
[49] "bittor aiape" "prince francis"
[51] "alex klaasen & martine sandifort" "peppermint petty"
[53] "dave ramsey" "müşfik kenter"
[55] "shima & shikou duo" "jimmy j & cru-l-t"
[57] "ankarali yasemin" "marian opania"
[59] "madita" "zoltar"
Below are some data manipulation to put data in a sparse matrix.
library(Matrix)
raw_data[, artist_name := NULL]
dt = user_encoding[raw_data, .(artist_id, uid, number_plays), on = .(user_id = user_id)]
dt = item_encoding[dt, .(iid, uid, number_plays), on = .(artist_id = artist_id)]
rm(raw_data)
X = sparseMatrix(i = dt$uid, j = dt$iid, x = dt$number_plays,
dimnames = list(user_encoding$user_id, item_encoding$artist_name))
N_CV = 1000L
cv_uid = sample(nrow(user_encoding), N_CV)
X_train = X[-cv_uid, ]
X_cv = X[cv_uid, ]
rm(X)
Here we fit the model.
make_confidence = function(x, alpha) {
x_confidence = x
stopifnot(inherits(x, "sparseMatrix"))
x_confidence@x = 1 + alpha * x@x
x_confidence
}
library(rsparse)
model = WRMF$new(x_train = x_train, x_cv = X_cv, rank = 8, feedback = "implicit")
set.seed(1)
alpha = 0.01
X_train_conf = make_confidence(X_train, alpha)
X_cv_history_conf = make_confidence(X_cv_history, alpha)
user_embeddings = model$fit_transform(X_train_conf, n_iter = 10L, n_threads = 8)
new_user_embeddings = model$transform(X_cv_history_conf)
Now, I want to recommend only the artists that are on sale, so I pass the excluded artists to the items_exclude
argument.
new_user_1 = X_cv[1:1, , drop = FALSE]
new_user_predictions = model$predict(new_user_1, k = 60, items_exclude = items_exclude$artist_name)
head(data.frame(segmentid = t(attr(new_user_predictions, "ids"))))
e9dc15dfabe0bdac615143623e1fe83ba4e2daa5
1 björk
2 einstürzende neubauten
3 isis
4 frédéric chopin
5 sigur rós
6 ë\u008f™ë°©ì‹ 기
However, these recommendations are not the ones on sale?
I suppose this would be clearer for me with a vignette, that I can see is on its way, however, in the meanwhile, how should one use the item_exclude
argument?
Furthermore, say we want to maximize the recommendations here, i.e. put k = 60
, would that work for multiple users?