From 2002819149c0fb126d2548b826fb71cf920176cb Mon Sep 17 00:00:00 2001 From: srivani vinnakota <164530944+sv410@users.noreply.github.com> Date: Wed, 9 Oct 2024 19:48:29 +0530 Subject: [PATCH] Create content_based_filtering.py --- content_based_filtering.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 content_based_filtering.py diff --git a/content_based_filtering.py b/content_based_filtering.py new file mode 100644 index 00000000..c1943d15 --- /dev/null +++ b/content_based_filtering.py @@ -0,0 +1,28 @@ +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import linear_kernel + +# Example item descriptions +items = [ + {'id': 101, 'description': 'Vintage camera from the 1950s'}, + {'id': 102, 'description': 'Classic vinyl record'}, + {'id': 103, 'description': 'Retro gaming console'} +] + +# Create TF-IDF matrix +tfidf = TfidfVectorizer(stop_words='english') +tfidf_matrix = tfidf.fit_transform([item['description'] for item in items]) + +# Compute cosine similarity +cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix) + +# Function to get recommendations +def get_recommendations(item_id, cosine_sim=cosine_sim): + idx = next(index for (index, d) in enumerate(items) if d["id"] == item_id) + sim_scores = list(enumerate(cosine_sim[idx])) + sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) + sim_scores = sim_scores[1:4] + item_indices = [i[0] for i in sim_scores] + return [items[i]['id'] for i in item_indices] + +# Example usage +print(get_recommendations(101))