Код:

Набор данных: https://www.kaggle.com/datasets/arhamrumi/amazon-product-reviews

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
df = pd.read_csv('/kaggle/input/amazon-product-reviews/Reviews.csv')
df  = df[['Id','ProductId','Score','Summary','Text']]
df['Score'].value_counts().plot(kind='bar')
# Select the required columns for recommendation
ratings_df = df[['Id', 'ProductId', 'Score']]
ratings_df
pivot_table = ratings_df.pivot_table(index='Id', columns='ProductId', values='Score', fill_value=0)
items_similarity = cosine_similarity(pivot_table)
# Example: Get top-k recommendations for a given user
user_id = 4
k = 5

user_ratings = pivot_table.loc[user_id,:].values.reshape(1,-1)
user_ratings
# Calculate the similarity between the user's ratings and all items
user_item_similarity = cosine_similarity(user_ratings,pivot_table)

# Get the indices of top-k similar items
similar_item_indices = user_item_similarity.argsort()[0, ::-1][:k]

similar_item_indices

Товары с самым высоким рейтингом

def recommend_items(ratings_df):
    # Filter recommendations based on minimum rating
    filtered_recommendations = ratings_df[ratings_df['Score'] >= 3].head(50)
    
    return filtered_recommendations


recommendations = recommend_items(ratings_df)

if not recommendations.empty:
    for index, row in recommendations.iterrows():
        print("Product ID:", row['ProductId'], "Score:", row['Score'])
else:
    print("No recommendations found.")