Код:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
df = pd.read_csv('/kaggle/input/amazon-product-reviews/Reviews.csv')
df = df[['Id','ProductId','Score','Summary','Text']]
df['Score'].value_counts().plot(kind='bar')
# Select the required columns for recommendation
ratings_df = df[['Id', 'ProductId', 'Score']]
ratings_df
pivot_table = ratings_df.pivot_table(index='Id', columns='ProductId', values='Score', fill_value=0)
items_similarity = cosine_similarity(pivot_table)
# Example: Get top-k recommendations for a given user
user_id = 4
k = 5
user_ratings = pivot_table.loc[user_id,:].values.reshape(1,-1)
user_ratings
# Calculate the similarity between the user's ratings and all items
user_item_similarity = cosine_similarity(user_ratings,pivot_table)
# Get the indices of top-k similar items
similar_item_indices = user_item_similarity.argsort()[0, ::-1][:k]
similar_item_indices
Товары с самым высоким рейтингом
def recommend_items(ratings_df):
# Filter recommendations based on minimum rating
filtered_recommendations = ratings_df[ratings_df['Score'] >= 3].head(50)
return filtered_recommendations
recommendations = recommend_items(ratings_df)
if not recommendations.empty:
for index, row in recommendations.iterrows():
print("Product ID:", row['ProductId'], "Score:", row['Score'])
else:
print("No recommendations found.")