原版
import math
from random import randrange
# 原始数据,格式为{用户1:{电影名称1:打分1, 电影名称2:打分2,...}, 用户2:{...}}
"""
data = {
"User1": {"film1": 5, "film2": 4, "film3": 3},
"User2": {"film1": 4, "film2": 5, "film3": 4},
"User3": {"film1": 4, "film2": 3, "film3": 5},
"User4": {"film1": 2, "film2": 3, "film3": 5},
"NewUser": {"film1": 4, "film2": 5}
}
"""
# 定义计算两个用户之间相似度的函数
def user_sim(user1, user2):
movies = set(user1.keys()) & set(user2.keys())
numerator = sum([user1[movie] * user2[movie] for movie in movies])
denominator1 = sum([user1[movie] ** 2 for movie in user1.keys()])
denominator2 = sum([user2[movie] ** 2 for movie in user2.keys()])
denominator = math.sqrt(denominator1) * math.sqrt(denominator2)
if not denominator:
return 0
else:
return numerator / float(denominator)
# 定义为新用户进行推荐的函数
def rec_mov(data, user):
# 找出与新用户最相似的用户
similarities = {}
for other_user in data:
if other_user != user:
similarity = user_sim(data[user], data[other_user])
similarities[other_user] = similarity
sorted_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
most_similar_user = sorted_similarities[0][0]
print("最匹配的用户为:", most_similar_user)
print()
# 找出最相似用户看过且新用户没有看过的电影,并进行推荐
recommendations = {}
for movie in data[most_similar_user]:
if movie not in data[user]:
recommendations[movie] = data[most_similar_user][movie]
sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
return sorted_recommendations
# 调用函数,对NewUser进行电影推荐
data = {'User' + str(i) : {'film' + str(randrange(1,10)) : randrange(10) for j in range(randrange(15))} for i in range(10)}
NewUser = {'film' + str(randrange(1,10)) : randrange(10) for i in range(randrange(5))}
print("已有的数据为:")
for i in data:
print(i,"=",dict(data[i].items()))
print()
data["NewUser"] = NewUser
print("需要推荐电影的用户数据为: NewUser =",NewUser)
print()
recommendations = rec_mov(data, "NewUser")
print("为用户推荐的电影为: ", end="")
for recommendation in recommendations:
print(recommendation[0],end=" ")
精简版
import math
from random import randrange
# 定义计算两个用户之间相似度的函数
def jisuan(user1, user2):
ms = set(user1.keys()) & set(user2.keys())
n = sum([user1[m] * user2[m] for m in ms])
d1 = sum([user1[m] ** 2 for m in user1.keys()])
d2 = sum([user2[m] ** 2 for m in user2.keys()])
d = math.sqrt(d1) * math.sqrt(d2)
if not d:
return 0
else:
return n / float(d)
# 定义为新用户进行推荐的函数
def tuijian(data, user):
# 找出与新用户最相似的用户
ss = {}
for ou in data:
if ou != user:
s = jisuan(data[user], data[ou])
ss[ou] = s
sorted_ss = sorted(ss.items(), key=lambda x: x[1], reverse=True)
mu = sorted_ss[0][0]
print("最匹配的用户为:", mu)
print()
rs = {}
for movie in data[mu]:
if movie not in data[user]:
rs[movie] = data[mu][movie]
sorted_rs = sorted(rs.items(), key=lambda x: x[1], reverse=True)
return sorted_rs
#利用随机数生成数据集
data = {'User' + str(i) : {'film' + str(randrange(1,10)) : randrange(10) for j in range(randrange(15))} for i in range(10)}
NewUser = {'film' + str(randrange(1,10)) : randrange(10) for i in range(randrange(5))}
print("已有的数据为:")
for i in data:
print(i,"=",dict(data[i].items()))
print()
#添加新用户
data["NewUser"] = NewUser
print("需要推荐电影的用户数据为: NewUser =",NewUser)
print()
rs = tuijian(data, "NewUser")
print("为用户推荐的电影为: ", end="")
for r in rs:
print(r[0],end=" ")