You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1.6 KiB
1.6 KiB
10.4 动手实现基于矩阵分解的协同过滤
弄清楚了基于矩阵分解的协同过滤算法的原理和算法流程之后,我们可以很容易的使用python
,来实现算法。代码如下:
# -*- coding: utf-8 -*-
import numpy as np
def recommend(userID,lr,alpha,d,n_iter,data):
'''
userID(int):推荐用户ID
lr(float):学习率
alpha(float):权重衰减系数
d(int):矩阵分解因子
n_iter(int):训练轮数
data(ndarray):电影评分表
'''
#获取用户数与电影数
m,n = data.shape
#初始化参数
x = np.random.uniform(0,1,(m,d))
w = np.random.uniform(0,1,(d,n))
#创建评分记录表,无评分记为0,有评分记为1
record = np.array(data>0,dtype=int)
#梯度下降,更新参数
for i in range(n_iter):
x_grads = np.dot(np.multiply(record,np.dot(x,w)-data),w.T)
w_grads = np.dot(x.T,np.multiply(record,np.dot(x,w)-data))
x = alpha*x - lr*x_grads
w = alpha*w - lr*w_grads
#预测
predict = np.dot(x,w)
#将用户未看过的电影分值从低到高进行排列
for i in range(n):
if record[userID-1][i] == 1 :
predict[userID-1][i] = 0
recommend = np.argsort(predict[userID-1])
#分数最高的5部电影
a = recommend[-1]
b = recommend[-2]
c = recommend[-3]
d = recommend[-4]
e = recommend[-5]
print('为用户%d推荐的电影为:\n1:%s\n2:%s\n3:%s\n4:%s\n5:%s。'\
%(userID,movies_df['title'][a],movies_df['title'][b],movies_df['title'][c],movies_df['title'][d],movies_df['title'][e]))