Bohrium
robot
新建

空间站广场

论文
Notebooks
比赛
课程
Apps
我的主页
我的Notebooks
我的论文库
我的足迹

我的工作空间

任务
节点
文件
数据集
镜像
项目
数据库
公开
Lab 02 : Collaborative recommendation
Machine Learning
Machine Learning
xuxh@dp.tech
更新于 2024-10-15
推荐镜像 :Basic Image:bohrium-notebook:2023-04-07
推荐机型 :c2_m4_cpu
Lecture : Recommendation on Graphs
Lab 02 : Collaborative recommendation
Xavier Bresson
Synthetic dataset
Real-world dataset SWEETRS

Lecture : Recommendation on Graphs

Lab 02 : Collaborative recommendation

Xavier Bresson

代码
文本
[1]
# For Google Colaboratory
import sys, os
if 'google.colab' in sys.modules:
# mount google drive
from google.colab import drive
drive.mount('/content/gdrive')
path_to_file = '/content/gdrive/My Drive/GML2023_codes/codes/05_Recommendation'
print(path_to_file)
# change current path to the folder containing "path_to_file"
os.chdir(path_to_file)
!pwd
代码
文本
[2]
# Load libraries
import numpy as np
import scipy.io
%matplotlib inline
#%matplotlib notebook
from IPython.display import display, clear_output
from matplotlib import pyplot
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
import time
import sys; sys.path.insert(0, 'lib/')
from lib.utils import shrink
import scipy.sparse.linalg
import warnings; warnings.filterwarnings("ignore")

代码
文本

Synthetic dataset

代码
文本
[3]
# Load graphs of rows/users and columns/movies
mat = scipy.io.loadmat('datasets/synthetic_netflix.mat')
M = mat['M']
Otraining = mat['Otraining']
Otest = mat['Otest']
Wrow = mat['Wrow']
Wcol = mat['Wcol']
n,m = M.shape
print('n,m=',n,m)

Mgt = M # Ground truth
O = Otraining
M = O* Mgt
perc_obs_training = np.sum(Otraining) / (n*m)
print('perc_obs_training=',perc_obs_training)

n,m= 150 200
perc_obs_training= 0.03
代码
文本
[4]
# Viusalize the rating matrix
plt.figure(1)
plt.imshow(Mgt, interpolation='nearest', cmap='jet')
plt.title('Low-rank Matrix M.\nNote: We NEVER observe it\n in real-world applications')
plt.show()

plt.figure(2)
plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet')
plt.title('Observed values of M\n for TRAINING.\n Percentage=' + str(perc_obs_training))
plt.show()

<Figure size 640x480 with 1 Axes>
<Figure size 640x480 with 1 Axes>
代码
文本
[5]
# Collaborative filtering / low-rank approximation by nuclear norm

# Norm of the operator
OM = O*M
normOM = np.linalg.norm(OM,2)

#######################################
# Select the set of hyper-parameters
#######################################

# scenario : very low number of ratings, 0.03%, error metric = 138.75
lambdaNuc = normOM/4; lambdaDF = 1e3 * 1e-2


# Indentify zero columns and zero rows in the data matrix X
idx_zero_cols = np.where(np.sum(Otraining,axis=0)<1e-9)[0]
idx_zero_rows = np.where(np.sum(Otraining,axis=1)<1e-9)[0]
nb_zero_cols = len(idx_zero_cols)
nb_zero_rows = len(idx_zero_rows)
# Initialization
X = M; Xb = X;
Y = np.zeros([n,m])
normA = 1.
sigma = 1./normA
tau = 1./normA
diffX = 1e10
min_nm = np.min([n,m])
k = 0
while (k<2000) & (diffX>1e-1):
# Update iteration
k += 1
# Update dual variable y
Y = Y + sigma* Xb
U,S,V = np.linalg.svd(Y/sigma)
Sdiag = shrink( S , lambdaNuc/ sigma )
I = np.array(range(min_nm))
Sshrink = np.zeros([n,m])
Sshrink[I,I] = Sdiag
Y = Y - sigma* U.dot(Sshrink.dot(V))
# Update primal variable x
Xold = X
X = X - tau* Y
X = ( X + tau* lambdaDF* O* M)/ (1 + tau* lambdaDF* O)
# Fix issue with no observations along some rows and columns
r,c = np.where(X>0.0); median = np.median(X[r,c])
if nb_zero_cols>0: X[:,idx_zero_cols] = median
if nb_zero_rows>0: X[nb_zero_rows,:] = median

# Update primal variable xb
Xb = 2.* X - Xold
# Difference between two iterations
diffX = np.linalg.norm(X-Xold)
# Reconstruction error
err_test = np.sqrt(np.sum((Otest*(X-Mgt))**2)) / np.sum(Otest) * (n*m)
# Plot
if not k%50:
clear_output(wait=True)
plt.figure(1)
plt.imshow(X, interpolation='nearest', cmap='jet')
plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))
plt.show()
print('diffX',diffX)


clear_output(wait=True)
print('Reconstruction Error: '+ str(round(err_test,5)))

# Final plot
plt.figure(2)
plt.imshow(Mgt, interpolation='nearest', cmap='jet')
plt.title('Ground truth low-rank matrix M')

plt.figure(3)
plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet')
plt.title('Observed values of M')

plt.figure(4)
plt.imshow(X, interpolation='nearest', cmap='jet')
plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,2)))
plt.show()


Reconstruction Error: 138.75954
<Figure size 640x480 with 1 Axes>
<Figure size 640x480 with 1 Axes>
<Figure size 640x480 with 1 Axes>
代码
文本
[ ]

代码
文本

Real-world dataset SWEETRS

代码
文本
[6]
# Load graphs of rows/users and columns/products
mat = scipy.io.loadmat('datasets/real_sweetrs_scenario1.mat')
mat = scipy.io.loadmat('datasets/real_sweetrs_scenario2.mat')
# mat = scipy.io.loadmat('datasets/real_sweetrs_scenario3.mat')
M = mat['M']
Otraining = mat['Otraining']
Otest = mat['Otest']
Wrow = mat['Wrow']
Wcol = mat['Wcol']
print('M', M.shape)
print('Otraining', Otraining.shape)
print('Otest', Otest.shape)
print('Wrow', Wrow.shape)
print('Wcol', Wcol.shape)

n,m = M.shape
print('n,m=',n,m)

Mgt = M # Ground truth
O = Otraining
M = O* Mgt
perc_obs_training = np.sum(Otraining) / (n*m)
print('perc_obs_training=',perc_obs_training)
perc_obs_test = np.sum(Otest) / (n*m)

M (664, 77)
Otraining (664, 77)
Otest (664, 77)
Wrow (664, 664)
Wcol (77, 77)
n,m= 664 77
perc_obs_training= 0.1317868878109842
代码
文本
[7]
# Visualize the original rating matrix
plt.figure(1,figsize=(10,10))
plt.imshow(Mgt, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Original rating matrix\n Percentage observed ratings: ' + str(100*np.sum(Mgt>0)/(n*m))[:5])

# Visualize the observed rating matrix
plt.figure(2, figsize=(10,10))
plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Observed rating matrix\n Percentage observed ratings: ' + str(100*perc_obs_training)[:5])
plt.show()

<Figure size 1000x1000 with 2 Axes>
<Figure size 1000x1000 with 2 Axes>
代码
文本
[8]
# Collaborative filtering / low-rank approximation by nuclear norm

# Norm of the operator
OM = O*M
normOM = np.linalg.norm(OM,2)

#######################################
# Select the set of hyper-parameters
#######################################

# scenario 1 : low number of ratings, 1.3%, error metric = 744.10
lambdaNuc = normOM/4; lambdaDF = 1e3 * 1e-2

# scenario 2 : intermediate number of ratings, 13.1%, error metric = 412.01
lambdaNuc = normOM/4 * 1e2; lambdaDF = 1e3 * 1e0

# scenario 3 : large number of ratings, 52.7%, error metric = 698.97
# lambdaNuc = normOM/4 * 1e2; lambdaDF = 1e3


# Indentify zero columns and zero rows in the data matrix X
idx_zero_cols = np.where(np.sum(Otraining,axis=0)<1e-9)[0]
idx_zero_rows = np.where(np.sum(Otraining,axis=1)<1e-9)[0]
nb_zero_cols = len(idx_zero_cols)
nb_zero_rows = len(idx_zero_rows)
# Initialization
X = M; Xb = X;
Y = np.zeros([n,m])
normA = 1.
sigma = 1./normA
tau = 1./normA
diffX = 1e10
min_nm = np.min([n,m])
k = 0
while (k<2000) & ( diffX>1e-1 or k<100 ) :
# Update iteration
k += 1
# Update dual variable y
Y = Y + sigma* Xb
U,S,V = np.linalg.svd(Y/sigma)
Sdiag = shrink( S , lambdaNuc/ sigma )
I = np.array(range(min_nm))
Sshrink = np.zeros([n,m])
Sshrink[I,I] = Sdiag
Y = Y - sigma* U.dot(Sshrink.dot(V))
# Update primal variable x
Xold = X
X = X - tau* Y
X = ( X + tau* lambdaDF* O* M)/ (1 + tau* lambdaDF* O)
# Fix issue with no observations along some rows and columns
r,c = np.where(X>0.0); median = np.median(X[r,c])
if nb_zero_cols>0: X[:,idx_zero_cols] = median
if nb_zero_rows>0: X[nb_zero_rows,:] = median

# Update primal variable xb
Xb = 2.* X - Xold
# Difference between two iterations
diffX = np.linalg.norm(X-Xold)
# Reconstruction error
err_test = np.sqrt(np.sum((Otest*(X-Mgt))**2)) / np.sum(Otest) * (n*m)
# Plot
if not k%50:
clear_output(wait=True)
plt.figure(figsize=(10,10))
plt.imshow(X, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))
plt.show()
print('diffX',diffX)

clear_output(wait=True)
print('Reconstruction Error: '+ str(round(err_test,5)))

# Final plots
plt.figure(2, figsize=(10,10))
plt.imshow(Mgt, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Original rating matrix\n Percentage observed ratings: ' + str(100*np.sum(Mgt>0)/(n*m))[:5])
plt.show()

plt.figure(3, figsize=(10,10))
plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Observed rating matrix\n Percentage observed ratings: ' + str(100*perc_obs_training)[:5])
plt.show()

plt.figure(4, figsize=(10,10))
plt.imshow(X, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))
plt.show()


Reconstruction Error: 409.76023
<Figure size 1000x1000 with 2 Axes>
<Figure size 1000x1000 with 2 Axes>
<Figure size 1000x1000 with 2 Axes>
代码
文本
[ ]

代码
文本
[ ]

代码
文本
Machine Learning
Machine Learning
点个赞吧
本文被以下合集收录
Graph Machine learning
xuxh@dp.tech
更新于 2024-10-08
44 篇0 人关注
推荐阅读
公开
Lab 04 : Hybrid recommendation
Machine Learning
Machine Learning
xuxh@dp.tech
更新于 2024-10-15
公开
Lab 01 : PageRank
Machine Learning
Machine Learning
xuxh@dp.tech
更新于 2024-10-15