新建
Lab 02 : Collaborative recommendation
xuxh@dp.tech
推荐镜像 :Basic Image:bohrium-notebook:2023-04-07
推荐机型 :c2_m4_cpu
赞
目录
Lecture : Recommendation on Graphs
Lab 02 : Collaborative recommendation
Xavier Bresson
代码
文本
[1]
# For Google Colaboratory
import sys, os
if 'google.colab' in sys.modules:
# mount google drive
from google.colab import drive
drive.mount('/content/gdrive')
path_to_file = '/content/gdrive/My Drive/GML2023_codes/codes/05_Recommendation'
print(path_to_file)
# change current path to the folder containing "path_to_file"
os.chdir(path_to_file)
!pwd
代码
文本
[2]
# Load libraries
import numpy as np
import scipy.io
%matplotlib inline
#%matplotlib notebook
from IPython.display import display, clear_output
from matplotlib import pyplot
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
import time
import sys; sys.path.insert(0, 'lib/')
from lib.utils import shrink
import scipy.sparse.linalg
import warnings; warnings.filterwarnings("ignore")
代码
文本
Synthetic dataset
代码
文本
[3]
# Load graphs of rows/users and columns/movies
mat = scipy.io.loadmat('datasets/synthetic_netflix.mat')
M = mat['M']
Otraining = mat['Otraining']
Otest = mat['Otest']
Wrow = mat['Wrow']
Wcol = mat['Wcol']
n,m = M.shape
print('n,m=',n,m)
Mgt = M # Ground truth
O = Otraining
M = O* Mgt
perc_obs_training = np.sum(Otraining) / (n*m)
print('perc_obs_training=',perc_obs_training)
n,m= 150 200 perc_obs_training= 0.03
代码
文本
[4]
# Viusalize the rating matrix
plt.figure(1)
plt.imshow(Mgt, interpolation='nearest', cmap='jet')
plt.title('Low-rank Matrix M.\nNote: We NEVER observe it\n in real-world applications')
plt.show()
plt.figure(2)
plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet')
plt.title('Observed values of M\n for TRAINING.\n Percentage=' + str(perc_obs_training))
plt.show()
<Figure size 640x480 with 1 Axes>
<Figure size 640x480 with 1 Axes>
代码
文本
[5]
# Collaborative filtering / low-rank approximation by nuclear norm
# Norm of the operator
OM = O*M
normOM = np.linalg.norm(OM,2)
#######################################
# Select the set of hyper-parameters
#######################################
# scenario : very low number of ratings, 0.03%, error metric = 138.75
lambdaNuc = normOM/4; lambdaDF = 1e3 * 1e-2
# Indentify zero columns and zero rows in the data matrix X
idx_zero_cols = np.where(np.sum(Otraining,axis=0)<1e-9)[0]
idx_zero_rows = np.where(np.sum(Otraining,axis=1)<1e-9)[0]
nb_zero_cols = len(idx_zero_cols)
nb_zero_rows = len(idx_zero_rows)
# Initialization
X = M; Xb = X;
Y = np.zeros([n,m])
normA = 1.
sigma = 1./normA
tau = 1./normA
diffX = 1e10
min_nm = np.min([n,m])
k = 0
while (k<2000) & (diffX>1e-1):
# Update iteration
k += 1
# Update dual variable y
Y = Y + sigma* Xb
U,S,V = np.linalg.svd(Y/sigma)
Sdiag = shrink( S , lambdaNuc/ sigma )
I = np.array(range(min_nm))
Sshrink = np.zeros([n,m])
Sshrink[I,I] = Sdiag
Y = Y - sigma* U.dot(Sshrink.dot(V))
# Update primal variable x
Xold = X
X = X - tau* Y
X = ( X + tau* lambdaDF* O* M)/ (1 + tau* lambdaDF* O)
# Fix issue with no observations along some rows and columns
r,c = np.where(X>0.0); median = np.median(X[r,c])
if nb_zero_cols>0: X[:,idx_zero_cols] = median
if nb_zero_rows>0: X[nb_zero_rows,:] = median
# Update primal variable xb
Xb = 2.* X - Xold
# Difference between two iterations
diffX = np.linalg.norm(X-Xold)
# Reconstruction error
err_test = np.sqrt(np.sum((Otest*(X-Mgt))**2)) / np.sum(Otest) * (n*m)
# Plot
if not k%50:
clear_output(wait=True)
plt.figure(1)
plt.imshow(X, interpolation='nearest', cmap='jet')
plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))
plt.show()
print('diffX',diffX)
clear_output(wait=True)
print('Reconstruction Error: '+ str(round(err_test,5)))
# Final plot
plt.figure(2)
plt.imshow(Mgt, interpolation='nearest', cmap='jet')
plt.title('Ground truth low-rank matrix M')
plt.figure(3)
plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet')
plt.title('Observed values of M')
plt.figure(4)
plt.imshow(X, interpolation='nearest', cmap='jet')
plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,2)))
plt.show()
Reconstruction Error: 138.75954
<Figure size 640x480 with 1 Axes>
<Figure size 640x480 with 1 Axes>
<Figure size 640x480 with 1 Axes>
代码
文本
[ ]
代码
文本
Real-world dataset SWEETRS
代码
文本
[6]
# Load graphs of rows/users and columns/products
mat = scipy.io.loadmat('datasets/real_sweetrs_scenario1.mat')
mat = scipy.io.loadmat('datasets/real_sweetrs_scenario2.mat')
# mat = scipy.io.loadmat('datasets/real_sweetrs_scenario3.mat')
M = mat['M']
Otraining = mat['Otraining']
Otest = mat['Otest']
Wrow = mat['Wrow']
Wcol = mat['Wcol']
print('M', M.shape)
print('Otraining', Otraining.shape)
print('Otest', Otest.shape)
print('Wrow', Wrow.shape)
print('Wcol', Wcol.shape)
n,m = M.shape
print('n,m=',n,m)
Mgt = M # Ground truth
O = Otraining
M = O* Mgt
perc_obs_training = np.sum(Otraining) / (n*m)
print('perc_obs_training=',perc_obs_training)
perc_obs_test = np.sum(Otest) / (n*m)
M (664, 77) Otraining (664, 77) Otest (664, 77) Wrow (664, 664) Wcol (77, 77) n,m= 664 77 perc_obs_training= 0.1317868878109842
代码
文本
[7]
# Visualize the original rating matrix
plt.figure(1,figsize=(10,10))
plt.imshow(Mgt, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Original rating matrix\n Percentage observed ratings: ' + str(100*np.sum(Mgt>0)/(n*m))[:5])
# Visualize the observed rating matrix
plt.figure(2, figsize=(10,10))
plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Observed rating matrix\n Percentage observed ratings: ' + str(100*perc_obs_training)[:5])
plt.show()
<Figure size 1000x1000 with 2 Axes>
<Figure size 1000x1000 with 2 Axes>
代码
文本
[8]
# Collaborative filtering / low-rank approximation by nuclear norm
# Norm of the operator
OM = O*M
normOM = np.linalg.norm(OM,2)
#######################################
# Select the set of hyper-parameters
#######################################
# scenario 1 : low number of ratings, 1.3%, error metric = 744.10
lambdaNuc = normOM/4; lambdaDF = 1e3 * 1e-2
# scenario 2 : intermediate number of ratings, 13.1%, error metric = 412.01
lambdaNuc = normOM/4 * 1e2; lambdaDF = 1e3 * 1e0
# scenario 3 : large number of ratings, 52.7%, error metric = 698.97
# lambdaNuc = normOM/4 * 1e2; lambdaDF = 1e3
# Indentify zero columns and zero rows in the data matrix X
idx_zero_cols = np.where(np.sum(Otraining,axis=0)<1e-9)[0]
idx_zero_rows = np.where(np.sum(Otraining,axis=1)<1e-9)[0]
nb_zero_cols = len(idx_zero_cols)
nb_zero_rows = len(idx_zero_rows)
# Initialization
X = M; Xb = X;
Y = np.zeros([n,m])
normA = 1.
sigma = 1./normA
tau = 1./normA
diffX = 1e10
min_nm = np.min([n,m])
k = 0
while (k<2000) & ( diffX>1e-1 or k<100 ) :
# Update iteration
k += 1
# Update dual variable y
Y = Y + sigma* Xb
U,S,V = np.linalg.svd(Y/sigma)
Sdiag = shrink( S , lambdaNuc/ sigma )
I = np.array(range(min_nm))
Sshrink = np.zeros([n,m])
Sshrink[I,I] = Sdiag
Y = Y - sigma* U.dot(Sshrink.dot(V))
# Update primal variable x
Xold = X
X = X - tau* Y
X = ( X + tau* lambdaDF* O* M)/ (1 + tau* lambdaDF* O)
# Fix issue with no observations along some rows and columns
r,c = np.where(X>0.0); median = np.median(X[r,c])
if nb_zero_cols>0: X[:,idx_zero_cols] = median
if nb_zero_rows>0: X[nb_zero_rows,:] = median
# Update primal variable xb
Xb = 2.* X - Xold
# Difference between two iterations
diffX = np.linalg.norm(X-Xold)
# Reconstruction error
err_test = np.sqrt(np.sum((Otest*(X-Mgt))**2)) / np.sum(Otest) * (n*m)
# Plot
if not k%50:
clear_output(wait=True)
plt.figure(figsize=(10,10))
plt.imshow(X, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))
plt.show()
print('diffX',diffX)
clear_output(wait=True)
print('Reconstruction Error: '+ str(round(err_test,5)))
# Final plots
plt.figure(2, figsize=(10,10))
plt.imshow(Mgt, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Original rating matrix\n Percentage observed ratings: ' + str(100*np.sum(Mgt>0)/(n*m))[:5])
plt.show()
plt.figure(3, figsize=(10,10))
plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Observed rating matrix\n Percentage observed ratings: ' + str(100*perc_obs_training)[:5])
plt.show()
plt.figure(4, figsize=(10,10))
plt.imshow(X, interpolation='nearest', cmap='jet', aspect=0.1)
plt.colorbar(shrink=0.65)
plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))
plt.show()
Reconstruction Error: 409.76023
<Figure size 1000x1000 with 2 Axes>
<Figure size 1000x1000 with 2 Axes>
<Figure size 1000x1000 with 2 Axes>
代码
文本
[ ]
代码
文本
[ ]
代码
文本
点个赞吧
本文被以下合集收录
Graph Machine learning
xuxh@dp.tech
更新于 2024-10-08
44 篇0 人关注
推荐阅读
公开
Lab 04 : Hybrid recommendationxuxh@dp.tech
更新于 2024-10-15
公开
Lab 01 : PageRankxuxh@dp.tech
更新于 2024-10-15