空间站广场

论文

Notebooks

比赛

课程

Apps

我的主页

我的Notebooks

我的论文库

我的足迹

我的工作空间

任务

节点

文件

数据集

镜像

项目

数据库

公开

Lab 02 : Collaborative recommendation

Machine Learning

xuxh@dp.tech

更新于 2024-10-15

推荐镜像 :Basic Image:bohrium-notebook:2023-04-07

推荐机型 :c2_m4_cpu

Lecture : Recommendation on Graphs

Lab 02 : Collaborative recommendation

Xavier Bresson

Synthetic dataset

Real-world dataset SWEETRS

Lecture : Recommendation on Graphs

Lab 02 : Collaborative recommendation

Xavier Bresson

代码

文本

[1]

# For Google Colaboratory

import sys, os

if 'google.colab' in sys.modules:

# mount google drive

from google.colab import drive

drive.mount('/content/gdrive')

path_to_file = '/content/gdrive/My Drive/GML2023_codes/codes/05_Recommendation'

print(path_to_file)

# change current path to the folder containing "path_to_file"

os.chdir(path_to_file)

!pwd

代码

文本

[2]

# Load libraries

import numpy as np

import scipy.io

%matplotlib inline

#%matplotlib notebook

from IPython.display import display, clear_output

from matplotlib import pyplot

import matplotlib.pyplot as plt

plt.rcParams.update({'figure.max_open_warning': 0})

import time

import sys; sys.path.insert(0, 'lib/')

from lib.utils import shrink

import scipy.sparse.linalg

import warnings; warnings.filterwarnings("ignore")

代码

文本

Synthetic dataset

代码

文本

[3]

# Load graphs of rows/users and columns/movies

mat = scipy.io.loadmat('datasets/synthetic_netflix.mat')

M = mat['M']

Otraining = mat['Otraining']

Otest = mat['Otest']

Wrow = mat['Wrow']

Wcol = mat['Wcol']

n,m = M.shape

print('n,m=',n,m)

Mgt = M # Ground truth

O = Otraining

M = O* Mgt

perc_obs_training = np.sum(Otraining) / (n*m)

print('perc_obs_training=',perc_obs_training)

n,m= 150 200
perc_obs_training= 0.03

代码

文本

[4]

# Viusalize the rating matrix

plt.figure(1)

plt.imshow(Mgt, interpolation='nearest', cmap='jet')

plt.title('Low-rank Matrix M.\nNote: We NEVER observe it\n in real-world applications')

plt.show()

plt.figure(2)

plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet')

plt.title('Observed values of M\n for TRAINING.\n Percentage=' + str(perc_obs_training))

plt.show()

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

代码

文本

[5]

# Collaborative filtering / low-rank approximation by nuclear norm

# Norm of the operator

OM = O*M

normOM = np.linalg.norm(OM,2)

#######################################

# Select the set of hyper-parameters

#######################################

# scenario : very low number of ratings, 0.03%, error metric = 138.75

lambdaNuc = normOM/4; lambdaDF = 1e3 * 1e-2

# Indentify zero columns and zero rows in the data matrix X

idx_zero_cols = np.where(np.sum(Otraining,axis=0)<1e-9)[0]

idx_zero_rows = np.where(np.sum(Otraining,axis=1)<1e-9)[0]

nb_zero_cols = len(idx_zero_cols)

nb_zero_rows = len(idx_zero_rows)

# Initialization

X = M; Xb = X;

Y = np.zeros([n,m])

normA = 1.

sigma = 1./normA

tau = 1./normA

diffX = 1e10

min_nm = np.min([n,m])

k = 0

while (k<2000) & (diffX>1e-1):

# Update iteration

k += 1

# Update dual variable y

Y = Y + sigma* Xb

U,S,V = np.linalg.svd(Y/sigma)

Sdiag = shrink( S , lambdaNuc/ sigma )

I = np.array(range(min_nm))

Sshrink = np.zeros([n,m])

Sshrink[I,I] = Sdiag

Y = Y - sigma* U.dot(Sshrink.dot(V))

# Update primal variable x

Xold = X

X = X - tau* Y

X = ( X + tau* lambdaDF* O* M)/ (1 + tau* lambdaDF* O)

# Fix issue with no observations along some rows and columns

r,c = np.where(X>0.0); median = np.median(X[r,c])

if nb_zero_cols>0: X[:,idx_zero_cols] = median

if nb_zero_rows>0: X[nb_zero_rows,:] = median

# Update primal variable xb

Xb = 2.* X - Xold

# Difference between two iterations

diffX = np.linalg.norm(X-Xold)

# Reconstruction error

err_test = np.sqrt(np.sum((Otest*(X-Mgt))**2)) / np.sum(Otest) * (n*m)

# Plot

if not k%50:

clear_output(wait=True)

plt.figure(1)

plt.imshow(X, interpolation='nearest', cmap='jet')

plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))

plt.show()

print('diffX',diffX)

clear_output(wait=True)

print('Reconstruction Error: '+ str(round(err_test,5)))

# Final plot

plt.figure(2)

plt.imshow(Mgt, interpolation='nearest', cmap='jet')

plt.title('Ground truth low-rank matrix M')

plt.figure(3)

plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet')

plt.title('Observed values of M')

plt.figure(4)

plt.imshow(X, interpolation='nearest', cmap='jet')

plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,2)))

plt.show()

Reconstruction Error: 138.75954

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

代码

文本

[ ]

代码

文本

Real-world dataset SWEETRS

代码

文本

[6]

# Load graphs of rows/users and columns/products

mat = scipy.io.loadmat('datasets/real_sweetrs_scenario1.mat')

mat = scipy.io.loadmat('datasets/real_sweetrs_scenario2.mat')

# mat = scipy.io.loadmat('datasets/real_sweetrs_scenario3.mat')

M = mat['M']

Otraining = mat['Otraining']

Otest = mat['Otest']

Wrow = mat['Wrow']

Wcol = mat['Wcol']

print('M', M.shape)

print('Otraining', Otraining.shape)

print('Otest', Otest.shape)

print('Wrow', Wrow.shape)

print('Wcol', Wcol.shape)

n,m = M.shape

print('n,m=',n,m)

Mgt = M # Ground truth

O = Otraining

M = O* Mgt

perc_obs_training = np.sum(Otraining) / (n*m)

print('perc_obs_training=',perc_obs_training)

perc_obs_test = np.sum(Otest) / (n*m)

M (664, 77)
Otraining (664, 77)
Otest (664, 77)
Wrow (664, 664)
Wcol (77, 77)
n,m= 664 77
perc_obs_training= 0.1317868878109842

代码

文本

[7]

# Visualize the original rating matrix

plt.figure(1,figsize=(10,10))

plt.imshow(Mgt, interpolation='nearest', cmap='jet', aspect=0.1)

plt.colorbar(shrink=0.65)

plt.title('Original rating matrix\n Percentage observed ratings: ' + str(100*np.sum(Mgt>0)/(n*m))[:5])

# Visualize the observed rating matrix

plt.figure(2, figsize=(10,10))

plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet', aspect=0.1)

plt.colorbar(shrink=0.65)

plt.title('Observed rating matrix\n Percentage observed ratings: ' + str(100*perc_obs_training)[:5])

plt.show()

<Figure size 1000x1000 with 2 Axes>

<Figure size 1000x1000 with 2 Axes>

代码

文本

[8]

# Collaborative filtering / low-rank approximation by nuclear norm

# Norm of the operator

OM = O*M

normOM = np.linalg.norm(OM,2)

#######################################

# Select the set of hyper-parameters

#######################################

# scenario 1 : low number of ratings, 1.3%, error metric = 744.10

lambdaNuc = normOM/4; lambdaDF = 1e3 * 1e-2

# scenario 2 : intermediate number of ratings, 13.1%, error metric = 412.01

lambdaNuc = normOM/4 * 1e2; lambdaDF = 1e3 * 1e0

# scenario 3 : large number of ratings, 52.7%, error metric = 698.97

# lambdaNuc = normOM/4 * 1e2; lambdaDF = 1e3

# Indentify zero columns and zero rows in the data matrix X

idx_zero_cols = np.where(np.sum(Otraining,axis=0)<1e-9)[0]

idx_zero_rows = np.where(np.sum(Otraining,axis=1)<1e-9)[0]

nb_zero_cols = len(idx_zero_cols)

nb_zero_rows = len(idx_zero_rows)

# Initialization

X = M; Xb = X;

Y = np.zeros([n,m])

normA = 1.

sigma = 1./normA

tau = 1./normA

diffX = 1e10

min_nm = np.min([n,m])

k = 0

while (k<2000) & ( diffX>1e-1 or k<100 ) :

# Update iteration

k += 1

# Update dual variable y

Y = Y + sigma* Xb

U,S,V = np.linalg.svd(Y/sigma)

Sdiag = shrink( S , lambdaNuc/ sigma )

I = np.array(range(min_nm))

Sshrink = np.zeros([n,m])

Sshrink[I,I] = Sdiag

Y = Y - sigma* U.dot(Sshrink.dot(V))

# Update primal variable x

Xold = X

X = X - tau* Y

X = ( X + tau* lambdaDF* O* M)/ (1 + tau* lambdaDF* O)

# Fix issue with no observations along some rows and columns

r,c = np.where(X>0.0); median = np.median(X[r,c])

if nb_zero_cols>0: X[:,idx_zero_cols] = median

if nb_zero_rows>0: X[nb_zero_rows,:] = median

# Update primal variable xb

Xb = 2.* X - Xold

# Difference between two iterations

diffX = np.linalg.norm(X-Xold)

# Reconstruction error

err_test = np.sqrt(np.sum((Otest*(X-Mgt))**2)) / np.sum(Otest) * (n*m)

# Plot

if not k%50:

clear_output(wait=True)

plt.figure(figsize=(10,10))

plt.imshow(X, interpolation='nearest', cmap='jet', aspect=0.1)

plt.colorbar(shrink=0.65)

plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))

plt.show()

print('diffX',diffX)

clear_output(wait=True)

print('Reconstruction Error: '+ str(round(err_test,5)))

# Final plots

plt.figure(2, figsize=(10,10))

plt.imshow(Mgt, interpolation='nearest', cmap='jet', aspect=0.1)

plt.colorbar(shrink=0.65)

plt.title('Original rating matrix\n Percentage observed ratings: ' + str(100*np.sum(Mgt>0)/(n*m))[:5])

plt.show()

plt.figure(3, figsize=(10,10))

plt.imshow(Otraining*Mgt, interpolation='nearest', cmap='jet', aspect=0.1)

plt.colorbar(shrink=0.65)

plt.title('Observed rating matrix\n Percentage observed ratings: ' + str(100*perc_obs_training)[:5])

plt.show()

plt.figure(4, figsize=(10,10))

plt.imshow(X, interpolation='nearest', cmap='jet', aspect=0.1)

plt.colorbar(shrink=0.65)

plt.title('Collaborative Filtering\nIteration='+ str(k)+'\nReconstruction Error= '+ str(round(err_test,5)))

plt.show()

Reconstruction Error: 409.76023

<Figure size 1000x1000 with 2 Axes>

<Figure size 1000x1000 with 2 Axes>

<Figure size 1000x1000 with 2 Axes>

代码

文本

[ ]

代码

文本

[ ]

代码

文本

Machine Learning

点个赞吧

本文被以下合集收录

Graph Machine learning

xuxh@dp.tech

更新于 2024-10-08

44 篇0 人关注