新建
Lab 04 : Graph SVM
xuxh@dp.tech
推荐镜像 :Basic Image:bohrium-notebook:2023-04-07
推荐机型 :c2_m4_cpu
赞
目录
Lecture : Graph SVM
Lab 04 : Graph SVM
Xavier Bresson
代码
文本
[1]
# For Google Colaboratory
import sys, os
if 'google.colab' in sys.modules:
# mount google drive
from google.colab import drive
drive.mount('/content/gdrive')
path_to_file = '/content/gdrive/My Drive/GML2023_codes/codes/04_Graph_SVM'
print(path_to_file)
# change current path to the folder containing "path_to_file"
os.chdir(path_to_file)
!pwd
代码
文本
[2]
# Load libraries
import numpy as np
import scipy.io
%matplotlib inline
#%matplotlib notebook
from matplotlib import pyplot
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
plt.rcParams.update({'figure.max_open_warning': 0})
import time
import sys; sys.path.insert(0, 'lib/')
from lib.utils import compute_purity
from lib.utils import compute_SVM
from lib.utils import construct_knn_graph
from lib.utils import graph_laplacian
import warnings; warnings.filterwarnings("ignore")
import sklearn.metrics.pairwise
代码
文本
[3]
# Dataset
mat = scipy.io.loadmat('datasets/data_twomoons_graphSVM.mat')
Xtrain = mat['Xtrain']
Cgt_train = mat['Cgt_train'] - 1; Cgt_train = Cgt_train.squeeze()
l_train = mat['l'].squeeze()
nb_labeled_data_per_class = mat['nb_labeled_data_per_class'].squeeze()
n = Xtrain.shape[0]
d = Xtrain.shape[1]
nc = len(np.unique(Cgt_train))
print(n,d,nc)
Xtest = mat['Xtest']
Cgt_test = mat['Cgt_test'] - 1; Cgt_test = Cgt_test.squeeze()
print('l_train:',l_train)
print('number of labeled data per class:',nb_labeled_data_per_class)
print('number of unlabeled data:',n-2*nb_labeled_data_per_class)
500 2 2 l_train: [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] number of labeled data per class: 1 number of unlabeled data: 498
代码
文本
[4]
# Plot
plt.figure(figsize=(12,4))
p1 = plt.subplot(121)
size_vertex_plot = 33
plt.scatter(Xtrain[:,0], Xtrain[:,1], s=size_vertex_plot*np.ones(n), c=l_train, color=pyplot.jet())
plt.title('Training Data: Labeled Data in red (first class)\n and blue (second class), \n and unlabeled Data in green (data geometry)')
plt.colorbar()
p2 = plt.subplot(122)
size_vertex_plot = 33
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=Cgt_test, color=pyplot.jet())
plt.title('Test Data')
plt.colorbar()
#plt.tight_layout()
plt.show()
<Figure size 1200x400 with 4 Axes>
代码
文本
Run kernel SVM
代码
文本
[5]
# Run kernel SVM
# Compute Gaussian kernel, L, Q
sigma = 0.5; sigma2 = sigma**2
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtrain, metric='euclidean', n_jobs=1)
Ker = np.exp(- Ddist**2 / sigma2)
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtest, metric='euclidean', n_jobs=1)
KXtest = np.exp(- Ddist**2 / sigma2)
l = l_train
L = np.diag(l)
Q = L.dot(Ker.dot(L))
# Time steps
tau_alpha = 10/ np.linalg.norm(Q,2)
tau_beta = 0.1/ np.linalg.norm(L,2)
# For conjuguate gradient
Acg = tau_alpha* Q + np.eye(n)
# Pre-compute J.K(Xtest) for test data
LKXtest = L.dot(KXtest)
# Error parameter
lamb = 3 # acc: 95.4
# Initialization
alpha = np.zeros([n])
beta = 0.0
alpha_old = alpha
# Loop
k = 0
diff_alpha = 1e6
num_iter = 201
while (diff_alpha>1e-3) & (k<num_iter):
# Update iteration
k += 1
# Update alpha
# Approximate solution with conjuguate gradient
b0 = alpha + tau_alpha - tau_alpha* l* beta
alpha, _ = scipy.sparse.linalg.cg(Acg, b0, x0=alpha, tol=1e-3, maxiter=50)
alpha[alpha<0.0] = 0 # Projection on [0,+infty]
alpha[alpha>lamb] = lamb # Projection on [-infty,lamb]
# Update beta
beta = beta + tau_beta* l.T.dot(alpha)
# Stopping condition
diff_alpha = np.linalg.norm(alpha-alpha_old)
alpha_old = alpha
# Plot
if not(k%100) or (diff_alpha<1e-3):
# Indicator function of support vectors
idx = np.where( np.abs(alpha)>0.25* np.max(np.abs(alpha)) )
Isv = np.zeros([n]); Isv[idx] = 1
nb_sv = len(Isv.nonzero()[0])
# Offset
if nb_sv > 1:
b = (Isv.T).dot( l - Ker.dot(L.dot(alpha)) )/ nb_sv
else:
b = 0
# Continuous score function
f_test = alpha.T.dot(LKXtest) + b
# Binary classification function
C_test = np.sign(f_test) # decision function in {-1,1}
accuracy_test = compute_purity(0.5*(1+C_test),Cgt_test,nc) # 0.5*(1+C_test) in {0,1}
# Plot
size_vertex_plot = 33
plt.figure(figsize=(12,4))
p1 = plt.subplot(121)
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=f_test, color=pyplot.jet())
plt.title('Score function $s(x)=w^T\phi(x)+b$ \n iter=' + str(k)+ ', diff_alpha=' + str(diff_alpha)[:7])
plt.colorbar()
p2 = plt.subplot(122)
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=C_test, color=pyplot.jet())
plt.title('Classification function $f(x)=sign(w^T\phi(x)+b)$\n iter=' + str(k) + ', acc=' + str(accuracy_test)[:5])
#plt.tight_layout()
plt.colorbar()
plt.show()
if k<num_iter-1:
clear_output(wait=True)
<Figure size 1200x400 with 4 Axes>
代码
文本
Run Graph SVM
代码
文本
[6]
# Run Graph SVM
# Compute Gaussian kernel
sigma = 0.15; sigma2 = sigma**2
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtrain, metric='euclidean', n_jobs=1)
Ker = np.exp(- Ddist**2 / sigma2)
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtest, metric='euclidean', n_jobs=1)
KXtest = np.exp(- Ddist**2 / sigma2)
# Compute kNN graph
kNN = 10
gamma = 25
A = construct_knn_graph(Xtrain, kNN, 'euclidean')
Lap = graph_laplacian(A).todense()
# Compute Indicator function of labels
H = np.zeros([n])
H[np.abs(l_train)>0.0] = 1
H = np.diag(H)
# Compute L, Q
L = np.diag(l_train)
l = l_train
T = np.eye(n)
T += gamma* Lap.dot(Ker)
Tinv = np.linalg.inv(T)
Q = L.dot(H.dot(Ker.dot(Tinv.dot(H.dot(L)))))
# Time steps
tau_alpha = 1/ np.linalg.norm(Q,2)
tau_beta = 1/ np.linalg.norm(L,2)
# For conjuguate gradient
Acg = tau_alpha* Q + np.eye(n)
# Error parameter
lamb = 1 # acc: 98.6
# Initialization
alpha = np.zeros([n])
beta = 0.0
alpha_old = alpha
# Loop
k = 0
diff_alpha = 1e6
num_iter = 201
while (diff_alpha>1e-3) & (k<num_iter):
# Update iteration
k += 1
# Update alpha
# Approximate solution with conjuguate gradient
b0 = alpha + tau_alpha - tau_alpha* l* beta
alpha, _ = scipy.sparse.linalg.cg(Acg, b0, x0=alpha, tol=1e-3, maxiter=50)
alpha[alpha<0.0] = 0 # Projection on [0,+infty]
alpha[alpha>lamb] = lamb # Projection on [-infty,lamb]
# Update beta
beta = beta + tau_beta* l.T.dot(alpha)
# Stopping condition
diff_alpha = np.linalg.norm(alpha-alpha_old)
alpha_old = alpha
# Plot
if not(k%100) or (diff_alpha<1e-3):
# xi vector
xi = Tinv.dot(H.dot(L.dot(alpha)))
# Offset
idx_unlabeled_data = np.where( np.abs(l)<1./2 )
alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0
idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )
Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors
nb_sv = len(Isv.nonzero()[0])
if nb_sv > 1:
b = (Isv.T).dot( l - Ker.dot(np.squeeze(np.array(xi))) )/ nb_sv
else:
b = 0
# Continuous score function
f_test = xi.dot(KXtest) + b
# Binary classification function
C_test = np.sign(f_test) # decision function in {-1,1}
accuracy_test = compute_purity(0.5*(1+C_test),Cgt_test,nc) # 0.5*(1+C_test) in {0,1}
# Plot
size_vertex_plot = 33
plt.figure(figsize=(12,4))
p1 = plt.subplot(121)
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=f_test, color=pyplot.jet())
plt.title('Score function $s(x)=w^T\phi(x)+b$ \n iter=' + str(k)+ ', diff_alpha=' + str(diff_alpha)[:7])
plt.colorbar()
p2 = plt.subplot(122)
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=C_test, color=pyplot.jet())
plt.title('Classification function $f(x)=sign(w^T\phi(x)+b)$\n iter=' + str(k) + ', acc=' + str(accuracy_test)[:5])
#plt.tight_layout()
plt.colorbar()
plt.show()
if k<num_iter-1:
clear_output(wait=True)
k-NN graph with euclidean distance
<Figure size 1200x400 with 4 Axes>
代码
文本
Real-world graph of articles
Dataset has 10 labeled data and 40 unlabeled data
代码
文本
[7]
# Dataset
mat = scipy.io.loadmat('datasets/data_20news_10labels_40unlabels.mat')
Xtrain = mat['Xtrain']
n = Xtrain.shape[0]
l_train = mat['l'].squeeze()
d = Xtrain.shape[1]
Xtest = mat['Xtest']
Cgt_test = mat['Cgt_test'] - 1; Cgt_test = Cgt_test.squeeze()
nc = len(np.unique(Cgt_test))
print(n,d,nc)
num_labels = np.sum(np.abs(l_train)>0.0)
print('l_train:',l_train)
print('number of labeled data per class:',num_labels//2)
print('number of unlabeled data:',n-num_labels)
50 3684 2 l_train: [-1 0 -1 -1 -1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 -1 0 0 0 0 0 0 1 1 0] number of labeled data per class: 5 number of unlabeled data: 40
代码
文本
Run Kernel SVM (no graph information)
代码
文本
[8]
# Run Kernel SVM (no graph information)
# Compute Gaussian kernel
sigma = 0.5; sigma2 = sigma**2
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtrain, metric='euclidean', n_jobs=1)
Ker = np.exp(- Ddist**2 / sigma2)
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtest, metric='euclidean', n_jobs=1)
KXtest = np.exp(- Ddist**2 / sigma2)
# Compute kNN graph
kNN = 5
gamma = 0 # <= no graph information
A = construct_knn_graph(Xtrain, kNN, 'cosine')
Lap = graph_laplacian(A).todense()
# Compute Indicator function of labels
H = np.zeros([n])
H[np.abs(l_train)>0.0] = 1
H = np.diag(H)
# Compute L, Q
L = np.diag(l_train)
l = l_train
T = np.eye(n)
T += gamma* Lap.dot(Ker)
Tinv = np.linalg.inv(T)
Q = L.dot(H.dot(Ker.dot(Tinv.dot(H.dot(L)))))
# Time steps
tau_alpha = 1/ np.linalg.norm(Q,2)
tau_beta = 1/ np.linalg.norm(L,2)
# For conjuguate gradient
Acg = tau_alpha* Q + 1* np.eye(n)
# Error parameter
lamb = 100
# Initialization
alpha = np.zeros([n])
beta = 0.0
alpha_old = alpha
# Loop
k = 0
diff_alpha = 1e6
num_iter = 1001
while (diff_alpha>1e-3) & (k<num_iter):
# Update iteration
k += 1
# Update alpha
# Approximate solution with conjuguate gradient
b0 = alpha + tau_alpha - tau_alpha* l* beta
alpha, _ = scipy.sparse.linalg.cg(Acg, b0, x0=alpha, tol=1e-3, maxiter=50)
alpha[alpha<0.0] = 0 # Projection on [0,+infty]
alpha[alpha>lamb] = lamb # Projection on [-infty,lamb]
# Update beta
beta = beta + tau_beta* l.T.dot(alpha)
# Stopping condition
diff_alpha = np.linalg.norm(alpha-alpha_old)
alpha_old = alpha
# Plot
if not(k%100) or (diff_alpha<1e-3):
# xi vector
xi = Tinv.dot(H.dot(L.dot(alpha)))
# Offset
idx_unlabeled_data = np.where( np.abs(l)<1./2 )
alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0
idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )
Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors
nb_sv = len(Isv.nonzero()[0])
if nb_sv > 1:
b = (Isv.T).dot( l - Ker.dot(np.squeeze(np.array(xi))) )/ nb_sv
else:
b = 0
# Continuous score function
f_test = xi.dot(KXtest) + b
# Binary classification function
C_test = np.sign(f_test) # decision function in {-1,1}
accuracy_test = compute_purity(0.5*(1+C_test),Cgt_test,nc) # 0.5*(1+C_test) in {0,1}
# Print
# print('iter, diff_alpha',str(k),str(diff_alpha)[:7])
# print('acc',str(accuracy_test)[:5])
print('Kernel SVM iter, diff_alpha :',str(k),str(diff_alpha)[:7])
print(' acc :',str(accuracy_test)[:5])
k-NN graph with cosine distance Kernel SVM iter, diff_alpha : 1001 0.68597 acc : 65.5
代码
文本
Run Graph SVM
代码
文本
[9]
# Run Graph SVM
# Compute Gaussian kernel
sigma = 0.5; sigma2 = sigma**2
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtrain, metric='euclidean', n_jobs=1)
Ker = np.exp(- Ddist**2 / sigma2)
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtest, metric='euclidean', n_jobs=1)
KXtest = np.exp(- Ddist**2 / sigma2)
# Compute kNN graph
kNN = 8
gamma = 100
A = construct_knn_graph(Xtrain, kNN, 'cosine')
Lap = graph_laplacian(A).todense()
# Compute Indicator function of labels
H = np.zeros([n])
H[np.abs(l_train)>0.0] = 1
H = np.diag(H)
# Compute L, Q
L = np.diag(l_train)
l = l_train
T = np.eye(n)
T += gamma* Lap.dot(Ker)
Tinv = np.linalg.inv(T)
Q = L.dot(H.dot(Ker.dot(Tinv.dot(H.dot(L)))))
# Time steps
tau_alpha = 1/ np.linalg.norm(Q,2)
tau_beta = 1/ np.linalg.norm(L,2)
# For conjuguate gradient
Acg = tau_alpha* Q + 1* np.eye(n)
# Error parameter
lamb = 1
# Initialization
alpha = np.zeros([n])
beta = 0.0
alpha_old = alpha
# Loop
k = 0
diff_alpha = 1e6
num_iter = 1001
while (diff_alpha>1e-3) & (k<num_iter):
# Update iteration
k += 1
# Update alpha
# Approximate solution with conjuguate gradient
b0 = alpha + tau_alpha - tau_alpha* l* beta
alpha, _ = scipy.sparse.linalg.cg(Acg, b0, x0=alpha, tol=1e-3, maxiter=50)
alpha[alpha<0.0] = 0 # Projection on [0,+infty]
alpha[alpha>lamb] = lamb # Projection on [-infty,lamb]
# Update beta
beta = beta + tau_beta* l.T.dot(alpha)
# Stopping condition
diff_alpha = np.linalg.norm(alpha-alpha_old)
alpha_old = alpha
# Plot
if not(k%100) or (diff_alpha<1e-3):
# xi vector
xi = Tinv.dot(H.dot(L.dot(alpha)))
# Offset
idx_unlabeled_data = np.where( np.abs(l)<1./2 )
alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0
idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )
Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors
nb_sv = len(Isv.nonzero()[0])
if nb_sv > 1:
b = (Isv.T).dot( l - Ker.dot(np.squeeze(np.array(xi))) )/ nb_sv
else:
b = 0
# Continuous score function
f_test = xi.dot(KXtest) + b
# Binary classification function
C_test = np.sign(f_test) # decision function in {-1,1}
accuracy_test = compute_purity(0.5*(1+C_test),Cgt_test,nc) # 0.5*(1+C_test) in {0,1}
# Print
# print('iter, diff_alpha',str(k),str(diff_alpha)[:7])
# print('acc',str(accuracy_test)[:5])
print('Graph SVM iter, diff_alpha :',str(k),str(diff_alpha)[:7])
print(' acc :',str(accuracy_test)[:5])
k-NN graph with cosine distance Graph SVM iter, diff_alpha : 2 0.0 acc : 78.5
代码
文本
Plot graph of test data points
代码
文本
[10]
# Plot graph of test data points
kNN = 8
A = construct_knn_graph(Xtest, kNN, 'cosine')
print(type(A),A.shape)
import networkx as nx
A.setdiag(0)
A.eliminate_zeros()
G_nx = nx.from_scipy_sparse_array(A)
plt.figure(figsize=[40,40])
nx.draw_networkx(G_nx, with_labels=True, node_color=np.array(C_test), cmap='jet')
k-NN graph with cosine distance <class 'scipy.sparse._csr.csr_matrix'> (200, 200)
<Figure size 4000x4000 with 1 Axes>
代码
文本
[ ]
代码
文本
[ ]
代码
文本
点个赞吧
本文被以下合集收录
Graph Machine learning
xuxh@dp.tech
更新于 2024-10-08
44 篇0 人关注
推荐阅读
公开
Lab 03 : Kernel/Non-Linear SVMxuxh@dp.tech
更新于 2024-10-15
公开
Lab 01 : Standard/Linear SVMxuxh@dp.tech
更新于 2024-10-15