空间站广场

论文

Notebooks

比赛

课程

Apps

我的主页

我的Notebooks

我的论文库

我的足迹

我的工作空间

任务

节点

文件

数据集

镜像

项目

数据库

公开

Lab 04 : Graph SVM

Machine Learning

xuxh@dp.tech

更新于 2024-10-15

推荐镜像 :Basic Image:bohrium-notebook:2023-04-07

推荐机型 :c2_m4_cpu

Lecture : Graph SVM

Lab 04 : Graph SVM

Xavier Bresson

Run kernel SVM

Run Graph SVM

Real-world graph of articles

Dataset has 10 labeled data and 40 unlabeled data

Run Kernel SVM (no graph information)

Run Graph SVM

Plot graph of test data points

Lecture : Graph SVM

Lab 04 : Graph SVM

Xavier Bresson

代码

文本

[1]

# For Google Colaboratory

import sys, os

if 'google.colab' in sys.modules:

# mount google drive

from google.colab import drive

drive.mount('/content/gdrive')

path_to_file = '/content/gdrive/My Drive/GML2023_codes/codes/04_Graph_SVM'

print(path_to_file)

# change current path to the folder containing "path_to_file"

os.chdir(path_to_file)

!pwd

代码

文本

[2]

# Load libraries

import numpy as np

import scipy.io

%matplotlib inline

#%matplotlib notebook

from matplotlib import pyplot

import matplotlib.pyplot as plt

from IPython.display import display, clear_output

plt.rcParams.update({'figure.max_open_warning': 0})

import time

import sys; sys.path.insert(0, 'lib/')

from lib.utils import compute_purity

from lib.utils import compute_SVM

from lib.utils import construct_knn_graph

from lib.utils import graph_laplacian

import warnings; warnings.filterwarnings("ignore")

import sklearn.metrics.pairwise

代码

文本

[3]

# Dataset

mat = scipy.io.loadmat('datasets/data_twomoons_graphSVM.mat')

Xtrain = mat['Xtrain']

Cgt_train = mat['Cgt_train'] - 1; Cgt_train = Cgt_train.squeeze()

l_train = mat['l'].squeeze()

nb_labeled_data_per_class = mat['nb_labeled_data_per_class'].squeeze()

n = Xtrain.shape[0]

d = Xtrain.shape[1]

nc = len(np.unique(Cgt_train))

print(n,d,nc)

Xtest = mat['Xtest']

Cgt_test = mat['Cgt_test'] - 1; Cgt_test = Cgt_test.squeeze()

print('l_train:',l_train)

print('number of labeled data per class:',nb_labeled_data_per_class)

print('number of unlabeled data:',n-2*nb_labeled_data_per_class)

500 2 2
l_train: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
number of labeled data per class: 1
number of unlabeled data: 498

代码

文本

[4]

# Plot

plt.figure(figsize=(12,4))

p1 = plt.subplot(121)

size_vertex_plot = 33

plt.scatter(Xtrain[:,0], Xtrain[:,1], s=size_vertex_plot*np.ones(n), c=l_train, color=pyplot.jet())

plt.title('Training Data: Labeled Data in red (first class)\n and blue (second class), \n and unlabeled Data in green (data geometry)')

plt.colorbar()

p2 = plt.subplot(122)

size_vertex_plot = 33

plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=Cgt_test, color=pyplot.jet())

plt.title('Test Data')

plt.colorbar()

#plt.tight_layout()

plt.show()

<Figure size 1200x400 with 4 Axes>

代码

文本

Run kernel SVM

代码

文本

[5]

# Run kernel SVM

# Compute Gaussian kernel, L, Q

sigma = 0.5; sigma2 = sigma**2

Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtrain, metric='euclidean', n_jobs=1)

Ker = np.exp(- Ddist**2 / sigma2)

Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtest, metric='euclidean', n_jobs=1)

KXtest = np.exp(- Ddist**2 / sigma2)

l = l_train

L = np.diag(l)

Q = L.dot(Ker.dot(L))

# Time steps

tau_alpha = 10/ np.linalg.norm(Q,2)

tau_beta = 0.1/ np.linalg.norm(L,2)

# For conjuguate gradient

Acg = tau_alpha* Q + np.eye(n)

# Pre-compute J.K(Xtest) for test data

LKXtest = L.dot(KXtest)

# Error parameter

lamb = 3 # acc: 95.4

# Initialization

alpha = np.zeros([n])

beta = 0.0

alpha_old = alpha

# Loop

k = 0

diff_alpha = 1e6

num_iter = 201

while (diff_alpha>1e-3) & (k<num_iter):

# Update iteration

k += 1

# Update alpha

# Approximate solution with conjuguate gradient

b0 = alpha + tau_alpha - tau_alpha* l* beta

alpha, _ = scipy.sparse.linalg.cg(Acg, b0, x0=alpha, tol=1e-3, maxiter=50)

alpha[alpha<0.0] = 0 # Projection on [0,+infty]

alpha[alpha>lamb] = lamb # Projection on [-infty,lamb]

# Update beta

beta = beta + tau_beta* l.T.dot(alpha)

# Stopping condition

diff_alpha = np.linalg.norm(alpha-alpha_old)

alpha_old = alpha

# Plot

if not(k%100) or (diff_alpha<1e-3):

# Indicator function of support vectors

idx = np.where( np.abs(alpha)>0.25* np.max(np.abs(alpha)) )

Isv = np.zeros([n]); Isv[idx] = 1

nb_sv = len(Isv.nonzero()[0])

# Offset

if nb_sv > 1:

b = (Isv.T).dot( l - Ker.dot(L.dot(alpha)) )/ nb_sv

else:

b = 0

# Continuous score function

f_test = alpha.T.dot(LKXtest) + b

# Binary classification function

C_test = np.sign(f_test) # decision function in {-1,1}

accuracy_test = compute_purity(0.5*(1+C_test),Cgt_test,nc) # 0.5*(1+C_test) in {0,1}

# Plot

size_vertex_plot = 33

plt.figure(figsize=(12,4))

p1 = plt.subplot(121)

plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=f_test, color=pyplot.jet())

plt.title('Score function $s(x)=w^T\phi(x)+b$ \n iter=' + str(k)+ ', diff_alpha=' + str(diff_alpha)[:7])

plt.colorbar()

p2 = plt.subplot(122)

plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=C_test, color=pyplot.jet())

plt.title('Classification function $f(x)=sign(w^T\phi(x)+b)$\n iter=' + str(k) + ', acc=' + str(accuracy_test)[:5])

#plt.tight_layout()

plt.colorbar()

plt.show()

if k<num_iter-1:

clear_output(wait=True)

<Figure size 1200x400 with 4 Axes>

代码

文本

Run Graph SVM

代码

文本

[6]

# Run Graph SVM

# Compute Gaussian kernel

sigma = 0.15; sigma2 = sigma**2

Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtrain, metric='euclidean', n_jobs=1)

Ker = np.exp(- Ddist**2 / sigma2)

Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtest, metric='euclidean', n_jobs=1)

KXtest = np.exp(- Ddist**2 / sigma2)

# Compute kNN graph

kNN = 10

gamma = 25

A = construct_knn_graph(Xtrain, kNN, 'euclidean')

Lap = graph_laplacian(A).todense()

# Compute Indicator function of labels

H = np.zeros([n])

H[np.abs(l_train)>0.0] = 1

H = np.diag(H)

# Compute L, Q

L = np.diag(l_train)

l = l_train

T = np.eye(n)

T += gamma* Lap.dot(Ker)

Tinv = np.linalg.inv(T)

Q = L.dot(H.dot(Ker.dot(Tinv.dot(H.dot(L)))))

# Time steps

tau_alpha = 1/ np.linalg.norm(Q,2)

tau_beta = 1/ np.linalg.norm(L,2)

# For conjuguate gradient

Acg = tau_alpha* Q + np.eye(n)

# Error parameter

lamb = 1 # acc: 98.6

# Initialization

alpha = np.zeros([n])

beta = 0.0

alpha_old = alpha

# Loop

k = 0

diff_alpha = 1e6

num_iter = 201

while (diff_alpha>1e-3) & (k<num_iter):

# Update iteration

k += 1

# Update alpha

# Approximate solution with conjuguate gradient

b0 = alpha + tau_alpha - tau_alpha* l* beta

alpha, _ = scipy.sparse.linalg.cg(Acg, b0, x0=alpha, tol=1e-3, maxiter=50)

alpha[alpha<0.0] = 0 # Projection on [0,+infty]

alpha[alpha>lamb] = lamb # Projection on [-infty,lamb]

# Update beta

beta = beta + tau_beta* l.T.dot(alpha)

# Stopping condition

diff_alpha = np.linalg.norm(alpha-alpha_old)

alpha_old = alpha

# Plot

if not(k%100) or (diff_alpha<1e-3):

# xi vector

xi = Tinv.dot(H.dot(L.dot(alpha)))

# Offset

idx_unlabeled_data = np.where( np.abs(l)<1./2 )

alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0

idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )

Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors

nb_sv = len(Isv.nonzero()[0])

if nb_sv > 1:

b = (Isv.T).dot( l - Ker.dot(np.squeeze(np.array(xi))) )/ nb_sv

else:

b = 0

# Continuous score function

f_test = xi.dot(KXtest) + b

# Binary classification function

C_test = np.sign(f_test) # decision function in {-1,1}

accuracy_test = compute_purity(0.5*(1+C_test),Cgt_test,nc) # 0.5*(1+C_test) in {0,1}

# Plot

size_vertex_plot = 33

plt.figure(figsize=(12,4))

p1 = plt.subplot(121)

plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=f_test, color=pyplot.jet())

plt.title('Score function $s(x)=w^T\phi(x)+b$ \n iter=' + str(k)+ ', diff_alpha=' + str(diff_alpha)[:7])

plt.colorbar()

p2 = plt.subplot(122)

plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=C_test, color=pyplot.jet())

plt.title('Classification function $f(x)=sign(w^T\phi(x)+b)$\n iter=' + str(k) + ', acc=' + str(accuracy_test)[:5])

#plt.tight_layout()

plt.colorbar()

plt.show()

if k<num_iter-1:

clear_output(wait=True)

k-NN graph with euclidean distance

<Figure size 1200x400 with 4 Axes>

代码

文本

Real-world graph of articles

Dataset has 10 labeled data and 40 unlabeled data

代码

文本

[7]

# Dataset

mat = scipy.io.loadmat('datasets/data_20news_10labels_40unlabels.mat')

Xtrain = mat['Xtrain']

n = Xtrain.shape[0]

l_train = mat['l'].squeeze()

d = Xtrain.shape[1]

Xtest = mat['Xtest']

Cgt_test = mat['Cgt_test'] - 1; Cgt_test = Cgt_test.squeeze()

nc = len(np.unique(Cgt_test))

print(n,d,nc)

num_labels = np.sum(np.abs(l_train)>0.0)

print('l_train:',l_train)

print('number of labeled data per class:',num_labels//2)

print('number of unlabeled data:',n-num_labels)

50 3684 2
l_train: [-1  0 -1 -1 -1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  1  0  0  0  0  0  0  1  0  0  0  0 -1  0  0  0  0  0  0  1
  1  0]
number of labeled data per class: 5
number of unlabeled data: 40

代码

文本

Run Kernel SVM (no graph information)

代码

文本

[8]

# Run Kernel SVM (no graph information)

# Compute Gaussian kernel

sigma = 0.5; sigma2 = sigma**2

Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtrain, metric='euclidean', n_jobs=1)

Ker = np.exp(- Ddist**2 / sigma2)

Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtest, metric='euclidean', n_jobs=1)

KXtest = np.exp(- Ddist**2 / sigma2)

# Compute kNN graph

kNN = 5

gamma = 0 # <= no graph information

A = construct_knn_graph(Xtrain, kNN, 'cosine')

Lap = graph_laplacian(A).todense()

# Compute Indicator function of labels

H = np.zeros([n])

H[np.abs(l_train)>0.0] = 1

H = np.diag(H)

# Compute L, Q

L = np.diag(l_train)

l = l_train

T = np.eye(n)

T += gamma* Lap.dot(Ker)

Tinv = np.linalg.inv(T)

Q = L.dot(H.dot(Ker.dot(Tinv.dot(H.dot(L)))))

# Time steps

tau_alpha = 1/ np.linalg.norm(Q,2)

tau_beta = 1/ np.linalg.norm(L,2)

# For conjuguate gradient

Acg = tau_alpha* Q + 1* np.eye(n)

# Error parameter

lamb = 100

# Initialization

alpha = np.zeros([n])

beta = 0.0

alpha_old = alpha

# Loop

k = 0

diff_alpha = 1e6

num_iter = 1001

while (diff_alpha>1e-3) & (k<num_iter):

# Update iteration

k += 1

# Update alpha

# Approximate solution with conjuguate gradient

b0 = alpha + tau_alpha - tau_alpha* l* beta

alpha, _ = scipy.sparse.linalg.cg(Acg, b0, x0=alpha, tol=1e-3, maxiter=50)

alpha[alpha<0.0] = 0 # Projection on [0,+infty]

alpha[alpha>lamb] = lamb # Projection on [-infty,lamb]

# Update beta

beta = beta + tau_beta* l.T.dot(alpha)

# Stopping condition

diff_alpha = np.linalg.norm(alpha-alpha_old)

alpha_old = alpha

# Plot

if not(k%100) or (diff_alpha<1e-3):

# xi vector

xi = Tinv.dot(H.dot(L.dot(alpha)))

# Offset

idx_unlabeled_data = np.where( np.abs(l)<1./2 )

alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0

idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )

Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors

nb_sv = len(Isv.nonzero()[0])

if nb_sv > 1:

b = (Isv.T).dot( l - Ker.dot(np.squeeze(np.array(xi))) )/ nb_sv

else:

b = 0

# Continuous score function

f_test = xi.dot(KXtest) + b

# Binary classification function

C_test = np.sign(f_test) # decision function in {-1,1}

accuracy_test = compute_purity(0.5*(1+C_test),Cgt_test,nc) # 0.5*(1+C_test) in {0,1}

# Print

# print('iter, diff_alpha',str(k),str(diff_alpha)[:7])

# print('acc',str(accuracy_test)[:5])

print('Kernel SVM iter, diff_alpha :',str(k),str(diff_alpha)[:7])

print(' acc :',str(accuracy_test)[:5])

k-NN graph with cosine distance
Kernel SVM  iter, diff_alpha : 1001 0.68597
            acc : 65.5

代码

文本

Run Graph SVM

代码

文本

[9]

# Run Graph SVM

# Compute Gaussian kernel

sigma = 0.5; sigma2 = sigma**2

Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtrain, metric='euclidean', n_jobs=1)

Ker = np.exp(- Ddist**2 / sigma2)

Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain, Xtest, metric='euclidean', n_jobs=1)

KXtest = np.exp(- Ddist**2 / sigma2)

# Compute kNN graph

kNN = 8

gamma = 100

A = construct_knn_graph(Xtrain, kNN, 'cosine')

Lap = graph_laplacian(A).todense()

# Compute Indicator function of labels

H = np.zeros([n])

H[np.abs(l_train)>0.0] = 1

H = np.diag(H)

# Compute L, Q

L = np.diag(l_train)

l = l_train

T = np.eye(n)

T += gamma* Lap.dot(Ker)

Tinv = np.linalg.inv(T)

Q = L.dot(H.dot(Ker.dot(Tinv.dot(H.dot(L)))))

# Time steps

tau_alpha = 1/ np.linalg.norm(Q,2)

tau_beta = 1/ np.linalg.norm(L,2)

# For conjuguate gradient

Acg = tau_alpha* Q + 1* np.eye(n)

# Error parameter

lamb = 1

# Initialization

alpha = np.zeros([n])

beta = 0.0

alpha_old = alpha

# Loop

k = 0

diff_alpha = 1e6

num_iter = 1001

while (diff_alpha>1e-3) & (k<num_iter):

# Update iteration

k += 1

# Update alpha

# Approximate solution with conjuguate gradient

b0 = alpha + tau_alpha - tau_alpha* l* beta

alpha, _ = scipy.sparse.linalg.cg(Acg, b0, x0=alpha, tol=1e-3, maxiter=50)

alpha[alpha<0.0] = 0 # Projection on [0,+infty]

alpha[alpha>lamb] = lamb # Projection on [-infty,lamb]

# Update beta

beta = beta + tau_beta* l.T.dot(alpha)

# Stopping condition

diff_alpha = np.linalg.norm(alpha-alpha_old)

alpha_old = alpha

# Plot

if not(k%100) or (diff_alpha<1e-3):

# xi vector

xi = Tinv.dot(H.dot(L.dot(alpha)))

# Offset

idx_unlabeled_data = np.where( np.abs(l)<1./2 )

alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0

idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )

Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors

nb_sv = len(Isv.nonzero()[0])

if nb_sv > 1:

b = (Isv.T).dot( l - Ker.dot(np.squeeze(np.array(xi))) )/ nb_sv

else:

b = 0

# Continuous score function

f_test = xi.dot(KXtest) + b

# Binary classification function

C_test = np.sign(f_test) # decision function in {-1,1}

accuracy_test = compute_purity(0.5*(1+C_test),Cgt_test,nc) # 0.5*(1+C_test) in {0,1}

# Print

# print('iter, diff_alpha',str(k),str(diff_alpha)[:7])

# print('acc',str(accuracy_test)[:5])

print('Graph SVM iter, diff_alpha :',str(k),str(diff_alpha)[:7])

print(' acc :',str(accuracy_test)[:5])

k-NN graph with cosine distance
Graph SVM  iter, diff_alpha : 2 0.0
           acc : 78.5

代码

文本

Plot graph of test data points

代码

文本

[10]

# Plot graph of test data points

kNN = 8

A = construct_knn_graph(Xtest, kNN, 'cosine')

print(type(A),A.shape)

import networkx as nx

A.setdiag(0)

A.eliminate_zeros()

G_nx = nx.from_scipy_sparse_array(A)

plt.figure(figsize=[40,40])

nx.draw_networkx(G_nx, with_labels=True, node_color=np.array(C_test), cmap='jet')

k-NN graph with cosine distance
<class 'scipy.sparse._csr.csr_matrix'> (200, 200)

<Figure size 4000x4000 with 1 Axes>

代码

文本

[ ]

代码

文本

[ ]

代码

文本

Machine Learning

点个赞吧

本文被以下合集收录

Graph Machine learning

xuxh@dp.tech

更新于 2024-10-08

44 篇0 人关注