spectral clustering

1833 days ago by nepia

The process of spectral clustering 

install.packages("mlbench") library(mlbench) set.seed(111) obj <- mlbench.spirals(100,1,0.025) my.data <- 4 * obj$x plot(my.data) plot(my.data) 
       
URL 'http://cran.r-project.org/src/contrib/mlbench_2.1-1.tar.gz'를 시도하고
있습니다
Content type 'application/x-gzip' length 920768 bytes (899 Kb)
열린  URL
==================================================
downloaded 899 Kb

* installing *source* package ‘mlbench’ ...
** 패키지 ‘mlbench’ 가 성공적으로 압축해제 되었고, MD5 sums  가 확인되었습니다 
** libs
gcc -std=gnu99 -I/root/sage-5.8/local/lib/R/include -DNDEBUG      -fpic 
-g -O2   -c waveform.c -o waveform.o
gcc -std=gnu99 -shared -o mlbench.so waveform.o
-L/root/sage-5.8/local/lib/R//lib -lR
다음 부분에 설치  /root/sage-5.8/local/lib/R/library/mlbench/libs
** R
** data
** inst
** preparing package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded

* DONE (mlbench)

다운로드된 소스 패키지들은 다음에 위치해 있습니다
	‘/tmp/RtmpO7DdUB/downloaded_packages’
URL 'http://cran.r-project.org/src/contrib/mlbench_2.1-1.tar.gz'를 시도하고 있습니다
Content type 'application/x-gzip' length 920768 bytes (899 Kb)
열린  URL
==================================================
downloaded 899 Kb

* installing *source* package ‘mlbench’ ...
** 패키지 ‘mlbench’ 가 성공적으로 압축해제 되었고, MD5 sums  가 확인되었습니다 
** libs
gcc -std=gnu99 -I/root/sage-5.8/local/lib/R/include -DNDEBUG      -fpic  -g -O2   -c waveform.c -o waveform.o
gcc -std=gnu99 -shared -o mlbench.so waveform.o -L/root/sage-5.8/local/lib/R//lib -lR
다음 부분에 설치  /root/sage-5.8/local/lib/R/library/mlbench/libs
** R
** data
** inst
** preparing package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded

* DONE (mlbench)

다운로드된 소스 패키지들은 다음에 위치해 있습니다
	‘/tmp/RtmpO7DdUB/downloaded_packages’

s <- function(x1, x2, alpha=1) { exp(- alpha * norm(as.matrix(x1-x2), type="F")) } make.similarity <- function(my.data, similarity) { N <- nrow(my.data) S <- matrix(rep(NA,N^2), ncol=N) for(i in 1:N) { for(j in 1:N) { S[i,j] <- similarity(my.data[i,], my.data[j,]) } } S } S <- make.similarity(my.data, s) S[1:8,1:8] 
       
           [,1]        [,2]       [,3]       [,4]        [,5]       
[,6]       [,7]        [,8]
[1,] 1.00000000 0.064179185 0.74290158 0.63193426 0.098831073
0.094897744 0.56549848 0.033550836
[2,] 0.06417919 1.000000000 0.06938066 0.04276431 0.214229495
0.275123731 0.04833520 0.008796359
[3,] 0.74290158 0.069380663 1.00000000 0.61054893 0.089569089
0.088641808 0.66577557 0.043420466
[4,] 0.63193426 0.042764307 0.61054893 1.00000000 0.062517586
0.059982837 0.71959220 0.044260673
[5,] 0.09883107 0.214229495 0.08956909 0.06251759 1.000000000
0.776556494 0.05973178 0.005091154
[6,] 0.09489774 0.275123731 0.08864181 0.05998284 0.776556494
1.000000000 0.05901605 0.005548028
[7,] 0.56549848 0.048335201 0.66577557 0.71959220 0.059731778
0.059016049 1.00000000 0.058059785
[8,] 0.03355084 0.008796359 0.04342047 0.04426067 0.005091154
0.005548028 0.05805979 1.000000000
           [,1]        [,2]       [,3]       [,4]        [,5]        [,6]       [,7]        [,8]
[1,] 1.00000000 0.064179185 0.74290158 0.63193426 0.098831073 0.094897744 0.56549848 0.033550836
[2,] 0.06417919 1.000000000 0.06938066 0.04276431 0.214229495 0.275123731 0.04833520 0.008796359
[3,] 0.74290158 0.069380663 1.00000000 0.61054893 0.089569089 0.088641808 0.66577557 0.043420466
[4,] 0.63193426 0.042764307 0.61054893 1.00000000 0.062517586 0.059982837 0.71959220 0.044260673
[5,] 0.09883107 0.214229495 0.08956909 0.06251759 1.000000000 0.776556494 0.05973178 0.005091154
[6,] 0.09489774 0.275123731 0.08864181 0.05998284 0.776556494 1.000000000 0.05901605 0.005548028
[7,] 0.56549848 0.048335201 0.66577557 0.71959220 0.059731778 0.059016049 1.00000000 0.058059785
[8,] 0.03355084 0.008796359 0.04342047 0.04426067 0.005091154 0.005548028 0.05805979 1.000000000
make.affinity <- function(S, n.neighboors=2) { N <- length(S[,1]) if (n.neighboors >= N) { # fully connected A <- S } else { A <- matrix(rep(0,N^2), ncol=N) for(i in 1:N) { # for each line # only connect to those points with larger similarity best.similarities <- sort(S[i,], decreasing=TRUE)[1:n.neighboors] for (s in best.similarities) { j <- which(S[i,] == s) A[i,j] <- S[i,j] A[j,i] <- S[i,j] # to make an undirected graph, ie, the matrix becomes symmetric } } } A } A <- make.affinity(S, 3) # use 3 neighboors (includes self) A[1:8,1:8] 
       
          [,1] [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
[,8]
[1,] 1.0000000    0 0.7429016 0.6319343 0.0000000 0.0000000 0.0000000   
0
[2,] 0.0000000    1 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000   
0
[3,] 0.7429016    0 1.0000000 0.0000000 0.0000000 0.0000000 0.6657756   
0
[4,] 0.6319343    0 0.0000000 1.0000000 0.0000000 0.0000000 0.7195922   
0
[5,] 0.0000000    0 0.0000000 0.0000000 1.0000000 0.7765565 0.0000000   
0
[6,] 0.0000000    0 0.0000000 0.0000000 0.7765565 1.0000000 0.0000000   
0
[7,] 0.0000000    0 0.6657756 0.7195922 0.0000000 0.0000000 1.0000000   
0
[8,] 0.0000000    0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000   
1
          [,1] [,2]      [,3]      [,4]      [,5]      [,6]      [,7] [,8]
[1,] 1.0000000    0 0.7429016 0.6319343 0.0000000 0.0000000 0.0000000    0
[2,] 0.0000000    1 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000    0
[3,] 0.7429016    0 1.0000000 0.0000000 0.0000000 0.0000000 0.6657756    0
[4,] 0.6319343    0 0.0000000 1.0000000 0.0000000 0.0000000 0.7195922    0
[5,] 0.0000000    0 0.0000000 0.0000000 1.0000000 0.7765565 0.0000000    0
[6,] 0.0000000    0 0.0000000 0.0000000 0.7765565 1.0000000 0.0000000    0
[7,] 0.0000000    0 0.6657756 0.7195922 0.0000000 0.0000000 1.0000000    0
[8,] 0.0000000    0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000    1
D <- diag(apply(A, 1, sum)) # sum rows D[1:8,1:8] 
       
         [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]    
[,8]
[1,] 2.374836 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000
[2,] 0.000000 2.597451 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000
[3,] 0.000000 0.000000 2.408677 0.000000 0.000000 0.000000 0.000000
0.000000
[4,] 0.000000 0.000000 0.000000 2.351526 0.000000 0.000000 0.000000
0.000000
[5,] 0.000000 0.000000 0.000000 0.000000 2.523175 0.000000 0.000000
0.000000
[6,] 0.000000 0.000000 0.000000 0.000000 0.000000 2.519936 0.000000
0.000000
[7,] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.170424
0.000000
[8,] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2.302241
         [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
[1,] 2.374836 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
[2,] 0.000000 2.597451 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
[3,] 0.000000 0.000000 2.408677 0.000000 0.000000 0.000000 0.000000 0.000000
[4,] 0.000000 0.000000 0.000000 2.351526 0.000000 0.000000 0.000000 0.000000
[5,] 0.000000 0.000000 0.000000 0.000000 2.523175 0.000000 0.000000 0.000000
[6,] 0.000000 0.000000 0.000000 0.000000 0.000000 2.519936 0.000000 0.000000
[7,] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.170424 0.000000
[8,] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.302241
U <- D - A round(U[1:12,1:12],1) 
       
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
 [1,]  1.4  0.0 -0.7 -0.6  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0
 [2,]  0.0  1.6  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0
 [3,] -0.7  0.0  1.4  0.0  0.0  0.0 -0.7  0.0  0.0   0.0   0.0   0.0
 [4,] -0.6  0.0  0.0  1.4  0.0  0.0 -0.7  0.0  0.0   0.0   0.0   0.0
 [5,]  0.0  0.0  0.0  0.0  1.5 -0.8  0.0  0.0  0.0   0.0   0.0   0.0
 [6,]  0.0  0.0  0.0  0.0 -0.8  1.5  0.0  0.0  0.0   0.0   0.0   0.0
 [7,]  0.0  0.0 -0.7 -0.7  0.0  0.0  2.2  0.0  0.0  -0.8   0.0   0.0
 [8,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.3  0.0   0.0   0.0   0.0
 [9,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.5   0.0   0.0   0.0
[10,]  0.0  0.0  0.0  0.0  0.0  0.0 -0.8  0.0  0.0   1.6  -0.8   0.0
[11,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  -0.8   1.5  -0.8
[12,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  -0.8   1.5
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
 [1,]  1.4  0.0 -0.7 -0.6  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0
 [2,]  0.0  1.6  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0
 [3,] -0.7  0.0  1.4  0.0  0.0  0.0 -0.7  0.0  0.0   0.0   0.0   0.0
 [4,] -0.6  0.0  0.0  1.4  0.0  0.0 -0.7  0.0  0.0   0.0   0.0   0.0
 [5,]  0.0  0.0  0.0  0.0  1.5 -0.8  0.0  0.0  0.0   0.0   0.0   0.0
 [6,]  0.0  0.0  0.0  0.0 -0.8  1.5  0.0  0.0  0.0   0.0   0.0   0.0
 [7,]  0.0  0.0 -0.7 -0.7  0.0  0.0  2.2  0.0  0.0  -0.8   0.0   0.0
 [8,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.3  0.0   0.0   0.0   0.0
 [9,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.5   0.0   0.0   0.0
[10,]  0.0  0.0  0.0  0.0  0.0  0.0 -0.8  0.0  0.0   1.6  -0.8   0.0
[11,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  -0.8   1.5  -0.8
[12,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  -0.8   1.5
NL <- diag(nrow(my.data)) - solve(D) %*% A # Normalized asymmetric Laplacian round(NL[1:12,1:12],1) 
       
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
 [1,]  0.6  0.0 -0.3 -0.3  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0
 [2,]  0.0  0.6  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0
 [3,] -0.3  0.0  0.6  0.0  0.0  0.0 -0.3  0.0  0.0   0.0   0.0   0.0
 [4,] -0.3  0.0  0.0  0.6  0.0  0.0 -0.3  0.0  0.0   0.0   0.0   0.0
 [5,]  0.0  0.0  0.0  0.0  0.6 -0.3  0.0  0.0  0.0   0.0   0.0   0.0
 [6,]  0.0  0.0  0.0  0.0 -0.3  0.6  0.0  0.0  0.0   0.0   0.0   0.0
 [7,]  0.0  0.0 -0.2 -0.2  0.0  0.0  0.7  0.0  0.0  -0.2   0.0   0.0
 [8,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.6  0.0   0.0   0.0   0.0
 [9,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.6   0.0   0.0   0.0
[10,]  0.0  0.0  0.0  0.0  0.0  0.0 -0.3  0.0  0.0   0.6  -0.3   0.0
[11,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  -0.3   0.6  -0.3
[12,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  -0.3   0.6
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
 [1,]  0.6  0.0 -0.3 -0.3  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0
 [2,]  0.0  0.6  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0
 [3,] -0.3  0.0  0.6  0.0  0.0  0.0 -0.3  0.0  0.0   0.0   0.0   0.0
 [4,] -0.3  0.0  0.0  0.6  0.0  0.0 -0.3  0.0  0.0   0.0   0.0   0.0
 [5,]  0.0  0.0  0.0  0.0  0.6 -0.3  0.0  0.0  0.0   0.0   0.0   0.0
 [6,]  0.0  0.0  0.0  0.0 -0.3  0.6  0.0  0.0  0.0   0.0   0.0   0.0
 [7,]  0.0  0.0 -0.2 -0.2  0.0  0.0  0.7  0.0  0.0  -0.2   0.0   0.0
 [8,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.6  0.0   0.0   0.0   0.0
 [9,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.6   0.0   0.0   0.0
[10,]  0.0  0.0  0.0  0.0  0.0  0.0 -0.3  0.0  0.0   0.6  -0.3   0.0
[11,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  -0.3   0.6  -0.3
[12,]  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  -0.3   0.6
evL <- eigen(U, symmetric=TRUE) plot(1:10, rev(evL$values)[1:10], log="y") abline(v=2.25, col="red", lty=2) plot(1:10, rev(evL$values)[1:10], log="y") abline(v=2.25, col="red", lty=2) 
       
경고 메시지가 손실되었습니다
In xy.coords(x, y, xlabel, ylabel, log) :
  1 y value <= 0 omitted from logarithmic plot

경고 메시지가 손실되었습니다
In xy.coords(x, y, xlabel, ylabel, log) :
  1 y value <= 0 omitted from logarithmic plot
경고 메시지가 손실되었습니다
In xy.coords(x, y, xlabel, ylabel, log) :
  1 y value <= 0 omitted from logarithmic plot

경고 메시지가 손실되었습니다
In xy.coords(x, y, xlabel, ylabel, log) :
  1 y value <= 0 omitted from logarithmic plot

evNL <- eigen(NL) plot(1:10, rev(evNL$values)[1:10], log="y") abline(v=2.1, col="red", lty=2) plot(1:10, rev(evNL$values)[1:10], log="y") abline(v=2.1, col="red", lty=2) 
       
경고 메시지가 손실되었습니다
In xy.coords(x, y, xlabel, ylabel, log) :
  1 y value <= 0 omitted from logarithmic plot

경고 메시지가 손실되었습니다
In xy.coords(x, y, xlabel, ylabel, log) :
  1 y value <= 0 omitted from logarithmic plot
경고 메시지가 손실되었습니다
In xy.coords(x, y, xlabel, ylabel, log) :
  1 y value <= 0 omitted from logarithmic plot

경고 메시지가 손실되었습니다
In xy.coords(x, y, xlabel, ylabel, log) :
  1 y value <= 0 omitted from logarithmic plot

k <- 2 Z <- evL$vectors[,(ncol(evL$vectors)-k+1):ncol(evL$vectors)] km <- kmeans(Z, centers=k) plot(my.data, col=km$cluster) plot(my.data, col=km$cluster) 
       

k <- 2 NZ <- evNL$vectors[,(ncol(evNL$vectors)-k+1):ncol(evNL$vectors)] Nkm <- kmeans(NZ, centers=k) plot(my.data, col=Nkm$cluster) plot(my.data, col=Nkm$cluster) 
       

Compare with K-means Clustering

(cl <- kmeans(my.data, 2)) plot(my.data, col = cl$cluster) points(cl$centers, col = 1:2, pch = 8, cex = 2) plot(my.data, col = cl$cluster) points(cl$centers, col = 1:2, pch = 8, cex = 2) 
       
K-means clustering with 2 clusters of sizes 48, 52

Cluster means:
       [,1]      [,2]
1  1.077452 -1.504055
2 -1.027031  1.371337

Clustering vector:
  [1] 1 1 1 1 1 1 1 2 1 1 2 2 2 1 1 2 2 2 2 2 1 2 1 2 1 1 1 2 2 1 2 2 2
2 1 2 2 1 2 2 2 2 1 2 2 2 2
 [48] 2 1 2 1 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 1 1 1 1 1 1 1
1 1 2 1 1 1 2 1 2 1 1 2 1 1
 [95] 1 2 1 2 1 1

Within cluster sum of squares by cluster:
[1] 207.0643 240.7847
 (between_SS / total_SS =  41.4 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
"tot.withinss" "betweenss"   
[7] "size"        
K-means clustering with 2 clusters of sizes 48, 52

Cluster means:
       [,1]      [,2]
1  1.077452 -1.504055
2 -1.027031  1.371337

Clustering vector:
  [1] 1 1 1 1 1 1 1 2 1 1 2 2 2 1 1 2 2 2 2 2 1 2 1 2 1 1 1 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 1 2 2 2 2
 [48] 2 1 2 1 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 2 1 1
 [95] 1 2 1 2 1 1

Within cluster sum of squares by cluster:
[1] 207.0643 240.7847
 (between_SS / total_SS =  41.4 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"   
[7] "size"        

(c2 <- kmeans(my.data, 3)) plot(my.data, col = c2$cluster) points(c2$centers, col = 1:3, pch = 8, cex = 3) plot(my.data, col = cl$cluster) points(c2$centers, col = 1:3, pch = 8, cex = 3) 
       
K-means clustering with 3 clusters of sizes 34, 32, 34

Cluster means:
        [,1]      [,2]
1 -0.2995602  2.183563
2 -1.7926388 -1.054059
3  1.9371052 -1.217540

Clustering vector:
  [1] 3 2 3 3 3 2 3 1 2 3 1 1 1 3 3 1 1 1 1 1 3 1 3 1 3 3 3 1 1 3 1 1 1
1 3 1 2 3 1 2 2 1 3 1 1 2 1
 [48] 1 3 2 2 1 2 2 2 2 3 2 2 2 2 2 1 2 2 1 2 1 2 2 2 2 1 3 2 2 2 2 3 3
3 3 1 3 3 3 1 3 1 3 3 1 3 3
 [95] 2 1 2 2 3 3

Within cluster sum of squares by cluster:
[1] 98.27141 86.32112 98.67226
 (between_SS / total_SS =  63.0 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
"tot.withinss" "betweenss"   
[7] "size"        
K-means clustering with 3 clusters of sizes 34, 32, 34

Cluster means:
        [,1]      [,2]
1 -0.2995602  2.183563
2 -1.7926388 -1.054059
3  1.9371052 -1.217540

Clustering vector:
  [1] 3 2 3 3 3 2 3 1 2 3 1 1 1 3 3 1 1 1 1 1 3 1 3 1 3 3 3 1 1 3 1 1 1 1 3 1 2 3 1 2 2 1 3 1 1 2 1
 [48] 1 3 2 2 1 2 2 2 2 3 2 2 2 2 2 1 2 2 1 2 1 2 2 2 2 1 3 2 2 2 2 3 3 3 3 1 3 3 3 1 3 1 3 3 1 3 3
 [95] 2 1 2 2 3 3

Within cluster sum of squares by cluster:
[1] 98.27141 86.32112 98.67226
 (between_SS / total_SS =  63.0 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"   
[7] "size"