# spectral clustering

## 1833 days ago by nepia

The process of spectral clustering

install.packages("mlbench") library(mlbench) set.seed(111) obj <- mlbench.spirals(100,1,0.025) my.data <- 4 * obj$x plot(my.data) plot(my.data)  URL 'http://cran.r-project.org/src/contrib/mlbench_2.1-1.tar.gz'를 시도하고 있습니다 Content type 'application/x-gzip' length 920768 bytes (899 Kb) 열린 URL ================================================== downloaded 899 Kb * installing *source* package ‘mlbench’ ... ** 패키지 ‘mlbench’ 가 성공적으로 압축해제 되었고, MD5 sums 가 확인되었습니다 ** libs gcc -std=gnu99 -I/root/sage-5.8/local/lib/R/include -DNDEBUG -fpic -g -O2 -c waveform.c -o waveform.o gcc -std=gnu99 -shared -o mlbench.so waveform.o -L/root/sage-5.8/local/lib/R//lib -lR 다음 부분에 설치 /root/sage-5.8/local/lib/R/library/mlbench/libs ** R ** data ** inst ** preparing package for lazy loading ** help *** installing help indices ** building package indices ** testing if installed package can be loaded * DONE (mlbench) 다운로드된 소스 패키지들은 다음에 위치해 있습니다 ‘/tmp/RtmpO7DdUB/downloaded_packages’ URL 'http://cran.r-project.org/src/contrib/mlbench_2.1-1.tar.gz'를 시도하고 있습니다 Content type 'application/x-gzip' length 920768 bytes (899 Kb) 열린 URL ================================================== downloaded 899 Kb * installing *source* package ‘mlbench’ ... ** 패키지 ‘mlbench’ 가 성공적으로 압축해제 되었고, MD5 sums 가 확인되었습니다 ** libs gcc -std=gnu99 -I/root/sage-5.8/local/lib/R/include -DNDEBUG -fpic -g -O2 -c waveform.c -o waveform.o gcc -std=gnu99 -shared -o mlbench.so waveform.o -L/root/sage-5.8/local/lib/R//lib -lR 다음 부분에 설치 /root/sage-5.8/local/lib/R/library/mlbench/libs ** R ** data ** inst ** preparing package for lazy loading ** help *** installing help indices ** building package indices ** testing if installed package can be loaded * DONE (mlbench) 다운로드된 소스 패키지들은 다음에 위치해 있습니다 ‘/tmp/RtmpO7DdUB/downloaded_packages’ s <- function(x1, x2, alpha=1) { exp(- alpha * norm(as.matrix(x1-x2), type="F")) } make.similarity <- function(my.data, similarity) { N <- nrow(my.data) S <- matrix(rep(NA,N^2), ncol=N) for(i in 1:N) { for(j in 1:N) { S[i,j] <- similarity(my.data[i,], my.data[j,]) } } S } S <- make.similarity(my.data, s) S[1:8,1:8]   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] 1.00000000 0.064179185 0.74290158 0.63193426 0.098831073 0.094897744 0.56549848 0.033550836 [2,] 0.06417919 1.000000000 0.06938066 0.04276431 0.214229495 0.275123731 0.04833520 0.008796359 [3,] 0.74290158 0.069380663 1.00000000 0.61054893 0.089569089 0.088641808 0.66577557 0.043420466 [4,] 0.63193426 0.042764307 0.61054893 1.00000000 0.062517586 0.059982837 0.71959220 0.044260673 [5,] 0.09883107 0.214229495 0.08956909 0.06251759 1.000000000 0.776556494 0.05973178 0.005091154 [6,] 0.09489774 0.275123731 0.08864181 0.05998284 0.776556494 1.000000000 0.05901605 0.005548028 [7,] 0.56549848 0.048335201 0.66577557 0.71959220 0.059731778 0.059016049 1.00000000 0.058059785 [8,] 0.03355084 0.008796359 0.04342047 0.04426067 0.005091154 0.005548028 0.05805979 1.000000000  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] 1.00000000 0.064179185 0.74290158 0.63193426 0.098831073 0.094897744 0.56549848 0.033550836 [2,] 0.06417919 1.000000000 0.06938066 0.04276431 0.214229495 0.275123731 0.04833520 0.008796359 [3,] 0.74290158 0.069380663 1.00000000 0.61054893 0.089569089 0.088641808 0.66577557 0.043420466 [4,] 0.63193426 0.042764307 0.61054893 1.00000000 0.062517586 0.059982837 0.71959220 0.044260673 [5,] 0.09883107 0.214229495 0.08956909 0.06251759 1.000000000 0.776556494 0.05973178 0.005091154 [6,] 0.09489774 0.275123731 0.08864181 0.05998284 0.776556494 1.000000000 0.05901605 0.005548028 [7,] 0.56549848 0.048335201 0.66577557 0.71959220 0.059731778 0.059016049 1.00000000 0.058059785 [8,] 0.03355084 0.008796359 0.04342047 0.04426067 0.005091154 0.005548028 0.05805979 1.000000000 make.affinity <- function(S, n.neighboors=2) { N <- length(S[,1]) if (n.neighboors >= N) { # fully connected A <- S } else { A <- matrix(rep(0,N^2), ncol=N) for(i in 1:N) { # for each line # only connect to those points with larger similarity best.similarities <- sort(S[i,], decreasing=TRUE)[1:n.neighboors] for (s in best.similarities) { j <- which(S[i,] == s) A[i,j] <- S[i,j] A[j,i] <- S[i,j] # to make an undirected graph, ie, the matrix becomes symmetric } } } A } A <- make.affinity(S, 3) # use 3 neighboors (includes self) A[1:8,1:8]   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] 1.0000000 0 0.7429016 0.6319343 0.0000000 0.0000000 0.0000000 0 [2,] 0.0000000 1 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0 [3,] 0.7429016 0 1.0000000 0.0000000 0.0000000 0.0000000 0.6657756 0 [4,] 0.6319343 0 0.0000000 1.0000000 0.0000000 0.0000000 0.7195922 0 [5,] 0.0000000 0 0.0000000 0.0000000 1.0000000 0.7765565 0.0000000 0 [6,] 0.0000000 0 0.0000000 0.0000000 0.7765565 1.0000000 0.0000000 0 [7,] 0.0000000 0 0.6657756 0.7195922 0.0000000 0.0000000 1.0000000 0 [8,] 0.0000000 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] 1.0000000 0 0.7429016 0.6319343 0.0000000 0.0000000 0.0000000 0 [2,] 0.0000000 1 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0 [3,] 0.7429016 0 1.0000000 0.0000000 0.0000000 0.0000000 0.6657756 0 [4,] 0.6319343 0 0.0000000 1.0000000 0.0000000 0.0000000 0.7195922 0 [5,] 0.0000000 0 0.0000000 0.0000000 1.0000000 0.7765565 0.0000000 0 [6,] 0.0000000 0 0.0000000 0.0000000 0.7765565 1.0000000 0.0000000 0 [7,] 0.0000000 0 0.6657756 0.7195922 0.0000000 0.0000000 1.0000000 0 [8,] 0.0000000 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1 D <- diag(apply(A, 1, sum)) # sum rows D[1:8,1:8]   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] 2.374836 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 [2,] 0.000000 2.597451 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 [3,] 0.000000 0.000000 2.408677 0.000000 0.000000 0.000000 0.000000 0.000000 [4,] 0.000000 0.000000 0.000000 2.351526 0.000000 0.000000 0.000000 0.000000 [5,] 0.000000 0.000000 0.000000 0.000000 2.523175 0.000000 0.000000 0.000000 [6,] 0.000000 0.000000 0.000000 0.000000 0.000000 2.519936 0.000000 0.000000 [7,] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.170424 0.000000 [8,] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.302241  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] 2.374836 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 [2,] 0.000000 2.597451 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 [3,] 0.000000 0.000000 2.408677 0.000000 0.000000 0.000000 0.000000 0.000000 [4,] 0.000000 0.000000 0.000000 2.351526 0.000000 0.000000 0.000000 0.000000 [5,] 0.000000 0.000000 0.000000 0.000000 2.523175 0.000000 0.000000 0.000000 [6,] 0.000000 0.000000 0.000000 0.000000 0.000000 2.519936 0.000000 0.000000 [7,] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.170424 0.000000 [8,] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.302241 U <- D - A round(U[1:12,1:12],1)   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [1,] 1.4 0.0 -0.7 -0.6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 [2,] 0.0 1.6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 [3,] -0.7 0.0 1.4 0.0 0.0 0.0 -0.7 0.0 0.0 0.0 0.0 0.0 [4,] -0.6 0.0 0.0 1.4 0.0 0.0 -0.7 0.0 0.0 0.0 0.0 0.0 [5,] 0.0 0.0 0.0 0.0 1.5 -0.8 0.0 0.0 0.0 0.0 0.0 0.0 [6,] 0.0 0.0 0.0 0.0 -0.8 1.5 0.0 0.0 0.0 0.0 0.0 0.0 [7,] 0.0 0.0 -0.7 -0.7 0.0 0.0 2.2 0.0 0.0 -0.8 0.0 0.0 [8,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3 0.0 0.0 0.0 0.0 [9,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.5 0.0 0.0 0.0 [10,] 0.0 0.0 0.0 0.0 0.0 0.0 -0.8 0.0 0.0 1.6 -0.8 0.0 [11,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.8 1.5 -0.8 [12,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.8 1.5  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [1,] 1.4 0.0 -0.7 -0.6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 [2,] 0.0 1.6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 [3,] -0.7 0.0 1.4 0.0 0.0 0.0 -0.7 0.0 0.0 0.0 0.0 0.0 [4,] -0.6 0.0 0.0 1.4 0.0 0.0 -0.7 0.0 0.0 0.0 0.0 0.0 [5,] 0.0 0.0 0.0 0.0 1.5 -0.8 0.0 0.0 0.0 0.0 0.0 0.0 [6,] 0.0 0.0 0.0 0.0 -0.8 1.5 0.0 0.0 0.0 0.0 0.0 0.0 [7,] 0.0 0.0 -0.7 -0.7 0.0 0.0 2.2 0.0 0.0 -0.8 0.0 0.0 [8,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3 0.0 0.0 0.0 0.0 [9,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.5 0.0 0.0 0.0 [10,] 0.0 0.0 0.0 0.0 0.0 0.0 -0.8 0.0 0.0 1.6 -0.8 0.0 [11,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.8 1.5 -0.8 [12,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.8 1.5 NL <- diag(nrow(my.data)) - solve(D) %*% A # Normalized asymmetric Laplacian round(NL[1:12,1:12],1)   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [1,] 0.6 0.0 -0.3 -0.3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 [2,] 0.0 0.6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 [3,] -0.3 0.0 0.6 0.0 0.0 0.0 -0.3 0.0 0.0 0.0 0.0 0.0 [4,] -0.3 0.0 0.0 0.6 0.0 0.0 -0.3 0.0 0.0 0.0 0.0 0.0 [5,] 0.0 0.0 0.0 0.0 0.6 -0.3 0.0 0.0 0.0 0.0 0.0 0.0 [6,] 0.0 0.0 0.0 0.0 -0.3 0.6 0.0 0.0 0.0 0.0 0.0 0.0 [7,] 0.0 0.0 -0.2 -0.2 0.0 0.0 0.7 0.0 0.0 -0.2 0.0 0.0 [8,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6 0.0 0.0 0.0 0.0 [9,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6 0.0 0.0 0.0 [10,] 0.0 0.0 0.0 0.0 0.0 0.0 -0.3 0.0 0.0 0.6 -0.3 0.0 [11,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.3 0.6 -0.3 [12,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.3 0.6  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [1,] 0.6 0.0 -0.3 -0.3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 [2,] 0.0 0.6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 [3,] -0.3 0.0 0.6 0.0 0.0 0.0 -0.3 0.0 0.0 0.0 0.0 0.0 [4,] -0.3 0.0 0.0 0.6 0.0 0.0 -0.3 0.0 0.0 0.0 0.0 0.0 [5,] 0.0 0.0 0.0 0.0 0.6 -0.3 0.0 0.0 0.0 0.0 0.0 0.0 [6,] 0.0 0.0 0.0 0.0 -0.3 0.6 0.0 0.0 0.0 0.0 0.0 0.0 [7,] 0.0 0.0 -0.2 -0.2 0.0 0.0 0.7 0.0 0.0 -0.2 0.0 0.0 [8,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6 0.0 0.0 0.0 0.0 [9,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6 0.0 0.0 0.0 [10,] 0.0 0.0 0.0 0.0 0.0 0.0 -0.3 0.0 0.0 0.6 -0.3 0.0 [11,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.3 0.6 -0.3 [12,] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.3 0.6 evL <- eigen(U, symmetric=TRUE) plot(1:10, rev(evL$values)[1:10], log="y") abline(v=2.25, col="red", lty=2) plot(1:10, rev(evL$values)[1:10], log="y") abline(v=2.25, col="red", lty=2)  경고 메시지가 손실되었습니다 In xy.coords(x, y, xlabel, ylabel, log) : 1 y value <= 0 omitted from logarithmic plot 경고 메시지가 손실되었습니다 In xy.coords(x, y, xlabel, ylabel, log) : 1 y value <= 0 omitted from logarithmic plot 경고 메시지가 손실되었습니다 In xy.coords(x, y, xlabel, ylabel, log) : 1 y value <= 0 omitted from logarithmic plot 경고 메시지가 손실되었습니다 In xy.coords(x, y, xlabel, ylabel, log) : 1 y value <= 0 omitted from logarithmic plot evNL <- eigen(NL) plot(1:10, rev(evNL$values)[1:10], log="y") abline(v=2.1, col="red", lty=2) plot(1:10, rev(evNL$values)[1:10], log="y") abline(v=2.1, col="red", lty=2)  경고 메시지가 손실되었습니다 In xy.coords(x, y, xlabel, ylabel, log) : 1 y value <= 0 omitted from logarithmic plot 경고 메시지가 손실되었습니다 In xy.coords(x, y, xlabel, ylabel, log) : 1 y value <= 0 omitted from logarithmic plot 경고 메시지가 손실되었습니다 In xy.coords(x, y, xlabel, ylabel, log) : 1 y value <= 0 omitted from logarithmic plot 경고 메시지가 손실되었습니다 In xy.coords(x, y, xlabel, ylabel, log) : 1 y value <= 0 omitted from logarithmic plot k <- 2 Z <- evL$vectors[,(ncol(evL$vectors)-k+1):ncol(evL$vectors)] km <- kmeans(Z, centers=k) plot(my.data, col=km$cluster) plot(my.data, col=km$cluster)
k <- 2 NZ <- evNL$vectors[,(ncol(evNL$vectors)-k+1):ncol(evNL$vectors)] Nkm <- kmeans(NZ, centers=k) plot(my.data, col=Nkm$cluster) plot(my.data, col=Nkm$cluster) Compare with K-means Clustering (cl <- kmeans(my.data, 2)) plot(my.data, col = cl$cluster) points(cl$centers, col = 1:2, pch = 8, cex = 2) plot(my.data, col = cl$cluster) points(cl$centers, col = 1:2, pch = 8, cex = 2)  K-means clustering with 2 clusters of sizes 48, 52 Cluster means: [,1] [,2] 1 1.077452 -1.504055 2 -1.027031 1.371337 Clustering vector: [1] 1 1 1 1 1 1 1 2 1 1 2 2 2 1 1 2 2 2 2 2 1 2 1 2 1 1 1 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 1 2 2 2 2 [48] 2 1 2 1 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 2 1 1 [95] 1 2 1 2 1 1 Within cluster sum of squares by cluster: [1] 207.0643 240.7847 (between_SS / total_SS = 41.4 %) Available components: [1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss" [7] "size"  K-means clustering with 2 clusters of sizes 48, 52 Cluster means: [,1] [,2] 1 1.077452 -1.504055 2 -1.027031 1.371337 Clustering vector: [1] 1 1 1 1 1 1 1 2 1 1 2 2 2 1 1 2 2 2 2 2 1 2 1 2 1 1 1 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 1 2 2 2 2 [48] 2 1 2 1 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 2 1 1 [95] 1 2 1 2 1 1 Within cluster sum of squares by cluster: [1] 207.0643 240.7847 (between_SS / total_SS = 41.4 %) Available components: [1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss" [7] "size"  (c2 <- kmeans(my.data, 3)) plot(my.data, col = c2$cluster) points(c2$centers, col = 1:3, pch = 8, cex = 3) plot(my.data, col = cl$cluster) points(c2\$centers, col = 1:3, pch = 8, cex = 3)
 K-means clustering with 3 clusters of sizes 34, 32, 34 Cluster means: [,1] [,2] 1 -0.2995602 2.183563 2 -1.7926388 -1.054059 3 1.9371052 -1.217540 Clustering vector: [1] 3 2 3 3 3 2 3 1 2 3 1 1 1 3 3 1 1 1 1 1 3 1 3 1 3 3 3 1 1 3 1 1 1 1 3 1 2 3 1 2 2 1 3 1 1 2 1 [48] 1 3 2 2 1 2 2 2 2 3 2 2 2 2 2 1 2 2 1 2 1 2 2 2 2 1 3 2 2 2 2 3 3 3 3 1 3 3 3 1 3 1 3 3 1 3 3 [95] 2 1 2 2 3 3 Within cluster sum of squares by cluster: [1] 98.27141 86.32112 98.67226 (between_SS / total_SS = 63.0 %) Available components: [1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss" [7] "size"  K-means clustering with 3 clusters of sizes 34, 32, 34 Cluster means: [,1] [,2] 1 -0.2995602 2.183563 2 -1.7926388 -1.054059 3 1.9371052 -1.217540 Clustering vector: [1] 3 2 3 3 3 2 3 1 2 3 1 1 1 3 3 1 1 1 1 1 3 1 3 1 3 3 3 1 1 3 1 1 1 1 3 1 2 3 1 2 2 1 3 1 1 2 1 [48] 1 3 2 2 1 2 2 2 2 3 2 2 2 2 2 1 2 2 1 2 1 2 2 2 2 1 3 2 2 2 2 3 3 3 3 1 3 3 3 1 3 1 3 3 1 3 3 [95] 2 1 2 2 3 3 Within cluster sum of squares by cluster: [1] 98.27141 86.32112 98.67226 (between_SS / total_SS = 63.0 %) Available components: [1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss" [7] "size"