SOM

In [14]:
library(kohonen)
library(RColorBrewer) # to use brewer.pal
library(fields) # to use designer.colors
library(ggplot2)
In [17]:
set.seed(1)
sm <- som(data = cdata.n, grid = somgrid(10, 10, "rectangular"))
str(sm)
List of 10
 $ data        : num [1:1000, 1:4] -0.227 -0.463 -0.15 -0.466 -0.414 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:1000] "1" "2" "3" "4" ...
  .. ..$ : chr [1:4] "MONEY" "VISIT" "CROSS" "API"
  ..- attr(*, "scaled:center")= Named num [1:4] 659823.1 15.5 5.6 28.3
  .. ..- attr(*, "names")= chr [1:4] "MONEY" "VISIT" "CROSS" "API"
  ..- attr(*, "scaled:scale")= Named num [1:4] 1.29e+06 1.92e+01 4.67 3.22e+01
  .. ..- attr(*, "names")= chr [1:4] "MONEY" "VISIT" "CROSS" "API"
 $ grid        :List of 5
  ..$ pts   : int [1:100, 1:2] 1 2 3 4 5 6 7 8 9 10 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : NULL
  .. .. ..$ : chr [1:2] "x" "y"
  ..$ xdim  : num 10
  ..$ ydim  : num 10
  ..$ topo  : chr "rectangular"
  ..$ n.hood: chr "square"
  ..- attr(*, "class")= chr "somgrid"
 $ codes       : num [1:100, 1:4] -0.12611 -0.00927 -0.13581 0.13115 0.11957 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : NULL
  .. ..$ : chr [1:4] "MONEY" "VISIT" "CROSS" "API"
 $ changes     : num [1:100, 1] 0.0159 0.0182 0.0205 0.0169 0.0213 ...
 $ alpha       : num [1:2] 0.05 0.01
 $ radius      : num [1:2] 6 -6
 $ toroidal    : logi FALSE
 $ unit.classif: int [1:1000] 44 89 11 97 86 51 16 43 93 4 ...
 $ distances   : num [1:1000] 0.02133 0.00983 0.04259 0.01541 0.00165 ...
 $ method      : chr "som"
 - attr(*, "class")= chr "kohonen"
In [18]:
sm$codes # 10x10 Grid에 각각에 해당하는 Weight, Node수만큼의 Weight
MONEYVISITCROSSAPI
-0.126113696677975-0.4601064180334571.0962386374253 -0.635203710149349
-0.00926530944671553-0.0891701832129362 0.620560568153708 -0.592037719385386
-0.1358148759828740.176327466682718 0.834903423791145 -0.47188203570459
0.131150501860887 0.624023735227139 0.920701891934675 -0.614157693178954
0.11956774858212 0.376301496956898 1.28193023150052 -0.561695195509611
0.358336688868957 0.371139599216933 1.72403131328679 -0.654979492773109
0.545411364919319 0.721370067999354 2.09129022400005 -0.736763119567566
1.41166064887447 0.950220535210303 2.44855450240069 -0.793099065326606
6.42507905497517 3.22312808511671 2.54469330010098 -0.837261372671094
11.9976452409556 2.94753576368691 3.34937062603138 -0.873401908752385
-0.10393671841944 -0.3756151049884190.364459118104695 -0.607860057752683
-0.216396918402362-0.1620818093253890.586990568771098 -0.291517363303764
-0.09331595013662480.135090218130413 0.514533726730472 -0.504419735913228
0.178006818051934 0.192418598289977 0.84952078458582 -0.577107644588882
0.323217810661099 -0.1079317297634791.04678620459412 -0.639661300858667
0.9460654660302180.1657490716208751.1972685131191 -0.70475336260882
0.886736630650586 1.18364660793038 1.55002693495428 -0.751392546731405
2.66611256002742 1.45043483585612 1.72051710974574 -0.793327082157556
3.21222157027378 3.38169907873746 1.68793777193081 -0.823774565840559
4.46875396659166 6.96186585313981 3.89238657006453 -0.846816853616542
-0.26137101317088 -0.3806066609043730.300443085134404 -0.313253584917871
-0.240507362248708-0.1004449451726580.278951628506069 -0.358063074057133
-0.06995365992677630.0602932284066852 0.300492823923219 -0.503011894969671
0.30454499964856 -0.06875269430939680.344104713772376 -0.594354813138283
0.3153062724120660.3864619121697340.584226379878098-0.63803118938819
0.90967116480658 0.359929466409933 0.461858575777535 -0.560407474694253
1.236445829094 0.947589323469318 0.906769033693981 -0.693735864777624
1.75324716474117 1.79896198185395 0.927656869181429 -0.769338250274773
1.08663894944222 3.29713223876782 1.94579734839154 -0.765288745897034
2.21432008052784 3.30297597515901 3.23867138859579 -0.813293789583382
............
-0.42500198957515 -0.680801398859028-0.555613287794553-0.677218001608139
-0.396169919155737-0.628078979879803-0.53588256170776 -0.450362976264678
-0.399031617131415-0.602427499078275-0.545184147331798-0.169023856258494
-0.383519173477543-0.547020392119644-0.5556121176291060.0244012168771308
-0.375026671345776-0.562054370054334-0.3415678649258590.0686018565797179
-0.394817210574703-0.486829400913991-0.5556121146125420.300818131470824
-0.340047768311409-0.409451097965499-0.5512042090525060.5537057724216
-0.388440222435519-0.520273721178019-0.5556426432766451.15482116693713
-0.446905889967341-0.552657413362555-0.72866318003191 1.91701270748279
-0.441072737379069-0.59216302741576 -0.9648978660714592.16649579524909
-0.445912281351437-0.658574898744607-0.769638353114367-0.746204169814957
-0.386810763332557-0.567481740481707-0.8044750750578 -0.482974581428281
-0.349945270009256 -0.409117219499571 -0.810579341150762 -0.0882575301826337
-0.413364700801349-0.605973332177141-0.7696383530641070.183906604146614
-0.327362344857632-0.339550580135561-0.8006808358386840.291305964214293
-0.419443484520285-0.580836040466932-0.75607239016101 0.551660473227989
-0.371346522126949-0.50434425480095 -0.7926510659308720.823648312709547
-0.422164458583513-0.569515638159386-0.7696372782030691.11641863293373
-0.460879901423903-0.636841496276855-0.8863411451757512.49947092158801
-0.466941053748124-0.654227793165738-0.9625531240631414.07135077512873
-0.441208710292067-0.690833850403092-0.983664591444997-0.767694634828594
-0.462891527696559-0.686773118070266-0.983664380793678-0.369574246804685
-0.411941527753377-0.622429940149898-0.769638353146001-0.156360813684342
-0.441781417484389 -0.684144521166902 -0.983664590710246 0.00692540173748636
-0.450081178624105-0.659335975667006-0.9836643990146610.41682693297374
-0.443964170625545-0.690747200506753-0.9836450106738410.76592626004942
-0.459416507193678-0.640168551399217-0.9836638346663111.17339694694465
-0.451179134241936-0.618696259405063-0.9836645872279421.46467884045457
-0.423158655883884-0.66026786781696 -0.9300786125051463.06496072664205
-0.489097913392668-0.663241835207435-0.9836258232631746.00711674809974
In [19]:
options(repr.plot.width=6,repr.plot.height=5)
plot(sm, main = "feature distribution")
In [20]:
plot(sm, type="counts", main = "cluster size")
In [21]:
plot(sm, type="quality", main = "mapping quality")
In [22]:
coolBlueHotRed <- function(n, alpha = 1) {
  rainbow(n, end=4/6, alpha=alpha)[n:1]
}
for (i in 1:ncol(sm$data))
  plot(sm, type="property", property=sm$codes[,i], main=dimnames(sm$data)[[2]][i], palette.name=coolBlueHotRed)

ggplot2 패키지를 이용하여 SPSS Modeler와 유사한 Grid 도식

In [23]:
cdata$clusterX <- sm$grid$pts[sm$unit.classif,"x"]
cdata$clusterY <- sm$grid$pts[sm$unit.classif,"y"]
p <- ggplot(cdata, aes(clusterX, clusterY))
p + geom_jitter(position = position_jitter(width=.2, height=.2))
In [24]:
k = 6
somClusters <- kmeans(sm$codes, centers = k)

plot(0, 0, type = "n", axes = FALSE, xlim = c(0,sm$grid$xdim), ylim = c(0,sm$grid$ydim), xlab = "", ylab = "", asp = 1)
ColRamp <- rev(designer.colors(n=k, col=brewer.pal(k,"Set1")))
ColorCode <- rep("#FFFFFF", length(somClusters$cluster))
for (i in 1:length(somClusters$cluster))
  ColorCode[i] <- ColRamp[somClusters$cluster[i]]
offset <- 0.5
for (row in 1:sm$grid$ydim) {
  for (column in 0:(sm$grid$xdim-1))
    Hexagon(column + offset, row - 1, col = ColorCode[row + sm$grid$ydim * column])
  offset <- ifelse(offset, 0, 0.5)
}