#-*- R -*- # generation of populations predicted from data library(MASS) library(class) library(nnet) #decrease len if you have little memory. decplot <- function(xp, yp, Z, t) { plot(Pop[, 1], Pop[, 2], type = "n", xlab = "x1", ylab = "x2") title(t) for(il in 1:2) { set <- Pop$y==levels(Pop$y)[il] text(Pop[set, 1], Pop[set, 2], labels = as.character(Pop$y[set]), col = 1 + il) } zp <- Z[, 1] - Z[, 2] contour(xp, yp, matrix(zp, np), add = T, levels = 0, labex = 0) points(pT, pch=".",col=Z[,2]+2) invisible() } # total number of datapoints N <- 500 # total number of iterations j <- 1 # read in data - with class information c1<-read.table("ex23a.dat") names(c1)<-c("x1","x2") c2<-read.table("ex23b.dat") names(c2)<-c("x1","x2") # set the cov matrix and get the eigenvectors using chol() c.cov<-rbind(c(3,1),c(0.5,2)) c.r <- chol(c.cov) # set the covariance and mean ones <- matrix( c(rep(1,N), rep(1,N)), ncol=2) # set up ploting parameters if( j > 6 ) j<- 1 if( j == 1) par(mfcol=c(1,1)) if( j == 2 ) par(mfcol=c(2,1)) if( j == 3 ) par(mfcol=c(3,1)) if( j == 4 ) par(mfcol=c(2,2)) if( j == 5 ) par(mfcol=c(2,3)) if( j == 6 ) par(mfcol=c(2,3)) # loop through process i times for(i in 1:j) { for(r in 1:2) { if(r==1){cx=c1;cl="a"} if(r==2){cx=c2;cl="b"} # generate the data: normal distributions pop1 <- data.frame(rnorm(N),rnorm(N)) names(pop1)<-c("x1","x2") pop2 <- data.frame(rnorm(N),rnorm(N)) names(pop2)<-c("x1","x2") pop3 <- data.frame(rnorm(N),rnorm(N)) names(pop3)<-c("x1","x2") pop4 <- data.frame(rnorm(N),rnorm(N)) names(pop4)<-c("x1","x2") pop5 <- data.frame(rnorm(N),rnorm(N)) names(pop5)<-c("x1","x2") pop1.r <- as.matrix(pop1) %*% c.r pop2.r <- as.matrix(pop2) %*% c.r pop3.r <- as.matrix(pop3) %*% c.r pop4.r <- as.matrix(pop4) %*% c.r pop5.r <- as.matrix(pop5) %*% c.r # shift the data pop1.r.s <- pop1.r + (ones %*% diag(c(cx[1,1],cx[1,2]))) pop2.r.s <- pop2.r + (ones %*% diag(c(cx[2,1],cx[2,2]))) pop3.r.s <- pop3.r + (ones %*% diag(c(cx[3,1],cx[3,2]))) pop4.r.s <- pop4.r + (ones %*% diag(c(cx[4,1],cx[4,2]))) pop5.r.s <- pop5.r + (ones %*% diag(c(cx[5,1],cx[5,2]))) # add in the class information pop1.r.s.c = data.frame(pop1.r.s, rep(cl,length(pop1.r.s[,1]))) names(pop1.r.s.c)<-c("x1","x2","y") pop2.r.s.c = data.frame(pop2.r.s, rep(cl,length(pop2.r.s[,1]))) names(pop2.r.s.c)<-c("x1","x2","y") pop3.r.s.c = data.frame(pop3.r.s, rep(cl,length(pop3.r.s[,1]))) names(pop3.r.s.c)<-c("x1","x2","y") pop4.r.s.c = data.frame(pop4.r.s, rep(cl,length(pop4.r.s[,1]))) names(pop4.r.s.c)<-c("x1","x2","y") pop5.r.s.c = data.frame(pop5.r.s, rep(cl,length(pop5.r.s[,1]))) names(pop5.r.s.c)<-c("x1","x2","y") PopT <- data.frame(rbind(pop1.r.s.c, pop2.r.s.c, pop3.r.s.c, pop4.r.s.c, pop5.r.s.c)) if(r==1){Pop1<-PopT} } Pop <- rbind( Pop1, PopT) # Pop[, 3] would return just the third column, the class value # Pop[,-3] returns everything but the third column p <- as.matrix(Pop[, -3]) tp <- Pop$y xp <- seq(min(Pop$x1), max(Pop$x1), length = 50); np <- length(xp) yp <- seq(min(Pop$x2), max(Pop$x2), length = 50); pT <- expand.grid(x1 = xp, x2 = yp) Z <- knn(p, pT, tp, k = 30) decplot(xp, yp, class.ind(Z),"k=30") }