set.seed(111) n = 20 x = matrix(rnorm(2*n),n,2) S1 = matrix(c(1,.2,.2,1),2,2) tmp = eigen(S1) S1s = tmp$vectors%*% sqrt(diag(tmp$values))%*%t(tmp$vectors) S2 = matrix(c(1,-.5,-.5,1),2,2) tmp = eigen(S2) S2s = tmp$vectors%*% sqrt(diag(tmp$values))%*%t(tmp$vectors) x[1:(n/2),] = -1 +x[1:(n/2),]%*%S1s x[(n/2+1):n,] = 1 + x[(n/2+1):n,]%*%S2s plot(x,col="white",axes=FALSE,xlab=" ",ylab=" ") points(x[1:(n/2),1],x[1:(n/2),2],pch=19,col="black") points(x[(n/2+1):n,1],x[(n/2+1):n,2],pch=17,col="red") box() # Single linkage lines(c(x[8,1],x[16,1]),c(x[8,2],x[16,2]),lwd = 2) text((x[8,1]+x[16,1])/2,(x[8,2]+x[16,2])/2,pos =2 ,labels="single linkage") # Complete linkage lines(c(x[7,1],x[13,1]),c(x[7,2],x[13,2]),lwd = 2,col="gray") text((x[7,1]+x[13,1])/2,(x[7,2]+x[13,2])/2,pos =4 ,labels="complete linkage",col="gray") m1 = apply(x[1:(n/2),],2,mean) m2 = apply(x[(n/2+1):n,],2,mean) points(m1[1],m1[2],col="black",pch=3) points(m2[1],m2[2],col="red",pch=3) lines(c(m1[1],m2[1]),c(m1[2],m2[2]),lwd = 2,col="blue") text((m1[1]+m2[1])/2,(m1[2]+m2[2])/2,pos =2 ,labels="average",col="blue") dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Figures/linkages.eps",width=6,height=6) # A voir : projection sur le premier plan factoriel (FactoMineR) library(FactoMineR) ACP = PCA(USArrests) hc <- hclust(dist(USArrests), "ave") plot(hc) barplot(hc$height) #install.packages("dendextend") library(dendextend) # Ce package crée un nouveau type d'objet : le dendrogramme. # On doit préciser que h1 est un dendrogramme : h1 = as.dendrogram(hc) h1 = color_branches(h1, k=3) #on considère qu'il y a 3 classes plot(h1, main = "Classification", horiz = F) par(mfrow=c(1,2)) memb <- cutree(hc, k = 3) plot(ACP$ind$coord[,1],ACP$ind$coord[,2],col="white",axes=FALSE,xlabel=" ",ylabel=" ") m=matrix(0,3,dim(USArrests)[2]) for (k in 1:4) { w = which(memb==k) text(ACP$ind$coord[w,1],ACP$ind$coord[w,2],labels=attributes(USArrests)$row.names[w],col=k) m[k,] = apply(USArrests[w,],2,mean) } title("Average (PCA)") D = dist(USArrests) mds2 <- -cmdscale(D) plot(mds2, type="n", axes=FALSE, ann=FALSE) text(mds2, labels=rownames(mds2), xpd = NA,col=memb) title("Average (MDS)") hc <- hclust(dist(USArrests), "sing") plot(hc) method = "mcquitty" # "median","centroid" hc <- hclust(dist(USArrests), "mcquitty") plot(hc) par(mfrow=c(1,2)) memb <- cutree(hc, k = 4) plot(ACP$ind$coord[,1],ACP$ind$coord[,2],col="white",axes=FALSE,xlabel=" ",ylabel=" ") m=matrix(0,3,dim(USArrests)[2]) for (k in 1:4) { w = which(memb==k) text(ACP$ind$coord[w,1],ACP$ind$coord[w,2],labels=attributes(USArrests)$row.names[w],col=k) m[k,] = apply(USArrests[w,],2,mean) } title(paste(method,"(PCA)")) D = dist(USArrests) mds2 <- -cmdscale(D) plot(mds2, type="n", axes=FALSE, ann=FALSE) text(mds2, labels=rownames(mds2), xpd = NA,col=memb) title(paste(method,"(MDS)")) km = kmeans(USArrests, centers = m) s = sample(1:dim(USArrests)[1],3) km = kmeans(USArrests, centers = USArrests[s,]) dev.new() plot(ACP$ind$coord[,1],ACP$ind$coord[,2],col="white") m=matrix(0,3,dim(USArrests)[2]) for (k in 1:3) { w = which(km$cluster==k) text(ACP$ind$coord[w,1],ACP$ind$coord[w,2],labels=attributes(USArrests)$row.names[w],col=k) m[k,] = apply(USArrests[w,],2,mean) } hc <- hclust(dist(USArrests), "ward") res = PCA(USArrests) hcpc.ward = HCPC(res,nb.clust=4) hc <- hclust(dist(USArrests)^2, "cen") memb <- cutree(hc, k = 4) plot(ACP$ind$coord[,1],ACP$ind$coord[,2],col="white") for (k in 1:4) { w = which(memb==k) text(ACP$ind$coord[w,1],ACP$ind$coord[w,2],labels=attributes(USArrests)$row.names[w],col=k) } library(maps) Quakes = read.csv("~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/DATASETS/quake.dat",skip=7,header=FALSE,col.names=c("Depth","Latitude","Longitude","Richter")) cutree(hc, k = 5) for (cl in 1:5){} km = kmeans(x, centers = ) hc <- hclust(dist(scale(Quakes)), "ward.D") plot(hc) memb <- cutree(hc, k = 6) dev.new() map() points(Quakes$Longitude,Quakes$Latitude,pch=20,col=memb,cex=.7) center = matrix(0,length(unique(memb)),4) for (k in 1:length(unique(memb))){ k.cl = which(memb==k) center[k,] = apply(Quakes[k.cl,],2,mean) } km = kmeans(Quakes,center) dev.new() map() points(Quakes$Longitude,Quakes$Latitude,pch=20,col=km$cluster,cex=.7) library(mclust) BIC = mclustBIC(Quakes[,c(1,4)]) plot(BIC) mod1 = Mclust(Quakes[,c(1,4)], x = BIC) summary(mod1, parameters = TRUE) install.packages("maps") library(maps) dev.new() map() points(Quakes$Longitude,Quakes$Latitude,pch=20,col=mod1$classification,cex=.7) library(mclust) BIC = mclustBIC(scale(Quakes)) plot(BIC) mod1 = Mclust(scale(Quakes), x = BIC) summary(mod1, parameters = TRUE) install.packages("maps") library(maps) dev.new() #map() plot(Quakes$Longitude,Quakes$Latitude,pch=20,col=mod1$classification,cex=.7) data(diabetes) class = diabetes$class table(class) X = diabetes[,-1] head(X) clPairs(X, class) km = kmeans(X,3) clPairs(X, km$cluster) table(class,km$cluster) BIC = mclustBIC(X) plot(BIC) summary(BIC) mod1 = Mclust(X, x = BIC) summary(mod1, parameters = TRUE) dev.new() clPairs(X, class) dev.new() plot(mod1, what = "classification") table(class,mod1$classification)