#library(mclust) library(MASS) library(mvtnorm) # algorithme kmeans avec calcul de la log vraisemblance kmeans_vm<-function(x,M,theta,MaxIter=100,epsilon=1e-3,aff=0,pl=0) { # Entrees # x : donnees (matrice nxd) # M : nombre de lois # theta : valeur initiale des centres # theta$mu : moyennes (matrice dxM) # MaxIter : nombre maximum d'itÈration de l'algorithme EM # epsilon : tolerance pour l'arret del'algorithme # aff : affichage de l'erreur ‡ chaque itÈration # pl : graphiques # Sorties # theta : parametre # loglik : log vraisemblance (?) # nbiter : nombre d'itÈrations t1 <- seq(min(x[,1]),max(x[,1]), length=100) t2 <- seq(min(x[,2]),max(x[,2]), length=100) d1 <- expand.grid(x=t1,y=t2) d <- dim(x)[2] if (d==1) {pl = 0} if (pl) {par(mfrow= c(1,1))} col <- rainbow(M) p <- matrix(0,M,1) mu <- matrix(0,d,M) sigma <- matrix(0,d*d,M) for (m in 1:M) { mu[,m] <- theta$mu[,m] } n <- dim(x)[1] kiter <- 1 err <- 2*epsilon loglik <- NULL while ((kiterepsilon)) { distance <- matrix(0,n,M) for (k in 1:M) # Calcul des paramËtres { for (jj in 1:d) { # indic <- (x[,jj]==NaN) distance[,k] <- distance[,k]+(x[,jj] - mu[jj,k])^2} } cl <- apply(distance,1,which.min) for (k in 1:M) # Calcul des paramËtres { if (sum(cl==k)>0) { mu[,k] <- apply(x[cl==k,],2, mean) sigma[,k]<- cov(x[cl==k,]) p[k] <- sum(cl==k)/n } } # if (pl) { par(mfrow = c(1, M)) } if (pl) { # graphique pour les 2 premiËres dimensions plot(x[,1],x[,2],pch=21,bg=col[cl]) points(mu[1,],mu[2,],col="black",pch = 19 ) pdf <- matrix(0,dim(d1)[1],1) for (ktmp in 1:M) { pdf <- pdf + p[ktmp]*dmvnorm(d1, mean = mu[,ktmp], sigma = matrix(sigma[,ktmp],d,d)) } #contour(t1,t2,matrix(pdf,length(t1),length(t2)),col="black",add=TRUE) title("kmeans's groups") scan() } err <- sum((theta$mu-mu)^2) theta$pi <- p theta$mu <- mu theta$sigma <- sigma kiter<-kiter+1 } denom <- matrix(0,n,1) for (k in 1:M) { denom <- denom + p[k]*dmvnorm(x, mean = mu[,k], sigma = matrix(sigma[,k],d,d)) } kmeans <- NULL kmeans$theta <- theta kmeans$nbiter <- kiter-1 kmeans$err <- err kmeans$loglik <- sum(log(denom)) return(kmeans) }