#library(mclust) library(MASS) library(mvtnorm) # algorithme EM pour un melange de lois de Gauss EM_gauss<-function(x,M,theta,MaxIter=100,epsilon=1e-3,aff=0,pl=0) { # Entrees # x : donnees (matrice nxd) # M : nombre de lois # theta : valeur initiale du parametre # de la forme theta$pi : probabilites a priori (matrice 1xM) # theta$mu : moyennes (matrice dxM) # theta$sigma : variances (matrice (d*d)xM) # MaxIter : nombre maximum d'itération de l'algorithme EM # epsilon : tolerance pour l'arret de l'EM # aff : affiche la log-vraisemblance si aff=1 # pl : trace la densité a posteriori des M groupes (rouge=0,bleu=1) si pl==1 # Sorties # theta : parametre # loglik : log vraisemblance (?) # nbiter : nombre d'itérations d <- dim(x)[2] if (d==1) {pl = 0} L <- 128 col <- rainbow(L) col <- col[length(col) :1] p <- matrix(0,M,1) mu <- matrix(0,dim(x)[2],M) sigma <- matrix(0,dim(x)[2]^2,M) for (m in 1:M) { p[m] <- theta$pi[m] mu[,m] <- theta$mu[,m] sigma[,m] <- theta$sigma[,m] } if (abs(sum(p)-1)>1e-6) {stop("sum of a priori probabilities should be equal to 1")} n <- dim(x)[1] kiter <- 1 err <- 2*epsilon loglik <- NULL pY <- matrix(0,n,M) while ((kiterepsilon)) { denom <- matrix(0,n,1) for (k in 1:M) { denom <- denom + p[k]*dmvnorm(x, mean = mu[,k], sigma = matrix(sigma[,k],d,d)) } loglik[kiter] <- sum(log(denom)) if (pl) { par(mfrow = c(1, M)) } for (k in 1:M) # Calcul des paramètres { pY[,k] <- p[k]*dmvnorm(x, mu[,k], matrix(sigma[,k],d,d))/denom E <- sum(pY[,k]) p[k] <- E/n mu[,k] <- pY[,k]%*%as.matrix(x)/E xck <-as.matrix(x) for (jj in 1:d) {xck[,jj] <- xck[,jj] - mu[jj,k]} sigma[,k]<- (t(matrix(pY[,k],n,d)*xck)%*%xck)/E + matrix(c(1e-5,0,0,1e-5),d,d) if (pl) { # graphique pour les 2 premières dimensions ind <- round( (pY[,k]-min(pY[,k]))/(max(pY[,k])-min(pY[,k]))*(L/2-1))+1 plot(x[,1],x[,2],col=col[ind],pch=20) points(mu[1,],mu[2,],col="black",pch = 19 ) title(c("EM classe",k)) } } if (pl) { Sys.sleep(1)} err <- sum((theta$pi-p)^2)+sum((theta$mu-mu)^2)+sum((theta$sigma-sigma)^2) theta$pi <- p theta$mu <- mu theta$sigma <- sigma if (aff) {(c(sum(log(denom)),err))} kiter<-kiter+1 } denom <- matrix(0,n,1) for (k in 1:M) { denom <- denom + p[k]*dmvnorm(x, mean = mu[,k], sigma = matrix(sigma[,k],d,d)) } em_gauss <- NULL em_gauss$theta <- theta em_gauss$nbiter <- kiter-1 em_gauss$err <- err em_gauss$loglik <- loglik return(em_gauss) } proba_pos <- function(x,M,theta){ n <- dim(x)[1] d <- dim(x)[2] p <- matrix(0,M,1) mu <- matrix(0,d,M) sigma <- matrix(0,d^2,M) for (m in 1:M) { p[m] <- theta$pi[m] mu[,m] <- theta$mu[,m] sigma[,m] <- theta$sigma[,m] } if (abs(sum(p)-1)>1e-6) {stop("sum of a priori probabilities should be equal to 1")} denom <- matrix(0,n,1) for (k in 1:M) { denom <- denom + p[k]*dmvnorm(x, mean = mu[,k], sigma = matrix(sigma[,k],d,d)) } pY <- matrix(0,n,M) for (k in 1:M) # Calcul des paramètres { pY[,k] <- p[k]*dmvnorm(x, mu[,k], matrix(sigma[,k],d,d))/denom } return(pY) }