# algorithme EM pour un melange de lois de Gauss EM_gauss <- function(x,M,theta,MaxIter=100,epsilon=1e-3,aff=0,pl=0){ # Entrees # x : donnees (matrice nxd) # M : nombre de lois # theta : valeur initiale du parametre # de la forme theta$pi : probabilites a priori (matrice 1xM) # theta$mu : moyennes (matrice dxM) # theta$sigma : variances (matrice (d*d)xM) # MaxIter : nombre maximum d'itÈration de l'algorithme EM # epsilon : tolerance pour l'arret de l'EM # aff : affiche la log-vraisemblance si aff=1 # pl : trace la densitÈ a posteriori des M groupes (rouge=0,bleu=1) si pl==1 # Sorties # theta : parametre # loglik : log vraisemblance (?) # nbiter : nombre d'itÈrations d <- dim(x)[2] if (d==1 | d>2) {pl = 0} col <- rainbow(M) t1 <- seq(min(x[,1]),max(x[,1]), length=100) t2 <- seq(min(x[,2]),max(x[,2]), length=100) d1 <- expand.grid(x=t1,y=t2) p <- matrix(0,M,1) mu <- matrix(0,dim(x)[2],M) sigma <- matrix(0,dim(x)[2]^2,M) for (m in 1:M){ p[m] <- theta$pi[m] mu[,m] <- theta$mu[,m] sigma[,m] <- theta$sigma[,m] } if (abs(sum(p)-1)>1e-6) {stop("sum of priors should be equal to 1")} n <- dim(x)[1] kiter <- 1 err <- 2*epsilon loglik <- NULL pY <- matrix(0,n,M) while ((kiterepsilon)){ denom <- matrix(0,n,1) for (k in 1:M){ denom <- denom + p[k]*dmvnorm(x, mean = mu[,k], sigma = matrix(sigma[,k],d,d)) } loglik[kiter] <- sum(log(denom)) for (k in 1:M){ pY[,k] <- p[k]*dmvnorm(x, mu[,k], matrix(sigma[,k],d,d))/denom E <- sum(pY[,k]) p[k] <- E/n mu[,k] <- pY[,k]%*%as.matrix(x)/E xck <-as.matrix(x) for (jj in 1:d) {xck[,jj] <- xck[,jj] - mu[jj,k]} sigma[,k]<- (t(matrix(pY[,k],n,d)*xck)%*%xck)/E + matrix(c(1e-5,0,0,1e-5),d,d) } if (pl){ # graphique pour les 2 premiËres dimensions par(mfrow=c(1,1)) cl <- apply(pY,1,which.max) plot(x[,1],x[,2],pch = 21,bg = col[cl]) points(mu[1,],mu[2,],col="black",pch = 19 ) pdf <- matrix(0,dim(d1)[1],1) for (ktmp in 1:M){ pdf <- pdf + p[ktmp]*dmvnorm(d1, mean = mu[,ktmp], sigma = matrix(sigma[,ktmp],d,d)) } contour(t1,t2,matrix(pdf,length(t1),length(t2)),col="black",add=TRUE) title("EM's group") scan() } if (pl) { Sys.sleep(1)} err <- sum((theta$pi-p)^2)+sum((theta$mu-mu)^2)+sum((theta$sigma-sigma)^2) theta$pi <- p theta$mu <- mu theta$sigma <- sigma if (aff) {(c(sum(log(denom)),err))} kiter<-kiter+1 } denom <- matrix(0,n,1) for (k in 1:M){ denom <- denom + p[k]*dmvnorm(x, mean = mu[,k], sigma = matrix(sigma[,k],d,d)) } em_gauss <- NULL em_gauss$theta <- theta em_gauss$nbiter <- kiter-1 em_gauss$err <- err em_gauss$loglik <- loglik return(em_gauss) } proba_pos <- function(x,M,theta){ n <- dim(x)[1] d <- dim(x)[2] p <- matrix(0,M,1) mu <- matrix(0,d,M) sigma <- matrix(0,d^2,M) for (m in 1:M){ p[m] <- theta$pi[m] mu[,m] <- theta$mu[,m] sigma[,m] <- theta$sigma[,m] } if (abs(sum(p)-1)>1e-6) {stop("sum of a priori probabilities should be equal to 1")} denom <- matrix(0,n,1) for (k in 1:M){ denom <- denom + p[k]*dmvnorm(x, mean = mu[,k], sigma = matrix(sigma[,k],d,d)) } pY <- matrix(0,n,M) for (k in 1:M){ pY[,k] <- p[k]*dmvnorm(x, mu[,k], matrix(sigma[,k],d,d))/denom } return(pY) }