library(mvtnorm) library(MASS) library(class) M = 3 seed("123") mu = matrix(rnorm(2*M),2,M) plot(t(mu),pch=20) # Cas homoscédastique Sigma = list() Sigma[[1]] = matrix(c(1,.4,.4,.5)/2,2,2) Sigma[[2]] = Sigma[[1]] Sigma[[3]] = Sigma[[1]] pi = c(1/4,1/2,1/4) cpi = c(0,cumsum(pi)) ts = seq(-4,4,length.out = 50) tt = cbind(c(matrix(ts,50,50)),c(t(matrix(ts,50,50)))) n = 1000 u = runif(n) X = NULL Y = NULL p = list() for (m in 1:M){ w = which(ucpi[m]) X = rbind(X,rmvnorm(length(w),mean = matrix(mu[,m],2,1),sigma = Sigma[[m]])) Y = c(Y,rep(m,length(w))) p[[m]] = dmvnorm(tt, mean = matrix(mu[,m],1,2),sigma = Sigma[[m]], log = FALSE) } plot(X,pch=20,col=Y) for (m in 1:M){ contour(ts,ts,matrix(p[[m]],50,50),add=TRUE,nlevels=6,col=m) } mod = lda(Y~X) points(mod$means,col=c(1,2,3),pch=3,cex=2) abline(h=mod$means[1,2],v=mod$means[1,1],col=1,lty=2) abline(h=mod$means[2,2],v=mod$means[2,1],col=2,lty=2) abline(h=mod$means[3,2],v=mod$means[3,1],col=3,lty=2) dev.new() projected.data <- X %*% mod$scaling plot(projected.data, col=Y, pch=20) library(klaR) # Classification and visualization package drawparti( as.factor(Y), X[,1],X[,2], method="lda", image.colors = c("lightgrey","red","green")) # Cas hétéroscédastique seed("123") Sigma = list() Sigma[[1]] = matrix(c(1,.4,.4,.5)/2,2,2) Sigma[[2]] = matrix(c(.3,-.2,-.2,1)/2,2,2) Sigma[[3]] = matrix(c(1,0,0,1)/2,2,2) pi = c(1/4,1/2,1/4) cpi = c(0,cumsum(pi)) ts = seq(-4,4,length.out = 50) tt = cbind(c(matrix(ts,50,50)),c(t(matrix(ts,50,50)))) n = 200 u = runif(n) X = NULL Y = NULL p = list() for (m in 1:M){ w = which(ucpi[m]) X = rbind(X,rmvnorm(length(w),mean = matrix(mu[,m],2,1),sigma = Sigma[[m]])) Y = c(Y,rep(m,length(w))) p[[m]] = dmvnorm(tt, mean = matrix(mu[,m],1,2),sigma = Sigma[[m]], log = FALSE) } plot(X,pch=20,col=Y) for (m in 1:M){ contour(ts,ts,matrix(p[[m]],50,50),add=TRUE,nlevels=6,col=m) } dev.new() data.train = data.frame(Y=Y,X1=X[,1],X2=X[,2]) data.train$Y = as.factor(Y) lda.fit = lda(Y~.,data=data.train) drawparti( as.factor(Y), X[,1],X[,2], method="lda", image.colors = c("lightgrey","red","green")) tlda = table(Y,predict(lda.fit,data.frame(X))$class) sum(tlda-diag(diag(tlda))) confusionMatrix(predict(lda.fit,data.frame(X))$class, Y) dev.new() qda.fit = qda(Y~.,data=data.train) drawparti( as.factor(Y), X[,1],X[,2], method="qda", image.colors = c("lightgrey","red","green")) tqda = table(Y,predict(qda.fit,data.frame(X))$class) sum(tqda-diag(diag(tqda))) confusionMatrix(predict(qda.fit,data.frame(X))$class, Y) # Validation avec un échantillon test n.test = 2000 u = runif(n.test) X.test = NULL Y.test = NULL for (m in 1:M){ w = which(ucpi[m]) X.test = rbind(X.test,rmvnorm(length(w),mean = matrix(mu[,m],2,1),sigma = Sigma[[m]])) Y.test = c(Y.test,rep(m,length(w))) } data.test = data.frame(Y=Y.test,X1=X.test[,1],X2=X.test[,2]) data.test$Y.test = as.factor(Y.test) (cM.qda = confusionMatrix(predict(qda.fit,data.test)$class, Y.test)) (cM.lda = confusionMatrix(predict(lda.fit,data.test)$class, Y.test)) Acc = NULL for (k in 1:20){ Knn = knn(X, X.test, as.factor(Y), k,prob=TRUE, use.all = TRUE) Acc = c(Acc,confusionMatrix(Knn, Y.test)$overall[1]) } plot(1:20,Acc,pch=20,ylim=range(Acc,cM.qda$overall[1]),xlab="k",ylab="Accuracy",col="blue") lines(1:20,Acc,lty=3,col="blue") text(2,Acc[2],labels="knn",pos=4,col="blue") grid() abline(h=cM.qda$overall[1],lty=1.8) text(x=2,y=cM.qda$overall[1],labels="qda",pos=1) abline(h=cM.lda$overall[1],col="gray") text(2,cM.lda$overall[1],labels="lda",col="gray",pos=1) ### Estimatuer non paramétrique hist(X[,1],prob=TRUE,main="Densité",xlab=" ") lines(density(X[,1]),col="red") hist(X[,1],prob=TRUE,main="Densité",xlab=" ") lines(density(X[,1],bw=.3),col="red") hist(X[,1],prob=TRUE,main="Densité : trop de détails",xlab=" ") lines(density(X[,1],bw=.08),col="red") hist(X[,1],prob=TRUE,main="Densité : trop lisée",xlab=" ") lines(density(X[,1],bw=1),col="red") phat = kde2d(X[,1],X[,2]) plot(X,pch=20,col=Y) contour(phat$x,phat$y,phat$z,add=TRUE) # Règle de bayes phat = kde2d(X[,1],X[,2]) plot(X,pch=20,col=Y) for (m in 1:M){ phat = kde2d(X[Y==m,1],X[Y==m,2]) contour(phat$x,phat$y,phat$z,add=TRUE,col=m) } plot(X,pch=20,col=Y) for (m in 1:M){ phat = kde2d(X[Y==m,1],X[Y==m,2],h=1.5) contour(phat$x,phat$y,phat$z,add=TRUE,col=m) } plot(X,pch=20,col=Y) for (m in 1:M){ phat = kde2d(X[Y==m,1],X[Y==m,2],h=1.5) contour(phat$x,phat$y,phat$z,add=TRUE,col=m) contour(ts,ts,matrix(p[[m]],50,50),add=TRUE,nlevels=6,col=m,lty=3) } # Règle de Bayes h=1.5 z = list() pi.est=NULL for (m in 1:M){ ax <- outer(X.test[,1], x[Y==m], "-")/h[1L] ay <- outer(X.test[,2], y[Y==m], "-")/h[2L] nx = sum(Y==m) z[[m]] <- tcrossprod(matrix(dnorm(ax), , nx), matrix(dnorm(ay), , nx))/(nx * h[1L] * h[2L]) pi.est[m] = nx/n }