BW = read.table(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/BirthWeights.dat",header=TRUE)

BW$smoke = as.factor(BW$smoke)
BW$parity = as.factor(BW$parity)
summary(BW)

BW$smoke[which(BW$smoke==9)] = NA
BW$gestation[which(BW$gestation>360)] = NA
BW$age[which(BW$age==99)] = NA
BW$height[which(BW$height==99)] = NA
BW$weight[which(BW$weight==999)] = NA
pairs(BW, panel = panel.smooth, main = "Birth Weights",
 col = 3+(BW$smoke==1),pch=20)

D = read.table("~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/LifeExp.dat",header=TRUE)
pairs(D[,-1], panel = panel.smooth, main = "Esperance de vie",pch=20)

Ozone = read.table("~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/TECHNICOLOR/Ozone.dat",header=TRUE,row.names=1)
pairs(Ozone[,c(1,2,5,8,11)], panel = panel.smooth, main = "Ozone",pch=20)

dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Ozone_pairs.eps",width=6,height=6)

L = loess(maxO3~T9, data =Ozone)
PL = predict(L, data.frame(T9 = seq(8, 30, 1)), se = TRUE)
plot(L,pch=20,xlab="T9",ylab="maxO3")
lines(seq(8, 30, 1),PL$fit,col="red",lwd = 2)
lines(seq(8, 30, 1),PL$fit+2*PL$se.fit,col="red",lwd = 2,lty=2)
lines(seq(8, 30, 1),PL$fit-2*PL$se.fit,col="red",lwd = 2,lty=2)
#dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_loess.eps",width=6,height=6)
L = loess(maxO3~maxO3v, data =Ozone)
PL = predict(L, data.frame(maxO3v = seq(40, 160, 1)), se = TRUE)
plot(L,pch=20,xlab="maxO3v",ylab="maxO3")
lines(seq(40, 160, 1),PL$fit,col="red",lwd = 2)
lines(seq(40, 160, 1),PL$fit+2*PL$se.fit,col="red",lwd = 2,lty=2)
lines(seq(40, 160, 1),PL$fit-2*PL$se.fit,col="red",lwd = 2,lty=2)

plot(Ozone$T9,Ozone$maxO3,pch=20,xlab = "Temperature t-9h",ylab = "Ozone, concentration max.")
mod = lm(maxO3~T9,data=Ozone)
lines(Ozone$T9,mod$fitted.values,col="red")
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Ozone_line.eps",width=5,height=5)
qqnorm((mod$residuals-mean(mod$residuals))/sd(mod$residuals),xlab="Quantiles théoriques",ylab='Quantiles des résidus',pch=20)
mod.comp = lm(maxO3~.,data=Ozone)
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Ozone_line_qq.eps",width=4,height=4)
qqnorm((mod.comp$residuals-mean(mod.comp$residuals))/sd(mod.comp$residuals),xlab="Quantiles théoriques",ylab='Quantiles des résidus',pch=20)
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Ozone_qq.eps",width=4,height=4)

#mod = lm(maxO3~Vx9,data=Ozone)
plot(mod$fitted.values,mod$residuals,pch=20,xlab="Valeurs prédites",ylab="Résidus")
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Ozone_line_res_fit.eps",width=4,height=4)

plot(mod.comp$fitted.values,mod.comp$residuals,pch=20,xlab="Valeurs prédites",ylab="Résidus")
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Ozone_res_fit.eps",width=4,height=4)

mod = lm(maxO3~Vx9,data=Ozone)
plot(mod$fitted.values,mod$residuals,pch=20,xlab="Valeurs prédites",ylab="Résidus")
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Ozone_Vx9_res_fit.eps",width=4,height=4)

mod = lm(log(maxO3)~log(Vx9),data=Ozone)
plot(mod$fitted.values,mod$residuals,pch=20,xlab="Valeurs prédites",ylab="Résidus")
#dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Ozone_Vx9_res_fit.eps",width=4,height=4)

# Cours Conf
m=lm(maxO3~T12,data=Ozone)
summary(m)
plot(Ozone$maxO3,m$fitted.values,pch=20,xlab="Observed y",ylab="Predicted y")
grid()
# dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_lmT12.eps",width=6,height=6)
m1=lm(maxO3~.,data=Ozone)
plot(Ozone$maxO3,m1$fitted.values,pch=20,xlab="Observed y",ylab="Predicted y")
grid()


# Best subset
library(leaps)
n = dim(Ozone)[1]
B = 50
for (b in 1:B){
	train = sample(1:n,round(n*2/3))
	test = setdiff(1:n,train)
	X = Ozone[train,-c(1,2)]
	y = Ozone[train,2]
	for (k in 1:8){
        res = leaps(X,y,nbest = k,method="Cp")
        
	}  
}


# Ridge
library(MASS)
mod.ridge = lm.ridge(maxO3~.,data=Ozone,lambda = seq(0,1000,1))
w = which(abs(mod.ridge$coef[,100])>2.5)
matplot(log(mod.ridge$lambda),t(mod.ridge$coef[w,]),xlab="log(lambda)",ylab="coefficients",typ="l",lty=1,col=1:5)
matplot(log(mod.ridge$lambda),t(mod.ridge$coef[-c(w),]),xlab="lambda",ylab="coefficients",typ="l",col="gray",lty=1,add=TRUE)
legend("topright",rownames(mod.ridge$coef[w,]),col=1:5,lty=1)
abline(v=log(mod.ridge$lambda[which.min(mod.ridge$GCV)]))
# dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ridge_ozone_coef.eps",width=6,height=6)


Cp.ridge = NULL
mod.tmp = mod.comp
x.new = cbind(matrix(1,length(Ozone[,1]),1),Ozone[,-1])
for (k in 1:101){
	beta = coef(mod.ridge)[k,]
	mod.tmp$coefficients = beta
	Cp.ridge[k] = sum((Ozone$maxO3-predict(mod.tmp,x.new))^2)+2*length(beta)*var(mod.comp$residuals)
}

x = Ozone[,-c(1)]
y = Ozone[,1]
n= length(y)
p = dim(x)[2]
Z = matrix(NA,n,p-2)
for (j in 1:(p-2)) {Z[,j] = as.numeric(x[,j])}
colnames(Z) = colnames(x)[1:(p-2)]
Z = cbind(scale(Z))
y = y-mean(y)
lambda=seq(0,50,5)
b <- bp <- matrix(NA,p-2,length(lambda))
yhat = matrix(0,n,length(lambda))

# CV
B = 50
napp = round(n*2/3)
lambda = seq(1,60,5)
err =  matrix(0,B,length(lambda))
R = solve(t(Z)%*%Z) 
d = svd(Z)
for (b in 1:B){
	train = sample(1:n,napp)
	test = setdiff(1:n,train)
	Zv = as.matrix(Z[train,])
	#Rv = solve(t(Z)%*%Z) 
	#dv = svd(Z)
	for (k in 1:length(lambda)){
   		beta = solve(t(Zv)%*%Zv+lambda[k]*diag(1,pZ))%*%(t(Zv)%*%matrix(y[train],length(train),1))
   		# effective degrees of freedom
   		df[k] = sum(d$d^2/(d$d^2+lambda[k]))
   		yhat = Z[test,]%*%beta
   		err[b,k] = mean((y[test]-yhat)^2)
   }	
}
plot(df[1:length(lambda)],apply(sqrt(err),2,mean),pch=20,xlab="Nb degrees of freedom",ylab="RMSE")
grid()
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_Ridge_RMSE.eps",width=6,height=6)


df <- err <- NULL
R = solve(t(Z)%*%Z) 
d = svd(Z)
for (k in 1:length(lambda)){
   b[,k] = solve(t(Z)%*%Z+lambda[k]*diag(1,p-2))%*%(t(Z)%*%matrix(y,n,1))
   bp[,k] = solve((diag(1,p-2)+lambda[k]*R) )%*%matrix(mod.comp$coefficients[2:(p-1)],p-2,1)
   # effective degrees of freedom
   df[k] = sum(d$d^2/(d$d^2+lambda[k]))
   yhat[,k] = Z%*%b[,k]
   err[k] = mean((y-yhat[,k])^2)
   }
matplot(lambda,t(b),typ="l")
#dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Figures/Ridge_ozone_coef.eps",width=6,height=6)
matplot(lambda,t(bp),typ="l")

# Validation croisée
Z =  Ozone[,2:11]
pZ = ncol(Z)
n = nrow(Z)
n.f = n/8
err = matrix(0,8,length(mod.lars$lambda))
lambda = mod.lars$lambda
df = NULL
ii =  sample(1:n,n) 
for (v in 1:8) {
	ii.test = (n.f*(v-1))+(1:n.f)
	ii.app = ii[setdiff(1:n,ii.test)]
	Zv = as.matrix(Z[ii.app,])
	#Rv = solve(t(Z)%*%Z) 
	#dv = svd(Z)
	for (k in 1:length(lambda)){
   		b[,k] = solve(t(Zv)%*%Zv+lambda[k]*diag(1,pZ))%*%(t(Zv)%*%matrix(y[ii.app],length(ii.app),1))
   		# effective degrees of freedom
   		df[k] = sum(d$d^2/(d$d^2+lambda[k]))
   		yhat[ii.test,k] = Z[ii.test,]%*%b[,k]
   		err[v,k] = mean((y[ii.test]-yhat[ii.test,k])^2)
   }
}

plot(df,sqrt(apply(err,2,mean)),typ="l",ylab="CV RMSE (8 folds)",xlab="df")
#dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Figures/Ridge_CV.eps",width=6,height=6)

# lasso
library(lars)
mod.lars = lars(as.matrix(Ozone[,-c(1,12,13)]), Ozone[,1], type = "lasso")
matplot(c(mod.lars$lambda,0),mod.lars$beta,typ="l",xlab="lambda",ylab="coefficients",lty=1)
text(100,2,col="red",labels = "T15")
text(40,-2,col="blue",labels = "Ne9")
text(25,1,col="black",labels = "Vx9")
# dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Figures/lasso_ozone_coef.eps",width=6,height=6)
plot(c(mod.lars$lambda,0),mod.lars$Cp,pch=20,xlab="lambda",ylab="Cp de Mallow")
#dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/TECHNICOLOR/Figures/lasso_ozone_Cp.eps",width=6,height=6)

B = 50
napp = round(n*2/3)
err.lars =  matrix(NA,B,10)
R = solve(t(Z)%*%Z) 
d = svd(Z)
for (b in 1:B){
	train = sample(1:n,napp)
	test = setdiff(1:n,train)
	mod.f.lars = lars(Z[train, ], y[train], type = "lasso",intercept=FALSE)
	df.lars = mod.f.lars$df
	coef1 = mod.f.lars$beta
    j = 1 
    df.ref=0
    cnt = 0
	while (j < length(df.lars)){
		if (df.lars[j]>df.ref) {
			cnt = cnt+1
		    yhat.lars = Z[test,]%*%coef1[j,]
		    err.lars[b,cnt] =  mean((y[test]-yhat.lars)^2)
		    df.ref = df.lars[j]
		} else {j=j+1}   
	}	
}

B = 50
napp = round(n*2/3)
Zdf = data.frame(cbind(Z,y))
colnames(Zdf) = c(colnames(x)[1:(p-2)],"y")
err.lm = matrix(0,B,1)
for (b in 1:B){
	train = sample(1:n,napp)
	test = setdiff(1:n,train)
	mod = lm(y~.,data=Zdf,subset=train)
	yhat = predict(mod,Zdf)[test]
	err.lm = mean((y[test]-yhat)^2)
}
	
plot(1:10,apply(sqrt(err.lars),2,mean,na.rm=TRUE),pch=20,xlab="Nb degrees of freedom",ylab="RMSE",ylim=range(apply(sqrt(err),2,mean),apply(sqrt(err.lars),2,mean,na.rm=TRUE)),xlim=c(1,9.7))
lines(1:10,apply(sqrt(err.lars),2,mean,na.rm=TRUE))
points(df[1:12],apply(sqrt(err),2,mean),pch=18,col="red")
lines(df[1:12],apply(sqrt(err),2,mean),col="red")
abline(h=mean(sqrt(err.lm)),col="gray",lwd=2)
grid()
legend("topright",c("Lasso","Ridge","LM"),col=c("black","red","gray"),lty=1,pch=c(20,18))
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_Ridge_Lasso_RMSE.eps",width=6,height=6)


boxplot(sqrt(err.lars),at=1:10,boxwex=.2,col="blue",axes=FALSE)

boxplot(sqrt(err),at=rev(df[1:12]),col="red",add=TRUE,boxwex=.2,axes=FALSE,xlab="Nb degrees of freedom",ylab="RMSE")
axis(2)
box()
axis(1)
abline(h=mean(sqrt(err.lm)),col="gray",lwd=2)
grid()
legend("topright",c("Lasso","Ridge","LM"),col=c("blue","red","gray"),lty=1,lwd=c(1,1,2))
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_Ridge_Lasso_RMSE.eps",width=8,height=6)
# K fold
n = nrow(x)
n.f = n/8
yhat.lars = matrix(NA,length(y),11)
err.lars = matrix(0,8,11)
for (v in 1:8) {
	ii.test = (n.f*(v-1))+(1:n.f)
	ii.app = ii[setdiff(1:n,ii.test)]
	mod.f.lars = lars(Z[ii.app, ], y[ii.app], type = "lasso")
	for (j in 1:11){
		yhat.lars[ii.test,j] = Z[ii.test,]%*%t(mod.f.lars$beta)[,j]
		err.lars[v,j] =  mean((y[ii.test]-yhat.lars[ii.test,j])^2)
	}
}
plot(0:10,sqrt(apply(err.lars,2,mean)),typ="l",ylab="CV RMSE (8 folds)",xlab="df")
points(0:10,sqrt(apply(err.lars,2,mean)),pch=20)


sqrt(sum((y-y.hat)^2)/n)
sqrt(apply((matrix(y,n,51)-y.hat.ridge)^2,2,sum)/n)
sqrt(apply((matrix(y,n,dim(y.hat.lars)[2])-y.hat.lars)^2,2,sum)/n)

plot(sqrt(apply((matrix(y,n,51)-y.hat.ridge)^2,2,sum)/n)[rev(1:51)],pch=20)
abline(h=sqrt(sum((y-y.hat)^2)/n))
points(sqrt(apply((matrix(y,n,dim(y.hat.lars)[2])-y.hat.lars)^2,2,sum)/n),pch=20,col="red")
# ================================================
# ANN
library(neuralnetwork)
n = length(Ozone[,1])
appri = sample(1:n,round(n*2/3))
testi = setdiff(1:n,appri)
mod.lin = lm(maxO3~.,data=Ozone,subset=appri)
st = step(mod.lin)
mod.lin.pr = predict(mod.lin,Ozone[testi,])
err.lin = sqrt(mean((mod.lin.pr-Ozone$maxO3[testi])^2))
mod.lin.pr.st = predict(st,Ozone[testi,])
err.lin.st = sqrt(mean((mod.lin.pr.st-Ozone$maxO3[testi])^2))

Oz.st = data.frame(maxO3=Ozone$maxO3,T15=Ozone$T15,Vx9 = Ozone$Vx9,Ne9 = Ozone$Ne9,maxO3v=Ozone$maxO3v)
mod.nlin = lm(maxO3 ~ .^2, data = Oz.st, subset = appri)
st.nlin = step(mod.nlin)
mod.nlin.pr.st = predict(st.nlin,Ozone[testi,])
err.nlin.st = sqrt(mean((mod.nlin.pr.st-Ozone$maxO3[testi])^2))

B = 50
kmax = 10
rmse.ann <- rmse.ann.st <- matrix(NA,B,kmax) 
for (b in 1:B){
appri = sample(1:n,round(n*2/3))
testi = setdiff(1:n,appri)
min.o = apply(Ozone[appri,1:11],2,min)
max.o = apply(Ozone[appri,1:11],2,max)
data.sc = Ozone
for (j in 1:11){
	data.sc[,j] = (Ozone[,j]-min.o[j])/(max.o[j]-min.o[j])
}
ann = NULL
ann.st = NULL
data.app = data.sc[appri,]
data.test = data.sc[testi,-c(1,12,13)]
data.test.st = data.sc[testi,c(4,5,8,11)]
y.test = data.sc[testi,1]
 
for (k in 1:kmax){
	ann[[k]] = neuralnet(maxO3 ~T9+T12+T15+Ne9+Ne12+Ne15+Vx9+Vx12+Vx15+maxO3v,data=data.app,hidden=k,rep=1)#rep=10
	wm = which.min(ann[[k]]$result.matrix[1,])
	ann[[k]] = neuralnet(maxO3 ~T9+T12+T15+Ne9+Ne12+Ne15+Vx9+Vx12+Vx15+maxO3v,data=data.app,hidden=k,startweights=ann[[k]]$weights[[wm[1]]])
	y.p.sc = compute(ann[[k]],data.test)
	y.p = y.p.sc$net.result*(max.o[1]-min.o[1])+min.o[1]
	rmse.ann[b,k] = sqrt(mean((Ozone$maxO3[testi]-y.p)^2))
	ann.st[[k]] = neuralnet(maxO3 ~T15+Ne9+Vx9+maxO3v,data=data.app,hidden=k,rep=10)
	wm = which.min(ann.st[[k]]$result.matrix[1,])
	ann.st[[k]] = neuralnet(maxO3 ~T15+Ne9+Vx9+maxO3v,data=data.app,hidden=k,startweights=ann.st[[k]]$weights[[wm[1]]])
	y.p.sc = compute(ann.st[[k]],data.test.st)
	y.p = y.p.sc$net.result*(max.o[1]-min.o[1])+min.o[1]
	rmse.ann.st[b,k] = sqrt(mean((Ozone$maxO3[testi]-y.p)^2))

}
}

k.opt = which.min(rmse.ann)
k.opt.st = which.min(rmse.ann.st)
affich = matrix(c("*** Erreur qudratique moyenne ***",
paste("Modèle linéaire                                :", round(err.lin*100)/100),
paste("Modèle linéaire avec sélection de variable     :", round(err.lin.st*100)/100),
paste("Modèle non linéaire avec sélection de variable :", round(err.nlin.st*100)/100),
paste("Réseau de neurone (",k.opt,")                      :", round(rmse.ann[k.opt]*100)/100),
paste("Réseau de neurone avec sélection de variable (",k.opt.st,") :", round(rmse.ann.st[k.opt]*100)/100)),6,1)
write(affich,"",sep="\t")

ann32 = neuralnet(maxO3 ~T9+T12+T15+Ne9+Ne12+Ne15+Vx9+Vx12+Vx15+maxO3v,data=data.app,hidden=c(3,2),rep=10)
y.p.sc = compute(ann32,data.test)
y.p = y.p.sc$net.result*(max.o[1]-min.o[1])+min.o[1]
rmse.ann32 = sqrt(mean((Ozone$maxO3[testi]-y.p)^2))


plot(ann[[3]])
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_ANN3.eps",width=6,height=6))

##
# install.packages("FNN")
library(FNN)
plot(Ozone$Vx9,Ozone$maxO3,pch=20,xlab="Vx9",ylab="max03")
n = nrow(Ozone)
B = 50
test = matrix(seq(min(Ozone$Vx9),max(Ozone$Vx9),length.out=30),30,1)
pred = matrix(0,30,B)
for (b in 1:B){
	app = sample(1:n,round(2/3*n))
	M = knn.reg(Ozone$Vx9[app],y=Ozone$maxO3[app],test=test,k=7)
	lines(test,M$pred,col="gray")
	pred[,b] = M$pred
}
lines(test,apply(pred,1,mean),col="red",lwd=2)
B = 50
test = matrix(seq(min(Ozone$Vx9),max(Ozone$Vx9),length.out=30),30,1)
pred = matrix(0,30,B)
for (b in 1:B){
	app = sample(1:n,round(2/3*n))
	M = knn.reg(Ozone$Vx9[app],y=Ozone$maxO3[app],test=test,k=7)
	pred[,b] = M$pred
}
lines(test,apply(pred,1,mean),col="blue",lwd=2)
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_bagging_knn.eps",width=6,height=6)


# Reduction of variance? 
plot(Ozone$Vx9,Ozone$maxO3,pch=20,xlab="Vx9",ylab="max03")
B = 30
B.bag = 50
pred.bag = matrix(0,B,30)
test = matrix(seq(min(Ozone$Vx9),max(Ozone$Vx9),length.out=30),30,1)
for (b in 1:B) {
	pred = matrix(0,30,B.bag)
	for (b.bag in 1:B.bag){
		app = sample(1:n,round(2/3*n))
		M = knn.reg(Ozone$Vx9[app],y=Ozone$maxO3[app],test=test,k=7)
		lines(test,M$pred,col="gray")
		pred[,b.bag] = M$pred
	}
	pred.bag[b,] = apply(pred,1,mean)
}
q1 = apply(pred.bag,2,quantile,1/4)
q2 = apply(pred.bag,2,quantile,3/4)
polygon(c(test,rev(test)),c(q1,rev(q2)),col="red",border="red")
legend("topleft",c("knn (7), all","bagging, interquartile"),col=c("gray","red"),lty=1,lwd=2)
points(Ozone$Vx9,Ozone$maxO3,pch=20)
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_bagging_knn.eps",width=6,height=6)

library(FNN)
n = nrow(Ozone)

B=100
B.bag = 40
RMSE = matrix(0,B,B.bag)
RMSEall = NULL
for (b in 1:B){
	pred = matrix(0,n-round(2/3*n),B.bag)
	app = sample(1:n,round(2/3*n)) ; napp = length(app)
	test = setdiff(1:n,app)
    Mall = rpart(Ozone$maxO3~.,subset=app,data=Ozone,control=rpart.control(maxdepth=15,cp=.1))
    predall = predict(Mall,Ozone[test,])
    RMSEall[b] = sqrt(mean((Ozone$maxO3[test]-predall)^2))
	for (b.bag in 1:B.bag){
    	app.bag = sample(app,napp,replace=TRUE)
		M = rpart(Ozone$maxO3~.,control=rpart.control(maxdepth=2),subset=app.bag,data=Ozone)
		pred[,b.bag] = predict(M,Ozone[test,])
		if (b.bag>1) {pred.bag = apply(pred[,1:b.bag],1,mean)} else  {pred.bag = pred[,1]}
		RMSE[b,b.bag] =  sqrt(mean((Ozone$maxO3[test]-pred.bag)^2))
	}
}
boxplot(RMSE,xlab="Nb of bagging samples",ylab="RMSE")

q1 = apply(RMSE,2,quantile,probs=1/4)
q2 = apply(RMSE,2,quantile,probs=3/4)
q1all = quantile(RMSEall,probs=1/4)
q2all = quantile(RMSEall,probs=3/4)
plot(1:B.bag,q1,col="white",ylim=range(c(q1,q2,q1all,q2all)))
polygon(c(1:B.bag,rev(1:B.bag)),c(q1,rev(q2)),xlab="Nb of bagging samples",ylab="RMSE",col="red",border="red")


polygon(c(1:B.bag,rev(1:B.bag)),c(rep(q1all,B.bag),rep(q2all,B.bag)),col="gray",border="gray")

legend("topright",c("tree","bagging"),col=c("gray","red"),lty=1,lwd=2)

dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_bagging_tree.eps",width=6,height=6)

pred.bag = apply(pred,1,mean)
sqrt(mean((Ozone$maxO3[test]-pred.bag)^2))

sqrt(mean((Ozone$maxO3[test]-pred[,1])^2))


B = 50
test = matrix(seq(min(Ozone$Vx9),max(Ozone$Vx9),length.out=30),30,1)
pred = matrix(0,30,B)
for (b in 1:B){
	app = sample(1:n,round(2/3*n))
	M = knn.reg(Ozone$Vx9[app],y=Ozone$maxO3[app],test=test,k=7)
	pred[,b] = M$pred
}
lines(test,apply(pred,1,mean),col="blue",lwd=2)
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_tree_knn.eps",width=6,height=6)


## Random Forest
# data(airquality)
library(adabag)
library(randomForest)
set.seed(131)
RF = randomForest(maxO3~.,data=Ozone,mtry=3,importance=TRUE)
print(RF)
barplot(RF$importance[,1])
dev.copy2eps(file=
      "~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_RF_importance.eps",width=12,height=6)

n = nrow(Ozone)
B=100
RMSE.tree = matrix(0,B,1)
RMSE.RF1 = matrix(0,B,1)
RMSE.RF2 = matrix(0,B,1)
mtry = 10
for (b in 1:B){
	  app = sample(1:n,round(2/3*n)) ; napp = length(app)
	  test = setdiff(1:n,app)
    Mall = rpart(Ozone$maxO3~.,subset=app,data=Ozone,control=rpart.control(maxdepth=15,cp=.1))
    predall = predict(Mall,Ozone[test,])
    RMSE.tree[b] = sqrt(mean((Ozone$maxO3[test]-predall)^2))
		RF = randomForest(Ozone$maxO3~.,data=Ozone,mtry=mtry,subset=app,ntree=100)
		pred = predict(RF,Ozone[test,])
		RMSE.RF2[b] =  sqrt(mean((Ozone$maxO3[test]-pred)^2))
		RF = randomForest(Ozone$maxO3~.,data=Ozone,mtry=mtry,subset=app,ntree=25)
		pred = predict(RF,Ozone[test,])
		RMSE.RF1[b] =  sqrt(mean((Ozone$maxO3[test]-pred)^2))
		RF = randomForest(Ozone$maxO3~.,data=Ozone,mtry=mtry,subset=app,ntree=5)
		pred = predict(RF,Ozone[test,])
		RMSE.RF[b] =  sqrt(mean((Ozone$maxO3[test]-pred)^2))
}
D = data.frame(cbind(RMSE.tree,RMSE.RF,RMSE.RF1,RMSE.RF2))
colnames(D) = c("tree","forest (5 trees)","forest (25 trees)","forest (100 trees)")
boxplot(D,ylab="RMSE")
dev.copy2eps(file="~/Dropbox/ENSEIGNEMENT/MACHINE LEARNING/CONF_DS/Figures/Ozone_RF.eps",width=10,height=6)

# Boosting
library(gbm)
n = nrow(Ozone)
boost = gbm.fit(Ozone[,-c(1,2)],Ozone$maxO3,distribution="gaussian",shrinkage=0.01,
                nTrain=round(2/3*n),n.trees=2000)
best.iter <- gbm.perf(boost,method="OOB")
pred = predict(boost,Ozone[test,-c(1,2)],n.trees=best.iter)

B=100
n.trees = seq(1,500,by=50)
RMSE.boost = matrix(0,B,length(n.trees))
for (b in 1:B){
  app = sample(1:n,round(2/3*n)) ; napp = length(app)
  test = setdiff(1:n,app)
  boost = gbm.fit(Ozone[app,-c(1,2)],Ozone$maxO3[app],distribution="gaussian",shrinkage=0.05,n.trees=max(n.trees),interaction.depth = 2)
  pred = predict(boost,Ozone[test,-c(1,2)],n.trees=n.trees)
  RMSE.boost[b,] = sqrt(apply((pred-matrix(Ozone$maxO3[test],length(test),length(n.trees)))^2,2,mean))
}

q1 = apply(RMSE.boost,1,quantile,probs=1/4)
q2 = apply(RMSE.boost,1,quantile,probs=3/4)
plot(1:B,q1,col="white",ylim=range(c(q1,q2)))
polygon(c(1:B,rev(1:B)),c(q1,rev(q2)),xlab="Nb of boosting iteration",ylab="RMSE",col="red",border="red")
lines(1:B,)