#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 12 10:11:20 2017

@author: valerie
"""
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from math import ceil
from scipy import linalg

from sklearn.cross_validation import ShuffleSplit
from sklearn.cross_validation import cross_val_score
from sklearn.base import BaseEstimator
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

# data importation
Ozone = pd.read_csv("~/Documents/ENSEIGNEMENT/TECHNICOLOR/Ozone.dat", 
                    delim_whitespace=True, dtype=object)
Ozone['maxO3']=Ozone['maxO3'].astype('float64')
Ozone['maxO3v']=Ozone['maxO3v'].astype('float64')

Ozone['T9']=Ozone['T9'].astype('float64')
Ozone['T12']=Ozone['T12'].astype('float64')
Ozone['T15']=Ozone['T15'].astype('float64')

Ozone['Ne9']=Ozone['Ne9'].astype('float64')
Ozone['Ne12']=Ozone['Ne12'].astype('float64')
Ozone['Ne15']=Ozone['Ne15'].astype('float64')

Ozone['Vx9']=Ozone['Vx9'].astype('float64')
Ozone['Vx12']=Ozone['Vx12'].astype('float64')
Ozone['Vx15']=Ozone['Vx15'].astype('float64')
#
plt.plot(Ozone['maxO3'])

plt.plot(Ozone['maxO3v'],Ozone['maxO3'],'.')

## ============================================================================
## LLR
def lowess(x, y, f=2. / 3., iter=3):
    """lowess(x, y, f=2./3., iter=3) -> yest
    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.
    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.
    """
    n = len(x)
    r = int(ceil(f * n))
    h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
    w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
    w = (1 - w ** 3) ** 3
    yest = np.zeros(n)
    delta = np.ones(n)
    for iteration in range(iter):
        for i in range(n):
            weights = delta * w[:, i]
            b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
            A = np.array([[np.sum(weights), np.sum(weights * x)],
                          [np.sum(weights * x), np.sum(weights * x * x)]])
            beta = linalg.solve(A, b)
            yest[i] = beta[0] + beta[1] * x[i]

        residuals = y - yest
        s = np.median(np.abs(residuals))
        delta = np.clip(residuals / (6.0 * s), -1, 1)
        delta = (1 - delta ** 2) ** 2

    return yest


yest = lowess(Ozone['maxO3v'],Ozone['maxO3']) 
plt.clf()
plt.plot(Ozone['maxO3v'],Ozone['maxO3'], '.')
plt.plot(Ozone['maxO3v'], yest, label='y pred')
plt.legend()
plt.show()

yest = lowess(Ozone['T9'],Ozone['maxO3']) 
plt.clf()
plt.plot(Ozone['T9'],Ozone['maxO3'], '.')
plt.plot(Ozone['T9'], yest,'.', label='y pred')
plt.legend()
plt.show()

# =============================================================================
# kNN
y = Ozone['maxO3']
X = Ozone.iloc[:,2:12] # keep the numerical values only
X1 = Ozone.iloc[:,np.append(range(3,12,3),11)]
skf = ShuffleSplit(y.shape[0], n_iter=100, test_size=.25, random_state=1)
NN_max = 15
sc_m_dist_L1 = np.zeros(NN_max)
sc_sd_dist_L1 = np.zeros(NN_max)
sc_m_dist_L2 = np.zeros(NN_max)
sc_sd_dist_L2 = np.zeros(NN_max)
sc_m_unif = np.zeros(NN_max)
sc_sd_unif = np.zeros(NN_max)
i = 0
for n_neighbors in range(1,NN_max+1):
    scores_dist = np.sqrt(-cross_val_score(KNeighborsRegressor(n_neighbors=n_neighbors, weights='distance'), 
                                           X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
    scores_unif = np.sqrt(-cross_val_score(KNeighborsRegressor(n_neighbors=n_neighbors, weights='uniform'), 
                                           X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
    sc_m_dist_L1[i] = np.mean(scores_dist)
    sc_sd_dist_L1[i] = np.std(scores_dist)
    scores_dist = np.sqrt(-cross_val_score(
            KNeighborsRegressor(n_neighbors=n_neighbors, weights='distance',metric='euclidean'), 
                               X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
    sc_m_dist_L2[i] = np.mean(scores_dist)
    sc_sd_dist_L2[i] = np.std(scores_dist)
    sc_m_unif[i] = np.mean(scores_unif)
    sc_sd_unif[i] = np.std(scores_unif)
    print([n_neighbors, np.mean(scores_dist), np.std(scores_dist),
           np.mean(scores_unif), np.std(scores_unif)])
    i = i+1
    
plt.plot(range(1,NN_max+1), sc_m_unif, '-')
plt.fill_between(range(1,NN_max+1), sc_m_unif-sc_sd_unif, sc_m_unif+sc_sd_unif, alpha=0.3)
plt.plot(range(1,NN_max+1), sc_m_dist_L2, '-')
plt.fill_between(range(1,NN_max+1), sc_m_dist_L2-sc_sd_dist_L2, 
                 sc_m_dist_L2+sc_sd_dist_L2, alpha=0.3)
#plt.plot(range(1,NN_max+1), sc_m_dist_L1, '-')
#plt.fill_between(range(1,NN_max+1), sc_m_dist_L1-sc_sd_dist_L1, 
#                sc_m_dist_L1+sc_sd_dist_L1, alpha=0.3)

plt.plot(range(1,NN_max+1), sc_m_unif, 'ko-')
plt.plot(range(1,NN_max+1), sc_m_unif-sc_sd_unif,'k--',
         range(1,NN_max+1), sc_m_unif+sc_sd_unif,'k--')
plt.plot(range(1,NN_max+1), sc_m_dist_L2, 'b+-')
plt.plot(range(1,NN_max+1), sc_m_dist_L2-sc_sd_dist_L2,'b--',
         range(1,NN_max+1), sc_m_dist_L2+sc_sd_dist_L2,'b--')
plt.xlabel('k')
plt.ylabel('RMSE')
plt.grid()
#plt.fill_between(range(1,NN_max+1), sc_m_dist_L2-sc_sd_dist_L2, 
 #                sc_m_dist_L2+sc_sd_dist_L2, alpha=0.3)
plt.legend(['uniform','dist L2'])
plt.savefig('Ozone_kNN.eps', format='eps', dpi=300)
#knn = neighbors.KNeighborsRegressor(n_neighbors, weights=weights)
#y_ = knn.fit(X, y).predict(T)

## ============================================================================
## Regression Trees
X = Ozone.iloc[:,2:12]

max_depth=15
sc_m_dt = np.zeros(max_depth)
sc_sd_dt = np.zeros(max_depth)

i = 0
for depth in range(1,max_depth+1):
    scores = np.sqrt(-cross_val_score(DecisionTreeRegressor(max_depth=depth), 
                         X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
    print([depth, np.mean(scores), np.std(scores)])
    sc_m_dt[i] = np.mean(scores)
    sc_sd_dt[i] = np.std(scores)
    i = i+1

plt.plot(range(1,max_depth+1), sc_m_dt, 'ko-')
plt.xlabel('Tree depth')
plt.ylabel('RMSE')
plt.plot(range(1,max_depth+1), sc_m_dt-sc_sd_dt,'k--',
         range(1,max_depth+1), sc_m_dt+sc_sd_dt,'k--')
plt.savefig('Ozone_DT.eps', format='eps', dpi=300)


from sklearn.tree import export_graphviz
import os

Tr = DecisionTreeRegressor(max_depth=depth)
Tr_fit = Tr.fit(X,y) 
export_graphviz(Tr_fit)
os.system('dot -Tpng tree.dot -o tree.png')

## Linear model
scores = np.sqrt(-cross_val_score(LinearRegression(), 
                         X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
plt.plot([1,15],[np.mean(scores),np.mean(scores)],'r')
plt.plot(range(1,max_depth+1), sc_m_dt, 'ko-')
plt.plot(range(1,NN_max+1), sc_m_dist_L2, 'b+-')
plt.xlabel('Tree depth/kNN')
plt.ylabel('RMSE')
plt.legend(['Linear','kNN','Tree'])
#plt.savefig('Ozone_compar1.eps', format='eps', dpi=300)

# Subset selection
import itertools
import time
import statsmodels.api as sm

def processSubset(feature_set):
# Fit model on feature_set and calculate RSS
    model = sm.OLS(y,X[list(feature_set)])
    regr = model.fit()
    RSS = ((regr.predict(X[list(feature_set)]) - y) ** 2).sum()
    return {'models':regr, 'RSS':RSS}

def getBest(k):
    tic = time.time()
    results = []
    for combo in itertools.combinations(X.columns, k):
        results.append(processSubset(combo)) # Wrap everything up in a nice dataframe 
    # Choose the model with the highest RSS
    best_model = models.loc[models['RSS'].argmin()]
    toc = time.time()
    print("Processed ", models.shape[0], "models on", k, "predictors in", (toc-tic), "seconds.") # Return the best model, along with some other useful information about the model
    return best_model


models = pd.DataFrame(columns=["RSS", "model"])
tic = time.time()
for i in range(1,8):
    models.loc[i] = getBest(i)
toc = time.time()
print("Total elapsed time:", (toc-tic), "seconds.")

## ============================================================================
## ANN
# One hidden layer
max_nb = 15
sc_m_ann = np.zeros(max_nb)
sc_sd_ann = np.zeros(max_nb)
i = 0
for nb_neurons in range(2,max_nb+1):
    scores = np.sqrt(-cross_val_score(MLPRegressor(solver='sgd', alpha=.1,
                              hidden_layer_sizes=(nb_neurons),
                              learning_rate='invscaling',
                              learning_rate_init = .001,
                              max_iter=5000), 
                              X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
    print([nb_neurons, np.mean(scores), np.std(scores)])
    sc_m_ann[i] = np.mean(scores)
    sc_sd_ann[i] = np.std(scores)
    i = i+1
    
max_1 = 15
max_2 = 10
sc_m_ann = np.zeros(100)
sc_sd_ann = np.zeros(100)
i = 0
for nb_1 in range(6,max_1+1):
    for nb_2 in range(2,min(nb_1,max_2)+1):
        scores = np.sqrt(-cross_val_score(MLPRegressor(solver='lbfgs',alpha=1e-5,
                            hidden_layer_sizes=(nb_1,nb_2),
                            learning_rate='invscaling',max_iter=500), 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
        print([nb_1, nb_2, np.mean(scores), np.std(scores)])
        sc_m_ann[i] = np.mean(scores)
        sc_sd_ann[i] = np.std(scores)
        i = i+1
        
        
## SVR
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e3)
svr_poly = SVR(kernel='poly', C=1e3, degree=2)
y_rbf = svr_rbf.fit(X, y).predict(X)
y_lin = svr_lin.fit(X, y).predict(X)
y_poly = svr_poly.fit(X, y).predict(X)        


## ============================================================================
### Adaboost

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

y = Ozone['maxO3']
X = Ozone.iloc[:,2:12]

# Fit regression model
skf = ShuffleSplit(y.shape[0], n_iter=100, test_size=.25, random_state=1)

regr_1 = DecisionTreeRegressor(max_depth=2)
scores_1 = np.sqrt(-cross_val_score(regr_1, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
regr_10 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2),
                          n_estimators=10, random_state=1)
scores_10 = np.sqrt(-cross_val_score(regr_10, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
regr_50 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2),
                          n_estimators=50, random_state=1)
scores_50 = np.sqrt(-cross_val_score(regr_50, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
regr_100 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2),
                          n_estimators=100, random_state=1)
scores_100 = np.sqrt(-cross_val_score(regr_100, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))

regr_300 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2),
                          n_estimators=300, random_state=1)
scores_300 = np.sqrt(-cross_val_score(regr_300, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
tmp= np.array([scores_1,scores_10,scores_50,scores_100,scores_300])
df = pd.DataFrame(tmp.T,columns=['1','10','50','100','300'] )                 
plt.boxplot(tmp.T,labels=['1','10','50','100','300'])
plt.xlabel('Nb boosting iterations')
plt.ylabel('RMSE')
plt.savefig('Ozone_boosting.eps', format='eps', dpi=300)

regr_1.fit(X, y)
regr_2.fit(X, y)

# Predict
y_1 = regr_1.predict(X)
y_2 = regr_2.predict(X)

# Plot the results
plt.figure()
plt.plot(y, y_1, '.', c="g", label="n_estimators=1")
plt.plot(y, y_2, '.', c="r", label="n_estimators=300")
plt.xlabel("data")
plt.ylabel("target")
plt.title("Boosted Decision Tree Regression")
plt.legend()
plt.show()

## ============================================================================
# Gradient boosting
from sklearn import ensemble
from sklearn.model_selection import GridSearchCV
param_grid = {'n_estimators': [1e0, 1e1, 5e1, 1e2, 3e2, 1e3],
              'learning_rate': [0.001,0.01,0.1]}
params = {'max_depth': 2, 'min_samples_split': 2, 'loss': 'ls'}
boost = GridSearchCV(ensemble.GradientBoostingRegressor(**params),
                     cv=skf, param_grid=param_grid)
boost.fit(X,y)


params = {'n_estimators': 1, 'max_depth': 2, 'min_samples_split': 2,
          'learning_rate': 0.01, 'loss': 'ls'}
regr_1 = ensemble.GradientBoostingRegressor(**params)
scores_1 = np.sqrt(-cross_val_score(regr_1, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
params = {'n_estimators': 10, 'max_depth': 2, 'min_samples_split': 2,
          'learning_rate': 0.01, 'loss': 'ls'}
regr_10 = ensemble.GradientBoostingRegressor(**params)
scores_10 = np.sqrt(-cross_val_score(regr_10, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
params = {'n_estimators': 50, 'max_depth': 2, 'min_samples_split': 2,
          'learning_rate': 0.01, 'loss': 'ls'}
regr_50 = ensemble.GradientBoostingRegressor(**params)
scores_50 = np.sqrt(-cross_val_score(regr_50, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
params = {'n_estimators': 100, 'max_depth': 2, 'min_samples_split': 2,
          'learning_rate': 0.01, 'loss': 'ls'}
regr_100 = ensemble.GradientBoostingRegressor(**params)
scores_100 = np.sqrt(-cross_val_score(regr_100, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
params = {'n_estimators': 300, 'max_depth': 2, 'min_samples_split': 2,
          'learning_rate': 0.01, 'loss': 'ls'}
regr_300 = ensemble.GradientBoostingRegressor(**params)
scores_300 = np.sqrt(-cross_val_score(regr_300, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
params = {'n_estimators': 1000, 'max_depth': 2, 'min_samples_split': 2,
          'learning_rate': 0.01, 'loss': 'ls'}
regr_1000 = ensemble.GradientBoostingRegressor(**params)
scores_1000 = np.sqrt(-cross_val_score(regr_1000, 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
tmp= np.array([scores_1,scores_10,scores_50,scores_100,scores_300,scores_1000])
df = pd.DataFrame(tmp.T,columns=['1','10','50','100','300','1000'] )                 
plt.boxplot(tmp.T,labels=['1','10','50','100','300','1000'])
plt.xlabel('Nb boosting iterations')
plt.ylabel('RMSE')
plt.savefig('Ozone_gradient_boosting.eps', format='eps', dpi=300)

regr_300.fit(X,y)
feature_importance = regr_300.feature_importances_
# make importances relative to max importance
feature_importance = 100.0 * (feature_importance / feature_importance.max())
sorted_idx = np.argsort(feature_importance)
pos = np.arange(sorted_idx.shape[0]) + .5
plt.subplot(1, 2, 1)
plt.barh(pos, feature_importance[sorted_idx], align='center')
colnames = np.array(['T9','T12','T15','Ne9','Ne12','Ne15','Vx9','Vx12','Vx15','maxO3v'])
plt.yticks(pos, colnames[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('Variable Importance')
#plt.show()
plt.savefig('Ozone_gradient_boosting_importance.eps', format='eps', dpi=300)


## ============================================================================
### Kernel Ridge Regression
from sklearn.model_selection import GridSearchCV

param_grid = {"alpha": [1e0, 1e-1, 1e-2, 1e-3]}
kr = GridSearchCV(KernelRidge(cv=skf, param_grid=param_grid)
kr.fit(X, y)
kr.best_score_

kr.best_params_

scores = np.sqrt(-cross_val_score(KernelRidge(kernel='rbf',alpha=.1,gamma=0.00046), 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
np.mean(scores)

## ============================================================================
### SVR
from sklearn.svm import SVR

param_grid={"C": [1e0, 1e1, 1e2, 1e3],
           "gamma": np.logspace(-3, 2, 5),
            'epsilon':[1e1, 1e0, 1e-1, 1e-2, 1e-3]}
svr = GridSearchCV(SVR(kernel='rbf'), cv=skf,
                   param_grid=param_grid)
svr.fit(X, y)
svr.best_score_
svr.best_params_

scores = np.sqrt(-cross_val_score(SVR(kernel='rbf', gamma=0.001,C=100,epsilon=10), 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
np.mean(scores)


param_grid={"C": [1e0, 1e1, 1e2, 1e3],
            'epsilon':[1e1, 1e0, 1e-1, 1e-2, 1e-3]}
svr = GridSearchCV(SVR(kernel='linear'), cv=skf,
                   param_grid=param_grid)
svr.fit(X, y)
svr.best_score_
svr.best_params_

scores = np.sqrt(-cross_val_score(SVR(kernel='linear', C=1,epsilon=10), 
                       X=X, y=y, scoring='neg_mean_squared_error', cv=skf))
np.mean(scores)