library(keras)
install_keras() 
  
# Data Preparation -----------------------------------------------------

batch_size <- 128
num_classes <- 10
epochs <- 4 # small number of epochs to reduce the computational time

# Input image dimensions
img_rows <- 28
img_cols <- 28

# The data, shuffled and split between train and test sets
mnist <- dataset_mnist() # load images
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y

# Redefine  dimension of train/test inputs
x_train <- array_reshape(x_train, c(nrow(x_train), img_rows, img_cols, 1))
x_test <- array_reshape(x_test, c(nrow(x_test), img_rows, img_cols, 1))
input_shape <- c(img_rows, img_cols, 1)

# Transform RGB values into [0,1] range
x_train <- x_train / 255
x_test <- x_test / 255

cat('x_train_shape:', dim(x_train), '\n')
cat(nrow(x_train), 'train samples\n')
cat(nrow(x_test), 'test samples\n')

# Convert class vectors to binary class matrices
y_train <- to_categorical(y_train, num_classes)
y_test <- to_categorical(y_test, num_classes)

# Define the model -----------------------------------------------------------
model <- keras_model_sequential() %>%
  layer_conv_2d(filters = 32, kernel_size = c(5,5), activation = 'relu',
                input_shape = input_shape) %>% 
  layer_conv_2d(filters = 64, kernel_size = c(3,3), activation = 'relu') %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_dropout(rate = 0.25) %>% 
  layer_flatten() %>% 
  layer_dense(units = 128, activation = 'relu') %>% 
  layer_dropout(rate = 0.5) %>% 
  layer_dense(units = num_classes, activation = 'softmax')

model
#  Define the compilation options -----------------------------------------
model %>% compile(
  loss = loss_categorical_crossentropy,
  optimizer = optimizer_adadelta(),
  metrics = c('accuracy')
)


# Fit the model ----------------------------------------------------------
history <- model %>% fit(
  x_train, y_train, 
  epochs = epochs, batch_size = batch_size, 
  verbose = 1,
  validation_split = 0.2
)

# Compute and print the scores -------------------------------------------
scores <- model %>% evaluate(
  x_test, y_test, verbose = 0
)

# Output metrics
cat('Test loss:', scores[[1]], '\n')
cat('Test accuracy:', scores[[2]], '\n')


# Confusion matrix
y_pred = model %>% predict(x_test)
conf_mat = table(apply(y_test,1,which.max),apply(y_pred,1,which.max))
print("Confusion matrix")
conf_mat

print(paste("Classification error: ",round((1-sum(diag(conf_mat))/dim(x_test)[1])*100,2),"%",sep=""))

# Show the filters of the first layer ------------------------------------
weights = get_weights(model)
dim(weights[[1]])

dev.new()
par(mfrow=c(4,8),mar=c(1,1,1,1))
for (k in 1:dim(weights[[1]])[4]){image.plot(weights[[1]][,,1,k],axes=FALSE,zlim=range(weights[[1]]))
  box()}


# Show output of the first layer ----------------------------------------
layer_name <- 'conv2d'
intermediate_layer_model <- keras_model(inputs = model$input,
                                        outputs = get_layer(model, layer_name)$output)
img = x_test[2,,,]
intermediate_output <- predict(intermediate_layer_model, array(img,c(1,28,28,1)))
dev.new()
par(mfrow=c(4,8),mar=c(1,1,1,1))
for (k in 1:dim(weights[[1]])[4]){image.plot(t(intermediate_output[1,,,k])[,rev(1:26)],axes=FALSE,zlim=range(intermediate_output))
  box()}