# Step 1 - Generate Training and Testing Data:
library(tidymodels)
library(janitor)
library(brulee)

set.seed(888)
DataDiamonds=diamonds|> #add sample_n(x) to the pipe to choose x random observations
  clean_names("upper_camel") |>
  select(Price, Carat, Clarity, Cut, Color) |> 
  mutate(Cut=as.integer(Cut),Color=as.integer(Color),Clarity=as.integer(Clarity))

set.seed(888)
Split70=initial_split(DataDiamonds, prop = 0.7, strata = Price, breaks = 5)
DataTrain=training(Split70)
DataTest=testing(Split70)

# Step 2 - Create a Recipe:
RecipeDiamonds=recipe(Price ~ ., data = DataTrain) |>
               step_normalize(all_predictors())

# Step 3 - Create a Model Design:
# If you want to tune penalty, set it to penalty=tune
# do not forget to provide the values in the parameter grid
# for penalty in Step 5. E.g., penalty=c(0, 0.05, 0.1)
# You can also reduce the value for epochs to execute the tuning faster.
ModelDesignNN= mlp(hidden_units = tune(), dropout = tune(), penalty=0, epochs=1000) |> 
               set_engine("brulee") |>
               set_mode("regression")

# Step 4 - Add the Recipe and the Model Design to a Workflow:
WFModelNN=workflow() |> 
          add_model(ModelDesignNN) |> 
          add_recipe(RecipeDiamonds)

# Step 5 - Create a Hyper-Parameter Grid:
#          To try out different hyper-parameter values, change the values
#          that are assigned to the respective hyper-parameters below
ParGridNN=expand.grid(hidden_units=c(10, 20, 50),
                            dropout=c(0, 0.25))

# Step 6: - Create Resamples for Cross-Validation:
# with v=10 you can use 10 instead of 7 folds. 
# Expect computing time to increase by about 40%
set.seed(888)
FoldsForTuningNN=vfold_cv(DataTrain, v=7, strata=Price)


# Advanced R Users:
# uncommenting this might speed up processing time on a multi core machine 
# library(doParallel)
# NumOfCores <- makePSOCKcluster(parallel::detectCores(logical = FALSE))
# registerDoParallel(NumOfCores)


# Step 7 - Tune the Workflow and Train All Models:
#  (this: step requires patients; you might want to run it overnight)
# You can see the progress in the console
# If you execute the line "RunTime=Sys.time()-StartTime" before the
# tuning is finished you can print the run time later on.
StartTime=Sys.time()
set.seed(888)
TuneResultsNN=tune_grid(WFModelNN,  resamples=FoldsForTuningNN, 
                              grid=ParGridNN, 
                              metrics = metric_set(rmse, rsq, mae),
                              control = control_grid(verbose = TRUE))
RunTime=Sys.time()-StartTime
print(RunTime)

# Step 8 - Extract the Best Hyper-Parameter(s)
BestHyperParNN=select_best(TuneResultsNN, "rmse")
print(BestHyperParNN)

# Step 9 - Finalize and Train the Best Workflow Model:
#  (this: step requires a little patients; 
#   run time possibly a few minutes)
set.seed(888)
BestWFModelNN=finalize_workflow(WFModelNN,BestHyperParNN) |> 
              fit(DataTrain)
print(BestWFModelNN)

# Step 10: Assess Prediction Quality Based on the Testing Data:
DataTestWithPredBestModelNN=augment(BestWFModelNN,DataTest)
metrics(DataTestWithPredBestModelNN, truth=Price, 
    estimate=.pred)

# Plot Validation Performance of Hyper-Parameters
autoplot(TuneResultsNN)


# Spoiler Alert: Far below is a hyper parameter grid that produces good results
# Keep experimenting, before you read. You might produce a better result.
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
# ParGridNN=expand.grid(hidden_units=c(300, 500, 700, 1100),
#                             dropout=c(0, 0.25, 0.5))
# These settings need alot of computing time in Step 7. Depending on your computer
# between 30 minutes and several hours.
# Step 9 also will take some time