bonski.predict: Predict Where Ski Will Be Good

Documented in evaluate evaluate.Model fit load_model load_ski_nivo load_ski_nivo.Model load_Xy load_Xy.Model Model ordinal_score predict predict_fresh predict_fresh.Model save_model save_model.Model score split_Xy split_Xy.Model

#' Statistical model used to predict ski conditions evaluations
#'
#' # Create model object
#' model <- Model(name)
#'
#' # Fit model if not already fitted
#' load_ski_nivo(model) # optionnal run by instructions bellow if needed
#' load_Xy(model, na_replacement) # optionnal run by instructions bellow if needed
#' split_Xy(model, split) # optionnal run by instructions bellow if needed
#' fit(model, classify, scoring) # optionnal run by instructions bellow if needed
#' score <- score(model, ord_score) # optionnal run by instructions bellow if needed
#'
#' # Run predictions
#' y <- predict_fresh(model) # fit first if model has not been loaded from the filesystem
#'
#' # Save model to the filesystem to be reused without fitting again
#' save_model(model)
#'
#' @param name character. Model name
#'
#' @return Model. object
#'
#' @export
Model <- function(name) {
  obj_path <- get_model_path(name)
  if (file.exists(obj_path)) {
    obj <- load_model(name)
  } else {
    obj <- rlang::env(
      name = name,
      data = ModelData(),
      classify = NULL,
      scoring = NULL,
      trained_model = NULL,
      ord_score = NULL,
      score = NULL,
      evaluation = NULL
    )
    class(obj) <- "Model"
  }
  obj
}

# -------
# METHODS
# -------

pre_save_model <- function(obj, ...) UseMethod("pre_save_model")

pre_save_model.Model <- function(obj) {
  shrink_data(obj$data)
}

#' Save model on the filesystem
#'
#' @param Model obj. Model object
#'
#' @export
save_model <- function(obj, ...) UseMethod("save_model")

#' Save model on the filesystem
#'
#' @param Model obj. Model object
#'
#' @export
save_model.Model <- function(obj) {
  pre_save_model(obj)
  save(obj, file = get_model_path(obj$name))
}

post_load_model <- function(obj, ...) UseMethod("post_load_model")
post_load_model.Model <- function(obj) {
}

#' Set ski_nivo data to be used with the statistical model
#'
#' @param obj Model.
#' @param past_horizon integer. Past horizon to consider in days
#'
#' @export
load_ski_nivo <- function(obj, ...) UseMethod("load_ski_nivo")

#' Set ski_nivo data to be used with the statistical model
#'
#' @param obj Model.
#' @param past_horizon integer. Past horizon to consider in days
#'
#' @export
load_ski_nivo.Model <- function(obj, past_horizon = 9) {
  load_ski_nivo(obj$data, past_horizon)
}

restrict_orientations <- function(obj, ...) UseMethod("restrict_orientations")

restrict_orientations.Model <- function(obj, orientations, ski_nivo = NULL, routes_stations = NULL) {
  restrict_orientations(obj$data, orientations, ski_nivo, routes_stations)
  obj$name <- paste(obj$name, join_orientations(orientations), sep = "-")
}

#' Set Xy data to be used with the statistical model
#'
#' @param obj Model.
#' @param na_replacement character. NA replacement strategy to apply
#'
#' @export
load_Xy <- function(obj, ...) UseMethod("load_Xy")

#' Set Xy data to be used with the statistical model
#'
#' @param obj Model.
#' @param na_replacement character. NA replacement strategy to apply
#'
#' @export
load_Xy.Model <- function(obj, na_replacement = "mean") { # drop gives higher explanation % but largely reduce data set size
  if (is.null(obj$data$ski_nivo)) {
    load_ski_nivo(obj)
  }
  load_Xy(obj$data, na_replacement)
}

#' Split train and test data
#'
#' Full dataset is divided into training ant testing following a split ratio
#'
#' @param obj Model.
#' @param split numeric. Split ratio between train and test data
#'
#' @export
split_Xy <- function(obj, ...) UseMethod("split_Xy")

#' Split train and test data
#'
#' Full dataset is divided into training ant testing following a split ratio
#'
#' @param obj Model.
#' @param split numeric. Split ratio between train and test data
#'
#' @export
split_Xy.Model <- function(obj, split = .75) { # split (0.6 - 0.8) does not really seems to have impact
  if (is.null(obj$data$Xy)) {
    load_Xy(obj)
  }
  split_Xy(obj$data, split)
}

#' Model fitting
#'
#' @param obj Model
#' @param classify logical. use classification or regression?
#' @param scoring character. Scoring method used by the classifier
#'
#' @export
fit <- function(obj, ...) UseMethod("fit")
fit.Model <- function(obj, classify = NULL, scoring = NULL) {
  stop("Should be implemented in a child class")
}

#' Model score
#'
#' If ord_score is TRUE instead of performing an accuracy score (is the predicted class equals to the true one) we take
#' advantage of the fact that the ski evaluation score is an ordinal variable
#'
#' @param obj Model
#' @param ord_score logical. Whether the score should be computed as ordinal
#'
#' @return numeric. score
#'
#' @export
score <- function(obj, ...) UseMethod("score")
score.Model <- function(obj, ord_score = FALSE) {
  stop("Should be implemented in a child class")
}

#' Model predict
#'
#' @param obj Model
#' @param X_pred data.frame. Model input
#'
#' @return list. prediction
#'
#' @export
predict <- function(obj, ...) UseMethod("predict")
predict.Model <- function(obj, X_pred) {
  stop("Should be implemented in a child class")
}

#' Predict using fresh data
#'
#' @param obj Model
#' @param X_fresh data.frame. Already fresh data (optional)
#'
#' @return data.frame
#'
#' @export
predict_fresh <- function(obj, ...) UseMethod("predict_fresh")

#' Predict using fresh data
#'
#' @param obj Model
#' @param X_fresh data.frame. Already fresh data (optional)
#'
#' @return data.frame
#'
#' @export
predict_fresh.Model <- function(obj, X_fresh = NULL) {
  if (is.null(X_fresh)) {
    X_fresh <- fetch_fresh_X(obj$data)
  }
  y_fresh <- predict(obj, dplyr::select(X_fresh, !c(station, date_nivo)))
  attach_Xy(obj$data, dplyr::select(X_fresh, station), y_fresh)
}

#' Evaluate a model
#'
#' Evaluates a model using different parameters
#'
#' @param obj Model. model to evaluate
#' @param na_replacements vector. NA replacement strategies
#' @param splits vector. training - test splits
#' @param classify list. use classification or regression?
#' @param scorings list. fit scoring strategies
#' @param ord_scores list. score as ordinal?
#'
#' @return data.frame. evaluation
#'
#' @export
evaluate <- function(obj, ...) UseMethod("evaluate")

#' Evaluate a model
#'
#' Evaluates a model using different parameters
#'
#' @param obj Model. model to evaluate
#' @param na_replacements vector. NA replacement strategies
#' @param splits vector. training - test splits
#' @param classify list. use classification or regression?
#' @param scorings list. fit scoring strategies
#' @param ord_scores list. score as ordinal?
#'
#' @return data.frame. evaluation
#'
#' @export
evaluate.Model <- function(obj, na_replacements, splits, classify, scorings, ord_scores) {
  evaluation <- data.frame()

  for (i in na_replacements) {
    tryCatch({
      load_Xy(obj, na_replacement = i)

      for (j in splits) {
        tryCatch({
          split_Xy(obj, split = j)

          for (k in classify) {
            for (l in scorings) {
              tryCatch({
                fit(obj, classify = k, scoring = l)

                if (is.null(l)) {
                  l <- NA
                }
                for (m in ord_scores) {
                  tryCatch({
                    score <- score(obj, ord_score = m)
                    evaluation <- rbind(evaluation, data.frame(na_replacement = i, split = j, classify = k, scoring = l,
                                                               ord_score = m, score = score))
                  }, error = function(e) {
                    message("Error in ord_scores loop")
                    message(paste0("model: ", obj$name, ", ord_score; ", m, ", scoring; ", l, ",  classify: ", k,
                                   ", split; ", j, ", na_replacement; ", i))
                    print(e)
                  })
                }

              }, error = function(e) {
                message("Error in classify / scorings loop")
                message(paste0("model: ", obj$name, ", scoring; ", l, ", classify: ", k, ", split; ", j,
                               ", na_replacement; ", i))
                print(e)
              })
            }
          }
        }, error = function(e) {
          message("Error in splits loop")
          message(paste0("model: ", obj$name, ", split; ", j, ", na_replacement; ", i))
          print(e)
        })
      }
    }, error = function(e) {
      message("Error in na_replacements loop")
      message(paste0("model: ", obj$name, ", na_replacement; ", i))
      print(e)
    })
  }

  obj$evaluation <- evaluation
  evaluation
}

# ---------
# FUNCTIONS
# ---------

#' Ordinal scoring function
#'
#' 1. compute the mae between true and predicted values
#' 2. divide it by 5, the maximum value taken by the variable
#' 3. subtract it to 1 so higher is better, as for the accuracy score
#'
#' @param y_true vector.
#' @param y_pred vector.
#'
#' @return numeric. ordinal score
ordinal_score <- function(y_true, y_pred) {
  1 - (mean(abs(as.numeric(y_true) - as.numeric(y_pred)))) / 5
}

get_model_ref <- function(name) {
  paste(name, "model", sep = "-")
}

get_model_path <- function(name) {
  paste(system.file("extdata", package = "bonski.predict"), paste(get_model_ref(name), "rds", sep = "."), sep = "/")
}

#' Load model from the filesystem
#'
#' @param Model character. Model name
#'
#' @export
load_model <- function(name) {
  obj <- readRDS(file = get_model_path(name))
  post_load_model(obj)
  obj
}