R/plots.R

Defines functions ey_colors ey_font_apply plot_hyperparam_tune plot_rmse_boot plot_rsq_boot plot_mae_boot plot_cor_boot plot_all_metrics plot_predicted_actual

#' Returns EY color hex values for use in plotting
#' @param color valid input values are ey_yellow, ey_off_black, ey_gray_01, ey_gray_02,
#' maroon_04, red_04, orange_04, green_04, teal_04, blue_04, purple_04
#' @return hex value of specified color
ey_colors <- function(color){

  colors <- list()
  colors[["ey_yellow"]]    <- "FFE600"
  colors[["ey_off_black"]] <- "2E2E38"
  colors[["ey_gray_01"]]   <- "747480"
  colors[["ey_gray_02"]]   <- "C4C4CD"
  colors[["maroon_04"]]    <- "750E5C"
  colors[["red_04"]]       <- "FF4136"
  colors[["orange_04"]]    <- "FF6D00"
  colors[["green_04"]]     <- "2DB757"
  colors[["teal_04"]]      <- "27ACAA"
  colors[["blue_04"]]      <- "188CE5"
  colors[["purple_04"]]    <- "3D108A"
  
  return(colors[[color]])
}

ey_font_apply <- function(p, font_size = 9){
  p <- p %>%
    plotly::layout(font = list(
      family = "Arial Black",
      size = font_size,
      color = "2E2E38"
    ))
  p
}

#' Plot hyperparameter tuning results
#'
#' @param mdl model training results
#' @param num_digits number of digits to round to
#' @param font_size font size in arial black
#' @return tuning results graph

plot_hyperparam_tune <- function(mdl, num_digits = 3, font_size = 9){
  params_tested <- mdl$params_tested
  params_tested$num <- c(1:nrow(params_tested))
  for(i in 1:ncol(params_tested)){
    if(is.numeric(params_tested[,i])){
      params_tested[,i] <- round(params_tested[,i], num_digits) 
    }
  }
  
  txt <- c()
  cnames <- colnames(params_tested)
  for(i in 1:nrow(params_tested)){
    txt_sub <- c()
    for(x in 1:length(cnames)){
      txt_sub <- c(txt_sub, paste0(cnames[x], ": ", params_tested[i,x]))
    }
    txt <- c(txt, paste(txt_sub, collapse = "\n"))
  }

  plotly::plot_ly(data = params_tested,
                  x    = ~num,
                  y    = ~eval,
                  type = 'scatter',
                  mode = 'markers',
                  text = txt,
                  hoverinfo = txt,
                  marker = list(
                    color = ey_colors("blue_04")
                  )) %>%
    plotly::layout(title = 'Hyperparameter tuning results') %>%
    ey_font_apply(font_size = font_size)
}


#' Plot bootstrapped rmse
#'
#' @param mdl model training results
#' @param n number of bootstrap replicates
#' @param font_size font size
#' @return test data metrics graph
plot_rmse_boot <- function(mdl, n = 10000, font_size = 9){
  df <- data.frame(y     = mdl$test_y,
                   y_hat = mdl$test_yhat)
  set.seed(1234)
  rmse_boot <- c()
  for(i in 1:n){
    samp <- sample(1:nrow(df), nrow(df), replace = T)
    rmse_boot <- c(rmse_boot, yardstick::rmse_vec(df$y[samp], df$y_hat[samp]))
  }
  
  plotly::plot_ly(x = rmse_boot, 
                  type = 'histogram',
                  marker = list(
                    color = ey_colors("blue_04"))) %>%
    plotly::layout(title = "Bootstrap Test Set RMSE",
                   xaxis = list(title = "RMSE"),
                   yaxis = list(title = "Count")) %>%
    ey_font_apply(font_size = font_size)
  
}

#' Plot bootstrapped Rsquare
#'
#' @param mdl model training results
#' @param n number of bootstrap replicates
#' @param font_size font size
#' @return test data metrics graph
plot_rsq_boot <- function(mdl, n = 10000, font_size = 9){
  df <- data.frame(y     = mdl$test_y,
                   y_hat = mdl$test_yhat)
  set.seed(1234)
  rsq_boot <- c()
  for(i in 1:n){
    samp <- sample(1:nrow(df), nrow(df), replace = T)
    rsq_boot <- c(rsq_boot, yardstick::rsq_vec(df$y[samp], df$y_hat[samp]))
  }
  
  plotly::plot_ly(x = rsq_boot, 
                  type = 'histogram',
                  marker = list(
                    color = ey_colors("blue_04"))) %>%
    plotly::layout(title = "Bootstrap Test Set Rsquare",
                   xaxis = list(title = "Rsquare"),
                   yaxis = list(title = "Count")) %>%
    ey_font_apply(font_size = font_size)
  
}

#' Plot bootstrapped mae
#'
#' @param mdl model training results
#' @param n number of bootstrap replicates
#' @param font_size sets font size in arial black
#' @return test data metrics graph
plot_mae_boot <- function(mdl, n = 10000, font_size = 9){
  df <- data.frame(y     = mdl$test_y,
                   y_hat = mdl$test_yhat)
  set.seed(1234)
  mae_boot <- c()
  for(i in 1:n){
    samp <- sample(1:nrow(df), nrow(df), replace = T)
    mae_boot <- c(mae_boot, yardstick::mae_vec(df$y[samp], df$y_hat[samp]))
  }
  
  plotly::plot_ly(x = mae_boot, 
                  type = 'histogram',
                  marker = list(
                    color = ey_colors("blue_04"))) %>%
    plotly::layout(title = "Bootstrap Test Set MAE",
                   xaxis = list(title = "MAE"),
                   yaxis = list(title = "Count")) %>%
    ey_font_apply(font_size = font_size)
  
}

#' Plot bootstrapped spearman rank correlation metrics
#'
#' @param mdl model training results
#' @param n number of bootstrap replicates
#' @param font_size sets font size in arial black
#' @return test data metrics graph
plot_cor_boot <- function(mdl, n = 10000, font_size = 9){
  df <- data.frame(y     = mdl$test_y,
                   y_hat = mdl$test_yhat)
  set.seed(1234)
  cor_boot <- c()
  for(i in 1:n){
    samp <- sample(1:nrow(df), nrow(df), replace = T)
    cor_boot <- c(cor_boot, cor(df$y[samp], df$y_hat[samp], method = 'spearman'))
  }
  
  plotly::plot_ly(x = cor_boot, 
                  type = 'histogram',
                  marker = list(
                    color = ey_colors("blue_04"))) %>%
    plotly::layout(title = "Bootstrap Test Set Spearman Rank Correlation",
                   xaxis = list(title = "Spearman Rank Correlation"),
                   yaxis = list(title = "Count")) %>%
    ey_font_apply(font_size = font_size)
  
}

#' Plot test data bootstrapped rmse, rsquare, mae, and spearman rank correlation
#'
#' @param mdl model training results
#' @param n number of bootstrap replicates
#' @param font_size sets font size in arial black
#' @return test data metrics histograms
plot_all_metrics <- function(mdl, n = 10000, font_size = 9){
  s1 <- plot_rmse_boot(mdl = mdl, n = n)
  s2 <- plot_rsq_boot(mdl = mdl, n = n)
  s3 <- plot_mae_boot(mdl = mdl, n = n)
  s4 <- plot_cor_boot(mdl = mdl, n = n)
  plotly::subplot(s1, s2, s3, s4, nrows = 2, titleX = T, titleY = T) %>%
    plotly::layout(title = "Test Data Bootstrap Goodness of Fit Metrics",
                   showlegend = FALSE) %>%
    ey_font_apply(font_size = font_size)
}

#' Plot test data predicted versus actual scatter plot
#'
#' @param mdl model training results
#' @param font_size sets font size in arial black
#' @return predicted versus actual graph on test data

plot_predicted_actual <- function(mdl, font_size = 9){
  range_min <- min(c(mdl$test_y, mdl$test_yhat))
  range_max <- max(c(mdl$test_y, mdl$test_yhat))
  range_min <- ifelse(range_min <= 0, range_min * 1.1, 0)
  range_max <- ifelse(range_max <= 0, range_max * .9, range_max * 1.1)
  plotly::plot_ly(x = mdl$test_y,
               y = mdl$test_yhat,
               type = 'scatter',
               mode = 'markers',
               marker = list(color = ey_colors("blue_04"))) %>%
    plotly::layout(title = "Actual vs. Predicted Values",
                   xaxis = list(title = "Actual",
                                range = c(range_min, range_max)),
                   yaxis = list(title = "Predicted",
                                range = c(range_min, range_max))) %>%
    ey_font_apply(font_size = font_size)
}
prescient/opinionated_pipelines documentation built on Dec. 10, 2019, 12:15 a.m.