knitr::opts_chunk$set(echo = TRUE)

Packages

#devtools::install_github("dreamRs/apexcharter")
pacman::p_load(tidyverse, apexcharter, mlgraph, recipes, deeplyr)
# ggplot2::theme_set(ggplot2::theme_classic())
# devtools::document()
devtools::load_all()
# devtools::install()
mtcars <- mtcars %>% 
  mutate(
    id = 1:n(),
    amnum = am,
    am = as.factor(am),
    cyl =  as.factor(as.numeric(as.factor(cyl))),
    cyl0num = as.numeric(as.factor(cyl))-1, 
    cyl0 = as.factor(cyl0num)
  )

### PS
rec_linear <- recipes::recipe(hp ~ ., mtcars) %>%
  update_role(id, new_role = "ID")

### automatic?
rec_binary<- recipes::recipe(am ~ ., mtcars) %>%
  update_role(id, amnum, new_role = "ID")

### number of cylinders
rec_multi <- recipes::recipe(cyl ~ ., mtcars) %>%
  update_role(id, cyl0, cyl0num, new_role = "ID") 

Binary

fb <- fit_learner(rec_binary, mtcars, list(), "binary", "rpart")
fb$predict(mtcars)

nn <- eval_classifier(fb$preds, "am", path = ".")

Confusion Matrix

devtools::document()
confusion_df <- get_confusion_df(preds2, ft_winner)

gg_plot_confusion(confusion_df)
hc_plot_confusion(confusion_df)
ax_plot_confusion(confusion_df)

ROC

devtools::load_all()
roc_df <- get_roc_df(preds2, ft_winner)

# roc_df %>%
#   mutate(id = 1:n()) %>%
#   gather(var, value, -.threshold, -actual, -id) %>%
#   sample_n(100) %>%
#   arrange(id) %>%
#   ggplot(aes(.threshold, value, color = var)) +
#   geom_line()
# 

#Metrics::

gg_plot_roc(roc_df)
#gg_plot_roc2(preds$target, preds$prob)
hc_plot_roc(roc_df)
ax_plot_roc(roc_df)
#ax_plot_roc(roc_df)
devtools::load_all()
preds2 <- readRDS("/Volumes/storage/MEGA/projects/mlgraph/data/preds2.rds")
roc_df <- get_roc_df(preds2, ft_winner)

roc_df %>% ax_plot_roc()

preds2 %>% 
  mutate(ft_winner = as.factor(ft_winner)) %>% 
  eval_classifier(ft_winner)

RP-Curve

computes the precision at every unique value of the probability column (in addition to infinity)

rp_df <- get_rp_df(preds2$ft_winner01, preds2$prob)

gg_plot_rp(rp_df)
hc_plot_rp(rp_df)
ax_plot_rp(rp_df)

Probability Density

devtools::document()
dens_df <- get_density_df(fb$preds, am)

gg_plot_density(dens_df)
hc_plot_density(dens_df)
ax_plot_density(dens_df)

Cutoff

get_cutoff_df(preds2, ft_winner01) %>%
  ggplot(aes(thres, value, colour = metric)) +
  geom_line()

get_cutoff_df(preds2, ft_winner01) %>%
  ax_plot_cutoff

Class Error

get_classes_df(preds2, ft_winner) %>%
  ax_plot_classes()

Avergage Performance

d <-  preds2 %>%
  get_avg_df(ft_winner) %>%
  ax_plot_avg()

d

Other

get_gain_df <- function(actual, prob){
  dplyr::tibble(actual, prob) %>%
    dplyr::mutate_at(1, as.factor) %>%
    dplyr::mutate(prob = 1-prob) %>%
    yardstick::gain_curve(actual, prob) 
}

get_lift_df <- function(actual, prob){
  dplyr::tibble(actual, prob) %>%
    dplyr::mutate_at(1, as.factor) %>%
    dplyr::mutate(prob = 1-prob) %>%
    yardstick::lift_curve(actual, prob) 
}

gain_df <- get_gain_df(preds$target, preds$prob)
lift_df <- get_lift_df(preds$target, preds$prob)

gain_df %>%
  ggplot2::ggplot(aes(x = .percent_tested, y = .percent_found)) +
  ggplot2::geom_line() 

lift_df %>%
  ggplot2::ggplot(aes(x = .percent_tested, y = .lift)) +
  ggplot2::geom_line() 

All together

devtools::document()
plot_classifier <- function(li){
  li %>% 
    imap(~{
      do.call(glue::glue("ax_plot_{.y}"), list(.data = .x))
    })
}

evals <- list(
  confusion = "ft_winner",
  classes = "ft_winner",
  roc = "ft_winner",
  cutoff = "ft_winner01",
  density = "ft_winner",
  avg = "ft_winner"
) %>%
  eval_classifier(preds2)

evals %>%
  plot_classifier


get_avg_df(preds2, "ft_winner")
do.call(mlgraph::get_avg_df, list(.data = preds2, actual = "ft_winner"))


systats/mlgraph documentation built on Feb. 25, 2020, 10:37 a.m.