PEDISdata: Preprocessing PEDIS data files

# Die Funktionen in dieser Datei fassen die Ergebnisse der Trainings und Test AUC zusammen.
# Momentan ist nicht vorgesehen, diese Ergebnisse in die Publikation auf zunehmen.

#' Summarise AUC value
#'
#' Create a summary table to compare AUC values of test and train
#' set. The function takes a dataframe with two columns containing AUC values for each fold
#'
#' @param data A dataframe with two columns, Fold and AUC value
#'
#' @return
#'
#' @examples
create_summary_table <- function(data) {
  data %>%
    set_names(sub, pattern = "_auc", replacement = "") %>%
    gather() %>%
    group_by(key) %>%
    summarise_at("value", .funs = list(avg = mean, stdev = sd, ci = formatCI)) %>%
    mutate_if(is.numeric, round, digits = 2) %>%
    set_names(c("Set", "Avg.", "StDev.", "95% CI")) %>%
    knitr::kable() %>%
    kableExtra::kable_styling(full_width = F) %>%
    kableExtra::footnote(general = "AUC Values of the Training- and Test-Sets", general_title = "")
}

#' Format CI
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
formatCI <- function(x) {
  ci <- Hmisc::smean.cl.normal(x, conf.int = .95)
  ci <- round(ci, digits = 2)
  glue::glue("[{ci[2]}, {ci[3]}]")
}

#' Calculate and Print Effect Size
#'
#' @param data
#'
#' @return
#' @export
#'
#' @examples
effSize <- function(data) {

  f <- data %>% pull("key")
  d <- data %>% pull("value")
  eff_size <- effsize::cohen.d(d, f)
  eff_size_ci <- pluck(eff_size, "conf.int") %>% round(digits = 2)
  eff_size_ci <- glue::glue("[{eff_size_ci[1]}, {eff_size_ci[2]}]")
  eff_size <- pluck(eff_size, "estimate") %>% round(digits = 2)

  t_test <- t.test(d ~ f) %>% broom::tidy()
  delta_avg <- t_test %>% pull("estimate")

  ci_low <- t_test %>% pull("conf.low") %>% round(3)
  ci_high <- t_test %>% pull("conf.high") %>% round(3)
  delta_avg_ci <- glue::glue("[{ci_low}, {ci_high}]")

  t_stat <- t_test %>% pluck("statistic") %>% round(3)
  p_value <- t_test %>% pluck("p.value")

  list("delta_avg" = delta_avg,
       "delta_avg_ci" = delta_avg_ci,
       "eff_size" = eff_size,
       "eff_size_ci" = eff_size_ci,
       "t_stat" = t_stat,
       "p_value" = p_value)
}

diff_table <- function(data) {

  differences <- data %>%
    gather %>%
    effSize()

  as_tibble(differences) %>%
    mutate(p_value = scales::pvalue(p_value)) %>%
    # mutate_if(is.numeric, myround, digits = 3) %>%
    gather() %>%
    set_names(c("Metric", "Value")) %>%
    knitr::kable(digits = 3) %>%
    kableExtra::kable_styling(full_width = F) %>%
    kableExtra::footnote(general = "Differences between the Training- and Test-Set AUC values", general_title = "")
}