R/bivar_summaries.R

Defines functions describe_bivar describe_bivar_quant describe_bivar_categ describe_bivar_both

describe_bivar <- function(y, x, max_lvl = 4) {
  ls <- dispatch_bivar(y, x, "describe")
  x <- ls[[1]]
  y <- ls[[2]]
  f <- ls[[3]]
  
  f(y, x, max_lvl = max_lvl)
}

describe_bivar_quant <- function(y, x, max_lvl = 4) {
  pearson <- cor.test(y, x, method = "pearson")
  kendall <- cor.test(y, x, method = "kendall", exact = FALSE)
  spearman <- cor.test(y, x, method = "spearman", exact = FALSE)
  
  result <- tibble::tribble(
    ~correlation, ~value, ~inferior_CI95, ~superior_CI95, ~p_value,
    "pearson", pearson$estimate, pearson$conf.int[[1]], 
               pearson$conf.int[[2]], pearson$p.value,
    "kendall", kendall$estimate, NA, NA, kendall$p.value,
    "spearman", spearman$estimate, NA, NA, spearman$p.value
  )
  
  return(result)
}

describe_bivar_categ <- function(y, x, max_lvl = 4) {
   result <- list()
  
  x <- x %>% 
  as.factor() %>% 
    forcats::fct_infreq()
  y <- y %>% 
    as.factor() %>% 
    forcats::fct_infreq()
  
  tab <- table(y, x, useNA = "ifany") %>% 
    broom::tidy() %>%
    dplyr::rename(Freq = n)
  
  complete <- tab %>% 
    dplyr::filter(!is.na(y), !is.na(x)) %>% 
    dplyr::summarise(n = sum(Freq)) %>% 
    dplyr::pull(n)
    
  
  mode <- tab %>% 
    dplyr::filter(Freq == max(Freq)) %>% 
    dplyr::select(1, 2)
  
  result[["n"]] <- length(x)
  result$levels_y <- length(levels(y))
  result$mode_y <- levels(y)[1]
  result$levels_x <- length(levels(x))
  result$mode_x <- levels(x)[1]
  result$cross_levels <- length(levels(x)) * length(levels(y))
  result$mode <- paste(mode[1, 1], mode[1, 2], sep = " - ")
  result$complete <- complete
  result$prop_complete <- complete / length(x)
  
  data.frame(result)
}

describe_bivar_both <- function(y, x, max_lvl = 4) {
  x <- as.factor(x) 
  if (length(levels(x) > max_lvl)) {
    x <- forcats::fct_lump(x, n = max_lvl - 1, ties.method = "first")
  } 
  
  df <- data.frame(y, x) %>% 
    dplyr::group_by(x) %>% 
    dplyr::do({
      describe(.$y)
    })
  
  return(df)
}
  
AdrienLeGuillou/descriptor documentation built on May 22, 2019, 7:55 p.m.