R/corr_matrix.R

Defines functions corr_matrix

Documented in corr_matrix

#' Cette fonction permet de mesurer la corrélation entre tous les types 
#' de variables (numeric vs numeric, numeric vs nominal, nominal vs nominal). 
#' Ces dernières doivent préalablement être converties au format requis (
#' numeric et character ou facteur). 
#' 
#' @param dat Data.Frame
#' @param plot Return correlation matrix plot ? 
#' @param reactive Return plotly unstead of ggplot graph output
#' @export
#' @examples 
#'   x = as_tibble(mtcars) %>% 
#'     mutate_at(vars(cyl, vs, am, gear, carb), as.factor) %>% 
#'     mutate_at(vars(mpg, disp, hp, drat, wt, qsec), as.numeric) %>% 
#'     corr()

corr_matrix = function(dat, plot = FALSE, reactive = FALSE) {
   # Libraries
   require(dplyr)
   require(purrr)
   require(tibble)
   require(broom)
   require(descr)
   require(corrplot)
   require(heplots)
   require(vcd)
   require(plotly)
   
   # Pre-processing
   dat = dat %>% 
      mutate_if(is.character, as.factor) 
   
   # Numeric vs Numeric
   corr.signif.1 = function(dat) {
      dat %>%
         select_if(is.numeric) %>%
         map(function(x) {
            dat %>%
               select_if(is.numeric) %>%
               map(function(y) {
                  cor.test(x, y)$p.value
               })
         }) %>%
         do.call("rbind", .) %>%
         as.data.frame() %>%
         rownames_to_column("variable") %>%
         mutate_if(is.list, as.numeric) %>%
         as_tibble()
   }
   
   # Numeric vs Numeric
   corr.value.1 = function(dat) {
      dat %>%
         select_if(is.numeric) %>%
         map(function(x) {
            dat %>%
               select_if(is.numeric) %>%
               map(function(y) {
                  cor.test(x, y)$estimate
               })
         }) %>%
         do.call("rbind", .) %>%
         as.data.frame() %>%
         rownames_to_column("variable") %>%
         mutate_if(is.list, as.numeric) %>%
         as_tibble()
   }
   
   # Numeric vs Nominal
   corr.signif.2 = function(dat) {
      dat %>%
         select_if(is.numeric) %>%
         map(function(x) {
            dat %>%
               select_if(is.factor) %>%
               map(function(y) {
                  tidy(aov(data = dat, x ~ y))$p.value[1]
               })
         }) %>%
         do.call("rbind", .) %>%
         as.data.frame() %>%
         rownames_to_column("variable") %>%
         mutate_if(is.list, as.numeric) %>%
         as_tibble()
   }
   
   # Numeric vs Nominal
   corr.value.2 = function(dat) {
      dat %>%
         select_if(is.numeric) %>%
         map(function(x) {
            dat %>%
               select_if(is.factor) %>%
               map(function(y) {
                  etasq(aov(data = dat, x ~ y))[1, 1]
               })
         }) %>%
         do.call("rbind", .) %>%
         as.data.frame() %>%
         rownames_to_column("variable") %>%
         mutate_if(is.list, as.numeric) %>%
         as_tibble()
   }
   
   # Nominal vs Numeric
   corr.signif.2bis = function(dat) {
      dat %>%
         select_if(is.factor) %>%
         map(function(x) {
            dat %>%
               select_if(is.numeric) %>%
               map(function(y) {
                  tidy(aov(data = dat, y ~ x))$p.value[1]
               })
         }) %>%
         do.call("rbind", .) %>%
         as.data.frame() %>%
         rownames_to_column("variable") %>%
         mutate_if(is.list, as.numeric) %>%
         as_tibble()
   }
   
   # Nominal vs Numeric
   corr.value.2bis = function(dat) {
      dat %>%
         select_if(is.factor) %>%
         map(function(x) {
            dat %>%
               select_if(is.numeric) %>%
               map(function(y) {
                  etasq(aov(data = dat, y ~ x))[1, 1]
               })
         }) %>%
         do.call("rbind", .) %>%
         as.data.frame() %>%
         rownames_to_column("variable") %>%
         mutate_if(is.list, as.numeric) %>%
         as_tibble()
   }
   
   # Nominal vs Nominal (Step 1)
   corr.signif.3 = function(dat) {
      dat %>%
         select_if(is.factor) %>%
         map(function(x) {
            dat %>%
               select_if(is.factor) %>%
               map(function(y) {
                  CrossTable(y, x, chisq = TRUE)$CST$p.value
               })
         }) %>%
         do.call("rbind", .) %>%
         as.data.frame() %>%
         rownames_to_column("variable") %>%
         mutate_if(is.list, as.numeric) %>%
         as_tibble()
   }
   
   # Nominal vs Nominal (Step 2)
   corr.value.3 = function(dat) {
      dat %>%
         select_if(is.factor) %>%
         map(function(x) {
            dat %>%
               select_if(is.factor) %>%
               map(function(y) {
                  assocstats(xtabs(~ y + x, dat))$cramer
               })
         }) %>%
         do.call("rbind", .) %>%
         as.data.frame() %>%
         rownames_to_column("variable") %>%
         mutate_if(is.list, as.numeric) %>%
         as_tibble()
   }
   
   # Merge all
   mat.signif = corr.signif.1(dat) %>%
      full_join(corr.signif.2(dat)) %>%
      full_join(corr.signif.2bis(dat)) %>%
      full_join(corr.signif.3(dat)) %>% 
      group_by(variable) %>% 
      summarise_all(~ unique(na.omit(.x))) %>% 
      column_to_rownames("variable") %>% 
      select(rownames(.)) 
   
   mat.value = corr.value.1(dat) %>%
      full_join(corr.value.2(dat)) %>%
      full_join(corr.value.2bis(dat)) %>%
      full_join(corr.value.3(dat)) %>%
      mutate_if(is.numeric, abs) %>% 
      group_by(variable) %>% 
      summarise_all(~ unique(na.omit(.x))) %>% 
      column_to_rownames("variable") %>% 
      select(rownames(.)) 
   
   # Merge in list
   corr.matrix = list(
      "p.value" = mat.signif,
      "corr" = mat.value
   )
   
   # Plot it
   if(plot == TRUE & reactive == FALSE) {
      p = corrplot(
         corr = as.matrix(corr.matrix$corr),
         is.corr = FALSE,
         tl.srt = 45,
         tl.col = "black",
         type = "upper",
         p.mat = as.matrix(corr.matrix$p.value),
         sig.level = 0.05
      )
      
      return(p)
   }
   
   if(plot == TRUE & reactive == TRUE) {
      
      trace1 = list(
         type = "heatmap", 
         x = colnames(corr.matrix$corr), 
         y = rownames(corr.matrix$corr), 
         z = as.matrix(corr.matrix$corr)
      )

      data = list(trace1)
      
      p = plot_ly() %>% 
         add_trace(type=trace1$type, x=trace1$x, y=trace1$y, z=trace1$z, colors = "Blues") %>% 
         layout("Matrice de correlation")
      
      return(p)
      
   }
   
   return(corr.matrix)
   # Amélioration : Weighted !!
}
AlexisMayer/toolbox documentation built on Aug. 25, 2020, 3:56 p.m.