#' Cette fonction permet de mesurer la corrélation entre tous les types
#' de variables (numeric vs numeric, numeric vs nominal, nominal vs nominal).
#' Ces dernières doivent préalablement être converties au format requis (
#' numeric et character ou facteur).
#'
#' @param dat Data.Frame
#' @param plot Return correlation matrix plot ?
#' @param reactive Return plotly unstead of ggplot graph output
#' @export
#' @examples
#' x = as_tibble(mtcars) %>%
#' mutate_at(vars(cyl, vs, am, gear, carb), as.factor) %>%
#' mutate_at(vars(mpg, disp, hp, drat, wt, qsec), as.numeric) %>%
#' corr()
corr_matrix = function(dat, plot = FALSE, reactive = FALSE) {
# Libraries
require(dplyr)
require(purrr)
require(tibble)
require(broom)
require(descr)
require(corrplot)
require(heplots)
require(vcd)
require(plotly)
# Pre-processing
dat = dat %>%
mutate_if(is.character, as.factor)
# Numeric vs Numeric
corr.signif.1 = function(dat) {
dat %>%
select_if(is.numeric) %>%
map(function(x) {
dat %>%
select_if(is.numeric) %>%
map(function(y) {
cor.test(x, y)$p.value
})
}) %>%
do.call("rbind", .) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
mutate_if(is.list, as.numeric) %>%
as_tibble()
}
# Numeric vs Numeric
corr.value.1 = function(dat) {
dat %>%
select_if(is.numeric) %>%
map(function(x) {
dat %>%
select_if(is.numeric) %>%
map(function(y) {
cor.test(x, y)$estimate
})
}) %>%
do.call("rbind", .) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
mutate_if(is.list, as.numeric) %>%
as_tibble()
}
# Numeric vs Nominal
corr.signif.2 = function(dat) {
dat %>%
select_if(is.numeric) %>%
map(function(x) {
dat %>%
select_if(is.factor) %>%
map(function(y) {
tidy(aov(data = dat, x ~ y))$p.value[1]
})
}) %>%
do.call("rbind", .) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
mutate_if(is.list, as.numeric) %>%
as_tibble()
}
# Numeric vs Nominal
corr.value.2 = function(dat) {
dat %>%
select_if(is.numeric) %>%
map(function(x) {
dat %>%
select_if(is.factor) %>%
map(function(y) {
etasq(aov(data = dat, x ~ y))[1, 1]
})
}) %>%
do.call("rbind", .) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
mutate_if(is.list, as.numeric) %>%
as_tibble()
}
# Nominal vs Numeric
corr.signif.2bis = function(dat) {
dat %>%
select_if(is.factor) %>%
map(function(x) {
dat %>%
select_if(is.numeric) %>%
map(function(y) {
tidy(aov(data = dat, y ~ x))$p.value[1]
})
}) %>%
do.call("rbind", .) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
mutate_if(is.list, as.numeric) %>%
as_tibble()
}
# Nominal vs Numeric
corr.value.2bis = function(dat) {
dat %>%
select_if(is.factor) %>%
map(function(x) {
dat %>%
select_if(is.numeric) %>%
map(function(y) {
etasq(aov(data = dat, y ~ x))[1, 1]
})
}) %>%
do.call("rbind", .) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
mutate_if(is.list, as.numeric) %>%
as_tibble()
}
# Nominal vs Nominal (Step 1)
corr.signif.3 = function(dat) {
dat %>%
select_if(is.factor) %>%
map(function(x) {
dat %>%
select_if(is.factor) %>%
map(function(y) {
CrossTable(y, x, chisq = TRUE)$CST$p.value
})
}) %>%
do.call("rbind", .) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
mutate_if(is.list, as.numeric) %>%
as_tibble()
}
# Nominal vs Nominal (Step 2)
corr.value.3 = function(dat) {
dat %>%
select_if(is.factor) %>%
map(function(x) {
dat %>%
select_if(is.factor) %>%
map(function(y) {
assocstats(xtabs(~ y + x, dat))$cramer
})
}) %>%
do.call("rbind", .) %>%
as.data.frame() %>%
rownames_to_column("variable") %>%
mutate_if(is.list, as.numeric) %>%
as_tibble()
}
# Merge all
mat.signif = corr.signif.1(dat) %>%
full_join(corr.signif.2(dat)) %>%
full_join(corr.signif.2bis(dat)) %>%
full_join(corr.signif.3(dat)) %>%
group_by(variable) %>%
summarise_all(~ unique(na.omit(.x))) %>%
column_to_rownames("variable") %>%
select(rownames(.))
mat.value = corr.value.1(dat) %>%
full_join(corr.value.2(dat)) %>%
full_join(corr.value.2bis(dat)) %>%
full_join(corr.value.3(dat)) %>%
mutate_if(is.numeric, abs) %>%
group_by(variable) %>%
summarise_all(~ unique(na.omit(.x))) %>%
column_to_rownames("variable") %>%
select(rownames(.))
# Merge in list
corr.matrix = list(
"p.value" = mat.signif,
"corr" = mat.value
)
# Plot it
if(plot == TRUE & reactive == FALSE) {
p = corrplot(
corr = as.matrix(corr.matrix$corr),
is.corr = FALSE,
tl.srt = 45,
tl.col = "black",
type = "upper",
p.mat = as.matrix(corr.matrix$p.value),
sig.level = 0.05
)
return(p)
}
if(plot == TRUE & reactive == TRUE) {
trace1 = list(
type = "heatmap",
x = colnames(corr.matrix$corr),
y = rownames(corr.matrix$corr),
z = as.matrix(corr.matrix$corr)
)
data = list(trace1)
p = plot_ly() %>%
add_trace(type=trace1$type, x=trace1$x, y=trace1$y, z=trace1$z, colors = "Blues") %>%
layout("Matrice de correlation")
return(p)
}
return(corr.matrix)
# Amélioration : Weighted !!
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.