Nothing
#' Association (Correlation) between Continuous (numeric) Variables
#'
#' \code{CCassociation} finds correlation between all the variables in data
#' with only numeric columns
#'
#' This function calls \code{cor} function to calculate the correlation values.
#' The difference is that this doesn't take method as parameter, instead it
#' decides the methods itself using normality tests. If the variables satisfy
#' the assumption of Pearson correlation, then pearson correlation is calculated.
#' Otherwise Spearman is calculated. To learn more, check the
#' \code{\link[stats]{cor}}
#'
#' @seealso
#' \code{\link{association}} for association between any type of variables,
#' \code{\link{QQassociation}} for Association between Categorical variables,
#' \code{\link{CQassociation}} for Association between Continuous-Categorical
#' variables
#'
#' @param numtb a data frame with all the numerical columns. This should
#' have at least two columns
#' @param use an optional character string giving a method for computing
#' association in the presence of missing values. This must be (complete or an
#' abbreviation of) one of the strings "everything", "all.obs",
#' "complete.obs", "na.or.complete", or "pairwise.complete.obs". If use is
#' "everything", NAs will propagate conceptually, i.e., a resulting value will
#' be NA whenever one of its contributing observations is NA. If use is
#' "all.obs", then the presence of missing observations will produce an error.
#' If use is "complete.obs" then missing values are handled by case wise
#' deletion (and if there are no complete cases, that gives an error).
#' "na.or.complete" is the same unless there are no complete cases, that gives
#' NA
#' @param normality_test_method method for normality test for a variable.
#' Values can be \code{shapiro}
#' for Shapiro-Wilk test or
#' \code{'anderson'} for 'Anderson-Darling' test of normality or \code{ks} for
#' 'Kolmogorov-Smirnov'
#' @param normality_test_pval significance level for normality tests.
#' Default is 0.05
#' @param method1 method for association between continuous-continuous
#' variables. values can be \code{"auto", "pearson", "kendall", "spearman"}.
#' See details for more information.
#' @param methodMat1 method dataframe like methodMats from the function \code{
#' association}
#' @param methods_used a square data.frame which will store the type of
#' association used between the variables. Dimension will be
#' number of variables * number of variables.
#'
#' @return a list of two tables with number of rows and column equal to number
#' of columns in \code{numtb}:
#' \describe{
#' \item{r}{Table containing correlation values}
#' \item{r_pvalue}{Table containing p-value for the correlation test}
#' }
CCassociation <- function(numtb,
use = "everything",
normality_test_method,
normality_test_pval,
method1 = c("auto", "pearson", "kendall", "spearman"),
methodMat1 = NULL,
methods_used) {
# INTERNAL FUNCTION
CC_ <- function(x, y, varnames, norm_test, met) {
if (met == "auto") {
if (norm_test) {
warning(paste0("Variables '", paste0(varnames, collapse = "' and '"),
"' follows normality assumptions. Doing parameteric test
(Pearson) for these variables."))
return(list(m = "pearson", val = cor.test(x, y, method = "pearson")))
} else {
warning(paste0("Variables '", paste0(varnames, collapse = "' and '"),
"' doesn't follow normality assumptions. Doing
non-parameteric test (Spearman) for these variables."))
return(list(m = "spearman", val = cor.test(x, y, method="spearman")))
}
} else {
if (norm_test) {
warning(paste0("Variables '", paste0(varnames, collapse = "' and '"),
"' follows normality assumptions. Parameteric test
(Pearson) should be preferred for these variables."))
} else {
warning(paste0("Variables '", paste0(varnames, collapse = "' and '"),
"' doesn't follow normality assumptions. Non-parameteric
test (Spearman) should be preferred for these variables"))
}
return(list(m = met, val = cor.test(x, y, method = met)))
}
}
# METHOD FOR THE CORRELATION
method1 <- match.arg(method1)
# IF CHANNEL PAIR LEVEL METHOD IS MISSING THEN USING method1 FOR ALL PAIRS
if (is.null(methodMat1)) {
methodMat1 <- data.frame(matrix(method1, nrow = ncol(numtb),
ncol = ncol(numtb),
dimnames = list(names(numtb), names(numtb))))
}
# NORMALITY TEST FOR ALL THE VARIABLES ---------------------------------------
norm_test_all <- unlist(lapply(1:ncol(numtb), function(x,
numtb,
normality_test_method,
normality_test_pval) {
return(tryCatch(norm_test_fun(numtb[,x],
method = normality_test_method,
pval = normality_test_pval,
names(numtb)[x],
bin = TRUE),
error=function(e){
warning(paste0("Normality test failed for ",
names(numtb)[x])); return(0)
}))
}, numtb, normality_test_method, normality_test_pval))
names(norm_test_all) <- colnames(numtb)
if (is.null(norm_test_all)) {
norm_test_all <- rep(TRUE, ncol(numtb))
names(norm_test_all) <- colnames(numtb)
}
# ----------------------------------------------------------------------------
ncx <- ncol(numtb)
r <- matrix(0, nrow = ncx, ncol = ncx)
rownames(r) <- colnames(numtb)
colnames(r) <- colnames(numtb)
r_pvalue <- matrix(NA, nrow = ncx, ncol = ncx)
rownames(r_pvalue) <- colnames(numtb)
colnames(r_pvalue) <- colnames(numtb)
for (i in seq_len(ncx)) {
for (j in seq_len(i)) {
xname <- names(numtb)[i]
yname <- names(numtb)[j]
met <- methodMat1[xname, yname]
x <- numtb[, i]
y <- numtb[, j]
if (use == "everything") {
if ((sum(is.na(x))+sum(is.na(y))) > 0) {
r[i,j] <- r[j,i] <- NA
r_pvalue[i,j] <- r_pvalue[j,i] <- NA
} else {
norm_test <- all(norm_test_all[c(xname, yname)])
a <- CC_(x, y, c(xname, yname), norm_test,met)
r[i,j] <- r[j, i] <- a$val$estimate
r_pvalue[i,j] <- r_pvalue[j, i] <- a$val$p.value
methods_used[yname, xname] <- methods_used[xname, yname] <- a$m
}
} else {
ok = complete.cases(x, y)
if (sum(ok) == 0){
if (use == "complete.obs") {
stop('While finding association between "', xname, '" and "', yname,
'", all the observations were missing.
Select use = "na.or.complete" for such case.')
} else if (use == "na.or.complete") {
r[i,j] <- r[j,i] <- NA
r_pvalue[i,j] <- r_pvalue[j,i] <- NA
}
} else {
x <- x[ok]
y <- y[ok]
norm_test <- all(norm_test_all[c(xname, yname)])
a <- CC_(x, y, c(xname, yname), norm_test, met)
r[i,j] <- r[j, i] <- a$val$estimate
r_pvalue[i,j] <- r_pvalue[j, i] <- a$val$p.value
methods_used[yname, xname] <- methods_used[xname, yname] <- a$m
}
}
}
}
return(list(r = r, r_pvalue = r_pvalue, methods_used = methods_used))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.