Nothing
#' corx
#'
#' Calculates correlations matrices. Relevant values are stored in a list with methods for easy retrieval and formatting in publication ready tables.
#'
#' @param data data.frame or matrix
#' @param x a vector of rownames. Defaults to all
#' @param y a vector of colnames. If not supplied, y is set to x.
#' @param z a vector of variable names. Control variables to be used in partial correlations - defaults to NULL
#' @param method character. One of "pearson", "spearman", or "kendall"
#' @param round numeric. Number of digits in printing
#' @param stars a numeric vector. This argument defines cut-offs for p-value stars.
#' @param p_adjust character. What adjustment for multiple tests should be used? One of "none" (default), "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", or "fdr"
#' @param remove_lead logical. if TRUE (the default), leading zeros are removed in summaries
#' @param triangle character. one of "lower", "upper" or NULL (the default)
#' @param caption character. table caption. Passed to plots
#' @param note character. Text for a table note
#' @param describe list of named functions. If functions are supplied to describe, new columns will be bound to the 'APA matrix' for each function in the list. Describe also accepts a variety of shortcuts. If describe is set to TRUE, mean and standard deviation are returned for all row variables. Describe can accept a character vector to call the following descriptive functions: c('mean','sd','var','median','iqr','skewness','kurtosis'). These shortcuts are powered by 'tidyselect'. Skewness and kurtosis are calculated using the 'moments' package. All functions retrieved with shortcuts remove missing values.
#' @param grey_nonsig logical. Should non-significant values be grey in output? This argument does nothing if describe is not set to FALSE
#' @param call_only logical. For debugging, if TRUE only the call is returned
#' @details
#' Constructs correlation matrices using 'stats::cor.test' unless z is specified. When z is specified ppcor::ppcor.test is used instead. Character and factor variables are not accepted. To prevent errors, users must first convert all variables to numeric.
#'
#' ## Partial correlations:
#'
#' Supplying the argument z will call ppcor::pcor.test the correlation pair are supplied to arguments x and y. The vector of z given to corx is passed to argument z in pcor.test.
#'
#' ## Missing data:
#'
#' Observations containing missing data required to complete a correlation or partial correlation are automatically removed.
#'
#' ## P-adjust:
#'
#' P-values attained can be adjusted for multiple comparisons by using the 'p_adjust' argument. This calls the function stats::p.adjust. When a matrix is symmetrical, p-values are only adjusted for unique comparisons. When a correlation matrix is not symmetrical, all comparisons are assumed to be unique.
#' @examples
#' cor_mat <- corx(mtcars, x = c(mpg,cyl,disp), y = c(wt,drat,disp,qsec),
#' z = wt, round = 2, stars = c(0.05),
#' caption = "Controlling for weight" ,
#' describe = list("mean" = function(x) mean(x,na.rm=TRUE)))
#' cor_mat
#' coef(cor_mat)
#' cor_mat$p
#' plot(cor_mat)
#' cor_2 <- corx(iris[,-5], describe = c(median, IQR = iqr, kurt = kurtosis),
#' note = "Using shortcuts to select describe functions", triangle = "lower")
#' cor_2
#' @return A list of class 'corx' which includes:
#' * "call" The call which if evaluated reproduces the object
#' * "apa" An 'APA' formatted correlation matrix with significance stars
#' * "r" Raw correlation coefficients
#' * "p" p-values
#' * "n" Pairwise observations
#' * "caption" Object caption
#' * "note" Object note
#' @export
corx <-
function(data,
x = NULL,
y = NULL,
z = NULL,
method = c("pearson", "spearman", "kendall"),
stars = c(0.05,0.01,0.001),
p_adjust = c("none", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr"),
round = 2,
remove_lead = TRUE,
triangle = NULL,
caption = NULL,
note = NULL,
describe = FALSE,
grey_nonsig = TRUE,
call_only = FALSE) {
call <- match.call()
env <- environment()
parent_env <- sys.frame(sys.parent())
if(call_only) return(list(call = call, env = env, parent_env = parent_env))
if(nrow(data) < 3){
stop("Can't calculate p-values with fewer than four rows of data.")
}
if(methods::is(data, "matrix")) data <- data.frame(data, check.names = FALSE)
x <- tidyselect::vars_select(colnames(data), {{x}}, .strict = TRUE)
y <- tidyselect::vars_select(colnames(data), {{y}}, .strict = TRUE)
z <- tidyselect::vars_select(colnames(data), {{z}}, .strict = TRUE)
# allow rename within select
data <- rename_if_needed(data, x)
data <- rename_if_needed(data, y)
data <- rename_if_needed(data, z)
if(length(x) > 0) x <- names(x)
if(length(y) > 0) y <- names(y)
if(length(z) > 0) z <- names(z)
if(length(x) == 0){
x <- names(data)
}
if(length(y) == 0){
y <- x
}
if(length(z) == 0){
z <- NULL
}
if(length(z) > 0){ # remove partialled out variable from x and y
x <- x[!x %in% z]
y <- y[!y %in% z]
}
if(length(x) == 0 | length(y) == 0) stop("Can't partial out the entirety of x or y")
# check classes are appropriate
check_classes(data[,unique(c(x,y,z))], c("numeric","integer"), "All classes must be numeric.")
method <- method[1] # take the first method in case more than one supplied
p_adjust <- p_adjust[1]
cors <- cormat_list(
data = data,
x = x,
y = y,
z = z,
method = method,
p_adjust = p_adjust
)
pres_matrix <- apa_matrix(cors$r, #get apa matrix
cors$p,
stars,
round,
remove_lead,
triangle)
# describe function ----------------------------------------------------
# allow shortcuts
all_desc <- list(mean = function(x) mean(x, na.rm=T),
sd = function(x) stats::sd(x, na.rm=T),
var = function(x) stats::var(x, na.rm = T),
median = function(x) stats::median(x, na.rm = T),
iqr = function(x) stats::IQR(x, na.rm = T),
skewness = function(x) moments::skewness(x, na.rm = T),
kurtosis = function(x) moments::kurtosis(x, na.rm =T),
n = function(x) digits(length(stats::na.omit(x)),0)
)
tryCatch({ # allow lists to be sent to tidyselect
describe_name <- tidyselect::vars_select(names(all_desc), {{describe}}, .strict = F)
}, error = function(e) assign("describe_name", c(), envir = env)) # assign empty vec if error
if(length(describe_name) > 0){ # if vars were found
if(length(describe_name) != (length(call$describe) -1)){ # check if all vars were found
describe_name <- tidyselect::vars_select(names(all_desc), {{describe}}, .strict = T)
}
describe <- all_desc[describe_name] # set describe to all_desc
names(describe) <- names(describe_name) # rename as needed
}
if (!identical(describe, F)) { # if describe is selected
if (identical(describe, T)) { # if it is equal to true
describe = list( # define default describe functions
"M" = function(x)
mean(x, na.rm = TRUE),
"SD" = function(x)
stats::sd(x, na.rm = TRUE)
)
}
data <- data.frame(data)
x <- make.names(x)
orig_names <- colnames(pres_matrix)
pres_matrix <- data.frame(pres_matrix)
for (i in seq_along(describe)) { # then apply describe function to data
safe_round <- function(x, round){
if(methods::is(x, "numeric")){
return(digits(x, round))
}
x
}
pres_matrix[[names(describe)[i]]] <-
unlist(lapply(seq_along(x), function(var) {
val <- describe[[i]](data[, x[[var]]])
safe_round(val, round)
}))
}
pres_matrix <- as.matrix(pres_matrix)
colnames(pres_matrix)[seq_along(orig_names)] <- orig_names
}
if(!is.null(triangle)){ # if triangle change names -------
nums <- seq_along(rownames(pres_matrix))
rownames(pres_matrix) <- paste0(nums,". ", rownames(pres_matrix))
colnames(pres_matrix)[1:length(nums)] <- nums
pres_matrix <- pres_matrix[,-length(nums)]
}
# add in note --------------------------------------------
if(is.null(note)){
note <- lapply(seq_along(stars), function(s){
temp_stars <- paste(rep("*",s), collapse = "")
paste0(temp_stars, " p < ", stars[s])
})
note <- paste(note, collapse = "; ")
}
c_matrix <- list(
call = call,
apa = pres_matrix,
r = cors$r,
p = cors$p,
n = cors$n,
caption = caption,
note = note
)
class(c_matrix) <- "corx"
attr(c_matrix, "grey_nonsig") <- grey_nonsig
attr(c_matrix, "stars") <- stars
attr(c_matrix, "round") <- round
attr(c_matrix, "describe") <- describe
c_matrix
}
#' partial_n_matrix
#'
#' Calculate complete observations for a crosstab + a third variable
#' @param data data.frame or matrix
#' @param x rownames
#' @param y colnames
#' @param z partial variable vector
partial_n_matrix <- function(data, x, y, z){
mx <- matrix(nrow = length(x), ncol = length(y))
rownames(mx) <- x
colnames(mx) <- y
for(row in rownames(mx)){
for(col in colnames(mx)){
mx[row,col] <- sum(stats::complete.cases(data[,c(row, col, z)]))
}
}
as.matrix(mx)
}
#' apa matrix
#'
#' Creates an apa matrix
#' @param r_matrix correlation coefficient matrix
#' @param p_matrix p-value matrix
#' @param stars a vector of pvalue stars
#' @param round How many digits to round to?
#' @param remove_lead a logical. Should leading zeros be removed?
#' @param triangle can select lower upper or NULL
apa_matrix <- function(r_matrix,
p_matrix,
stars,
round,
remove_lead,
triangle) {
f_matrix <- r_matrix
f_matrix[] <- digits(f_matrix , round)
row_names <- matrix(rownames(r_matrix), nrow(r_matrix), ncol = ncol(r_matrix))
col_names <- matrix(colnames(r_matrix), nrow = nrow(r_matrix), ncol = ncol(r_matrix), byrow = T)
f_matrix[row_names == col_names] <- " - "
#diag(f_matrix)
if (!is.null(triangle)) {
if (triangle == "lower") {
f_matrix[upper.tri(r_matrix)] <- ""
p_matrix[upper.tri(p_matrix)] <- 1
}
if (triangle == "upper") {
f_matrix[lower.tri(r_matrix)] <- ""
p_matrix[lower.tri(p_matrix)] <- 1
}
}
s_matrix <- star_matrix(p_matrix, stars = stars)
s_matrix[row_names == col_names] <- ""
f_matrix[] <- paste0(f_matrix, s_matrix)
if (remove_lead)
f_matrix[] <-
gsub("0\\.", ".", f_matrix) #remove leading zeros if requested
return(f_matrix)
}
#' print.corx
#' @param x object
#' @param ... extra arguments
#' @export
print.corx <- function(x,...){
apa <- x$apa
text <- utils::capture.output(print(apa, quote = F, right = T))
width <- max(nchar(text))
header <- text[1]
grey <- attr(x, "grey_nonsig")
star_call <- attr(x, "stars")
if(length(star_call) > 0 & grey & identical(attr(x, "describe"), F)){# make nonsig grey
if(attr(x, "round") != 0 ){ # if no decimal places change regex
patt <- "(-)?[0-9]?\\.[0-9]{1,}(?![\\*0-9])" # possible negative, then a possible 0-9 character, then a decimal
}else{ # then more 0-9 characters (at least one), but not followed by any number of stars!
patt <- "-?[0-1](?![\\*\\.0-9]{1,})" # different pattern for round = 0 (even though no one will ever use that setting)
}
gr <- gregexpr(patt,text, perl = T) # get match locations
mat <- regmatches(text,gr)
regmatches(text,gr) <- lapply(mat, function(x) crayon::silver(x)) # replace with silver text
}
text <- gsub("\\bNA\\b", crayon::red("NA"), text) # make NAs red
text <- gsub("\\*", crayon::yellow("*"),text) # make stars yelloe
text <- gsub("\\ - ", crayon::silver(" - "),text) # make dashes silver
text <- text[-1] # remove header
bar <- paste(rep(crayon::silver("-"), width),collapse = "") # create a bar same length as table
temp_note <- paste("Note.",x$note) # get note ready
final_text <- paste(c(
crayon::blue(utils::capture.output(x$call)), # call
"", # then an empty line
x$caption, # table caption
bar, # a bar
header, # a header
bar, # a bar
text, # table contents
bar, # final bar
temp_note, # the note
""
),
collapse = "\n") # all separated with line breaks
cat(final_text)
}
#' @export
coef.corx <- function(object, ...) object$r # coef returns r matrix
#' digits
#'
#' Consistent rounding for strings
#' @param x number to round
#' @param n number of digits
digits <- function(x, n = 2) {
x <- round(x, n)
x[] <- sapply(x, function(i) {
ifelse(!is.na(i), trimws(format(round(as.numeric(as.character(i)), n), nsmall = n)),NA)
})
x
}
#' plot.corx
#' @param x a corx object
#' @param ... other arguments to ggcorrplot::ggcorrplot
#' @export
plot.corx <- function(x, ...){
call <- match.call()
elip <- list(...)
tri <- x$call$triangle
if(is.null(tri)) tri <- "full"
if(!is.null(call$type)) tri <- call$type
caption <- x$call$caption
if(is.null(caption)) caption <- ""
if(!is.null(call$title)) caption <- call$title
elip[['title']] <- caption # elip is designed
elip[['type']] <- tri # To be a call
elip[['corr']] <- x$r # I'm setting arguments which will be used in do.call
elip[['p.mat']] <- x$p # We include the p.matrix for signifance rules in ggcorrplot
do.call(ggcorrplot::ggcorrplot, elip)
}
#' @export
summary.corx <- function(object,... , digits, quantile.type){
name_mat <- colnames(object$apa)
obj <- data.frame(object$apa)
names(obj) <- name_mat
obj
}
#' @export
as.data.frame.corx <- function(x,...){
name_mat <- colnames(x$apa)
obj <- data.frame(x$apa)
names(obj) <- name_mat
obj
}
#' check_classes
#'
#' check all classes are as expected
#' @param data the data object
#' @param ok_classes a vector of allowed classes
#' @param stop_message a character string provided to users if error triggers.
#' @param stop should the variable stop, or create a warning?
check_classes <- function(data, ok_classes, stop_message, stop = TRUE) {
classes <- lapply(data, class)
class_ok <- sapply(classes, function(x) any(ok_classes %in% x))
bad_cols <- names(data)[!class_ok]
bad_index <- which(names(data) %in% bad_cols)
bad_classes <- lapply(classes[!class_ok], function(x) paste(abbreviate(x,3), collapse = ","))
script <- paste(glue::glue("[{bad_index}] '{bad_cols}' <{bad_classes}>"), collapse = ", ")
if (!all(class_ok)) {
if(stop){
stop(stop_message," ", script, ".", call. = F)
}else{
warning(stop_message," ", script, ".", call. = F)
}
}
}
#' star_matrix
#'
#' Replaces p-values with stars
#' @param m matrix of p-values
#' @param stars a vector of p-value thresholds to replace with stars
star_matrix <- function(m, stars) {
get_stars <- function(p, stars) {
if (is.na(p))
p <- 1
n_stars <- sum(p < stars)
paste(rep("*", n_stars), collapse = "")
}
s_matrix <- m
s_matrix[] <- sapply(m, function(p)
get_stars(p, stars = stars))
s_matrix
}
#' rename if needed
#'
#' Renames columns
#' @param data data object
#' @param x a character vector. If named, columns will be renamed
rename_if_needed <- function(data, x) {
rename_vars <- x[names(x) != x]
for (i in seq_along(rename_vars)) {
if (names(x)[i] != x[i]) {
colnames(data)[colnames(data) == x[i]] <- names(rename_vars[rename_vars == x[i]])
}
}
data
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.