#' Calculate association between components and given covariates
#'
#' Check the association between sample covariates and
#' independent/principal components.
#'
#' @param input_list ICA or PCA object generated by \code{runICA()} or
#' \code{runPCA()}.
#' @param covars A dataframe of covariates with each row for a sample
#' and each column for a covariate.
#' @param col_names Column names of \code{covars} to
#' be used for association testing. Default is to test every input covariate.
#' @param cor_threshold Bonferroni threshold that is going to be used for
#' identifying significant associations.
#' @return The output will be the \code{input_list} with additional entries
#' \code{comp_cov, covars, covar_threshold, covar_pvals}.
#' \code{comp_cov} will contain a list with entries for each component
#' showing the associated covariate and p-value.
#' \code{covars} will contain a copy of sample_info to keep all
#' the data in one place.
#' \code{covar_threshold} will record the threshold
#' used to call significant associations.
#' \code{covar_pvals} will contain the matrix of p-values
#' for all covariate and component pairs.
#' @examples
#' data(expr_data, sample_info)
#'
#' pca_result <- runPCA(expr_data)
#'
#' pca_result <- covarAssociationCheck(pca_result, covars = sample_info)
#' @export
covarAssociationCheck <- function(input_list = NULL,
covars = NULL,
col_names = NULL,
cor_threshold = 0.05) {
if (is.null(input_list)) {
stop("Input is missing, please specify an ICA or PCA object")
}
if (class(input_list) == "ICAobject") {
comp_coeff_mx <- input_list$A
} else if (class(input_list) == "PCAobject") {
comp_coeff_mx <- t(input_list$x)
}
message("Checking dimensions and column/row names \n")
if (!(nrow(covars) == ncol(comp_coeff_mx))) {
stop("[Error] Different number of samples in <covars> and <input_list>")
}
matching.names <-
sum(rownames(covars) %in% colnames(comp_coeff_mx))
if (!(matching.names == nrow(covars))) {
stop(
"Sample names do not match between <input_list> and <covars>"
)
}
if(!is.null(col_names)){
covars <- covars[,col_names]
}
# filter factors with only 1 level
n_unique <-
apply(covars, 2, function (x)
length(table(x, useNA = 'no')))
use_info <- n_unique > 1
message(sum(!use_info),
" columns excluded from <covars>, due to uniqueness issues.")
covars <- covars[, use_info, drop = FALSE]
message("- Checking associations between components and covariates \n")
# Anova analysis for covariates vs ICA weights (A matrix)
covar_pvals <-
ICcovarAssociationCheck(comp_coeff_mx, covars)
if (class(input_list) == "ICAobject") {
rownames(covar_pvals) <- paste0("IC", seq_len(nrow(covar_pvals)))
} else if (class(input_list) == "PCAobject") {
rownames(covar_pvals) <- paste0("PC", seq_len(nrow(covar_pvals)))
}
bon_sig <-
cor_threshold / (nrow(covar_pvals) * ncol(covar_pvals))
comp_cov <- list()
for (i in seq_len(nrow(covar_pvals))) {
comp_name <- rownames(covar_pvals)[i]
comp_cov[[comp_name]] <-
covar_pvals[i, covar_pvals[i, ] < bon_sig, drop = FALSE]
}
input_list$covar_pvals <- covar_pvals
input_list$comp_cov <- comp_cov
input_list$covars <- covars
input_list$covar_threshold <- bon_sig
attr(input_list, 'covar_cor') <- "yes"
return(input_list)
}
ICcovarAssociationCheck <- function(input.A, info.input) {
n.components <- dim(input.A)[1]
covar.names <- colnames(info.input)
n.covariates <- length(covar.names)
pval.mx <- matrix(NA, nrow = n.components, ncol = n.covariates)
colnames(pval.mx) <- covar.names
covar.df <-
data.frame(info.input[colnames(input.A), covar.names])
for (i in seq_len(dim(input.A)[1])) {
for (j in seq_len(length(covar.names))) {
analysis.df <- data.frame("IC" = input.A[i, ],
"Covar" = covar.df[, j])
anova.fit <-
summary(stats::aov(IC ~ Covar, data = analysis.df))
p.val <- anova.fit[[1]]$`Pr(>F)`[1]
if(!is.null(p.val)){
pval.mx[i, j] <- p.val
} else {
pval.mx[i, j] <- NA
}
}
}
return(pval.mx)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.