R/covariateAssociationCheck.R

Defines functions covarAssociationCheck ICcovarAssociationCheck

Documented in covarAssociationCheck

#' Calculate association between components and given covariates
#'
#' Check the association between sample covariates and
#' independent/principal components.
#'
#' @param input_list ICA or PCA object generated by \code{runICA()} or
#'                   \code{runPCA()}.
#' @param covars A dataframe of covariates with each row for a sample
#'               and each column for a covariate.
#' @param col_names Column names of \code{covars} to
#'        be used for association testing. Default is to test every input covariate.
#' @param cor_threshold Bonferroni threshold that is going to be used for
#'                      identifying significant associations.
#' @return The output will be the \code{input_list} with additional entries
#'         \code{comp_cov, covars, covar_threshold, covar_pvals}.
#'         \code{comp_cov} will contain a list with entries for each component
#'                         showing the associated covariate and p-value.
#'         \code{covars} will contain a copy of sample_info to keep all
#'                       the data in one place.
#'         \code{covar_threshold} will record the threshold
#'                                used to call significant associations.
#'         \code{covar_pvals} will contain the matrix of p-values
#'                            for all covariate and component pairs.
#' @examples
#' data(expr_data, sample_info)
#'
#' pca_result <- runPCA(expr_data)
#'
#' pca_result <- covarAssociationCheck(pca_result, covars = sample_info)
#' @export
covarAssociationCheck <- function(input_list = NULL,
                                    covars = NULL,
                                    col_names = NULL,
                                    cor_threshold = 0.05) {
    if (is.null(input_list)) {
        stop("Input is missing, please specify an ICA or PCA object")
    }


    if (class(input_list) == "ICAobject") {
        comp_coeff_mx <- input_list$A
    } else if (class(input_list) == "PCAobject") {
        comp_coeff_mx <- t(input_list$x)
    }

    message("Checking dimensions and column/row names \n")

    if (!(nrow(covars) == ncol(comp_coeff_mx))) {
        stop("[Error] Different number of samples in <covars> and <input_list>")
    }

    matching.names <-
        sum(rownames(covars) %in% colnames(comp_coeff_mx))

    if (!(matching.names == nrow(covars))) {
        stop(
            "Sample names do not match between <input_list> and <covars>"
        )
    }

    if(!is.null(col_names)){
        covars <- covars[,col_names]
    }
    # filter factors with only 1 level
    n_unique <-
        apply(covars, 2, function (x)
            length(table(x, useNA = 'no')))
    use_info <- n_unique > 1

    message(sum(!use_info),
            " columns excluded from <covars>, due to uniqueness issues.")

    covars <- covars[, use_info, drop = FALSE]

    message("- Checking associations between components and covariates \n")
    # Anova analysis for covariates vs ICA weights (A matrix)
    covar_pvals <-
        ICcovarAssociationCheck(comp_coeff_mx, covars)

    if (class(input_list) == "ICAobject") {
        rownames(covar_pvals) <- paste0("IC", seq_len(nrow(covar_pvals)))
    } else if (class(input_list) == "PCAobject") {
        rownames(covar_pvals) <- paste0("PC", seq_len(nrow(covar_pvals)))
    }

    bon_sig <-
        cor_threshold / (nrow(covar_pvals) * ncol(covar_pvals))
    comp_cov <- list()

    for (i in seq_len(nrow(covar_pvals))) {
        comp_name <- rownames(covar_pvals)[i]
        comp_cov[[comp_name]] <-
            covar_pvals[i, covar_pvals[i, ] < bon_sig, drop = FALSE]

    }

    input_list$covar_pvals <- covar_pvals
    input_list$comp_cov <- comp_cov
    input_list$covars <- covars
    input_list$covar_threshold <- bon_sig
    attr(input_list, 'covar_cor') <- "yes"

    return(input_list)
}

ICcovarAssociationCheck <- function(input.A, info.input) {
    n.components <- dim(input.A)[1]
    covar.names <- colnames(info.input)
    n.covariates <- length(covar.names)

    pval.mx <- matrix(NA, nrow = n.components, ncol = n.covariates)
    colnames(pval.mx) <- covar.names

    covar.df <-
        data.frame(info.input[colnames(input.A), covar.names])

    for (i in seq_len(dim(input.A)[1])) {
        for (j in seq_len(length(covar.names))) {
            analysis.df <- data.frame("IC" = input.A[i, ],
                                      "Covar" = covar.df[, j])
            anova.fit <-
                summary(stats::aov(IC ~ Covar, data = analysis.df))
            p.val <- anova.fit[[1]]$`Pr(>F)`[1]
            if(!is.null(p.val)){
                pval.mx[i, j] <- p.val
            } else {
                pval.mx[i, j] <- NA
            }
        }
    }

    return(pval.mx)
}
jinhyunju/picaplot documentation built on May 19, 2019, 10:35 a.m.