R/heterogeneity.R

Defines functions calc.het.feat calc.het

Documented in calc.het calc.het.feat

#' Calculate Age-related Expression and Heterogeneity Changes for Gene
#'
#' @param exprs a numeric vector with the expression values, ordered the same as age vector
#' @param age a numeric vector, where the names correspond to samples (the same as colnames of the given matrix).
#' @param modex expression change calculation method. 'linear' or 'loess', defaults to 'linear'
#' @param het_change_met heterogeneity change calculation method. 'LR', for 'linear regression', or and correlation method accepted by cor.test() function.
#'
#' @return a list with i) summary results with the effect size and p value for expression level and heterogeneity changes, ii) residuals from expr~age model, which corresponds to unexplained variance, iii) expression values passed into the function.
#' @export
#' @examples
#' myexp <- rnorm(n = 20, mean = sample(1:3, 1), sd = sample(c(1, 3), 1))
#' agevec <- sample(20:80,20)
#' het_result <- calc.het.feat(myexp, agevec)
#' head(het_result$summary) # summary statistics
#' het_result$residuals # heterogeneity values: residuals
#' het_result$expression  # input expression values
#' all(myexp == het_result$expression)
#'
calc.het.feat <- function(exprs, age, modex = 'linear',
                          het_change_met = 'spearman') {
    if ( modex == 'linear') {
        lmx <- lm(exprs ~ age)
        expr_change <- unname(lmx$coef[2])
        expr_change.p <- unname(summary(lmx)$coef[2, 4])
    } else if ( modex == 'loess') {
        lmx <- loess(exprs ~ age)
        expr_change <- NA
        expr_change.p <- NA
    }
    resids <- lmx$resid
    if ( het_change_met == "LR") {
        lmxx <- lm(abs(resids) ~ age)
        het_change <- unname(lmxx$coefficients[2])
        het_change.p <- unname((summary(lmxx)$coef)[2,4])
    } else {
        co_het <- cor.test(abs(resids), age, method = het_change_met)
        het_change <- unname(co_het$est)
        het_change.p <- unname(co_het$p.val)
    }
    res <- list(summary = c(level_change = expr_change,
                            level_change.p = expr_change.p,
                            heterogeneity_change = het_change,
                            heterogeneity_change.p = het_change.p))
    res$residuals <- resids
    res$expression <- exprs
    return(res)
}

#' Calculate Age-related Expression and Heterogeneity Changes
#'
#' @param exprmat a numeric matrix with the expression values, where columns are the samples and rows are probesets, transcripts, or genes.
#' @param age a numeric vector, where the names correspond to samples (the same as colnames of the given matrix).
#' @param age_in type of the age vector, allowed values are days or years. defaults to 'days'
#' @param age_to final format of age vector. allowed values are 'years', 'days', 'pw-N', and 'lg-N', where N is any number. 'pw' means power. e.g. pw-0.5 means sqrt(age), and 'lg' means log, e.g. lg-2 means log2(age).defaults to 'pw-0.25'
#' @param batch_corr batch correction strategy. available values are i) NC: No Correction, ii) QN: Quantile Normalization, iii) LR: Linear regression (requires covariates), iv) SVA: surrogate variable analysis, v) LR+QN: Linear regression followed by quantile normalization, and vi) SVA+QN: SVA followed by quantile normalization. Defaults to 'NC'.
#' @param modex expression change calculation method. 'linear' or 'loess', defaults to 'linear'
#' @param tr_log2 logical to set log2 transforming expression matrix. defaults to TRUE.
#' @param sc_features logical to set whether to scale features. defaults to TRUE.
#' @param covariates a list of covarietes where each element is a vector with sampleIDs as names
#' @param het_change_met heterogeneity change calculation method. 'LR', for 'linear regression', or and correlation method accepted by cor.test() function.
#' @param padj_met method for multiple test correction. value is passed to 'p.adjust' function. defaults to 'fdr'.
#'
#' @return a list object with summary results including expression level and heterogeneity changes, input values, intermediate values, and heterogeneity matrix.
#'
#' @importFrom tibble as.tibble
#' @importFrom dplyr select
#' @importFrom dplyr everything
#'
#' @export
#' @examples
#' sampnames = paste('sample',1:20,sep='')
#' myexp <- sapply( 1:20, function(i){ rnorm(n = 10000, mean = sample(1:3, 1), sd = sample(c(1, 3), 1)) })
#' rownames(myexp) = paste('gene', 1:10000, sep = '')
#' colnames(myexp) = sampnames
#' agevec <- sample(20:80,20)
#' names(agevec) = sampnames
#' het_result <- calc.het(myexp, agevec, age_in = 'years', tr_log2 = F)
#' head(het_result$sampleID)
#' het_result$input_expr[1:5,1:5] # expression values used as input
#' head(het_result$input_age) # input ages
#' head(het_result$usedAge) # ages used in calculations - transformation is applied using 'age_to' parameter
#' het_result$resid_mat[1:5,1:5] # heterogeneity values (residual matrix)
#' head(het_result$feature_result) # summary statistics

calc.het <- function(exprmat, age, age_in = 'days',
                     age_to = 'pw-0.25',
                     batch_corr = "NC",
                     modex = "linear",
                     tr_log2 = T, sc_features = T,
                     covariates = NA,
                     het_change_met = "spearman",
                     padj_met = "fdr") {
    retval <- list()
    snms <- intersect(colnames(exprmat), names(age))
    retval$sampleID <- snms
    exprmat <- exprmat[, snms]
    retval$input_expr <- exprmat
    age <- age[snms]
    if ( !all(is.na(covariates)) ) { covariates = lapply(covariates,
                                                    function(cov) cov[snms] )}
    retval$input_age <- age
    age <- transform_age(age, type = age_in, to = age_to)
    retval$usedAge <- age
    exprmat <- exprmat[complete.cases(exprmat),]
    if (tr_log2 == T) {
        exprmat <- trans_log2(exprmat)
    }
    if (batch_corr == "NC") {
        exprmat <- exprmat
    } else if (batch_corr == "QN") {
        exprmat <- cfr_quantileNorm(exprmat)
    } else if (batch_corr == "LR") {
        LR_res <- cfr_linReg(exprmat, covariates)
        exprmat <- LR_res$correctedExp
        retval$LR_res <- LR_res
    } else if (batch_corr == "LR+QN") {
        LR_res <- cfr_linReg(exprmat, covariates)
        exprmat <- LR_res$correctedExp
        exprmat <- cfr_quantileNorm(exprmat)
        retval$LR_res <- LR_res
    } else if (batch_corr == "SVA") {
        sva_res <- cfr_sva(exprmat, age, covariates)
        exprmat <- sva_res$correctedExp
        retval$sva_res <- sva_res
    } else if (batch_corr == "SVA+QN") {
        sva_res <- cfr_sva(exprmat, age, covariates)
        exprmat <- sva_res$correctedExp
        exprmat <- cfr_quantileNorm(exprmat)
        retval$sva_res <- sva_res
    }
    exprmat <- feature_scale(exprmat)
    exprmat <- exprmat[complete.cases(exprmat),]
    resx <- apply(exprmat, 1, function(expx) {
        calc.het.feat(expx, age, modex = modex,
                      het_change_met = het_change_met)
    })
    feature_het <- tibble::as.tibble(t(sapply(resx, function(x) x$summary)))
    feature_het$level_change.p.adj <- p.adjust(feature_het$level_change.p,
                                               method = padj_met)
    feature_het$heterogeneity_change.p.adj <- p.adjust(feature_het$heterogeneity_change.p,
                                              method = padj_met)
    feature_het$feature <- rownames(exprmat)
    feature_het <- dplyr::select(feature_het, 'feature', dplyr::everything())
    retval$residMat <- t(sapply(resx,function(x)x$resid))
    retval$usedMat <- t(sapply(resx,function(x)x$expr))
    retval$feature_result <- feature_het
    return(retval)
}
mdonertas/hetAge documentation built on Jan. 2, 2020, 12:53 a.m.