Nothing
#' Differential expression analysis based on natural cubic spline regression models for time-course data
#'
#' This function is a modified version of \code{\link[splineTimeR]{splineDiffExprs}} function
#' that allows data input as log-transformed counts taking into account the voom-weights and 75th quantile as library size.
#' The function compares time dependent behaviour of genes in two different groups.
#' Applying empirical Bayes moderate F-statistic on differences in coefficients of
#' fitted natural cubic spline regression models, differentially expressed in time genes are determined.
#' The function is a wrapper of other R-functions to simplify differential expression analysis of time-course data.
#' @inheritParams splineTimeR::splineDiffExprs
#' @param voom_method logical value TRUE or FALSE. TRUE when using log-transformed counts, voom-weight, and 75th quantile library size
#'
#' @return a list of 4 components
#' \item{fit}{output of \code{\link[limma]{lmFit}} function.}
#' \item{pv}{a vector of p-values of the test for line effect}
#' \item{qv}{a vector of q-values corresponding to the \code{pv} above.}
#' \item{diffExprs}{A data.frame with rows defining names/IDs of differentially expressed genes}
#' @importFrom rlang .data
#' @import splineTimeR
#' @importFrom Biobase fData
#' @importFrom Biobase pData
#' @importFrom Biobase ExpressionSet
#' @examples
#' library(rmRNAseq)
#' data(design)
#' data(covset)
#' data("resSymm")
#' EE <- 40
#' DE <- 10
#' n_idx <- sample(nrow(resSymm$ori.res$v), size = EE + DE)
#' v <- resSymm$ori.res$v[n_idx,]
#' newlm <- resSymm$ori.res$newlm[n_idx,]
#' BetaMat <- data.matrix(newlm[grep("fixed.", names(newlm))])
#' BetaMat[1:EE, 2] <- 0
#' BetaMat[(EE+1):(EE+DE), 2] <- BetaMat[(EE+1):(EE+DE), 2]*4
#' Sigma2Vec <- newlm$s2_shrunken
#' RhoVec <- data.matrix(newlm[grep("rho.", names(newlm))])
#' WeightMat <- v$weights
#' lib.size <- v$targets$lib.size
#' nrep <- 1
#' Subject <- covset$ear
#' Time <- covset$time
#' counts <- rmRNAseq:::sc_Symm(BetaMat, Sigma2Vec, RhoVec, WeightMat,
#' lib.size, design, Subject, Time,nrep)
#' inData <- counts
#' colnames(inData) <- paste(covset$line, Subject, covset$timef,sep="_")
#' design <- rmRNAseq::design
#' design1 <- data.frame(row.names=colnames(inData),
#' "SampleName"=colnames(inData),
#' "Time"=covset$time,"Treatment"=covset$line)
#' phenoData <- new("AnnotatedDataFrame",data=design1)
#' data <- Biobase::ExpressionSet(assayData=as.matrix(inData),phenoData=phenoData)
#' diffExprs <- rmRNAseq:::my_splineDiffExprs(eSetObject = data, df = 3,cutoff.adj.pVal = 1,
#' reference = "L",intercept = TRUE, voom_method = FALSE)
#'rmRNAseq:::pauc_out(diffExprs$pv, EE , DE)
#'diffExprs2 <- rmRNAseq:::my_splineDiffExprs(eSetObject = data, df = 3,cutoff.adj.pVal = 1,
#'reference = "L",intercept = TRUE, voom_method = TRUE)
#'rmRNAseq:::pauc_out(diffExprs2$pv, EE , DE)
my_splineDiffExprs <- function (eSetObject, df, cutoff.adj.pVal = 1, reference, intercept = TRUE, voom_method = FALSE)
{
if (!is(eSetObject, "ExpressionSet"))
stop("eSetObject must be of class ExpressionSet")
if (!(("SampleName" %in% names(Biobase::pData(eSetObject))) & ("Time" %in%
names(Biobase::pData(eSetObject))) & ("Treatment" %in% names(Biobase::pData(eSetObject)))))
stop("eSetObject has to include SampleName, Time and Treatment columns in phenotypic data")
if (!(is(cutoff.adj.pVal, "numeric") & (cutoff.adj.pVal >=
0) & (cutoff.adj.pVal <= 1)))
stop("cutoff.adj.pVal must be numeric between 0 and 1")
if (!(is(df, "numeric") & (df%%1 == 0) & (df > 0)))
stop("df must be integer > 0")
if (!(reference %in% levels(factor(Biobase::pData(eSetObject)$Treatment))))
stop("define valid reference group")
if (!is(intercept, "logical"))
stop("intercept must be boolean data type")
b_ <- splines::ns(Biobase::pData(eSetObject)$Time, df = df)
d_ <- factor(Biobase::pData(eSetObject)$Treatment, levels = c(reference,
setdiff(levels(factor(Biobase::pData(eSetObject)$Treatment)),
reference)))
design <- model.matrix(~d_ * b_)
# add voom method
if (voom_method){
eSetObject_voom <- limma::voom(counts = exprs(eSetObject), design = design,
lib.size = apply(exprs(eSetObject), 2, stats::quantile,
0.75))
fit <- limma::lmFit(eSetObject_voom, design)
}else fit <- limma::lmFit(eSetObject, design)
fit_eBayes <- limma::eBayes(fit)
if (ncol(Biobase::fData(eSetObject)) == 0) {
Biobase::fData(eSetObject) <- data.frame(rownames(exprs(eSetObject)))
colnames(Biobase::fData(eSetObject)) <- "row_IDs"
}
if (intercept) {
fit_coeff_ref <- fit$coefficient[, c(1, 3:(df + 2))]
colnames(fit_coeff_ref)[1] <- "b_0"
Biobase::fData(eSetObject) <- cbind(Biobase::fData(eSetObject), fit_coeff_ref)
topTable_output <- limma::topTable(fit_eBayes, coef = c(2, (df +
3):(2 * df + 2)), sort.by = "none", number = Inf,
genelist = Biobase::fData(eSetObject))
colnames(topTable_output)[(ncol(topTable_output) - 4 -
df):(ncol(topTable_output) - 4)] <- paste("d_", 0:df,
sep = "")
}else {
fit_coeff_ref <- fit$coefficient[, c(1, 3:(df + 2), 2)]
colnames(fit_coeff_ref)[1] <- "b_0"
Biobase::fData(eSetObject) <- cbind(Biobase::fData(eSetObject), fit_coeff_ref)
topTable_output <- limma::topTable(fit_eBayes, coef = c((df +
3):(2 * df + 2)), sort.by = "none", number = Inf,
genelist = fData(eSetObject))
colnames(topTable_output)[(ncol(topTable_output) - 4 -
df):(ncol(topTable_output) - 4)] <- paste("d_", 0:df,
sep = "")
}
diffExprs <- topTable_output[topTable_output$adj.P.Val <=
cutoff.adj.pVal, ]
# diffExprs <- diffExprs[order(diffExprs$adj.P.Val), ] # no ordering the genes
cat("-------------------------------------------------",
"\n")
cat(paste("Differential analysis done for df = ", df, " and adj.P.Val <= ",
cutoff.adj.pVal, sep = ""), "\n")
cat("Number of differentially expressed genes: ", nrow(diffExprs),
"\n")
pv = topTable_output$P.Value
qv <- jabes.q(pv)
out <- list(fit = fit, pv = pv, qv = qv, diffExprs = diffExprs)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.