##' Main Function for Individual Study DE: microarray & RNAseq
##' The \code{Indi.DE.Analysis} is a function to perform individual association
##' analysis between gene expression and the response/phenoype of interest (can
##' be either group, continuous or survival).
##' @title Main Function for Individual Study DE: microarray & RNAseq.
##' @param data is a list of K elements, where K is the number of studies, each
##' element is a microarray or RNAseq expression matrix with G rows and N
##' columns, where G is number of matched genes and N is the sample size.
##' @param clin.data is a list of K elements, each element includes is a
##' clinical data frame with N rows and p columns, where N is the sample size
##' and p is the number of clinical variables (main response included).
##' @param data.type is a character indicating the data type of the elements
##' in \code{data}, must be "continuous" or "discrete".
##' @param resp.type is a character indicating the response type of the
##' \code{response} variable selected, must be one of "twoclass", "multiclass",
##' "continuous" and "survival".
##' @param response is one column name of \code{clin.data}, indicating the
##' phenotype of interest. For survival, two column names have to be specified,
##' the first is the survival time and the second is the censoring status.
##' @param covariate are the clinical covariates you wish to adjust for in the
##' DE analysis, can be a vector of column names or NULL.
##' @param ind.method is a character vector to specify the method used to test
##' if there is association between the gene expression and outcome variable.
##' must be one of "limma", "sam" for "continuous" data type and "edgeR",
##' "DESeq2" or "limmaVoom" for "discrete" data type.
##' @param select.group: for two-class comparison only, specify the two groups
##' for comparison when the group factor has more than two levels.
##' @param ref.level: for two-class/multi-class comparison only, specify the
##' reference level of the group factor.
##' @param paired: logical value indicating whether paired design;
##' @param asymptotic: a logical value indicating whether asymptotic
##' distribution should be used. If FALSE, permutation will be performed.
## 'For resp.type = "continuous", "survival" only.
##' @param nperm: the number of permutations. Applicable when \code{asymptotic}
##' is FALSE.
##' @param tail: a character string specifying the alternative hypothesis,
##' must be one of "abs" (default), "low" or "high". For resp.type =
##' "continuous", "survival" only.
##' @param seed: Optional initial seed for random number generator.
##' @return a list with components: \cr
##' \itemize{
##' \item{p:}{ For all types of response, the p-value of the association test for
##' each gene}
##' \item{stat:}{ For "continuous" and "survival" only, the value of test
##' statistic for each ##' gene}
##' \item{bp:}{ For "continuous" and "survival" only, the p-value from
##' \code{nperm:} permutations for each gene. It will be used for the meta
##' analysis by default. It can be NULL if you chose asymptotic results. }
##' \item{log2FC:}{ For "twoclass" only, the log2 fold change for each gene}
##' \item{lfcSE:}{ For "twoclass" only, the standard error of log2 fold change
##' for each gene}
##' }
##' @export
##' @examples
##' data('Leukemia')
##' data('LeukemiaLabel')
##' data <- Leukemia
##' K <- length(data)
##' clin.data <- lapply(label, function(x) {data.frame(x)} )
##' for (k in 1:length(clin.data)){
##' colnames(clin.data[[k]]) <- "label"
##' }
##' select.group <- c('inv(16)','t(15;17)')
##' ref.level <- "inv(16)"
##' data.type <- "continuous"
##' ind.method <- c('limma','limma','limma')
##' resp.type <- "twoclass"
##' paired <- rep(FALSE,length(data))
##' ind.res <- Indi.DE.Analysis(data=data,clin.data= clin.data,
##' data.type=data.type,resp.type = resp.type,
##' response='label',
##' ind.method=ind.method,select.group = select.group,
##' ref.level=ref.level,paired=paired)
##' N <- sapply(data, FUN=function(x) ncol(x))
##' survival.time <- lapply(N,FUN = function(x) round(runif(x,10,2000)))
##' censor.status <- lapply(N,FUN = function(x) sample(c(0,1),x,replace=TRUE) )
##' for (k in 1:length(clin.data)){
##' clin.data[[k]] <- cbind(clin.data[[k]],survival.time[[k]],censor.status[[k]])
##' colnames(clin.data[[k]])[2:3] <- c("survival","censor")
##' }
##' ind.method <- c('logrank','logrank','logrank')
##' resp.type <- "survival"
##' ind.res <- Indi.DE.Analysis(data=data,clin.data= clin.data,
##' data.type=data.type,resp.type = resp.type,
##' response=c("survival","censor"),
##' ind.method=ind.method,asymptotic=TRUE)
Indi.DE.Analysis <- function(data, clin.data, data.type, resp.type,
response,covariate=NULL,ind.method,
select.group=NULL, ref.level=NULL,
paired=NULL,asymptotic=NULL,nperm=NULL,
tail="abs", seed=12345,
... ) {
## Call the packages required
#library(survival)
#library(limma)
#library(samr)
#library(edgeR)
#library(DESeq2)
set.seed(seed)
##Input
##data: list of data matrices, genes on the rows, samples on the columns;
##clin.data: list of clinical data frames, samples on the rows, clinical
##covariates on the columns;
##data.type = c("continuous","discrete");
##resp.type = c("twoclass", "multiclass", "continuous", "survival");
##response: a character (column name of clin.data) indicating the phenotype
##of interest (survival (2 column names));
##ind.method = c("limma","sam","limmaVoom","edgeR","DESeq2","pearsonr",
##"spearmanr","logrank");
##select.group: specify the two group names for comparison;
##ref.level: specify the reference level of the group factor;
##paired: logical indicating whether paired design;
##asymptotic: logical whether asymptotic dist should be used;
##nperm: number of permutations;
##tail = c("low", "high", "abs");
K<-length(data) # number of studies
G<-nrow(data[[1]]) # number of matched genes
response.list <- covariate.list <- vector('list',K)
if(!is.null(covariate)) covariate.list <-NULL
for (k in 1:K) {
response.list[[k]] <- clin.data[[k]][,response]
if(!is.null(covariate)) {
covariate.list[[k]] <- clin.data[[k]][,covariate]
}
}
##check the individual method for the corresponding resp.type and data.type,
##in addition, check the consistency between resp.type and response.
check.indmethod(response.list, resp.type, data.type, ind.method,
select.group, tail, paired)
##verify the correct specification of individual method
ind.method<- match.arg(ind.method,c("limma","sam","limmaVoom","edgeR",
"DESeq2","pearsonr","spearmanr","logrank"),
several.ok=TRUE)
data <-check.exp(data) # check the gene names for expression data
##Association with groups (DE, ANOVA, etc.)
if(resp.type %in% c("twoclass","multiclass") ) {
log2FC<- lfcSE <- pvalue <- NULL #prespecify the output matrix
N<-n<-NULL
for (k in 1:K) {
groupLabel <- as.factor(response.list[[k]])
if(!is.null(select.group)) {
l<- groupLabel[groupLabel %in% select.group]
y<-data[[k]][,groupLabel %in% select.group]
l<- droplevels(l)
if(is.null(covariate.list[[k]])) {
c<- NULL
}
if(!is.null(covariate.list[[k]]) && is.vector(covariate.list[[k]])) {
c <- covariate.list[[k]][groupLabel %in% select.group]
}
if(!is.null(covariate.list[[k]]) && !is.vector(covariate.list[[k]])) {
c <- covariate.list[[k]][groupLabel %in% select.group,]
}
} else{
l<- groupLabel
y<- data[[k]]
}
if(!is.null(ref.level)) {
l <- relevel(l,ref=ref.level)
}
ANOVA<- (resp.type == "multiclass") #indicate whether ANOVA should be performed
ind.res<-switch(ind.method[k],
limma={get.limma(y=y,l=l,c=c,name=name,
ANOVA=ANOVA,tail=tail,paired=paired[k])},
sam={get.sam(y=y,l=l,name=name,seed=seed,
ANOVA=ANOVA,tail=tail,paired=paired[k])},
limmaVoom={get.limmaVoom(y=y,l=l,c=c,name=name,
ANOVA=ANOVA,tail=tail,paired=paired[k])},
edgeR={get.edgeR(y=y,l=l,c=c,name=name,
ANOVA=ANOVA,tail=tail,paired=paired[k])},
DESeq2={get.DESeq2(y=y,l=l,c=c,name=name,
ANOVA=ANOVA,tail=tail,paired=paired[k])})
pvalue<- cbind(pvalue,ind.res$pvalue) #p value
if(!ANOVA) {
if(any(grepl('log2FC',names(ind.res))) ) {
log2FC<- cbind(log2FC,ind.res$log2FC) #log2 fold change
} else {
log2FC<- cbind(log2FC,rep(NA,nrow(y)))
}
if(any(grepl('lfcSE',names(ind.res))) ) {
lfcSE <- cbind(lfcSE,ind.res$lfcSE) #standard error of log2FC
} else {
lfcSE <- cbind(lfcSE,rep(NA,nrow(y)))
}
}
cat("dataset",k,"is done\n")
nns<-get.sample.label.number(l)
N<-c(N,nns$N) #sample size per study
n<-append(n,nns$n) #sample size per label per study
} # end of K study for loop
if(!ANOVA) {
if(is.null(names(data))) {
colnames(log2FC) <-colnames(lfcSE)<-colnames(pvalue)<-
paste("dataset",1:K,sep="") # naming studies
} else {
colnames(log2FC) <-colnames(lfcSE)<-colnames(pvalue)<-names(data)
}
if(is.null(rownames(data[[1]])) ) {
rownames(log2FC) <- rownames(lfcSE)<-rownames(pvalue)<-
paste("gene",1:G,sep="") # naming genes
} else {
rownames(log2FC) <-rownames(lfcSE)<-rownames(pvalue)<-rownames(data[[1]])
}
all.res<-list(log2FC=log2FC,lfcSE=lfcSE,p=pvalue)
} else {
all.res<-list(p=pvalue)
}
attr(all.res$p,"nperstudy")<-N
attr(all.res$p,"nperlabelperstudy")<-n
attr(all.res$p,"data.type")<- data.type
attr(all.res$p,"individual.analysis")<-ind.method
attr(all.res$p,"response.type")<- resp.type
attr(all.res$p,"alter.hypothesis")<- tail
return(all.res)
} #end of twoclass, multiclass comparison
if(resp.type == c("continuous") ) {
P<-stat<-P.perm<-NULL #prespecify the output matrix
N<- NULL
for (k in 1:K) {
y<- data[[k]]
r<- response.list[[k]]
ind.res<-switch(ind.method[k],
pearsonr={get.p.pearsonr(y,r,asymptotic=asymptotic,
nperm=nperm,tail=tail)},
spearmanr={get.p.spearmanr(y,r,asymptotic= asymptotic,
nperm=nperm,tail=tail)})
P<- cbind(P,(ind.res$obs)[,2]) #p value
stat<-cbind(stat,(ind.res$obs[,1])) # observed stats
if (!is.null(nperm)) {
P.perm<-cbind(P.perm,c(ind.res$perm)) #p value from permutation
}
cat("dataset",k,"is done\n")
N<-c(N,ncol(y))
} # end of K study for loop
if(is.null(names(data))) {
colnames(stat)<-colnames(P)<-paste("dataset",1:K,sep="") # naming studies
} else {
colnames(stat)<-colnames(P)<-names(data)
}
if (!is.null(nperm)) {
colnames(P.perm)<-paste("dataset",1:K,sep="") # naming studies
}
all.res<-list(stat=stat,p=P,bp=P.perm)
attr(all.res$p,"nperstudy")<-N
attr(all.res$p,"data.type")<- data.type
attr(all.res$p,"individual.analysis")<-ind.method
attr(all.res$p,"response.type")<- resp.type
attr(all.res$p,"alter.hypothesis")<- tail
return(all.res)
} #end of association with continuous phenotype
if(resp.type == c("survival") ) {
P<-stat<-P.perm<-NULL #prespecify the output matrix
N<- NULL
for (k in 1:K) {
y<- data[[k]]
r<- response.list[[k]]
ind.res<- get.p.logrank(y,r[,1], r[,2],asymptotic= asymptotic,
nperm=nperm,tail=tail)
P<- cbind(P,(ind.res$obs)[,2]) #p value
stat<-cbind(stat,(ind.res$obs[,1])) # observed stats
if (!is.null(nperm)) {
P.perm<-cbind(P.perm,c(ind.res$perm)) #p value from permutation
}
cat("dataset",k,"is done\n")
N<-c(N,ncol(y))
} # end of K study for loop
if(is.null(names(data))) {
colnames(stat)<-colnames(P)<-paste("dataset",1:K,sep="") #naming studies
} else {
colnames(stat)<-colnames(P)<-names(data)
}
if (!is.null(nperm)) {
colnames(P.perm)<-paste("dataset",1:K,sep="") # naming studies
}
all.res<-list(stat=stat,p=P,bp=P.perm)
attr(all.res$p,"nperstudy")<-N
attr(all.res$p,"data.type")<- data.type
attr(all.res$p,"individual.analysis")<-ind.method
attr(all.res$p,"response.type")<- resp.type
attr(all.res$p,"alter.hypothesis")<- tail
return(all.res)
} #end of association with survival
} # end of Indi.DE.Analysis function
## Output
## twoclass: pvalue, log2FC and its SE (if available)
## multiclass: pvalue
## continuous, survival: stat, p, bp (perm.p)
get.sample.label.number<-function(lbl) {
N<-length(lbl) #sample size per study
n<- table(lbl) #sample size per label per study
return(list(N=N,n=n))
}
get.limma<-function(y,l,c,name,ANOVA,tail, paired){
## y: intensity matrix, l: group label, c: clinical data, name: group name
if(!ANOVA) {
if(is.null(c)) {
if(paired) {
subject <- as.factor(rep(1:(length(l)/2),2))
design <-model.matrix(~ l + subject)
} else{
design <-model.matrix(~ l) # design matrix
}
} else {
if(paired) {
subject <- as.factor(rep(1:(length(l)/2),2))
lc <- data.frame(l=l,c=c,s=subject)
design <-model.matrix(~ l + c + s,data=lc)
} else{
lc <- data.frame(l=l,c=c)
design <-model.matrix(~ l + c,data=lc) # design matrix
}
}
#log2FC <- rowMeans(y[,l==name[1]])-rowMeans(y[,l==name[2]])
fit <-lmFit(y, design)
ebFit<-eBayes(fit)
out.table <- topTable(ebFit,coef=2, number=Inf, sort.by='none')
log2FC <- out.table$logFC
lfcSE <- sqrt(ebFit$s2.post) * fit$stdev.unscaled[,2]
#stat <- out.table$t
p <- as.numeric(out.table$P.Value)
dir <- sign(log2FC)
if (tail=="high") {
pvalue <- mapply(function(x,y) if(x==1){
return(y/2) } else{
return(1-y/2)
}, x=dir, y=p)
}
if (tail=="low") {
pvalue <- mapply(function(x,y) if(x==1){
return(1-y/2) } else{
return(y/2)
}, x=dir, y=p)
}
if (tail=="abs") pvalue <- p
#pvalue <-out.table$P.Value
limma.out <- list(log2FC,lfcSE,pvalue)
names(limma.out) <- c('log2FC','lfcSE','pvalue')
} else {
design <-model.matrix(~ -1+l) # design matrix
colnames(design)[1:nlevels(l)] = make.names(levels(l))
combination <- apply(combn(make.names(levels(l)),2),2,function(x) {
paste(x,collapse='-')
} )
cont.matrix<-makeContrasts(contrasts=combination,levels = colnames(design))
fit <-lmFit(y, design)
fit2 <-contrasts.fit(fit, contrasts=cont.matrix)
ebFit<-eBayes(fit2)
pvalue <- ebFit$F.p.value
limma.out <- list(pvalue)
names(limma.out) <- c('pvalue')
}
return(limma.out)
}
get.sam<-function(y,l,name, seed, ANOVA, tail, paired) {
## y: intensity matrix, l: group label, name: group name
options(verbose=F)
if(!ANOVA) {
#log2FC <- rowMeans(y[,l==name[1]])-rowMeans(y[,l==name[2]])
## prepare the sam data
sam.data<-list(x=y,y=l, geneid= 1:nrow(y), genenames=rownames(y),logged2=T)
## run the sam permutation test
samr.obj<-samr(sam.data, resp.type=ifelse(paired,
"Two class paired","Two class unpaired"), nperms=100,
random.seed=seed)
#samr.obj<-samr(sam.data, resp.type="Two class unpaired", nperms=100,
# random.seed=seed)
p<- as.numeric(samr.pvalues.from.perms(samr.obj$tt, samr.obj$ttstar))
log2FC <- as.numeric(log2(samr.obj$foldchange))
dir <- sign(log2FC)
if (tail=="high") {
pvalue <- mapply(function(x,y) if(x==1){
return(y/2) } else{
return(1-y/2)
}, x=dir, y=p)
}
if (tail=="low") {
pvalue <- mapply(function(x,y) if(x==1){
return(1-y/2) } else{
return(y/2)
}, x=dir, y=p)
}
if (tail=="abs") pvalue <- p
sam.out <- list(log2FC,pvalue)
names(sam.out) <- c('log2FC','pvalue')
} else {
sam.data<-list(x=y,y=l, geneid= 1:nrow(y), genenames=rownames(y),logged2=T)
samr.obj<-samr(sam.data,resp.type="Multiclass", nperms=100,random.seed=seed)
pvalue<- as.numeric(samr.pvalues.from.perms(samr.obj$tt, samr.obj$ttstar))
sam.out <- list(pvalue)
names(sam.out) <- c('pvalue')
}
options(verbose=T)
return(sam.out)
}
get.limmaVoom <- function(y,l,c,name, ANOVA, tail, paired) {
## y: count matrix, l: group label, c: clinical data, name: group name
dge <- DGEList(counts=y)
dge <- calcNormFactors(dge)
if(!ANOVA){
if(is.null(c)) {
if(paired) {
subject <- as.factor(rep(1:(length(l)/2),2))
design <-model.matrix(~ l + subject)
} else{
design <-model.matrix(~ l) # design matrix
}
} else {
if(paired) {
subject <- as.factor(rep(1:(length(l)/2),2))
lc <- data.frame(l=l,c=c,s=subject)
design <-model.matrix(~ l + c + s,data=lc)
}else{
lc <- data.frame(l=l,c=c)
design <-model.matrix(~ l + c,data=lc) # design matrix
}
}
v <- voom(dge,design,plot=FALSE,normalize="quantile") # voom normalization
fit <-lmFit(v, design)
ebFit<-eBayes(fit)
out.table <- topTable(ebFit,coef=2, number=Inf, sort.by='none')
log2FC <- out.table$logFC
lfcSE <- sqrt(ebFit$var.prior)
#stat <- out.table$t
p <- as.numeric(out.table$P.Value)
dir <- sign(log2FC)
if (tail=="high") {
pvalue <- mapply(function(x,y) if(x==1){
return(y/2) } else{
return(1-y/2)
}, x=dir, y=p)
}
if (tail=="low") {
pvalue <- mapply(function(x,y) if(x==1){
return(1-y/2) } else{
return(y/2)
}, x=dir, y=p)
}
if (tail=="abs") pvalue <- p
limmaVoom.out <- list(log2FC,lfcSE,pvalue)
names(limmaVoom.out) <- c('log2FC','lfcSE','pvalue')
} else {
design <-model.matrix(~ -1+l)
colnames(design)[1:nlevels(l)] = make.names(levels(l))
combination <- apply(combn(make.names(levels(l)),2),2,function(x) {
paste(x,collapse='-')
} )
cont.matrix<-makeContrasts(contrasts=combination,levels = colnames(design) )
v <- voom(dge,design,plot=FALSE,normalize="quantile") # voom normalization
fit <-lmFit(v, design)
fit2 <-contrasts.fit(fit, contrasts=cont.matrix)
ebFit<-eBayes(fit2)
pvalue <- ebFit$F.p.value
limmaVoom.out <- list(pvalue)
names(limmaVoom.out) <- c('pvalue')
}
return(limmaVoom.out)
}
get.edgeR <- function(y,l,c,name, ANOVA, tail, paired) {
## y: count matrix, l: group label, c: clinical data, name: group name
dat <- DGEList(counts=y)
dat <- calcNormFactors(dat)
dat=estimateGLMCommonDisp(dat) #estimate common dispersion
dat=estimateGLMTrendedDisp(dat) #estiamte trended dispersion
dat=estimateGLMTagwiseDisp(dat) #estimate tagwise dispersion
#dispersion <- dat$tagwise.dispersion
if(!ANOVA){
if(is.null(c)) {
if(paired) {
subject <- as.factor(rep(1:(length(l)/2),2))
design <-model.matrix(~ l + subject)
} else{
design <-model.matrix(~ l) # design matrix
}
} else {
if(paired) {
subject <- as.factor(rep(1:(length(l)/2),2))
lc <- data.frame(l=l,c=c,s=subject)
design <-model.matrix(~ l + c + s,data=lc)
}else{
lc <- data.frame(l=l,c=c)
design <-model.matrix(~ l + c,data=lc) # design matrix
}
}
fit <- glmFit(dat, design)
lrt <- glmLRT(fit, coef=2)
out.table <- lrt$table
log2FC <- out.table$logFC
p <- as.numeric(out.table$PValue)
dir <- sign(log2FC)
if (tail=="high") {
pvalue <- mapply(function(x,y) if(x==1){
return(y/2) } else{
return(1-y/2)
}, x=dir, y=p)
}
if (tail=="low") {
pvalue <- mapply(function(x,y) if(x==1){
return(1-y/2) } else{
return(y/2)
}, x=dir, y=p)
}
if (tail=="abs") pvalue <- p
edgeR.out <- list(log2FC,pvalue)
names( edgeR.out) <- c('log2FC','pvalue')
} else {
if(is.null(c)) {
design <-model.matrix(~ l) # design matrix
} else{
lc <- data.frame(l=l,c=c)
design <-model.matrix(~ l + c,data=lc) # design matrix
}
fit <- glmFit(dat, design)
lrt <- glmLRT(fit, coef=2:(nlevels(l)))
out.table <- lrt$table
#log2FC <- out.table$logFC
pvalue <- out.table$PValue
edgeR.out <- list(pvalue)
names(edgeR.out) <- c('pvalue')
}
return(edgeR.out)
}
get.DESeq2 <- function(y,l,c,name, ANOVA, tail, paired) {
## y: count matrix, l: group label, c: clinical data, name: group name
#library(SummarizedExperiment)
if(!ANOVA){
if(is.null(c)) {
if(paired) {
subject <- as.factor(rep(1:(length(l)/2),2))
design <-model.matrix(~ l + subject) # design matrix
colData <- data.frame(l=l, s=subject)
colnames(colData) <-colnames(design)[-1]
}else{
design <-model.matrix(~ l) # design matrix
colData <- data.frame(l=l)
colnames(colData) <-colnames(design)[-1]
}
} else {
if(paired) {
subject <- as.factor(rep(1:(length(l)/2),2))
lc <- data.frame(l=l,c=c,s=subject)
design <-model.matrix(~ l + c + s,data=lc) # design matrix
colData <- lc
colnames(colData) <-colnames(design)[-1]
} else{
lc <- data.frame(l=l,c=c)
design <-model.matrix(~ l + c,data=lc) # design matrix
colData <- lc
colnames(colData) <-colnames(design)[-1]
}
}
ddsMat <- DESeqDataSetFromMatrix(countData = y,
colData = colData,
design = as.formula(
paste(" ~ ",paste(colnames(colData), collapse=" + ") )
)
)
ddsMat <- DESeq(ddsMat)
res <- results(ddsMat,contrast=c(colnames(colData)[1],levels(l)[2],
levels(l)[1]) )
log2FC <- as.numeric(res$log2FoldChange)
lfcSE <- as.numeric(res$lfcSE)
p <- as.numeric(res$pvalue)
dir <- sign(log2FC)
if (tail=="high") {
pvalue <- mapply(function(x,y) if(x==1){
return(y/2) } else{
return(1-y/2)
}, x=dir, y=p)
}
if (tail=="low") {
pvalue <- mapply(function(x,y) if(x==1){
return(1-y/2) } else{
return(y/2)
}, x=dir, y=p)
}
if (tail=="abs") pvalue <- p
DESeq2.out <- list(log2FC,lfcSE,pvalue)
names(DESeq2.out) <- c('log2FC','lfcSE','pvalue')
} else {
if(is.null(c)) {
design <-model.matrix(~ l)
colData <- data.frame(group=l)
} else{
lc <- data.frame(l=l,c=c)
design <-model.matrix(~ l + c,data=lc)
colData <- lc
colnames(colData) <-colnames(design)[-1]
}
ddsMat <- DESeqDataSetFromMatrix(countData = y,
colData = colData,
design = as.formula(
paste(" ~ ",paste(colnames(colData), collapse=" + ") )
)
)
ddsMat <- DESeq(ddsMat,test='LRT',reduced= as.formula(
paste(" ~ ",paste(colnames(colData)[-c(1:(nlevels(l)-1))], collapse=" + "))
)
)
res <- results(ddsMat)
pvalue <- as.numeric(res$pvalue)
DESeq2.out <- list(pvalue)
names(DESeq2.out) <- c('pvalue')
}
return(DESeq2.out)
}
#--------calculate r statistic (pearson's correlation)for all genes-----#
#---note: l is continuous------------#
cal.pearsonr<-function(y,l) {
stopifnot(length(l)==ncol(y), is.matrix(y))
n<-length(l)
num<-n*y%*%l-rowSums(y)*sum(l)
den<-sqrt(n*rowSums(y^2)-rowSums(y)^2)*sqrt(n*sum(l^2)-sum(l)^2)
r<-num/den
r
}
#-------get p value for pearson correlation r using permutation-------#
get.p.pearsonr<-function(y,l,asymptotic, nperm,tail) {
## y: expression matrix , l: continuous outcome, tail: specify the Ha
rnum<-which(apply(y,1,function(x) !any(is.na(x))))
stat.obs<-p<-q<-rep(NA,nrow(y))
names(stat.obs)<-names(p)<-rownames(y)
stat.obs[rnum]<-cal.pearsonr(y[rnum,],l) #observed stat
if (asymptotic==F) {
stat.perm<-p.perm<-matrix(NA,nrow=nrow(y),ncol=nperm)
rownames(stat.perm)<-rownames(p.perm)<-rownames(y)
stat.perm[rnum,]<-replicate(nperm,cal.pearsonr(y[rnum,],l)) #stat from perm
p[rnum]<-perm.p(stat.obs[rnum],stat.perm[rnum,],tail) #p from perm
#q[rnum]<-p.adjust(p[rnum],method="BH")
p.perm[rnum,]<-emp(stat.perm[rnum,],tail)
res <- list(obs=cbind(stat.obs,p),perm=p.perm)
} else {
n<-length(l)
t<-stat.obs*sqrt((n-2)/(1-stat.obs^2)) #fisher's transformation
if (tail=="low") p[rnum]<-pt(t,n-2)
if (tail=="high") p[rnum]<-1-pt(t,n-2)
if (tail=="abs") p[rnum]<-2*(pmin(pt(t,n-2),1-pt(t,n-2)))
res<-list(obs=cbind(stat.obs,p))
}
return(res)
}
#--------calculate r statistic (spearman's correlation)for all genes------#
#---note: l is continuous------------#
cal.spearmanr<-function(y,l) {
stopifnot(length(l)==ncol(y), is.matrix(y))
n<-length(l)
y<-t(apply(y,1,rank))
l<-rank(l)
num<-n*y%*%l-rowSums(y)*sum(l)
den<-sqrt(n*rowSums(y^2)-rowSums(y)^2)*sqrt(n*sum(l^2)-sum(l)^2)
r<-num/den
r
}
#-------get p value for spearman correlation r using permutation----------#
get.p.spearmanr<-function (y,l,asymptotic, nperm,tail) {
## y: expression matrix , l: continuous outcome, tail: specify the Ha
rnum<-which(apply(y,1,function(x) !any(is.na(x))))
stat.obs<-p<-q<-rep(NA,nrow(y))
names(stat.obs)<-names(p)<-rownames(y)
stat.obs[rnum]<-cal.spearmanr(y[rnum,],l)#observed stat
if (asymptotic==F)
{
stat.perm<-p.perm<-matrix(NA,nrow=nrow(y),ncol=nperm)
rownames(stat.perm)<-rownames(p.perm)<-rownames(y)
stat.perm[rnum,]<-replicate(nperm,cal.spearmanr(y[rnum,],l))# stat from perm
p[rnum]<-perm.p(stat.obs[rnum],stat.perm[rnum,],tail) #p from perm
#q[rnum]<-p.adjust(p[rnum],method="BH")
p.perm[rnum,]<-emp(stat.perm[rnum,],tail)
res <- list(obs=cbind(stat.obs,p),perm=p.perm)
} else {
n<-length(l)
t<-stat.obs*sqrt((n-2)/(1-stat.obs^2)) #fisher's transformation
if (tail=="low") p[rnum]<-pt(t[rnum],n-2)
if (tail=="high") p[rnum]<-1-pt(t[rnum],n-2)
if (tail=="abs") p[rnum]<-2*(pmin(pt(t[rnum],n-2),1-pt(t[rnum],n-2)))
res<-list(obs=cbind(stat.obs,p))
}
return(res)
}
#-------get logrank z statistic-----------------#
cal.logrank<-function(y,time,event)
{
get.stat<-function(x,time,event) {
stat<-summary(coxph(Surv(time,event)~x),method="breslow")$sctest[1]
stat
}
z<-apply(y,1,get.stat,time=time,event=event)
z
}
cal.p.logrank<-function(y,time,event)
{
get.p<-function(x,time,event) {
p<-summary(coxph(Surv(time,event)~x,method="breslow"))$sctest[3]
p
}
p<-apply(y,1,get.p,time=time,event=event)
p
}
#-------get p value for logrank z using permutation-----------------#
get.p.logrank<-function(y,time,event,asymptotic,nperm,tail)
{
rnum<-which(apply(y,1,function(x) !any(is.na(x))))
stat.obs<-p<-pp<-rep(NA,nrow(y))
stat.obs[rnum]<-cal.logrank(y[rnum,],time=time,event=event)
names(stat.obs)<-names(p)<-rownames(y)
if(asymptotic== F)
{
stat.perm<-p.perm<-matrix(NA,nrow=nrow(y),ncol=nperm)
rownames(stat.perm)<-rownames(p.perm)<-rownames(y)
stat.perm[rnum,]<-replicate(nperm,cal.logrank(y[rnum,],time=time,
event=event))
p[rnum]<-perm.p(stat.obs[rnum],stat.perm[rnum,],tail)
#q[rnum]<-p.adjust(p[rnum],method="BH")
p.perm[rnum,]<-emp(stat.perm[rnum,],tail)
res <- list(obs=cbind(stat.obs,p),perm=p.perm)
} else {
p[rnum]<-cal.p.logrank(y[rnum,],time=time,event=event)
if (tail=="low") pp[rnum]<-p
if (tail=="high") pp[rnum]<-1-p
if (tail=="abs") pp[rnum]<-2*(pmin(p,1-p))
res<-list(obs=cbind(stat.obs,p=pp))
}
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.