intSEQ.default <-
function(count.data, condition, nullcondition = NULL,nneighbour=400,lambda1 =ncol(count.data),lambda2 =0.05, meanmeth=c("estimator","local.mean")
,smoothmethod = c("loess","spline","no"),normalize=TRUE, offsets = NULL, weights = NULL, constadj=FALSE,w1=max(1-ncol(count.data)/100, 0)
,w2=max(1-ncol(count.data)/1000, 0),...) {
meanmeth=match.arg(meanmeth)
smoothmeth=match.arg(smoothmethod)
##calculate the normalization factors by "TMM" method
if( normalize){
normfac <- calcNormFactors(count.data,method = "TMM")
libsize=colSums(count.data)
#get normalization factors
norm.fac <-libsize*normfac/(mean(normfac*libsize))
}else{
norm.fac <- rep(1, ncol(count.data))
}
##check the class if condition and nullcondition
if(!(is.vector(condition)|is.data.frame(condition)|is.factor(condition)))
stop("condition must be data frame or vector")
if(!(is.vector(nullcondition)|is.data.frame(nullcondition)|is.null(nullcondition)))
stop("null condition must be NULL, data frame or vector")
if(is.factor(condition)) condition = as.vector(condition)
if(is.vector(condition)) condition = data.frame(condition)
if(is.vector(nullcondition)) nullcondition = data.frame(nullcondition)
## get the design matrix for full model and reduced model
design <- model.matrix(~., data = condition)
if(is.null(nullcondition)){
design0 = model.matrix(~1, data = condition)
}else{
design0 = model.matrix(~., data = nullcondition)
}
##estimate the dispersion using edgeR without shrinking
if(is.null(offsets)) offsets=rep(0, ncol(count.data))
d <- estimateGLMTrendedDisp(count.data, design,offset=log(norm.fac)+offsets, weights = weights)
disper <- estimateGLMTagwiseDisp(count.data, design, prior.df = 0.0 , offset=log(norm.fac)+offsets, weights = weights, dispersion = d)
##get the index of coefficients of insterest used in glmLRT
index = match(data.frame(design), data.frame(design0), nomatch = NA)
coeff = (1:ncol(design))[is.na(index)]
##fit the reduced and full model, using above estimated unshrunken dispersion
fit.full <- glmFit(count.data,design , offset=log(norm.fac), dispersion = disper)
fit.null <- glmFit(count.data,design0 , offset=log(norm.fac), dispersion = disper)
full.lr <- glmLRT(fit.full, coef = coeff)
res = topTags(full.lr,n=nrow(count.data), sort.by = "none")$table
## get the coefficients,
beta=fit.full$unshrunk.coefficients
beta0=fit.null$unshrunk.coefficients
eta=beta%*%t(design)
eta0=beta0%*%t(design0)
eta=t(apply(eta,1, function(x,y) x+y, y=log(norm.fac)))
eta0=t(apply(eta0,1, function(x,y) x+y, y=log(norm.fac)))
mu0=exp(eta0)
mua=exp(eta)
means=rowMeans(count.data)
##get the constant multiplied to the null integrated likelihood and full integrated likelihood
if(constadj){
constvec = dnbinom(x = floor(means), size = 1/disper, mu=means)
}else{
constvec=rep(1, length(means))
}
#remove outliers before fit the smooth line, the dispersions are
#larger than 99% quantile +2* interquartile or extremely small are removed
newdisper=disper
newdisper[disper<=1e-3] <-NA
newdisper[disper > quantile(disper,probs=0.99,na.rm=T)+2* IQR(disper, na.rm = T)] <- NA
keep = !is.na(newdisper)
newdisper <- newdisper[keep]
newmeans <- means[keep]
#fit a smooth line of started log of means versus dispersion
df=data.frame(x=log(newmeans+0.5,base=2),y=newdisper)
smooth.fit <- switch(smoothmeth,
loess = loess(y~x,data=df, span=0.4),
spline = smooth.spline(x=log(newmeans+0.5,base=2),y=newdisper, spar=0.4),
no = NULL)
mdf=data.frame("x"=log(means+0.5,base=2))
fitted.disp <- switch(smoothmeth,
loess = predict(smooth.fit,mdf),
spline = predict(smooth.fit,x=log(means+0.5,base=2))$y,
no = NULL)
###get the integrated likelihood ratio statistics
rk<- rank(means)
lr=numeric(length(disper))
for(i in 1:length(disper)){
if(rk[i] <=nneighbour/2){
index = (rk<=(nneighbour+1))
}else if(rk[i] >= length(disper)-nneighbour/2){
index = (rk >= (length(disper)-nneighbour))
}else{
index = (rk>=(rk[i]-nneighbour/2))&(rk<=(rk[i]+nneighbour/2))
}
data=count.data[i,]
thetas <- disper[index]
if(smoothmeth=="no"){
is.smooth=FALSE
}else{is.smooth=T
fitted.disps=fitted.disp[index]
}
lr[i]<-marginal.LR(thetas = thetas, disp.fit = fitted.disps ,data=data, thet = disper[i]
, fit.thet = fitted.disp[i],lambda1,lambda2 ,meanmeth,is.smooth,m0=mu0[i,],ma=mua[i,],const = constvec[i],w1,w2)
}
##calculate the p value
pval = pchisq(lr,df=length(coeff),lower.tail = F)
##return the results
res$intLR = lr
res$intPValue = pval
res$"FDR" = p.adjust(pval, method = "fdr")
res<- res[ ,c("logFC", "logCPM", "FDR" , "intLR", "intPValue", "LR", "PValue")]
res[is.nan(pval), c("intLR","intPValue")] <- res[is.nan(pval), c("LR", "PValue")]
name <- colnames(res)
name[match(c("LR", "PValue"), name)] <- c("ordinaryLR", "ordinaryPValue")
colnames(res) <- name
par <- cbind(mua, disper)
colnames(par) <- c(paste("mu", 1:ncol(mua)), "disper")
res=list(restable = res, parameters = par, cond = t(as.matrix(condition)) )
class(res) <- "intres"
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.