Nothing
getCountParams <- function(counts, sizeFactors) {
# Divides the counts by sizeFactors and calculates the estimates for
# base means and variances for each gene
data.frame(baseMean = rowMeans(t(t(counts) / sizeFactors)),
baseVar = rowVars(t(t(counts) / sizeFactors)))
}
getCountParamsPooled <-
function(counts, sizeFactors, conditions) {
basecounts <- t(t(counts) / sizeFactors)
replicated_sample <-
conditions %in% names(which(table(conditions) > 1))
df <-
sum(replicated_sample) - length(unique(conditions[replicated_sample]))
data.frame(baseMean = rowMeans(basecounts),
baseVar =
rowSums(sapply(
tapply((seq_len(ncol(
counts
)))[replicated_sample],
factor(conditions[replicated_sample]),
function(cols)
rowSums((
basecounts[,cols] - rowMeans(basecounts[,cols])
) ^ 2)),
identity
)) / df)
}
getSCV <- function(means,
variances, sizeFactors, fitType = c("parametric", "local"),
locfit_extra_args = list(), lp_extra_args = list(), adjustForBias =
TRUE) {
fitType <- match.arg(fitType)
xim <- mean(1 / sizeFactors)
SCVAll <- (variances - xim * means) / means ^ 2
variances <- variances[means > 0]
SCV <- SCVAll[means > 0]
means <- means[means > 0]
if (adjustForBias)
SCV <- adjustScv(SCV, length(sizeFactors))
if (fitType == "local") {
fit <- do.call("locfit", c(
list(variances ~ do.call("lp", c(
list(log(means)), lp_extra_args
)),
family = "gamma"),
locfit_extra_args
))
rm(means)
rm(variances)
if (adjustForBias)
ans <- function(q)
adjustScv(pmax((predict_helper(fit, log(
q
)) - xim * q) / q ^ 2, 1e-8),
length(sizeFactors))
else
ans <- function(q)
pmax((predict_helper(fit, log(q)) - xim * q) / q ^ 2, 1e-8)
# Note: The 'pmax' construct above serves to limit the overdispersion to a minimum
# of 10^-8, which should be indistinguishable from 0 but ensures numerical stability.
} else if (fitType == "parametric") {
ans <- parametricscvFit(means, SCV)
} else
stop("Unknown fitType.")
attr(ans, "fitType") <- fitType
list(SCV = SCVAll, SCVfunc = ans)
}
parametricscvFit <- function(means, disps)
{
coefs <- c(.1, 1)
iter <- 0
while (TRUE) {
residuals <- disps / (coefs[1] + coefs[2] / means)
good <- which((residuals > 1e-4) & (residuals < 15))
fit <- glm(disps[good] ~ I(1 / means[good]),
family = Gamma(link = "identity"), start = coefs)
oldcoefs <- coefs
coefs <- coefficients(fit)
if (!all(coefs > 0))
stop(
"Parametric dispersion fit failed. Try a local fit and/or a pooled estimation. (See '?estimateSCV')"
)
if (sum(log(coefs / oldcoefs) ^ 2) < 1e-6)
break
iter <- iter + 1
if (iter > 10) {
warning("Dispersion fit did not converge.")
break
}
}
names(coefs) <- c("asymptDisp", "extraPois")
ans <- function(q)
coefs[1] + coefs[2] / q
attr(ans, "coefficients") <- coefs
ans
}
predict_helper <- function(fit, x)
{
# A wrapper around predict to avoid the issue that predict.locfit cannot
# propagate NAs and NaNs properly.
res <- rep.int(NA_real_, length(x))
res[is.finite(x)] <- predict(fit, x[is.finite(x)])
res
}
XBSeqTestForMatrices <-
function(countsA, countsB, bgcountsA, bgcountsB, sizeFactorsA, sizeFactorsB,
SCVA, SCVB , method = c('NP', 'MLE'), big_count)
{
method <- match.arg(method, c('NP', 'MLE'))
if (ncol(countsA) < 5 & method == 'MLE')
warning(
'Non-parametric estimation method is recommended for experiments with replicates smaller than 5'
)
kAs <- apply(countsA, 1, sum)
kBs <- apply(countsB, 1, sum)
mus <- rowMeans(cbind(t(t(countsA) / sizeFactorsA),
t(t(countsB) / sizeFactorsB)))
if (method == 'NP') {
signalmuA <- mus * sum(sizeFactorsA)
signalmuB <- mus * sum(sizeFactorsB)
signalVarsA <-
pmax(
mus * sum(sizeFactorsA) + SCVA * mus ^ 2 * sum(sizeFactorsA ^ 2),
mus * sum(sizeFactorsA) * (1 + 1e-8)
)
signalVarsB <-
pmax(
mus * sum(sizeFactorsB) + SCVB * mus ^ 2 * sum(sizeFactorsB ^ 2),
mus * sum(sizeFactorsB) * (1 + 1e-8)
)
sizeA <- signalmuA ^ 2 / (signalVarsA - signalmuA)
sizeB <- signalmuB ^ 2 / (signalVarsB - signalmuB)
}
else if(method == 'MLE'){
musA <- mus * mean(sizeFactorsA)
musB <- mus * mean(sizeFactorsB)
VarsA <-
pmax(
mus * mean(sizeFactorsA) + SCVA * mus ^ 2 * mean(sizeFactorsA ^ 2),
mus * mean(sizeFactorsA) * (1 + 1e-8)
)
VarsB <-
pmax(
mus * mean(sizeFactorsB) + SCVB * mus ^ 2 * mean(sizeFactorsB ^ 2),
mus * mean(sizeFactorsB) * (1 + 1e-8)
)
lambda <- rowMeans(cbind(t(t(bgcountsA) / sizeFactorsA),
t(t(bgcountsB) / sizeFactorsB)))
ParamsA <- c()
cat('estimating parameters using MLE for group one', '\n')
for(i in 1:nrow(countsA)){
temp <- estimation_param_PoissonNB_MLE_NB(
countsA[i,],
musA[i] ^2 / (VarsA[i] - musA[i]),
musA[i]
)
ParamsA <- rbind(ParamsA, as.data.frame(temp))
}
cat('estimating parameters using MLE for group two', '\n')
ParamsB <- c()
for(i in 1:nrow(countsB)){
temp <- estimation_param_PoissonNB_MLE_NB(
countsB[i,],
musB[i] ^2 / (VarsB[i] - musB[i]),
musB[i]
)
ParamsB <- rbind(ParamsB, as.data.frame(temp))
}
# ParamsA <-
# sapply(1:nrow(countsA), function(i)
# estimation_param_PoissonNB_MLE(
# countsA[i,] + bgcountsA[i,],
# bgcountsA[i,],
# musA[i] ^
# 2 / (VarsA[i] - musA[i]),
# (VarsA[i] -
# musA[i]) / musA[i],
# lambdaA[i]
# ))
# ParamsA <- matrix(unlist(ParamsA), ncol = 3, byrow = TRUE)
# ParamsB <-
# sapply(1:nrow(countsB), function(i)
# estimation_param_PoissonNB_MLE(
# countsB[i,] + bgcountsB[i,],
# bgcountsB[i,],
# musB[i] ^
# 2 / (VarsB[i] - musB[i]),
# (VarsB[i] -
# musB[i]) / musB[i],
# lambdaB[i]
# ))
# ParamsB <- matrix(unlist(ParamsB), ncol = 3, byrow = TRUE)
# sizeA <- ParamsA[,1] * sum(sizeFactorsA)
# sizeB <- ParamsB[,1] * sum(sizeFactorsB)
#
signalmuA <- mus * sum(sizeFactorsA)
signalmuB <- mus * sum(sizeFactorsB)
signalVarsA <-
pmax(
mus * sum(sizeFactorsA) + SCVA * mus ^ 2 * sum(sizeFactorsA ^ 2),
mus * sum(sizeFactorsA) * (1 + 1e-8)
)
signalVarsB <-
pmax(
mus * sum(sizeFactorsB) + SCVB * mus ^ 2 * sum(sizeFactorsB ^ 2),
mus * sum(sizeFactorsB) * (1 + 1e-8)
)
sizeA <- signalmuA ^ 2 / (signalVarsA - signalmuA)
sizeB <- signalmuB ^ 2 / (signalVarsB - signalmuB)
# sizeA <- musA ^2 / (VarsA - musA) * sum(sizeFactorsA)
# sizeB <- musB ^2 / (VarsB - musB) * sum(sizeFactorsB)
signalmuA <- ParamsA[,2] * sum(sizeFactorsA)
signalmuB <- ParamsB[,2] * sum(sizeFactorsB)
#signalmuA <- ParamsA[,1] * ParamsA[,2] * sum(sizeFactorsA)
#signalmuB <- ParamsB[,1] * ParamsB[,2] * sum(sizeFactorsB)
}
pvals <- rep(1,nrow(countsA))
names(pvals) <- rownames(countsA)
big <- kAs > big_count & kBs > big_count
if (any(big))
pvals[big] <-
exactTestBetaApprox(
sweep(countsA[big,,drop = FALSE], 2, sizeFactorsA, '/'), sweep(countsB[big,,drop =
FALSE], 2, sizeFactorsB, '/'), sizeA[big], sizeB[big]
)
if (any(!big))
pvals[!big] <- sapply(seq(along = kAs[!big]), function(i) {
if (kAs[!big][i] == 0 & kBs[!big][i] == 0)
return(NA)
# probability of all possible counts sums with the same total count:
ks <- 0:(kAs[!big][i] + kBs[!big][i])
ps <-
dnbinom(ks, mu = signalmuA[!big][i], size = sizeA[!big][i]) *
dnbinom(kAs[!big][i] + kBs[!big][i] - ks, mu = signalmuB[!big][i], size = sizeB[!big][i])
# probabilit y of observed count sums:
pobs <-
dnbinom(kAs[!big][i], mu = signalmuA[!big][i], size = sizeA[!big][i]) *
dnbinom(kBs[!big][i], mu = signalmuB[!big][i], size = sizeB[!big][i])
if (kAs[!big][i] * sum(sizeFactorsB) < kBs[!big][i] * sum(sizeFactorsA))
numer <- ps[1:(kAs[!big][i] + 1)]
else
numer <- ps[(kAs[!big][i] + 1):length(ps)]
min(1, 2 * sum(numer) / sum(ps))
})
return(pvals)
}
exactTestBetaApprox <- function(y1,y2,size1, size2)
# Test for differences in means between two negative binomial
# or Poisson random variables, or between two groups of variables,
# using a beta distribution approximation.
# Test is naturally conditional on total sum.
# Left and right rejection regions have equal probability.
# adopted and revised from source code of edgeR
{
# Convert matrices to vectors
ntags <- NROW(y1)
n1 <- NCOL(y1)
n2 <- NCOL(y2)
if(n1>1) y1 <- rowSums(y1)
if(n2>1) y2 <- rowSums(y2)
# Null fitted values
y <- y1+y2
mu <- y/(n1+n2)
# Compute p-values
pvals <- rep(1,ntags)
all.zero <- y<=0
alpha1 <- n1*mu/(1+n1/size1*mu)
alpha2 <- n2*mu/(1+n2/size2*mu)
med <- rep(0,ntags)
med[!all.zero] <- qbeta(0.5,alpha1[!all.zero],alpha2[!all.zero])
left <- (y1+0.5)/y<med & !all.zero
if(any(left)) {
pvals[left] <- 2*pbeta((y1[left]+0.5)/y[left],alpha1[left],alpha2[left])
}
right <- (y1-0.5)/y>med & !all.zero
if(any(right)) {
pvals[right] <- 2*pbeta((y1[right]-0.5)/y[right],alpha1[right],alpha2[right],lower.tail=FALSE)
}
names(pvals) <- names(y1)
pvals
}
insertRow <- function(existingDF, newrow, r) {
existingDF <- as.data.frame(existingDF)
for (i in 1:length(r)){
if(r[i] < nrow(existingDF) | r[i] == nrow(existingDF))
existingDF[seq(r[i] + 1,nrow(existingDF) + 1),] <-
existingDF[seq(r[i],nrow(existingDF)),]
existingDF[r[i],] <- newrow[i,]
}
existingDF <- as.matrix(existingDF)
storage.mode(existingDF) <- 'integer'
existingDF
}
getSignalVars <- function(counts, bgcounts) {
if (!is.matrix(counts))
counts <- as.matrix(counts)
if (!is.matrix(bgcounts))
bgcounts <- as.matrix(bgcounts)
bgsizeFactors <- estimateSizeFactorsForMatrix(bgcounts)
lambda <- rowMeans(sweep(bgcounts, 2, bgsizeFactors, '/'))
observe_param <-
getCountParams(counts, estimateSizeFactorsForMatrix(counts))
observe_sf <- estimateSizeFactorsForMatrix(counts)
tho <- c()
for (i in 1:nrow(counts))
{
if (lambda[i] == 0) {
temp <- 0
if (length(which(counts[i,] == 0)) >= ncol(counts) / 2)
temp <- NA
}
else {
if (length(which(counts[i,] == 0)) >= ncol(counts) / 2) {
temp <- NA
}
else {
temp2 <- cor(counts[i,] * observe_sf, bgcounts[i,] * bgsizeFactors)
if (is.na(temp2))
temp <- 0
else
temp <- temp2
}
}
tho <- c(tho, temp)
}
fullvar <- observe_param$baseVar
signalvar <- fullvar + lambda - 2 * tho * sqrt(fullvar) * sqrt(lambda)
as.matrix(signalvar)
}
prepareScvBiasCorrectionFits <-
function(maxnrepl = 15, mu = 100000, ngenes = 10000,
true_raw_scv = c(seq(0, 2, length.out =
100)[-1], seq(2, 10, length.out = 20)[-1]))
lapply(2:maxnrepl, function(m) {
est_raw_scv <- sapply(true_raw_scv, function(alpha) {
k <- matrix(rnbinom(ngenes * m, mu = mu, size = 1 / alpha), ncol = m)
k <- k[rowSums(k) > 0,]
mean(rowVars(k) / rowMeans(k) ^ 2)
})
locfit(true_raw_scv ~ lp(est_raw_scv, nn = .2))
})
adjustScv <- function(scv, nsamples) {
stopifnot(nsamples > 1)
if (!exists(("scvBiasCorrectionFits")))
data("scvBiasCorrectionFits")
if (nsamples - 1 > length(scvBiasCorrectionFits))
scv
else
ifelse(scv > .02,
pmax(predict_helper(scvBiasCorrectionFits[[nsamples - 1]], scv), 1e-8 * scv),
scv) # For scv < .02, our fit is too coarse, but no correction seems necessary anyway
}
Loglikhood <- function(counts, bgcounts) {
function(para) {
alpha <- para[1]
beta <- para[2]
lambda <- para[3]
- sum(
ddelap(
counts, alpha = alpha, beta = beta, lambda = lambda, log = TRUE
), dpois(bgcounts, lambda = lambda, log = TRUE)
)
}
}
estimation_param_PoissonNB_MLE <-
function(counts, bgcounts, alpha, beta, lambda) {
if (any(is.na(c(alpha, beta, lambda))))
list(alpha = alpha, beta = beta, lambda = lambda)
else{
mle <-
try(optim(
c(alpha, beta, lambda), Loglikhood(counts, bgcounts),
method = 'L-BFGS-B', lower = c(0,0,0)
), silent = TRUE)
if (class(mle) == 'try-error')
list(alpha = alpha, beta = beta, lambda = lambda)
else{
mle <- optim(
c(alpha, beta, lambda), Loglikhood(counts, bgcounts),
method = 'L-BFGS-B', lower = c(0,0,0)
)
if (mle$convergence > 0 | any(is.na(mle$par)))
list(alpha = alpha, beta = beta, lambda = lambda)
else
list(
alpha = mle$par[1], beta = mle$par[2], lambda = mle$par[3]
)
}
}
}
Loglikhood_NB <- function(counts) {
function(para) {
size <- para[1]
mu <- para[2]
- sum(
dnbinom(
counts, size = size, mu = mu, log = TRUE
)
)
}
}
estimation_param_PoissonNB_MLE_NB <-
function(counts, size, mu) {
if (any(is.na(c(size, mu))))
list(size = size, mu = mu)
else{
mle <-
try(optim(
c(size, mu), Loglikhood_NB(counts),
method = 'L-BFGS-B', lower = c(0,0,0)
), silent = TRUE)
if (class(mle) == 'try-error')
list(size = size, mu = mu)
else{
mle <- optim(
c(size, mu), Loglikhood_NB(counts),
method = 'L-BFGS-B', lower = c(0,0,0)
)
if (mle$convergence > 0 | any(is.na(mle$par)))
list(size = size, mu = mu)
else
list(
size = mle$par[1], mu = mle$par[2]
)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.