# internal function to run mcdavid et al. DE test
#
#' @importFrom stats pchisq
#
DifferentialLRT <- function(x, y, xmin = 0) {
lrtX <- bimodLikData(x = x)
lrtY <- bimodLikData(x = y)
lrtZ <- bimodLikData(x = c(x, y))
lrt_diff <- 2 * (lrtX + lrtY - lrtZ)
return(pchisq(q = lrt_diff, df = 3, lower.tail = F))
}
#internal function to run mcdavid et al. DE test
#
#' @importFrom stats sd dnorm
#
bimodLikData <- function(x, xmin = 0) {
x1 <- x[x <= xmin]
x2 <- x[x > xmin]
xal <- MinMax(
data = length(x = x2) / length(x = x),
min = 1e-2,
max = (1 - 1e-2)
)
likA <- length(x = x1) * log(x = 1 - xal)
if (length(x = x2) < 2) {
mysd <- 1
} else {
mysd <- sd(x = x2)
}
likB <- length(x = x2) *
log(x = xal) +
sum(dnorm(x = x2, mean = mean(x = x2), sd = mysd, log = TRUE))
return(likA + likB)
}
#internal function to run Tobit DE test
TobitDiffExpTest <- function(data1, data2, mygenes, print.bar, NT = 1) {
if(print.bar) {
p_val <- unlist(x = pblapply(
X = mygenes,
FUN = function(x) {
return(DifferentialTobit(
x1 = as.numeric(x = data1[x, ])[!is.na(as.numeric(x = data1[x, ]))],
x2 = as.numeric(x = data2[x, ])[!is.na(as.numeric(x = data2[x, ]))]
))},
cl = NT
))
} else {
p_val <- unlist(x = mclapply(
X = mygenes,
FUN = function(x) {
return(DifferentialTobit(
x1 = as.numeric(x = data1[x, ])[!is.na(as.numeric(x = data1[x, ]))],
x2 = as.numeric(x = data2[x, ])[!is.na(as.numeric(x = data2[x, ]))]
))},
mc.cores = NT
))
}
p_val[is.na(x = p_val)] <- 1
if (print.bar) {
iterate.fxn <- pblapply
} else {
iterate.fxn <- lapply
}
toRet <- data.frame(p_val, row.names = mygenes)
return(toRet)
}
#internal function to run Tobit DE test
#
#' @importFrom stats pchisq logLik
#
DifferentialTobit <- function(x1, x2, lower = 0, upper = 1) {
my.df <- data.frame(
c(x1, x2),
c(rep(x = 0, length(x = x1)), rep(x = 1, length(x = x2)))
)
colnames(x = my.df) <- c("Expression", "Stat")
#model.v1=vgam(Expression~1,family = tobit(Lower = lower,Upper = upper),data = my.df)
model.v1 <- TobitFitter(
x = my.df,
modelFormulaStr = "Expression~1",
lower = lower,
upper = upper
)
#model.v2=vgam(Expression~Stat+1,family = tobit(Lower = lower,Upper = upper),data = my.df)
model.v2 <- TobitFitter(
x = my.df,
modelFormulaStr = "Expression~Stat+1",
lower = lower,
upper = upper
)
# if (is.null(x = model.v1) == FALSE && is.null(x = model.v2) == FALSE) {
if (! is.null(x = model.v1) && ! is.null(x = model.v2)) {
p <- pchisq(
q = 2 * (logLik(object = model.v2) - logLik(object = model.v1)),
df = 1,
lower.tail = FALSE
)
} else {
p <- 0
}
return(p)
}
#internal function to run Tobit DE test
#credit to Cole Trapnell for this
#
#' @importFrom stats as.formula
#' @importFrom VGAM vgam
#' @importFrom VGAM tobit
#
TobitFitter <- function(x, modelFormulaStr, lower = 1, upper = Inf) {
tryCatch(
expr = return(suppressWarnings(expr = VGAM::vgam(
formula = as.formula(object = modelFormulaStr),
family = VGAM::tobit(Lower = lower, Upper = upper),
data = x
))),
#warning = function(w) { FM_fit },
error = function(e) { NULL }
)
}
# internal function to bbTest
#' @importFrom VGAM vglm
#' @importFrom VGAM fitted
bb_model_test <- function(object, sjgr, sj, cells.1, cells.2, maxit = 10, confounder = NULL) {
if(is.null(sjgr)) {
sjgr <- as(row.names(object), "GRanges")
}
if(!is.null(confounder)) {
if(!is.element(confounder, colnames(colData(object))))
stop("confounder must be one of colnames(colData(object))")
}
if(sum(with(as.data.frame(sjgr), paste0(seqnames, ":", start)) == with(as.data.frame(as(sj, "GRanges")), paste0(seqnames, ":", start))) > 1) {
clu <- as.character(sjgr[with(as.data.frame(sjgr), paste0(seqnames, ":", start)) == with(as.data.frame(as(sj, "GRanges")), paste0(seqnames, ":", start))])
bdata1 <- data.frame(N = colSums(CountSJ(object = object, SJ = clu, cells = cells.1)),
SJ = as.numeric(CountSJ(object = object, SJ = sj, cells = cells.1)),
Group = "A",
Batch = tryCatch(FetchMeta(object = object, variable = confounder, cells = cells.1), error = function(e) 1))
bdata2 <- data.frame(N = colSums(CountSJ(object = object, SJ = clu, cells = cells.2)),
SJ = as.numeric(CountSJ(object = object, SJ = sj, cells = cells.2)),
Group = "B",
Batch = tryCatch(FetchMeta(object = object, variable = confounder, cells = cells.2), error = function(e) 1))
bdata <- rbind(bdata1, bdata2)
if(length(unique(bdata$Batch)) == 1) {
fit0 <- tryCatch(VGAM::vglm(cbind(SJ, N - SJ) ~ 1, "betabinomial", data = bdata, trace = FALSE, maxit = maxit), error = function(e) NA)
fit1 <- tryCatch(VGAM::vglm(cbind(SJ, N - SJ) ~ Group, "betabinomial", data = bdata, trace = FALSE, maxit = maxit), error = function(e) NA)
} else {
fit0 <- tryCatch(VGAM::vglm(cbind(SJ, N - SJ) ~ Batch + 1, "betabinomial", data = bdata, trace = FALSE, maxit = maxit), error = function(e) NA)
fit1 <- tryCatch(VGAM::vglm(cbind(SJ, N - SJ) ~ Batch + Group, "betabinomial", data = bdata, trace = FALSE, maxit = maxit), error = function(e) NA)
}
pvalue <- tryCatch(1-pchisq(2*(fit1@criterion$loglikelihood-fit0@criterion$loglikelihood),abs(fit0@df.residual-fit1@df.residual)), error=function(e) NA)
beta.model <- tryCatch(VGAM::fitted(fit1), error=function(e) NULL)
if(!is.null(beta.model)) {
bdata$Fitted <- beta.model[, 1]
setDT(bdata)
EffectSize <- bdata[, mean(Fitted), by = Group][, abs(diff(V1))]
res1 <- data.frame(EffectSize = EffectSize, p_val = pvalue)
if(anyNA(res1)) res1 <- NULL
} else {
res1 <- NULL
}
} else {
res1 <- NULL
}
if(sum(with(as.data.frame(sjgr), paste0(seqnames, ":", end)) == with(as.data.frame(as(sj, "GRanges")), paste0(seqnames, ":", end))) > 1) {
clu <- as.character(sjgr[with(as.data.frame(sjgr), paste0(seqnames, ":", end)) == with(as.data.frame(as(sj, "GRanges")), paste0(seqnames, ":", end))])
bdata1 <- data.frame(N = colSums(CountSJ(object = object, SJ = clu, cells = cells.1)),
SJ = as.numeric(CountSJ(object = object, SJ = sj, cells = cells.1)),
Group = "A",
Batch = tryCatch(FetchMeta(object = object, variable = confounder, cells = cells.1), error = function(e) 1))
bdata2 <- data.frame(N = colSums(CountSJ(object = object, SJ = clu, cells = cells.2)),
SJ = as.numeric(CountSJ(object = object, SJ = sj, cells = cells.2)),
Group = "B",
Batch = tryCatch(FetchMeta(object = object, variable = confounder, cells = cells.2), error = function(e) 1))
bdata <- rbind(bdata1, bdata2)
if(length(unique(bdata$Batch)) == 1) {
fit0 <- tryCatch(VGAM::vglm(cbind(SJ, N - SJ) ~ 1, "betabinomial", data = bdata, trace = FALSE, maxit = maxit), error = function(e) NA)
fit1 <- tryCatch(VGAM::vglm(cbind(SJ, N - SJ) ~ Group, "betabinomial", data = bdata, trace = FALSE, maxit = maxit), error = function(e) NA)
} else {
fit0 <- tryCatch(VGAM::vglm(cbind(SJ, N - SJ) ~ Batch + 1, "betabinomial", data = bdata, trace = FALSE, maxit = maxit), error = function(e) NA)
fit1 <- tryCatch(VGAM::vglm(cbind(SJ, N - SJ) ~ Batch + Group, "betabinomial", data = bdata, trace = FALSE, maxit = maxit), error = function(e) NA)
}
pvalue <- tryCatch(1-pchisq(2*(fit1@criterion$loglikelihood-fit0@criterion$loglikelihood),abs(fit0@df.residual-fit1@df.residual)), error=function(e) NA)
beta.model <- tryCatch(VGAM::fitted(fit1), error=function(e) NULL)
if(!is.null(beta.model)) {
bdata$Fitted <- beta.model[, 1]
setDT(bdata)
EffectSize <- bdata[, mean(Fitted), by = Group][, abs(diff(V1))]
res2 <- data.frame(EffectSize = EffectSize, p_val = pvalue)
if(anyNA(res2)) res2 <- NULL
} else {
res2 <- NULL
}
} else {
res2 <- NULL
}
if(!is.null(res1) & !is.null(res2)) {
if(res1$p_val == res2$p_val) {
if(res1$EffectSize >= res2$EffectSize) {
res <- res1
} else {
res <- res2
}
} else {
if(res1$p_val < res2$p_val) {
res <- res1
} else {
res <- res2
}
}
} else {
if(is.null(res1) & is.null(res2)) {
res <- NA
} else {
if(is.null(res1)) {
res <- res2
} else {
res <- res1
}
}
}
return(res)
}
# internal function to calculate AUC values
AUCMarkerTest <- function(data1, data2, mygenes, print.bar = TRUE) {
myAUC <- unlist(x = lapply(
X = mygenes,
FUN = function(x) {
tryCatch(DifferentialAUC(
x = as.numeric(x = data1[x, ])[!is.na(as.numeric(x = data1[x, ]))],
y = as.numeric(x = data2[x, ])[!is.na(as.numeric(x = data2[x, ]))]
), error = function(e) NA)
}
))
myAUC[is.na(x = myAUC)] <- 0
if (print.bar) {
iterate.fxn <- pblapply
} else {
iterate.fxn <- lapply
}
avg_diff <- unlist(x = iterate.fxn(
X = mygenes,
FUN = function(x) {
return(
mean(x = as.numeric(x = data1[x, ]), na.rm = TRUE) - mean(x = as.numeric(x = data2[x, ]), na.rm = TRUE)
)
}
))
toRet <- data.frame(cbind(myAUC, avg_diff), row.names = mygenes)
toRet <- toRet[rev(x = order(toRet$myAUC)), ]
return(toRet)
}
# internal function to calculate AUC values
#' @importFrom ROCR prediction performance
DifferentialAUC <- function(x, y) {
prediction.use <- prediction(
predictions = c(x, y),
labels = c(rep(x = 1, length(x = x)), rep(x = 0, length(x = y))),
label.ordering = 0:1
)
perf.use <- performance(prediction.obj = prediction.use, measure = "auc")
auc.use <- round(x = perf.use@y.values[[1]], digits = 3)
return(auc.use)
}
# internal function for variable selection from random forests using OOB error
#' @importFrom varSelRF varSelRF
RandomOOB <- function(x, y) {
tryCatch(as.numeric(varSelRF::varSelRF(xdata = matrix(c(x, y)),
Class = factor(rep(c("x", "y"), c(length(x), length(y)))),
ntree = 100, ntreeIterat = 100)$initialImportances), error = function(e) NA)
}
# given a UMI count matrix, estimate NB theta parameter for each gene
# and use fit of relationship with mean to assign regularized theta to each gene
#
#' @importFrom stats glm loess poisson
#' @importFrom utils txtProgressBar setTxtProgressBar
#
RegularizedTheta <- function(cm, latent.data, min.theta = 0.01, bin.size = 128) {
genes.regress <- rownames(x = cm)
bin.ind <- ceiling(x = 1:length(x = genes.regress) / bin.size)
max.bin <- max(bin.ind)
message('Running Poisson regression (to get initial mean), and theta estimation per gene')
pb <- txtProgressBar(min = 0, max = max.bin, style = 3, file = stderr())
theta.estimate <- c()
for (i in 1:max.bin) {
genes.bin.regress <- genes.regress[bin.ind == i]
bin.theta.estimate <- unlist(
x = parallel::mclapply(
X = genes.bin.regress,
FUN = function(j) {
return(as.numeric(x = MASS::theta.ml(
y = cm[j, ],
mu = glm(
formula = cm[j, ] ~ .,
data = latent.data,
family = poisson
)$fitted
)))
}
),
use.names = FALSE
)
theta.estimate <- c(theta.estimate, bin.theta.estimate)
setTxtProgressBar(pb = pb, value = i)
}
close(con = pb)
UMI.mean <- apply(X = cm, MARGIN = 1, FUN = mean)
var.estimate <- UMI.mean + (UMI.mean ^ 2) / theta.estimate
for (span in c(1/3, 1/2, 3/4, 1)) {
fit <- loess(
formula = log10(x = var.estimate) ~ log10(x = UMI.mean),
span = span
)
if (! any(is.na(x = fit$fitted))) {
message(sprintf(
'Used loess with span %1.2f to fit mean-variance relationship\n',
span
))
break
}
}
if (any(is.na(x = fit$fitted))) {
stop('Problem when fitting NB gene variance in RegularizedTheta - NA values were fitted.')
}
theta.fit <- (UMI.mean ^ 2) / ((10 ^ fit$fitted) - UMI.mean)
names(x = theta.fit) <- genes.regress
to.fix <- theta.fit <= min.theta | is.infinite(x = theta.fit)
if (any(to.fix)) {
message(
'Fitted theta below ',
min.theta,
' for ',
sum(to.fix),
' genes, setting them to ',
min.theta
)
theta.fit[to.fix] <- min.theta
}
return(theta.fit)
}
# compare two negative binomial regression models
# model one uses only common factors (com.fac)
# model two additionally uses group factor (grp.fac)
#
#' @importFrom stats glm anova coef
#
NBModelComparison <- function(y, theta, latent.data, com.fac, grp.fac) {
tab <- as.matrix(x = table(y > 0, latent.data[, grp.fac]))
freqs <- tab['TRUE', ] / apply(X = tab, MARGIN = 2, FUN = sum)
fit2 <- 0
fit4 <- 0
try(
expr = fit2 <- glm(
formula = y ~ .,
data = latent.data[, com.fac, drop = FALSE],
family = MASS::negative.binomial(theta = theta)
),
silent=TRUE
)
try(
fit4 <- glm(
formula = y ~ .,
data = latent.data[, c(com.fac, grp.fac)],
family = MASS::negative.binomial(theta = theta)
),
silent = TRUE
)
if (class(x = fit2)[1] == 'numeric' | class(x = fit4)[1] == 'numeric') {
message('One of the glm.nb calls failed')
return(c(rep(x = NA, 5), freqs))
}
pval <- anova(fit2, fit4, test = 'Chisq')$'Pr(>Chi)'[2]
foi <- 2 + length(x = com.fac)
log2.fc <- log2(x = 1 / exp(x = coef(object = fit4)[foi]))
ret <- c(
fit2$deviance,
fit4$deviance,
pval,
coef(object = fit4)[foi],
log2.fc,
freqs
)
names(x = ret) <- c(
'dev1',
'dev2',
'pval',
'coef',
'log2.fc',
'freq1',
'freq2'
)
return(ret)
}
#' Apply a ceiling and floor to all values in a matrix
#'
#' @param data Matrix or data frame
#' @param min all values below this min value will be replaced with min
#' @param max all values above this max value will be replaced with max
#' @return Returns matrix after performing these floor and ceil operations
#' @export
#'
#' @examples
#' mat <- matrix(data = rbinom(n = 25, size = 20, prob = 0.2 ), nrow = 5)
#' mat
#' MinMax(data = mat, min = 4, max = 5)
#'
MinMax <- function(data, min, max) {
data2 <- data
data2[data2 > max] <- max
data2[data2 < min] <- min
return(data2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.