#' Poisson log likelihood
loglik <- function(mu, y)
y * log(mu) - mu - lgamma(y + 1)
#' expit function
expit <- function(x)
1 / (1 + exp(-x))
#' Estimate Bias and Dropout functions for a single cell
#' @export
#' @importFrom scam scam
norm_model <-
function(bulk,
sc,
mu0_cons = 0.5,
update_mu = F,
max_iter = 100,
tol = 1e-20,
verbose = F,
init_lo,
init_hi) {
fit_df <- data.frame(eta = bulk, y = sc)
# initial values
g <- rep(sum(sc < init_lo) / length(sc), nrow(fit_df))
l_h <- exp(loglik(init_hi, fit_df$y) + log(1 - g))
l_mu0 <- exp(loglik(mu0_cons, fit_df$y) + log(g))
p_a <- l_h / (l_mu0 + l_h)
# numerical fixes
# when count is too big
p_a[l_mu0 == 0 & l_h == 0] <- 1 - .Machine$double.neg.eps
p_a[p_a == 0] <- .Machine$double.xmin
# browser()
converge_flag <- F
mu0 <- mu0_cons
for (iter in 1:max_iter) {
# fit bias model
h_model <-
scam(
y ~ s(eta, k = 5, m = 2, bs = "mpi"),
family = poisson(link = "log"),
data = fit_df,
weights = p_a
)
# fit dropout model
g_model <-
scam((1 - p_a) ~ s(eta, k = 5, m = 2, bs = "mpd"),
family = binomial(link = "logit"),
data = fit_df
)
# optional mu0 model
if (update_mu) {
mu0_model <-
glm(
y ~ 1,
family = poisson(link = "log"),
data = fit_df,
weights = 1 - p_a
)
# mu0 <- min(fitted(mu0_model)[1], mu0_cons)
mu0 = fitted(mu0_model)[1]
}
g <- fitted(g_model)
l_h <- exp(loglik(fitted(h_model), fit_df$y) + log(1 - g))
l_mu0 <- exp(loglik(mu0, fit_df$y) + log(g))
p_a_new <- l_h / (l_mu0 + l_h)
p_a_new[(l_mu0 == 0 & l_h == 0)] <- 1 - .Machine$double.neg.eps
p_a_new[p_a_new == 0] <- .Machine$double.xmin
if (mean(abs(p_a_new - p_a)) < tol) {
converge_flag <- T
if (verbose) {
message("Converged")
}
break
}
if (verbose) {
#TODO: check overall loglik here
message(paste0("Iter: ", iter,
", max change in prob: ",
max(abs(p_a_new - p_a)), "."))
# print(p_a_new)
}
p_a <- p_a_new
}
model <-
list(
h_model = h_model,
g_model = g_model,
mu0 = mu0,
p_a = p_a,
converged = converge_flag
)
model
}
#' Evaluate posterior likelihood given a vector of priors
#' @importFrom scam predict.scam
model_loglik_gene <- function(model, eta, y) {
g <- expit(predict(model$g_model, data.frame(eta = eta)))
h_mu <- exp(predict(model$h_model, data.frame(eta = eta)))
l_high <- exp(loglik(h_mu, y))
l_low <- exp(loglik(model$mu0, y))
l <- g * l_low + (1 - g) * l_high
l
}
#' Nearest neighbor imputation of missing
#' @importFrom RANN nn2
#' @export
nn_impute <- function(bulk_mat, sc_mat, k) {
out <- sc_mat
for (cell_id in 1:ncol(sc_mat)) {
# missing in cell
m_idx <- which(is.na(sc_mat[, cell_id]))
if (length(m_idx) == 0) {
next
}
nn_out <-
nn2(t(bulk_mat[-m_idx,]), t(sc_mat[-m_idx, cell_id, drop = F]), k = k)
out[m_idx, cell_id] <-
rowMeans(bulk_mat[m_idx, nn_out$nn.idx[1,]])
}
out
}
#' Make grid sequence of h functions from normalization model
#' @export
#' @importFrom scam predict.scam
make_hseq <- function(norm_models,
seq_len = 20,
seq_max = 12) {
x_seq <- seq(0, seq_max, length = seq_len)
y_seq_mat <- matrix(, length(norm_models), seq_len)
for (j in 1:length(norm_models)) {
y_seq_mat[j,] <-
predict(norm_models[[j]]$h_model, data.frame(eta = x_seq))
}
y_seq_mat
}
#' Make grid sequence of b functions from normalization model
#' @export
#' @importFrom scam predict.scam
make_gseq <- function(norm_models,
seq_len = 20,
seq_max = 12) {
x_seq <- seq(0, seq_max, length = seq_len)
y_seq_mat <- matrix(, length(norm_models), seq_len)
for (j in 1:length(norm_models)) {
y_seq_mat[j,] <-
predict(norm_models[[j]]$g_model, data.frame(eta = x_seq))
}
y_seq_mat
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.