#' @param generator_type either "dm" or "nb_resample" (should be equivalent in practice, but this lets us test)
#' @param model_type either "dm" or "nb" which tells the Stan model which method to use (once again should be equivalent but this lets us test numerical and other issues)
generate_data_dirichlet_multinomial_base <- function(G, sums, lambda_raw_prior, theta_given = TRUE, alpha_given = TRUE,
alpha_prior_log_mean = 3, alpha_prior_log_sd = 1,
theta_prior_log_mean = log(5), theta_prior_log_sd = 1, generator_type = "dm",
model_type = "nb") {
N = length(sums)
counts = array(-1, c(N, G))
#lambda_raw = MCMCpack::rdirichlet(1, rep(lambda_raw_prior,G))
lambda_raw = rlnorm(G, 0, lambda_raw_prior)
lambda_raw = lambda_raw / sum(lambda_raw)
theta = rlnorm(1,theta_prior_log_mean, theta_prior_log_sd)
alpha = rlnorm(1, alpha_prior_log_mean, alpha_prior_log_sd)
if(generator_type == "dm") {
single_draw_func <- single_draw_dirichlet_multinom
} else if(generator_type == "nb_resample") {
single_draw_func <- single_draw_nb_fano_resample
} else {
stop("Invalid generator_type")
}
for(n in 1:N) {
counts[n,] = single_draw_func(lambda_raw * sums[n] * alpha, sums[n], theta)
}
data = list(
observed = list(
N = N,
G = G,
counts = counts,
theta_data = if(theta_given) { array(theta, 1) } else {numeric(0)},
theta_given = theta_given,
alpha_data = if(alpha_given) { array(alpha, 1) } else {numeric(0)},
alpha_given = alpha_given,
lambda_raw_prior = lambda_raw_prior,
variant = if(model_type == "dm") { 0 } else if (model_type == "nb") { 1} else stop("Invalid model_type")
),
true = list(lambda_raw = lambda_raw,
theta_param = if(!theta_given) { array(theta, 1) } else {numeric(0)},
alpha_param = if(!alpha_given) { array(alpha, 1) } else {numeric(0)}
)
)
if(theta_given) {
data$observed$theta_prior_log_mean = numeric(0)
data$observed$theta_prior_log_sd = numeric(0)
} else {
data$observed$theta_prior_log_mean = array(theta_prior_log_mean, 1)
data$observed$theta_prior_log_sd = array(theta_prior_log_sd, 1)
}
if(alpha_given) {
data$observed$alpha_prior_log_mean = numeric(0)
data$observed$alpha_prior_log_sd = numeric(0)
} else {
data$observed$alpha_prior_log_mean = array(alpha_prior_log_mean, 1)
data$observed$alpha_prior_log_sd = array(alpha_prior_log_sd, 1)
}
data
}
#'
single_draw_dirichlet_multinom <- function(lambda, k, theta) {
dirichlet_draw = MCMCpack::rdirichlet(1, (1/theta) * lambda)
rmultinom(1, k, dirichlet_draw)
}
single_draw_nb_fano_resample <- function(lambda, k, theta, num_trials = 100) {
G = length(lambda)
for(i in 1:100) {
raw_counts_draw = rnbinom(G, mu = lambda, size = lambda / theta)
if(sum(raw_counts_draw) >= num_trials) {
break;
}
if(i == 5) {
warning("Neded over 5 draws")
}
}
if(sum(raw_counts_draw) < k) {
stop("Could not draw large enough")
}
resample_draw(raw_counts_draw, k)
}
single_draw_nb_resample <- function(lambda, k, phi, num_trials = 100) {
G = length(lambda)
for(i in 1:100) {
raw_counts_draw = rnbinom(G, mu = lambda, size = phi)
if(sum(raw_counts_draw) >= k) {
break;
}
if(i == 5) {
warning("Neded over 5 draws")
}
}
if(sum(raw_counts_draw) < k) {
stop("Could not draw large enough")
}
resample_draw(raw_counts_draw, k)
}
generate_data_nb_multinomial <- function(G, sums, lambda_raw_sigma, phi_given = TRUE, alpha_given = TRUE,
alpha_prior_log_mean = 3, alpha_prior_log_sd = 1,
phi_prior_log_mean = log(5), phi_prior_log_sd = 1) {
N = length(sums)
counts = array(-1, c(N, G))
#lambda_raw = MCMCpack::rdirichlet(1, rep(lambda_raw_prior,G))
lambda_raw = softmax(rnorm_sum_to_zero(G) * lambda_raw_sigma)
phi = rlnorm(1,phi_prior_log_mean, phi_prior_log_sd)
alpha = 1 + rlnorm(1, alpha_prior_log_mean, alpha_prior_log_sd)
for(n in 1:N) {
counts[n,] = single_draw_nb_resample(lambda_raw * sums[n] * alpha, sums[n], phi)
}
data = list(
observed = list(
N = N,
G = G,
counts = counts,
phi_data = if(phi_given) { array(phi, 1) } else {numeric(0)},
phi_given = phi_given,
alpha_data = if(alpha_given) { array(alpha, 1) } else {numeric(0)},
alpha_given = alpha_given,
lambda_raw_sigma = lambda_raw_sigma
),
true = list(lambda_raw = lambda_raw,
phi_param = if(!phi_given) { array(phi, 1) } else {numeric(0)},
alpha_param = if(!alpha_given) { array(alpha, 1) } else {numeric(0)}
)
)
if(phi_given) {
data$observed$phi_prior_log_mean = numeric(0)
data$observed$phi_prior_log_sd = numeric(0)
} else {
data$observed$phi_prior_log_mean = array(phi_prior_log_mean, 1)
data$observed$phi_prior_log_sd = array(phi_prior_log_sd, 1)
}
if(alpha_given) {
data$observed$alpha_prior_log_mean = numeric(0)
data$observed$alpha_prior_log_sd = numeric(0)
} else {
data$observed$alpha_prior_log_mean = array(alpha_prior_log_mean, 1)
data$observed$alpha_prior_log_sd = array(alpha_prior_log_sd, 1)
}
data
}
generate_data_multinomial <- function(G, sums, sigma_prior_sigma = 2) {
N = length(sums)
lambda_sigma <- abs(rnorm(1, 0, sigma_prior_sigma))
lambda <- rnorm_sum_to_zero(G) * lambda_sigma
counts = array(-1, c(N, G))
for(n in 1:N) {
counts[n, ] = rmultinom(1, sums[n], softmax(lambda))
}
data = list(
observed = list(
N = N,
G = G,
counts = counts,
my_prior = c(0,0),
holdout = array(0, N),
exposure = rowSums(counts),
is_prior_asymetric = 0,
generate_quantities = 0,
generate_log_lik = 1
),
true = list(
lambda_sigma = lambda_sigma,
lambda = lambda
)
)
}
generate_data_lognormal_multinomial <- function(G, sums, is_prior_assymetric = FALSE) {
N = length(sums)
lambda_prior <- abs(rnorm(1,0,1))
sigma <- abs(rnorm(1,0,1))
if(!is_prior_assymetric) {
lambda <- rnorm_sum_to_zero(G) * lambda_prior
} else {
gamma_b <- exp(digamma(lambda_prior))
lambda <- log(rgamma(G, lambda_prior, gamma_b)) %>% enforce_soft_sum_to_zero()
}
counts = array(-1, c(N, G))
theta_z = array(-1, c(N, G))
for(n in 1:N) {
#theta_z[n, ] <- rnorm_sum_to_zero(G)
#theta_z[n,] <- rnorm(G, 0, 1)
theta_z[n,] <- rnorm(G, 0, 1) %>% enforce_soft_sum_to_zero()
theta <- lambda + theta_z[n,] * sigma
counts[n, ] = rmultinom(1, sums[n], softmax(lambda))
}
data = list(
observed = list(
N = N,
G = G,
counts = counts,
my_prior = c(0,0),
holdout = array(0, N),
exposure = rowSums(counts),
is_prior_asymetric = if(is_prior_assymetric) {1} else {0},
generate_quantities = 0,
generate_log_lik = 1.
),
true = list(
lambda_prior = lambda_prior,
sigma = sigma,
#theta_z = theta_z,
#lambda = lambda,
theta = theta
)
)
}
generate_data_gamma_multinomial_helper <- function(G, sums, phi_generator) {
N = length(sums)
lambda_sigma = abs(rnorm(1, 0, 2))
lambda = rnorm_sum_to_zero(G) * lambda_sigma
#phi_raw_sigma = rgamma(1, shape = 5, rate = 5)#abs(rnorm(1, 0, 2))
phi <- phi_generator(lambda)
excess = 50 #This should be irrelevant to the model
alpha = (sums * (1 + excess)) / sum(exp(lambda))
counts = array(-1, c(N, G))
for(n in 1:N) {
#counts[n,] = rnbinom(G, mu = lambda, size = phi) %>% resample_draw(sums[n])
counts[n,] = single_draw_nb_resample(alpha[n] * exp(lambda), sums[n], phi)
}
list(
observed = list(
N = N,
G = G,
counts = counts,
holdout = array(0, N),
generate_quantities = 0,
generate_log_lik = 1
),
true = list(
lambda = lambda,
lambda_sigma = lambda_sigma,
#phi_raw_sigma = phi_raw_sigma,
phi = phi
#excess = excess
)
)
}
generate_data_gamma_multinomial <- function(G, sums) {
phi_generator <- function(lambda) {
phi_raw = abs(rnorm(G, 0, 1))
phi = 1/sqrt(phi_raw)
}
generate_data_gamma_multinomial_helper(G, sums, phi_generator)
}
generate_data_gamma_multinomial_2 <- function(G, sums, asymptDisp, extraPois) {
phi_generator <- function(lambda) { asymptDisp + extraPois / exp(lambda) }
result <- generate_data_gamma_multinomial_helper(G, sums, phi_generator)
result$true$phi <- NULL
result$true$asymptDisp <- asymptDisp
result$true$extraPois <- extraPois
result
}
generate_data_lognormal_test <- function(G, N) {
lambda_prior = 1 + abs(rnorm(1, 0, 1))
sigma = abs(rnorm(1, 0, 1))
lambda = rnorm(G, 0, lambda_prior)
theta_z = array(-1, c(N, G))
for(g in 1:G) {
theta_z[, g] <- rnorm_sum_to_zero(N)
}
direct_proportions = array(-1, c(N, G))
for(n in 1:N) {
theta <- lambda + theta_z[n, ] * sigma
direct_proportions[n, ] = rnorm(G, theta, 0.1)
}
data = list(
observed = list(
lambda_prior = lambda_prior,
N = N,
G = G,
direct_proportions = direct_proportions
),
true = list(
sigma = sigma,
theta_z = theta_z,
lambda = lambdax
)
)
}
generate_data_lognormal_simpler_test <- function(G, N) {
sigma = abs(rnorm(1, 0, 1))
theta_z = array(-1, c(N, G))
for(g in 1:G) {
theta_z[, g] <- rnorm_sum_to_zero(N)
#theta_z[, g] <- rnorm(N, 0, 1)
}
direct_proportions = array(-1, c(N, G))
for(n in 1:N) {
theta <- theta_z[n, ] * sigma
direct_proportions[n, ] = rnorm(G, theta, 0.1)
}
data = list(
observed = list(
N = N,
G = G,
direct_proportions = direct_proportions
),
true = list(
sigma = sigma,
theta_z = theta_z
)
)
}
generate_data_negbinomial_deseq2 <- function(G, N, asymptDisp, extraPois) {
my_prior <- c(5, 2)
lambda <- rnorm(G, my_prior[1], my_prior[2])
phi <- asymptDisp + extraPois / exp(lambda)
counts <- array(-1, c(N, G))
for(n in 1:N) {
counts[n,] <- rnbinom(G, mu = exp(lambda), size = phi)
}
list(
observed = list(
N = N,
G = G,
counts = counts,
my_prior = my_prior,
holdout = array(0, N),
generate_quantities = 0,
generate_log_lik = 0,
normalization = array(1, N)
),
true = list(
lambda = lambda,
asymptDisp = asymptDisp,
extraPois = extraPois
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.