knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(message = FALSE) knitr::opts_chunk$set(warning = FALSE) library(ProvidenciaChemo) library(tidyverse) theme_set(theme_classic())
# simulate for power analysis in choice assay: choice_power <- choice %>% filter(!date %in% c("2019_10_19", "2019_10_22", "2019_11_5", "2019_11_6", "2019_11_8"), genotype == "N2") # stan_mod <- rstanarm::stan_glmer(data = choice_power %>% filter(genotype == "N2"), # cbind(N_Test, N_OP50) ~ strain + (1|plateID) + (strain|date), # family = binomial, # cores = 6, # chains = 6, # adapt_delta = 0.99) # use a brms model to estimate unequal variance and the variance structure of the random effects structure library(brms) set.seed(7654) CHAINS <- 4 ITER <- 2000 WARMUP <- 1000 BAYES_SEED <- 1234 options(mc.cores = parallel::detectCores()) # Use all cores brms_uneq <- brm( bf(logit_p ~ strain, sigma ~ strain), data = choice_power, prior = c(set_prior("normal(0, 5)", class = "Intercept"), set_prior("normal(0, 1)", class = "b"), set_prior("cauchy(0, 1)", class = "b", dpar = "sigma")), chains = CHAINS, iter = ITER, warmup = WARMUP, seed = BAYES_SEED, file = "brms_uneq" ) brms_eq <- brm( bf(logit_p ~ strain), data = choice_power, prior = c(set_prior("normal(0, 5)", class = "Intercept"), set_prior("normal(0, 1)", class = "b")), chains = CHAINS, iter = ITER, warmup = WARMUP, seed = BAYES_SEED, file = "brms_eq" ) summary(brms_uneq) summary(brms_eq) model_params <- brms_eq$fit %>% broom::tidy() uneq_model_params <- brms_uneq %>% broom::tidy() plot(brm)
#compare fit values to true data for power simulation: #double check that SD is on the right scale first choice %>% filter(genotype == "N2", !date %in% c("2019_10_19", "2019_10_22", "2019_11_5", "2019_11_6", "2019_11_8")) %>% summarize(sd = sd(logit_p)) model_params[4, 2] Nplates <- 30 sim_data <- expand_grid( plate = factor(1:Nplates), genotype = "N2", strain = factor(c("OP50", "JUb39"), levels = c("OP50", "JUb39")) ) %>% arrange(strain) %>% mutate(logit_p = c( rnorm( Nplates, mean = model_params[1, 2] %>% as.numeric(), # b_intercept, estimat is second column sd = model_params[4, 2] %>% as.numeric() ), # sigma rnorm( Nplates, mean = (model_params[1, 2] + model_params[2, 2]) %>% as.numeric(), # b_intercept, estimat is second column sd = model_params[4, 2] %>% as.numeric() ) )) sim_data %>% ggplot(aes(x = strain, y = logit_p)) + ggbeeswarm::geom_quasirandom(aes(color = "strain")) sim_data %>% ggplot(aes(x = strain, y = logit_p)) + geom_bardots(fillvar = strain, dotvar = strain) + scale_x_discrete(labels = function(strain) str_wrap(strain, width = 10)) + labs(y = "Test bacteria preference (log-odds)") + guides(colour = FALSE) + scale_color_plot(palette = "grey-blue-green", drop = TRUE) + scale_fill_plot(palette = "grey-blue-green", drop = TRUE) ### data looks reasonable, now make a function to replicate and predict power by N in 2-sample t-test: sim_data %$% t.test(logit_p ~ strain)
power_func <- function(Nplates = 30, mean_OP50 = as.numeric(model_params[1, 2]), mean_JUb39 = as.numeric(model_params[1, 2] + model_params[2,2]), sigma = as.numeric(model_params[4, 2]))) { sim_data <- expand_grid( plate = factor(1:Nplates), genotype = "N2", strain = factor(c("OP50", "JUb39"), levels = c("OP50", "JUb39")) ) %>% arrange(strain) %>% mutate(logit_p = c( rnorm( Nplates, mean = mean_OP50, # b_intercept, estimat is second column sd = sigma ), # sigma rnorm( Nplates, mean = mean_JUb39, # b_intercept, estimat is second column sd = sigma ) )) p_value <- sim_data %$% t.test(logit_p ~ strain)$p.value return(p_value) } power_func_uneqVar <- function(Nplates = 30, mean_OP50 = as.numeric(uneq_model_params[1, 2]), mean_JUb39 = as.numeric(uneq_model_params[1, 2] + uneq_model_params[3,2]), sigmaO = exp(as.numeric(uneq_model_params[2, 2])), sigmaJ = exp(as.numeric(uneq_model_params[2, 2] + uneq_model_params[5, 2]))) { sim_data <- expand_grid( plate = factor(1:Nplates), genotype = "N2", strain = factor(c("OP50", "JUb39"), levels = c("OP50", "JUb39")) ) %>% arrange(strain) %>% mutate(logit_p = c( rnorm( Nplates, mean = mean_OP50, # b_intercept, estimat is second column sd = sigmaO ), # sigma rnorm( Nplates, mean = mean_JUb39, # b_intercept, estimat is second column sd = sigmaJ ) )) p_value <- sim_data %$% t.test(logit_p ~ strain)$p.value tibble(p_value = p_value) } simN20 <- replicate(1000, power_func(Nplates = 20)) pN20 <- simN20[simN20 < 0.05] %>% length() simN30 <- replicate(1000, power_func(Nplates = 30)) pN30 <- simN30[simN30 < 0.05] %>% length() simN35 <- replicate(1000, power_func(Nplates = 35)) pN35 <- simN35[simN35 < 0.05] %>% length() simN40 <- replicate(1000, power_func(Nplates = 40)) pN40 <- simN40[simN40 < 0.05] %>% length() simN50 <- replicate(1000, power_func(Nplates = 50)) pN50 <- simN50[simN50 < 0.05] %>% length() tibble(Nplates = c(20,30,35,40,50), power = c(pN20,pN30,pN35,pN40,pN50)/1000) %>% ggplot(aes(x = Nplates, y = power)) + geom_line() purrr::map(c(20,30,35,40,50), function(Nplates = .) { replicate(1000, power_func_uneqVar(Nplates = Nplates), simplify = FALSE) %>% bind_rows() %>% filter(p_value < 0.05) %>% nrow() %>% tibble(power = ./1000, n = Nplates) }) %>% bind_rows()
power_func_ANOVA <- function(Nplates = 30, mean_OP50 = as.numeric(model_params[1, 2]), mean_JUb39 = as.numeric(model_params[1, 2] + model_params[2,2]), mean_Mut1_OP = as.numeric(model_params[1, 2]), mean_Mut1_JU = as.numeric(model_params[1, 2]), mean_Mut2_OP = as.numeric(model_params[1, 2]), mean_Mut2_JU = as.numeric(model_params[1, 2]), sigma = as.numeric(model_params[4, 2])) { means <- c( mean_OP50, mean_JUb39, mean_Mut1_OP, mean_Mut1_JU, mean_Mut2_OP, mean_Mut2_JU ) sim_data <- expand_grid( plate = factor(1:Nplates), genotype = factor(c("N2","Mut1", "Mut2"), levels = c("N2", "Mut1", "Mut2")), strain = factor(c("OP50", "JUb39"), levels = c("OP50", "JUb39")) ) %>% arrange(genotype, strain) %>% mutate(logit_p = purrr::map(means, ~ rnorm(mean = ., n = Nplates, sd = sigma)) %>% unlist()) model_stats <- sim_data %>% lm(data = ., logit_p ~ genotype * strain) %>% summary() %>% broom::tidy() return(tibble(JUbEffect = model_stats$estimate[4], JUb_pval = model_stats$p.value[4], Mut1_effect = model_stats$estimate[5], Mut1_pval = model_stats$p.value[5], Mut2_effect = model_stats$estimate[6], Mut2_pval = model_stats$p.value[6])) } simN20 <- replicate(1000, power_func_ANOVA(Nplates = 20), simplify = FALSE) %>% bind_rows() %>% mutate(Nplates = 20) %>% mutate( main_effect = case_when( JUbEffect > 0 & JUb_pval < 0.05 ~ 1, TRUE ~ 0), Int1_effect = case_when( Mut1_effect < 0 & Mut1_pval < 0.05 ~ 1, TRUE ~ 0), Int2_effect = case_when( Mut2_effect < 0 & Mut2_pval < 0.05 ~ 1, TRUE ~ 0) ) %>% group_by(Nplates, main_effect, Int1_effect, Int2_effect) %>% tally() nReps <- 1000 test_data <- purrr::map(c(20,30,35,40,50,60), function(Nplates = .) { replicate(nReps, power_func_ANOVA(Nplates = Nplates), simplify = FALSE) %>% bind_rows() %>% mutate(Nplates = Nplates) %>% mutate( main_effect = case_when( JUbEffect > 0 & JUb_pval < 0.05 ~ 1, TRUE ~ 0), Int1_effect = case_when( main_effect == 1 & Mut1_effect < 0 & Mut1_pval < 0.05 ~ 1, TRUE ~ 0), Int2_effect = case_when( main_effect == 1 & Mut2_effect < 0 & Mut2_pval < 0.05 ~ 1, TRUE ~ 0), any_interaction = case_when( Int1_effect == 1 | Int2_effect == 1 ~ 1, TRUE ~ 0), both_interactions = case_when( Int1_effect == 1 & Int2_effect == 1 ~ 1, TRUE ~ 0 ) ) %>% group_by(Nplates, main_effect, any_interaction, both_interactions) %>% tally() }) %>% bind_rows() main_effects <- test_data %>% group_by(Nplates, main_effect) %>% summarize(total = sum(n)) interaciton_effects <- test_data %>% group_by(Nplates, any_interaction, both_interactions) %>% summarize(total = sum(n)) test_data_noInt <- purrr::map(c(20,30,35,40,50,60), function(Nplates = .) { replicate(nReps, power_func_ANOVA(Nplates = Nplates, mean_JUb39 = as.numeric(model_params[1, 2] + model_params[2,2]), mean_Mut1_JU = as.numeric(model_params[1, 2] + model_params[2,2]), mean_Mut2_JU = as.numeric(model_params[1, 2] + model_params[2,2])), simplify = FALSE) %>% bind_rows() %>% mutate(Nplates = Nplates) %>% mutate( main_effect = case_when( JUbEffect > 0 & JUb_pval < 0.05 ~ 1, TRUE ~ 0), Int1_effect = case_when( main_effect == 1 & Mut1_effect < 0 & Mut1_pval < 0.05 ~ 1, TRUE ~ 0), Int2_effect = case_when( main_effect == 1 & Mut2_effect < 0 & Mut2_pval < 0.05 ~ 1, TRUE ~ 0), any_interaction = case_when( Int1_effect == 1 | Int2_effect == 1 ~ 1, TRUE ~ 0), both_interactions = case_when( Int1_effect == 1 & Int2_effect == 1 ~ 1, TRUE ~ 0 ) ) %>% group_by(Nplates, main_effect, any_interaction, both_interactions) %>% tally() }) %>% bind_rows() tibble(Nplates = c(20,30,35,40,50), power = c(pN20,pN30,pN35,pN40,pN50)/1000) %>% ggplot(aes(x = Nplates, y = power)) + geom_line()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.