In ku-awdc/eggSim: Simulating Datasets Representing Egg Reduction Rate Data

knitr::opts_chunk$set(echo = TRUE)
library(eggSim)

budget <- 1200
reduction <- 0.2
prevalence <- 0.5

# Decide on variability partitions:
cv_between <- 1.5
cv_within <- 0.75
cv_slide <- 0.25
cv_reduction <- 0
# Overall k is about 0.23 - seems sensible??
combined_k(cv_between, cv_within, cv_slide, cv_reduction)

#cv_between <- 1/0.4^0.5
#cv_within <- 1/2^0.5
#cv_slide <- 0
#cv_reduction <- 0
# Overall k is about 0.23 - seems sensible??
#combined_k(cv_between, cv_within, cv_slide, cv_reduction)

# Simulate data:
simdatagp <- cgpDataSim(R=10^3, N=budget, prevalence=prevalence, reduction=reduction, community_mean=24, cv_between=cv_between, cv_within=cv_within, cv_slide=cv_slide, cv_reduction=cv_reduction, grams=1/24)

gpmeans <- design_means(simdatagp, budget=budget, count=TRUE)

comnrs <- gpmeans %>%
  count(Design, Communities)
ggplot(comnrs, aes(x=Design, y=n, fill=factor(Communities))) +
  geom_col()

ggplot(gpmeans %>% filter(Communities > 0), aes(x=Design, y=ScreenProp)) +
  geom_boxplot()

ggplot(gpmeans %>% filter(Communities > 0), aes(x=Design, y=ArithmeticEfficacy)) +
  geom_hline(yintercept=100*(1-reduction)) +
  geom_boxplot()

ggplot(gpmeans %>% filter(Communities > 0), aes(x=ArithmeticEfficacy, col=Design)) +
  stat_ecdf()

gpmeans %>%
  mutate(Success = 100 * sum(Communities > 0) / n()) %>%
  filter(Communities > 0) %>%
  group_by(Design, Success) %>%
  summarise(MeanN = mean(N), ScreenProp = mean(ScreenProp), ObsPrev = mean(ObsPrev), Mean = mean(ArithmeticEfficacy), Median = median(ArithmeticEfficacy), Variance = var(ArithmeticEfficacy), .groups='drop')

Full simulation

The wrapper function now allows vectorisation of mean and/or reduction. Here are some reasonable parameter estimates as discussed:

# Number of replicates:
R <- 250
# Reduction (either geometric or arithmetic mean, depending on LP vs GP) - can be vectorised:
reduction <- seq(0.05,0.95,by=0.1)  #c(0.01, 0.025, seq(0.05,0.3,by=0.05), 0.4, 0.5)
# Community mean EPG (either geometric or arithmetic mean, depending on LP vs GP):
community_mean <- c(0.15, 0.25, 0.35, 0.5, 1)*24 #c(0.25, 0.5, 1:2, 5)*24

# CV between individuals (vectorised to correspond to community means above):
cv_between <- 1.5
# CV within individuals (day-to-day variation):
cv_within <- 0.75
# CV within individual and day (slide-to-slide variation):
cv_slide <- 0.25
# Variation in efficacy between individuals:
cv_reduction <- 0

# The budget (currently cannot be vectorised but I will add this):
budget <- 1200

To produce the output (takes a while):

results <- eggSim(reduction, budget=budget, community_mean=community_mean, cv_between=cv_between, cv_within=cv_within, cv_slide=cv_slide, cv_reduction=cv_reduction)

## A couple of specific examples:
results %>%
  filter(Target == 95, OverallMean == 6) %>%
  select(Design, ScreenProp, MeanN, Bias, MedianBias, Variance)

results %>%
  filter(Target == 95, OverallMean == 24) %>%
  select(Design, ScreenProp, MeanN, Bias, MedianBias, Variance)

results %>%
  filter(Target == 85, OverallMean == 6) %>%
  select(Design, ScreenProp, MeanN, Bias, MedianBias, Variance)

results %>%
  filter(Target == 85, OverallMean == 24) %>%
  select(Design, ScreenProp, MeanN, Bias, MedianBias, Variance)

ggplot(results %>% filter(!is.na(Bias)), aes(x=Target, y=Bias, ymin=LCI, ymax=UCI, col=Design, fill=Design)) +
  geom_hline(yintercept=0) +
  geom_ribbon(alpha=0.25, lwd=0) +
  geom_line() +
  geom_point() +
  facet_wrap(~ OverallMean, scales='free_y') +
  xlab('True arithmetic mean efficacy') +
  ylab("Mean bias")

ggplot(results %>% filter(!is.na(Bias)), aes(x=Target, y=ReductionRatio, col=factor(OverallMean))) +
  geom_hline(yintercept=1) +
  stat_smooth(formula=y~x, method='lm', se=FALSE) +
  geom_point() +
  facet_wrap(~ Design, scales='free_y') +
  xlab('True arithmetic mean efficacy') +
  ylab("Mean ratio of the reduction to target")

ggplot(results %>% filter(!is.na(Bias)), aes(x=Target, y=MedianBias, col=factor(OverallMean))) +
  geom_hline(yintercept=0) +
  stat_smooth(formula=y~x, method='lm', se=FALSE) +
  geom_point() +
  facet_wrap(~ Design, scales='free_y') +
  xlab('True arithmetic mean efficacy') +
  ylab("Median bias")

Or to look at variance of the estimator:

ggplot(results %>% filter(!is.na(Bias)), aes(x=OverallMean, y=VarianceMeanRatio, col=Design)) +
  #stat_smooth(formula=y~x, method='loess', se=FALSE) +
  geom_line() +
  geom_point() +
  facet_wrap(~ Target, scales='free_y') +
  xlab('Pre-treatment mean') +
  ylab("Variance of the ratio of the reduction to target") +
  scale_y_continuous(trans='log10') +
  scale_x_continuous(trans='log10')

ggplot(results %>% filter(!is.na(Bias)), aes(x=OverallMean, y=Variance, col=Design)) +
  #stat_smooth(formula=y~x, method='loess', se=FALSE) +
  geom_line() +
  geom_point() +
  facet_wrap(~ Target, scales='free_y') +
  xlab('Pre-treatment mean') +
  ylab("Variance of the efficacy estimate") +
  scale_y_continuous(trans='log10') +
  scale_x_continuous(trans='log10')