knitr::opts_chunk$set( collapse = TRUE, fig.width = 8, comment = "#>" ) options(rmarkdown.html_vignette.check_title = FALSE)
# Load dependencies require(tidyverse) require(magrittr) require(rstan) require(bridgesampling) require(loo) require(cowplot) require(ggrepel) require(xlsx) # Load stan package library(progressionEstimation) #pkgbuild::compile_dll() #roxygen2::roxygenize(package.dir = "..") # Define serotype categories and palettes vaccine_types <- list( "PCV7" = c("4","6B","9V","14","18C","19F","23F"), "PCV10" = c("1","5","7F"), "PCV13" = c("6A","3","19A"), "PCV15" = c("22F","33F"), "PCV20" = c("8","10A","11A","12F","15B/C"), "PPV23" = c("2","9N","17F","20") ) vaccine_colours <- c("No PCV" = "blue", "PCV7" = "red", "PCV10" = "orange", "PCV13" = "coral2", "PCV15" = "pink", "PCV20" = "purple", "PPV23" = "black") epi_colors <- c( "carriage" = "skyblue", "disease" = "cornflowerblue" ) model_name_labels <- c("null Poisson", "null negative binomial", "type-specific Poisson", "type-specific negative binomial", "study-adjusted Poisson", "study-adjusted negative binomial", "study-adjusted type-specific Poisson", "study-adjusted type-specific negative binomial" )
The dataset used in this analysis is contained in the S_pneumoniae_infant_serotype
object, which includes data from r S_pneumoniae_infant_serotype %>% dplyr::select(study) %>% dplyr::n_distinct()
studies on r S_pneumoniae_infant_serotype %>% dplyr::select(type) %>% dplyr::n_distinct()
serotypes. The distribution of samples between each study can be plotted, if they are ranked by the mean number of carriage and disease isolates:
``` {r Plot distribution by study}
studies_by_size <- S_pneumoniae_infant_serotype %>% group_by(study) %>% dplyr::mutate(total_count = sum(carriage)+sum(disease)) %>% dplyr::select(study,total_count) %>% dplyr::distinct() %>% dplyr::arrange(desc(total_count)) %>% dplyr::select(study) %>% dplyr::pull()
S_pneumoniae_infant_serotype %<>% dplyr::mutate(study = factor(study, levels = studies_by_size))
full_epi_data_plot <-
ggplot(S_pneumoniae_infant_serotype %>%
tidyr::pivot_longer(c(carriage,disease),
names_to = "Sample type",
values_to = "count"),
aes(x = study,
y = count,
colour = Sample type
,
fill = Sample type
)) +
geom_col() +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
scale_fill_manual(values = epi_colors) +
scale_colour_manual(values = epi_colors)
The total number of observations for each serotype can be similarly plotted: ``` {r Plot distribution by serotype} serotype_by_count <- S_pneumoniae_infant_serotype %>% group_by(type) %>% dplyr::mutate(total_count = sum(carriage)+sum(disease)) %>% dplyr::select(type,total_count) %>% dplyr::distinct() %>% dplyr::arrange(desc(total_count)) %>% dplyr::select(type) %>% dplyr::pull() S_pneumoniae_infant_serotype %<>% dplyr::mutate(type = factor(type, levels = serotype_by_count)) serotype_frequency_plot <- ggplot(S_pneumoniae_infant_serotype %>% tidyr::pivot_longer(c(carriage,disease), names_to = "Sample type", values_to = "count"), aes(x = type, y = count, colour = `Sample type`, fill = `Sample type`)) + geom_col() + xlab("Serotype") + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + scale_fill_manual(values = epi_colors) + scale_colour_manual(values = epi_colors) (combined_full_epi_data_plot <- cowplot::plot_grid(plotlist = list(full_epi_data_plot, serotype_frequency_plot), nrow = 2, ncol = 1, labels = "AUTO") ) ggsave(combined_full_epi_data_plot, file = "full_epi_data_plot.pdf", height = 12, width = 8 )
As many of these serotypes are rare, to facilitate model fitting, we exclude any serotype for which the maximum number of observations is below five:
``` {r Plot distribution by serotype in filtered dataset}
threshold_count <- 5
filtered_S_pneumoniae_infant_serotype <- S_pneumoniae_infant_serotype %>% dplyr::filter(carriage >= threshold_count | disease >= threshold_count)
filtered_S_pneumoniae_infant_serotype %<>% dplyr::mutate(study = factor(study, levels = studies_by_size[studies_by_size %in% unique(filtered_S_pneumoniae_infant_serotype$study)]))
filtered_S_pneumoniae_infant_serotype %<>% dplyr::mutate(type = factor(type, levels = serotype_by_count[serotype_by_count %in% unique(filtered_S_pneumoniae_infant_serotype$type)]))
infant_serotype_invasiveness <- progressionEstimation::process_input_data(filtered_S_pneumoniae_infant_serotype)
filtered_epi_data_plot <-
ggplot(filtered_S_pneumoniae_infant_serotype %>%
tidyr::pivot_longer(c(carriage,disease),
names_to = "Sample type",
values_to = "count"),
aes(x = study,
y = count,
colour = Sample type
,
fill = Sample type
)) +
geom_col() +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
scale_fill_manual(values = epi_colors) +
scale_colour_manual(values = epi_colors)
filtered_serotype_frequency_plot <-
ggplot(filtered_S_pneumoniae_infant_serotype %>%
tidyr::pivot_longer(c(carriage,disease),
names_to = "Sample type",
values_to = "count"),
aes(x = type,
y = count,
colour = Sample type
,
fill = Sample type
)) +
geom_col() +
xlab("Serotype") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
scale_fill_manual(values = epi_colors) +
scale_colour_manual(values = epi_colors)
(combined_filtered_epi_data_plot <- cowplot::plot_grid(plotlist = list(filtered_epi_data_plot, filtered_serotype_frequency_plot), nrow = 2, ncol = 1, labels = "AUTO") )
ggsave(combined_filtered_epi_data_plot, file = "filtered_epi_data_plot.pdf", height = 12, width = 8 )
This filtered dataset comprises `r S_pneumoniae_infant_serotype %>% dplyr::select(type) %>% dplyr::n_distinct()` serotypes. ## Fitting the statistical models The null model, in which each serotype $j$ causes invasive disease at the same rate in each location $i$, can be fitted using a Poisson distribution of disease isolates (**null Poisson** model): $$d_{i,j} \sim Pois(\nu\rho_{i,j}N_{i}t_{i})$$ ``` {r Null Poisson model fit, include = FALSE} n_chains <- 2 n_iter <- 2.5e4 n_core <- 2 null_poisson_fit <- progressionEstimation::fit_progression_rate_model(infant_serotype_invasiveness, type_specific = FALSE, location_adjustment = FALSE, stat_model = "poisson", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
The null model can be fitted using a negative binomial distribution of disease isolates (null negative binomial model):
$$d_{i,j} \sim NegBin(\nu\rho_{i,j}N_{i}t_{i},\phi)$$
``` {r Null negative binomial model fit, include = FALSE}
null_negbin_fit <- progressionEstimation::fit_progression_rate_model(infant_serotype_invasiveness, type_specific = FALSE, location_adjustment = FALSE, stat_model = "negbin", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
The Poisson model can be modified by allowing each serotype to have its own invasiveness, $\nu_{j}$ (**type-specific Poisson** model): $$d_{i,j} \sim Pois(\nu_{j}\rho_{i,j}N_{i}t_{i})$$ ``` {r Type-specific Poisson model fit, include = FALSE} type_specific_poisson_fit <- progressionEstimation::fit_progression_rate_model(infant_serotype_invasiveness, type_specific = TRUE, location_adjustment = FALSE, stat_model = "poisson", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
The negative binomial model can be modified in the same way (type-specific negative binomial model):
$$d_{i,j} \sim NegBin(\nu_{j}\rho_{i,j}N_{i}t_{i},\phi)$$
``` {r Type-specific negative binomial model fit, include = FALSE}
type_specific_negbin_fit <- progressionEstimation::fit_progression_rate_model(infant_serotype_invasiveness, type_specific = TRUE, location_adjustment = FALSE, stat_model = "negbin", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
Alternatively, the Poisson model can be modified by allowing invasiveness to be adjusted between locations by a scale factor, $\gamma_{i}$. In this model, all serotypes still have the same invasiveness (**adjusted Poisson** model): $$d_{i,j} \sim Poisson(\gamma_{i}\nu\rho_{i,j}N_{i}t_{i})$$ ``` {r Adjusted Poisson model fit, include = FALSE} adjusted_poisson_fit <- progressionEstimation::fit_progression_rate_model(infant_serotype_invasiveness, type_specific = FALSE, location_adjustment = TRUE, stat_model = "poisson", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
The negative binomial model can again be modified in the same way (adjusted negative binomial model):
$$d_{i,j} \sim NegBin(\gamma_{i}\nu\rho_{i,j}N_{i}t_{i},\phi)$$
``` {r Adjusted negative binomial model fit, include = FALSE}
adjusted_negbin_fit <- progressionEstimation::fit_progression_rate_model(infant_serotype_invasiveness, type_specific = FALSE, location_adjustment = TRUE, stat_model = "negbin", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
Both modifications can be combined in adjusted type-specific model fits. This can be used with the Poisson model of disease isolate counts (**adjusted type-specific Poisson** model): $$d_{i,j} \sim Poisson(\gamma_{i}\nu_{j}\rho_{i,j}N_{i}t_{i})$$ ``` {r Adjusted type-specific Poisson model fit, include = FALSE} adjusted_type_specific_poisson_fit <- progressionEstimation::fit_progression_rate_model(infant_serotype_invasiveness, type_specific = TRUE, location_adjustment = TRUE, stat_model = "poisson", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
This can also be used with the negative binomial model (adjusted type-specific negative binomial model):
$$d_{i,j} \sim NegBin(\gamma_{i}\nu_{j}\rho_{i,j}N_{i}t_{i},\phi)$$
``` {r Adjusted type-specific negative binomial model fit, include = FALSE}
adjusted_type_specific_negbin_fit <- progressionEstimation::fit_progression_rate_model(infant_serotype_invasiveness, type_specific = TRUE, location_adjustment = TRUE, stat_model = "negbin", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
To check the models have fitted correctly, we can plot the $\hat{R}$ values across MCMCs: ``` {r Plot r hat values} fit_list <- list(null_poisson_fit, null_negbin_fit, type_specific_poisson_fit, type_specific_negbin_fit, adjusted_poisson_fit, adjusted_negbin_fit, adjusted_type_specific_poisson_fit, adjusted_type_specific_negbin_fit) plot_rhat <- function(fit) { rhat_plot <- plot(fit, plotfun = "rhat", binwidth = 0.00005) + theme(axis.text.x = element_text(angle = 90)) return(rhat_plot) } rhat_plots <- lapply(fit_list, plot_rhat) combined_rhat_plots <- cowplot::plot_grid(plotlist = rhat_plots, ncol = 4, nrow = 2, labels = "AUTO") ggsave(combined_rhat_plots, file = "infant_combined_rhat_plots.pdf", width = 8, height = 6)
Convergence can also be checked using trace plots of the log likelihood values:
``` {r Plot log likelihood traces}
plot_lp <- function(fit) { pl <- rstan::traceplot(fit, pars = "lp__") no_leg_pl <- pl + theme(legend.position = "none", axis.text.x = element_text(angle = 90)) return(no_leg_pl) }
trace_plots <- lapply(fit_list, plot_lp)
combined_trace_plots <- cowplot::plot_grid(plotlist = trace_plots, ncol = 4, nrow = 2, labels = "AUTO")
ggsave(combined_trace_plots, file = "infant_combined_trace_plots.pdf", width = 8, height = 6)
## Compare model observations and predictions For each model, the output can be processed, and the observations plotted against each model's predictions. ``` {r Process model outputs and plot predictions, fig.height = 18} model_outputs <- lapply(fit_list, progressionEstimation::process_progression_rate_model_output, filtered_S_pneumoniae_infant_serotype) prediction_plots <- lapply(model_outputs, progressionEstimation::plot_case_carrier_predictions, legend = FALSE, n_label = 0) no_leg_plot <- cowplot::plot_grid(plotlist = prediction_plots, ncol = 1, nrow = 8, labels = "AUTO", vjust = -0.01) + theme(plot.margin = unit(c(0.5,0,0,0), "cm")) prediction_plot_legend <- progressionEstimation::plot_case_carrier_predictions(model_outputs[[1]], just_legend = TRUE) pred_v_obs_plot <- cowplot::plot_grid(no_leg_plot, prediction_plot_legend, ncol = 1, rel_heights = c(1,0.1)) ggsave(pred_v_obs_plot, file = "infant_serotypes_pred_v_obs_plot.pdf", height = 20, width = 10)
The deviation of the predicted and observed values can be simply evaluated using the distribution of absolute deviations:
``` {r Calculate absolute deviations}
obs_pred_df <- dplyr::bind_rows(model_outputs) %>% dplyr::select(model_name,carriage_abs_dev,disease_abs_dev,phi,phi_lower,phi_upper) %>% tidyr::pivot_longer(c(carriage_abs_dev,disease_abs_dev), names_to = "Observation", values_to = "Abs_dev") %>% dplyr::mutate(model_name = factor(model_name, levels = c( "null_poisson", "null_negbin", "type_specific_poisson", "type_specific_negbin", "adjusted_null_poisson", "adjusted_null_negbin", "adjusted_type_specific_poisson", "adjusted_type_specific_negbin" ) ) ) %>% dplyr::mutate(Observation = dplyr::case_when( Observation == "carriage_abs_dev" ~ "Carriage", Observation == "disease_abs_dev" ~ "Disease" ) )
(serotype_abs_dev_plot <- ggplot(obs_pred_df, aes(x = model_name, y = Abs_dev)) + #geom_boxplot(outlier.shape = NA) + geom_point(position = position_jitter(width = 0.25), alpha = 0.25, cex = 0.75, colour = "blue", pch = 4) + geom_violin(fill = NA) + scale_y_continuous(trans="log10") + theme_bw() + facet_grid(Observation ~ .) + scale_x_discrete(labels = model_name_labels) + ylab("Absolute deviation") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) )
ggsave(serotype_abs_dev_plot, file = "serotype_abs_dev_plot.pdf", width = 7, height = 7 )
The fits of the negative binomial can be evaluated through plotting the estimates of the precision parameter, $\phi$. As $\phi$ increases, the variance of the negative binomial distribution decreases, and the negative binomal model becomes more similar to the Poisson model. ``` {r Plot precision parameters for negative binomial models, fig.width = 6} (phi_plot <- ggplot(obs_pred_df %>% dplyr::filter(!is.na(phi)) %>% dplyr::group_by(model_name) %>% dplyr::slice(1) %>% dplyr::ungroup(), aes(x = model_name, y = phi, ymin = phi_lower, ymax = phi_upper)) + geom_point() + geom_errorbar(width = 0.25) + theme_bw() + scale_y_continuous(trans="log10") + ylab(expression(phi)) + xlab("Model name") + scale_x_discrete(labels = model_name_labels[c(2,4,6,8)]) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) ) ggsave(phi_plot, file="serotype_phi_plot.pdf", height = 6, width = 6)
The model likelihoods can be compared using leave-one-out cross-validation:
``` {r Comparison with LOO-CV using full log likelihood}
loo_comparison <- progressionEstimation::compare_model_fits_with_loo(fit_list, use_moments = FALSE, num_threads = n_core)
loo_comparison %>% kableExtra::kable() %>% kableExtra::kable_styling(latex_options = "scale_down")
write.csv(loo_comparison, file = "child_serotype_loo_comparison.csv", quote = FALSE, row.names = FALSE)
The Pareto distribution k values are too high for robust estimates of the ELPD. This reflects many of the points being ``` {r Comparison with LOO-CV using disease distribution log likelihood} disease_loo_comparison <- progressionEstimation::compare_model_fits_with_loo(fit_list, log_lik_param = "disease_log_lik", use_moments = FALSE, num_threads = n_core) disease_loo_comparison %>% kableExtra::kable() %>% kableExtra::kable_styling(latex_options = "scale_down") write.csv(disease_loo_comparison, file = "child_disease_serotype_loo_comparison.csv", quote = FALSE, row.names = FALSE)
As the models can instead be compared with Bayes factors:
``` {r Comparison with BF}
bf_comparison <- progressionEstimation::compare_model_fits_with_bf(fit_list, num_iter = n_iter, num_threads = n_core)
bf_comparison %>% kableExtra::kable() %>% kableExtra::kable_styling(latex_options = "scale_down")
write.csv(bf_comparison, file = "child_disease_serotype_bf_comparison.csv", quote = FALSE, row.names = FALSE)
## Plot serotype invasiveness The best-fitting model can then be applied to the overall dataset. ``` {r Plot invasiveness estimates from best-fitting model} full_infant_serotype_invasiveness <- progressionEstimation::process_input_data( S_pneumoniae_infant_serotype %<>% dplyr::mutate(study = factor(study, levels = studies_by_size) ) ) full_adjusted_type_specific_negbin_fit <- progressionEstimation::fit_progression_rate_model(full_infant_serotype_invasiveness, type_specific = TRUE, location_adjustment = TRUE, stat_model = "negbin", num_chains = n_chains, num_iter = n_iter, num_cores = n_core) rhat_plot <- plot(full_adjusted_type_specific_negbin_fit, plotfun = "rhat", binwidth = 0.00005) trace_plot <- rstan::traceplot(full_adjusted_type_specific_negbin_fit, pars = "lp__") + theme(axis.text.x = element_text(angle = 90)) best_fitting_model_validation_plot <- cowplot::plot_grid(plotlist = list(rhat_plot,trace_plot), nrow = 1, ncol = 2, labels = "AUTO") ggsave(best_fitting_model_validation_plot, file = "best_fitting_infant_model_validation_plot.pdf", width = 8, height = 6)
``` {r Plot invasiveness across all serotypes}
mixedrank = function(x) order(gtools::mixedorder(as.character(x)))
best_fitting_output <- progressionEstimation::process_progression_rate_model_output(full_adjusted_type_specific_negbin_fit, S_pneumoniae_infant_serotype) %>% dplyr::mutate("classification" = dplyr::case_when( type %in% vaccine_types[["PCV7"]] ~ "PCV7", type %in% vaccine_types[["PCV10"]] ~ "PCV10", type %in% vaccine_types[["PCV13"]] ~ "PCV13", type %in% vaccine_types[["PCV15"]] ~ "PCV15", type %in% vaccine_types[["PCV20"]] ~ "PCV20", type %in% vaccine_types[["PPV23"]] ~ "PPV23", TRUE ~ "No PCV" ) ) %>% dplyr::mutate(classification = factor(classification, levels = c("PCV7", "PCV10", "PCV13", "PCV15", "PCV20", "PPV23", "No PCV")))
serotype_levels <- best_fitting_output %>% dplyr::select(type,classification) %>% dplyr::distinct() %>% group_by(classification) %>% dplyr::arrange(mixedrank(type), .by_group = TRUE) %>% ungroup() %>% dplyr::select(type) %>% dplyr::pull()
best_fitting_output %<>% dplyr::mutate(type = factor(type, levels = serotype_levels))
(invasiveness_plot <- progressionEstimation::plot_progression_rates(best_fitting_output, unit_time = "year", type_name = "Serotype", colour_col = "classification", colour_palette = vaccine_colours, use_sample_size = TRUE))
ggsave(invasiveness_plot, file = "full_serotype_invasiveness_plot.pdf", height = 8, width = 8)
write.csv(best_fitting_output, file = "study_adjusted_type_specific_negbin_serotype_fit.csv", quote = F, row.names = F)
write.csv(best_fitting_output %>% dplyr::select(type,nu,nu_lower,nu_upper) %>% dplyr::rename(invasiveness = nu) %>% dplyr::rename(invasiveness_lower = nu_lower) %>% dplyr::rename(invasiveness_upper = nu_upper) %>% dplyr::distinct(), file = "Dataset_S1.csv", quote = F, row.names = F)
## Plot study-specific scale factors ``` {r Plot study-specific scale factor estimates from best-fitting model} (scale_factor_plot <- progressionEstimation::plot_study_scale_factors(best_fitting_output)) ggsave(scale_factor_plot, file = "serotype_scale_factor_plot.png", width = 8, height = 6)
If PCVs provide further protection against disease, relative to carriage, then their invasiveness may reduce post-PCV. Therefore we can treat them as different serotypes pre- and post-PCV.
``` {r Adjust invasiveness post-PCV}
pv_S_pneumoniae_infant_serotype <- S_pneumoniae_infant_serotype %>% dplyr::mutate(type = dplyr::case_when( type %in% vaccine_types[["PCV7"]] & grepl("post.PCV", study) ~ paste0(type,"_pv"), type == "6A" & grepl("post.PCV", study) ~ paste0(type,"_pv"), type %in% vaccine_types[["PCV10"]] & grepl("post.PCV10", study) ~ paste0(type,"_pv"), type %in% vaccine_types[["PCV10"]] & grepl("post.PCV13", study) ~ paste0(type,"_pv"), type %in% vaccine_types[["PCV13"]] & grepl("post.PCV13", study) ~ paste0(type,"_pv"), type == "6C" & grepl("post.PCV13", study) ~ paste0(type,"_pv"), TRUE ~ as.character(type) ) ) %>% dplyr::mutate(type = factor(type))
pv_S_pneumoniae_infant_serotype %<>% dplyr::mutate(study = factor(study, levels = studies_by_size))
pv_full_infant_serotype_invasiveness <- progressionEstimation::process_input_data(pv_S_pneumoniae_infant_serotype)
pv_full_adjusted_type_specific_negbin_fit <- progressionEstimation::fit_progression_rate_model(pv_full_infant_serotype_invasiveness, type_specific = TRUE, location_adjustment = TRUE, stat_model = "negbin", num_chains = n_chains, num_iter = n_iter, num_cores = n_core)
rhat_plot <- plot(pv_full_adjusted_type_specific_negbin_fit, plotfun = "rhat", binwidth = 0.00005)
trace_plot <- rstan::traceplot(pv_full_adjusted_type_specific_negbin_fit, pars = "lp__") + theme(axis.text.x = element_text(angle = 90, size = 8))
best_fitting_pv_model_validation_plot <- cowplot::plot_grid(plotlist = list(rhat_plot,trace_plot), nrow = 1, ncol = 2, labels = "AUTO")
ggsave(best_fitting_pv_model_validation_plot, file = "best_fitting_pv_infant_model_validation_plot.pdf", width = 8, height = 6)
``` {r Analyse the pv output} pv_best_fitting_output <- progressionEstimation::process_progression_rate_model_output( pv_full_adjusted_type_specific_negbin_fit, pv_S_pneumoniae_infant_serotype) %>% dplyr::mutate("classification" = dplyr::case_when( gsub("_pv","",type) %in% vaccine_types[["PCV7"]] ~ "PCV7", gsub("_pv","",type) %in% vaccine_types[["PCV10"]] ~ "PCV10", gsub("_pv","",type) %in% vaccine_types[["PCV13"]] ~ "PCV13", gsub("_pv","",type) %in% vaccine_types[["PCV15"]] ~ "PCV15", gsub("_pv","",type) %in% vaccine_types[["PCV20"]] ~ "PCV20", gsub("_pv","",type) %in% vaccine_types[["PPV23"]] ~ "PPV23", TRUE ~ "No PCV" ) ) %>% dplyr::mutate(classification = factor(classification, levels = c("PCV7", "PCV10", "PCV13", "PCV15", "PCV20", "PPV23", "No PCV"))) # Order serotypes pv_serotype_levels <- pv_best_fitting_output %>% dplyr::select(type,classification) %>% dplyr::distinct() %>% group_by(classification) %>% dplyr::arrange(mixedrank(type), .by_group = TRUE) %>% ungroup() %>% dplyr::select(type) %>% dplyr::pull() pv_best_fitting_output %<>% dplyr::mutate(type = factor(type, levels = pv_serotype_levels)) # Plot (pv_invasiveness_plot <- progressionEstimation::plot_progression_rates(pv_best_fitting_output, unit_time = "year", type_name = "Serotype", colour_col = "classification", colour_palette = vaccine_colours, use_sample_size = TRUE)) (joint_invasiveness_plot <- cowplot::plot_grid(plotlist = list(invasiveness_plot + theme(legend.position = "none"), pv_invasiveness_plot), nrow = 2, ncol = 1, rel_heights = c(1,1.2), labels = "AUTO" ) ) ggsave(joint_invasiveness_plot, file = "joint_infant_serotype_plot.pdf", height = 10, width = 9)
``` {r Plot study-specific scale factor estimates from best-fitting model, adjust for vaccine}
(pv_scale_factor_plot <- progressionEstimation::plot_study_scale_factors(pv_best_fitting_output))
## Comparison with odds ratio calculation ``` {r Calculate odds ratios} require(ggpubr) require(metafor) get_serotype_stats <- function(serotype, df) { sero_df <- df %>% tidyr::pivot_longer(c(carriage,disease), names_to = "clinical_manifestation", values_to = "observation") %>% dplyr::group_by(study) %>% dplyr::mutate(matching_type = dplyr::if_else(type == serotype,1,0)) %>% dplyr::mutate(matching_type_disease = sum(observation[clinical_manifestation == "disease" & matching_type == 1])) %>% dplyr::mutate(matching_type_carriage = sum(observation[clinical_manifestation == "carriage" & matching_type == 1])) %>% dplyr::mutate(non_matching_type_disease = sum(observation[clinical_manifestation == "disease" & matching_type == 0])) %>% dplyr::mutate(non_matching_type_carriage = sum(observation[clinical_manifestation == "carriage" & matching_type == 0])) %>% dplyr::ungroup() %>% dplyr::select(study,matching_type_disease,matching_type_carriage,non_matching_type_disease,non_matching_type_carriage) %>% dplyr::distinct() %>% dplyr::mutate(serotype = serotype) %>% dplyr::filter(matching_type_carriage > 0 | matching_type_disease > 0) return(sero_df) } per_serotype_estimate <- function(test_serotype, df, method_type = "FE") { per_serotype_out <- metafor::rma(yi = yi, vi = vi, data = df, slab = study, subset = serotype == test_serotype, method = method_type, control=list(stepadj=0.5, maxiter=10000)) summary_df <- data.frame( "Analysis" = method_type, "Serotype" = test_serotype, "OR" = as.numeric(per_serotype_out$b), "OR_upper" = per_serotype_out$ci.ub, "OR_lower" = per_serotype_out$ci.lb, "tau_squared" = per_serotype_out$tau2, "i_squared" = per_serotype_out$I2, "h_squared" = per_serotype_out$H2 ) return(summary_df) } serotype_or_df <- dplyr::bind_rows(lapply( serotype_levels, get_serotype_stats, S_pneumoniae_infant_serotype) ) serotype_effect_size_df <- metafor::escalc(measure = "OR", ai = matching_type_disease, bi = matching_type_carriage, ci = non_matching_type_disease, di = non_matching_type_carriage, slab = study, data = serotype_or_df) per_serotype_out <- dplyr::bind_rows( lapply( serotype_levels, per_serotype_estimate, serotype_effect_size_df ), lapply( serotype_levels, per_serotype_estimate, serotype_effect_size_df, method_type = "REML" ) ) sample_sizes <- best_fitting_output %>% dplyr::group_by(type) %>% dplyr::mutate(num_samples = sum(carriage+disease)) %>% dplyr::mutate(Sample_count = dplyr::case_when( num_samples < 11 ~ "0-10", num_samples < 51 ~ "11-50", num_samples < 101 ~ "51-100", TRUE ~ ">100" ) ) %>% dplyr::ungroup() %>% dplyr::select(type, Sample_count) %>% dplyr::distinct() %>% dplyr::mutate(Sample_count = factor(Sample_count, levels = c("0-10", "11-50", "51-100", ">100"))) # Combine the invasiveness estimates combined_invasiveness_df <- dplyr::bind_rows( per_serotype_out %>% dplyr::select(Analysis, Serotype, OR, OR_upper, OR_lower) %>% dplyr::rename(Invasiveness = OR) %>% dplyr::rename(Invasiveness_upper = OR_upper) %>% dplyr::rename(Invasiveness_lower = OR_lower), best_fitting_output %>% dplyr::select(model_name, nu, nu_lower, nu_upper, type) %>% dplyr::distinct() %>% dplyr::rename(Analysis = model_name) %>% dplyr::rename(Serotype = type) %>% dplyr::rename(Invasiveness = nu) %>% dplyr::rename(Invasiveness_upper = nu_upper) %>% dplyr::rename(Invasiveness_lower = nu_lower) ) %>% tidyr::pivot_wider(id_cols = Serotype, names_from = Analysis, values_from = c(Invasiveness,Invasiveness_upper,Invasiveness_lower), names_sep = "_") %>% dplyr::mutate("classification" = dplyr::case_when( Serotype %in% vaccine_types[["PCV7"]] ~ "PCV7", Serotype %in% vaccine_types[["PCV10"]] ~ "PCV10", Serotype %in% vaccine_types[["PCV13"]] ~ "PCV13", Serotype %in% vaccine_types[["PCV15"]] ~ "PCV15", Serotype %in% vaccine_types[["PCV20"]] ~ "PCV20", Serotype %in% vaccine_types[["PPV23"]] ~ "PPV23", TRUE ~ "No PCV" ) ) %>% dplyr::left_join(sample_sizes, by = c("Serotype" = "type")) # Plot comparison of invasiveness estimates (fixed_effects_comparison <- ggplot(combined_invasiveness_df, aes(x = Invasiveness_adjusted_type_specific_negbin, xmin = Invasiveness_lower_adjusted_type_specific_negbin, xmax = Invasiveness_upper_adjusted_type_specific_negbin, y = Invasiveness_FE, ymin = Invasiveness_lower_FE, ymax = Invasiveness_upper_FE, colour = classification)) + geom_errorbar(alpha = 0.25) + geom_errorbarh(alpha = 0.25) + geom_point() + scale_x_continuous(trans = "log10") + theme_bw() + scale_color_manual(values = vaccine_colours) + theme(legend.position = "bottom") + ylab("Invasiveness estimated from log odds ratio") + xlab("Invasivess estimate from adjusted type-specific negative binomial (cases per carrier per year)") + facet_wrap(~Sample_count) + stat_cor(aes(x = Invasiveness_adjusted_type_specific_negbin, y = Invasiveness_FE), inherit.aes = FALSE, method = "kendall", cor.coef.name = "tau") + ggrepel::geom_label_repel(data = combined_invasiveness_df %>% dplyr::filter((Invasiveness_adjusted_type_specific_negbin < 0.0005 & Invasiveness_FE > 0) | Serotype == "46"), aes(label = Serotype) ) )
``` {r Plot comparison with mixed effects}
(random_effects_comparison <- ggplot(combined_invasiveness_df, aes(x = Invasiveness_adjusted_type_specific_negbin, xmin = Invasiveness_lower_adjusted_type_specific_negbin, xmax = Invasiveness_upper_adjusted_type_specific_negbin, y = Invasiveness_REML, ymin = Invasiveness_lower_REML, ymax = Invasiveness_upper_REML, colour = classification)) + geom_errorbar(alpha = 0.25) + geom_errorbarh(alpha = 0.25) + geom_point() + scale_x_continuous(trans = "log10") + theme_bw() + scale_color_manual(values = vaccine_colours) + theme(legend.position = "bottom") + ylab("Invasiveness estimated from log odds ratio") + xlab("Invasivess estimate from adjusted type-specific negative binomial (cases per carrier per year)") + facet_wrap(~Sample_count) + stat_cor(aes(x = Invasiveness_adjusted_type_specific_negbin, y = Invasiveness_REML), inherit.aes = FALSE, method = "kendall", cor.coef.name = "tau") + ggrepel::geom_label_repel(data = combined_invasiveness_df %>% dplyr::filter((Invasiveness_adjusted_type_specific_negbin < 0.0005 & Invasiveness_FE > 0) | Serotype == "46"), aes(label = Serotype) ) )
combined_odds_ratio_plot <- cowplot::plot_grid( plotlist = list(fixed_effects_comparison + theme(legend.position = "none"), random_effects_comparison), ncol = 1, nrow = 2, rel_heights = c(1,1.2), labels = "AUTO" )
ggsave(combined_odds_ratio_plot, file = "combined_odds_ratio_plot.pdf", width = 8, height = 12)
This comparison can be extended to location-specific invasiveness factors, which incorporate the uncertainty of the study-specific adjustment factor: ``` {r Comparison with location-specific invasiveness} netherland_pre_pcv_invasiveness_df <- progressionEstimation::get_type_invasiveness_for_study(S_pneumoniae_infant_serotype, full_adjusted_type_specific_negbin_fit, study = "Netherlands.post.PCV10") netherland_pre_pcv_invasiveness_df %<>% dplyr::mutate("classification" = dplyr::case_when( gsub("_pv","",type) %in% vaccine_types[["PCV7"]] ~ "PCV7", gsub("_pv","",type) %in% vaccine_types[["PCV10"]] ~ "PCV10", gsub("_pv","",type) %in% vaccine_types[["PCV13"]] ~ "PCV13", gsub("_pv","",type) %in% vaccine_types[["PCV15"]] ~ "PCV15", gsub("_pv","",type) %in% vaccine_types[["PCV20"]] ~ "PCV20", gsub("_pv","",type) %in% vaccine_types[["PPV23"]] ~ "PPV23", TRUE ~ "No PCV" ) ) %>% dplyr::mutate(classification = factor(classification, levels = c("PCV7", "PCV10", "PCV13", "PCV15", "PCV20", "PPV23", "No PCV"))) # Order serotypes serotype_levels <- netherland_pre_pcv_invasiveness_df %>% dplyr::select(type,classification) %>% dplyr::distinct() %>% group_by(classification) %>% dplyr::arrange(mixedrank(type), .by_group = TRUE) %>% ungroup() %>% dplyr::select(type) %>% dplyr::pull() netherland_pre_pcv_invasiveness_df %<>% dplyr::mutate(type = factor(type, levels = serotype_levels)) # Plot (netherlands_invasiveness_plot <- progressionEstimation::plot_progression_rates(netherland_pre_pcv_invasiveness_df, unit_time = "year", type_name = "Serotype", colour_col = "classification", colour_palette = vaccine_colours, use_sample_size = TRUE))
This subset of serotypes can be compared with the odds ratios estimated using fixed effects:
``` {r Comparison of Netherlands invasiveness with odds ratios calculated with fixed effects}
netherlands_or_comparison_df <- netherland_pre_pcv_invasiveness_df %>% dplyr::rename(Serotype = type) %>% dplyr::rename(Netherlands_invasiveness = nu) %>% dplyr::rename(Netherlands_invasiveness_lower = nu_lower) %>% dplyr::rename(Netherlands_invasiveness_upper = nu_upper) %>% dplyr::select(-classification) %>% dplyr::left_join(combined_invasiveness_df, by = c("Serotype"))
(netherlands_fixed_or_comparison_plot <- ggplot(netherlands_or_comparison_df, aes(x = Netherlands_invasiveness, xmin = Netherlands_invasiveness_lower, xmax = Netherlands_invasiveness_upper, y = Invasiveness_FE, ymin = Invasiveness_lower_FE, ymax = Invasiveness_upper_FE, colour = classification)) + geom_errorbar(alpha = 0.25) + geom_errorbarh(alpha = 0.25) + geom_point() + scale_x_continuous(trans = "log10") + theme_bw() + scale_color_manual(values = vaccine_colours) + theme(legend.position = "bottom") + ylab("Invasiveness estimated from log odds ratio") + xlab("Invasivess estimate from adjusted type-specific negative binomial (cases per carrier per year)") + facet_wrap(~Sample_count) + stat_cor(aes(x = Invasiveness_adjusted_type_specific_negbin, y = Invasiveness_FE), inherit.aes = FALSE, method = "kendall", cor.coef.name = "tau") )
The same comparison can be undertaken against odds ratios calculated with random effects: ``` {r Comparison of Netherlands data with odds ratios calculated with random effects} # Plot comparison of invasiveness estimates (netherlands_random_or_comparison_plot <- ggplot(netherlands_or_comparison_df, aes(x = Netherlands_invasiveness, xmin = Netherlands_invasiveness_lower, xmax = Netherlands_invasiveness_upper, y = Invasiveness_REML, ymin = Invasiveness_lower_REML, ymax = Invasiveness_upper_REML, colour = classification)) + geom_errorbar(alpha = 0.25) + geom_errorbarh(alpha = 0.25) + geom_point() + scale_x_continuous(trans = "log10") + theme_bw() + scale_color_manual(values = vaccine_colours) + theme(legend.position = "bottom") + ylab("Invasiveness estimated from log odds ratio") + xlab("Invasivess estimate from adjusted type-specific negative binomial (cases per carrier per year)") + facet_wrap(~Sample_count) + stat_cor(aes(x = Invasiveness_adjusted_type_specific_negbin, y = Invasiveness_REML), inherit.aes = FALSE, method = "kendall", cor.coef.name = "tau") ) combined_netherlands_or_comparison_plot <- cowplot::plot_grid(plotlist = list(netherlands_fixed_or_comparison_plot, netherlands_random_or_comparison_plot), nrow = 1, ncol = 2, labels = "AUTO") ggsave(combined_netherlands_or_comparison_plot, file = "netherlands_or_comparison.pdf", height = 8, width = 14)
``` {r Carriage duration comparison}
carriage_duration.df <- data.frame( "type" = c("19F","23F","6B","14","21","19B","18C","29","3","4","24F","NT","5","6A","6C"), "carriage_duration" = c(46.9,21,16.2,7.2,1.6,-0.1,-1.9,-4.3,-4.5,-7.2,-8.5,-12.3,-18.6,0,0) )
carriage_duration.df %<>% dplyr::inner_join(pv_best_fitting_output, by = "type") %>% dplyr::select(type,carriage_duration,classification,nu,nu_lower,nu_upper) %>% dplyr::distinct()
(invasiveness_v_duration_plot <- ggplot(carriage_duration.df, aes(x = carriage_duration, y = nu, ymin = nu_lower, ymax = nu_upper, colour = classification)) + geom_point() + geom_errorbar() + scale_colour_manual(values = vaccine_colours) + ggrepel::geom_label_repel(aes(label = type)) + scale_y_continuous(trans = "log10") + ylab(expression(nu)) + xlab("Carriage duration relative to serotype 6A/C") + theme_bw() )
ggsave(invasiveness_v_duration_plot, file = "invasiveness_v_duration.pdf", height = 8, width = 8)
```
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.