cleaning all the data sources

makeclass_hcwpre()

Make classes out of each data set

library(latex2exp)
library(patchwork)
library(here)
require(plyr)
require(data.table)
require(tidyverse)
library(devtools)
# Required for this analysis
require(reshape2)
require(foreach)
require(doParallel)
require(bayesplot)
require(coda)
require(ggplot2)
require(viridis)
require(ggpubr)

require(foreach)
require(doParallel)
require(Rcpp)

require(extrafont)
require(showtext)
font_add("Avenir", "/Users/davidhodgson/Documents/personal/fonts/avenir_ff/AvenirLTStd-Book.otf")  
showtext_auto()
theme_set(theme_light())
library(wesanderson)

Load packages and data

Currently using the serosolver

install_github("https://github.com/dchodge/serosolver", ref = "custom_ab")
require(serosolver)

devtools::load_all()
load(here::here("data", "hcwpre_data.RDS"))
get_model_info_hcw_pre(hcwpre)

````

################################################
######## A: Exploratory analysis of dataset ########
################################################
######## Look at titre values (overall)
```r

sero_data <- hcwpre$sero_data
sum_data <- sero_data %>% group_by(sample_time, titre_value) %>% summarise(n = n()) 
weighted_sum_data <- sum_data %>% summarise(w_mean = weighted.mean(titre_value, n, na.rm = TRUE))

plot_data1 <- sero_data %>% group_by(virus_isol_yr, sample_time) %>%
        summarise(titre_value = mean(titre_value)) %>%
        mutate(sample_time = factor(as.character(sample_time), levels = unique(as.character(sample_time)))) %>%
        filter(virus_isol_yr < 2016)

p1 <- plot_data1 %>%
        ggplot() + 
                geom_line(aes(x = virus_isol_yr, y = titre_value, color = sample_time), size = 1) +
                geom_point(aes(x = virus_isol_yr, y = titre_value, fill = sample_time), shape = 21, size = 3) + 
                labs(x = "Year of circulating virus", y = "Titre value", fill = "Time post-vac") + 
                scale_fill_manual(values = c("gray", "red", "pink")) + 
                scale_color_manual(values = c("gray", "red", "pink"))



p2 <- ggplot() +
        geom_point(data = sum_data, aes(x = sample_time, y = titre_value, size = n), fill = "black", alpha = 0.5) +
        geom_line(data = weighted_sum_data, aes(x = sample_time, y = w_mean), color = "red", size = 1) +
        geom_point(data = weighted_sum_data, aes(x = sample_time, y = w_mean), shape = 21, fill = "red", size = 5) + 
        labs(x = "Time post vaccination", y = "Titre value", size = "Number of samples") 

p1 / p2
ggsave(file = here::here("outputs", "hcw_pre", "analysis", "figs", "figA.pdf"))
## Look at titre value change relative to starting titre and year of isolation
sero_data_base <- sero_data %>% 
        group_by(virus_isol_yr, sample_time) %>%
        summarise(titre_mean = mean(titre_value, rm.na = TRUE)) %>%
        pivot_wider(names_from = sample_time, values_from = titre_mean) %>%
        mutate(day30 = (`30` - `0`), day180 = (`180` - `0`) ) %>%
        select(virus_isol_yr, base = `0`, day30, day180) %>%
        pivot_longer(c(day30, day180), names_to = "time", values_to = "abs") %>%
        mutate(time = case_when(time == "day30"~"30", time == "day180"~"180"))

sero_data_base_rel <- sero_data %>% 
        group_by(virus_isol_yr, sample_time) %>%
        summarise(titre_mean = mean(titre_value, rm.na = TRUE)) %>%
         pivot_wider(names_from = sample_time, values_from = titre_mean) %>%
        mutate(day30_change = (`30` - `0`) / `0`, day180_change = (`180` - `0`) / `0`) %>%
        select(virus_isol_yr, base = `0`, day30_change, day180_change) %>%
        pivot_longer(c(day30_change, day180_change), names_to = "time", values_to = "prop") %>%
        mutate(time = case_when(time == "day30_change"~"30", time == "day180_change"~"180"))

p1 <- sero_data_base %>%
        ggplot(aes(x = virus_isol_yr, y = abs, )) +
                geom_line(aes(group = virus_isol_yr)) +
                geom_point(aes(fill = time), shape = 21, size = 3) +
                labs(x = "Year of circulating virus", y = "Change in titre post-vac (abs)", fill = "Days post-vac")

p2 <- sero_data_base %>%
        ggplot(aes(x = base, y = abs, )) +
                geom_line(aes(group = virus_isol_yr)) +
                geom_point(aes(fill = time), shape = 21, size = 3) +
                labs(x = "Start titre value", y = "Change in titre post-vac (abs)", fill = "Days post-vac")


p3 <- sero_data_base_rel %>%
        ggplot(aes(x = virus_isol_yr, y = prop * 100, )) +
                geom_line(aes(group = virus_isol_yr)) +
                geom_point(aes(fill = time), shape = 21, size = 3) +
                labs(x = "Year of circulating virus", y = "Change in titre post-vac (%)", fill = "Days post-vac")

p4 <- sero_data_base_rel %>%
        ggplot(aes(x = base, y = prop * 100, )) +
                geom_line(aes(group = virus_isol_yr)) +
                geom_point(aes(fill = time), shape = 21, size = 3) +
                labs(x = "Start titre value", y = "Change in titre post-vac (%)", fill = "Days post-vac")

(p1 + p2) / (p3 + p4) + plot_layout(guides = "collect")
ggsave(file = here::here("outputs", "hcw_pre", "analysis", "figs", "figB.pdf"))
## Look at impact of vaccination
vac_data <- hcwpre$vac_data
vac_data_clean <- vac_data %>%
         mutate(response = case_when(
                        response == "Yes"~1,
                        response == "Don't know"~0,
                        response == "No"~0)) %>%
        select(pid, vac_yr, response)

vac_data_clean_final <- left_join(
        vac_data_clean %>% group_by(pid) %>% summarise(n = sum(response)),
        vac_data_clean %>% filter(vac_yr == 2015)
) %>% select(pid, n, prev_yr = response)

sero_vac_data <- left_join(vac_data_clean_final, sero_data)
sero_vac_data_complete <- sero_vac_data[complete.cases(sero_vac_data), ]

sero_vac_data_base <- sero_vac_data_complete %>% 
        group_by(virus_isol_yr, sample_time, n, prev_yr) %>%
        summarise(titre_mean = mean(titre_value, rm.na = TRUE)) %>%
         pivot_wider(names_from = sample_time, values_from = titre_mean) %>%
        mutate(day30_change = (`30` - `0`) / `0`, day180_change = (`180` - `0`) / `0`) %>%
        select(virus_isol_yr, n, prev_yr, base = `0`, day30_change, day180_change) %>%
        pivot_longer(c(day30_change, day180_change), names_to = "time", values_to = "prop") %>%
        mutate(time = case_when(time == "day30_change"~"30 day post vac", time == "day180_change"~"180 days post vac")) %>%
        mutate(prev_yr = case_when(prev_yr == 0~"No vaccine previous year", prev_yr == 1~"Vaccine previous year"))

p1 <- sero_vac_data_base %>% filter(is.finite(prop)) %>%
        ggplot(aes(x = virus_isol_yr, y = prop * 100), rm.na = TRUE) +
                geom_point(aes(fill = as.character(virus_isol_yr)), shape = 21, size = 2, alpha = 0.8, position = position_dodge(width = 0.5)) +
                labs(x = "Starting titre", y = "Change in titre post-vac (%)", fill = "Year of circulating virus") + 
                facet_grid(rows = vars(time), cols = vars(prev_yr)) + ylim(0, 350)

p2 <- sero_vac_data_base %>% filter(is.finite(prop)) %>%
        ggplot(aes(x = base, y = prop * 100), rm.na = TRUE) +
                geom_point(aes(fill = as.character(virus_isol_yr)), shape = 21, size = 2, alpha = 0.8, position = position_dodge(width = 0.5)) +
                labs(x = "Starting titre", y = "Change in titre post-vac (%)", fill = "Year of circulating virus") + 
                facet_grid(rows = vars(time), cols = vars(prev_yr)) + ylim(0, 350)

p1 + p2 + plot_layout(guides = "collect")
# Influences short term response 

Exploration A: how titre boost change with time and starting titre

# Plot the hanam data (overall)

sum_data <- all_models[[2]]$create_post$titre_data %>% 
        group_by(sample_time, titre) %>% summarise(n = n()) 
sum_data_mean <- sum_data %>% summarise(w_mean = weighted.mean(titre, n)) 
sum_data %>%
        ggplot(aes(x = sample_time)) + 
                geom_point(aes(y = titre, size = n), alpha = 0.5) + 
                geom_line(data = sum_data_mean, aes(y = w_mean), size = 1, color = "red") + 
                geom_point(data = sum_data_mean, aes(y = w_mean), size = 5, color = "white", fill = "red", shape = 23) +
                labs(y = "Log2 titre value", x =  "Days post vaccination") + 
                scale_y_continuous(breaks= 1:11) + scale_x_continuous(breaks= c(0, 14, 21, 280))

data_virus <- all_models[[1]]$create_post$titre_data %>% group_by(virus, sample_time, titre) %>% summarise(n = n()) %>%
        summarise(w_mean = weighted.mean(titre, n)) %>% filter(sample_time %in% c(0, 14, 280)) %>%
        pivot_wider(names_from = sample_time, values_from = w_mean) %>%
        mutate(day14_change = (`14` - `0`) / `0`, day280_change = (`280` - `0`) / `0`) %>% select(virus, start = `0`, day14_change, day280_change) %>%
        pivot_longer(c(day14_change, day280_change), names_to = "time", values_to = "prop") %>%
        mutate(time = case_when(time == "day14_change"~14, time == "day280_change"~280))

# no trend with starting titre value
library(patchwork)

p0A <- data_virus %>% 
        ggplot(aes(x = virus / 52, y = (prop) * 100, fill = as.character(time))) + 
                geom_line(aes(group = virus)) + 
                geom_point(shape = 21, color = "black", size = 3) + 
                labs(x = "Year of circulating virus", y = "Change in titre post-vac (%)", fill = "Days post-vac")

p0B <- data_virus %>% 
        ggplot(aes(x = start, y = (prop) * 100, fill = as.character(time))) + 
                geom_line(aes(group = virus)) + 
                geom_point(shape = 21, color = "black", size = 3) + 
                labs(x = "Start titre value", y = "Change in titre post-vac (%)", fill = "Days post-vac")


p0A / p0B


````


```r

all_post <- all_models %>% map(~load_mcmc_post(.x, "longterm"))
purrr::map2(all_post, all_models, ~plot_traces(.x, .y))
purrr::map2(all_post, all_models, ~plot_titre_post(.x, .y))
plot_post_compare(all_post, all_models)

mpsrf_df <- purrr::map2(all_post, all_models, ~calc_mpsrf(.x, .y)) %>% bind_rows
save_plot_mpsrf(mpsrf_df, all_models[[1]])

# calcualte and plot the waic
waics <- purrr::map2(all_post, all_models, ~calc_waic(.x, .y))
save_plot_waic(waics)

# get best models
# looks at underlying dynamics
# Look at correlations in models with odd posteriors

ids <- all_models[[1]]$others$study$sero_data %>% pull(pid) %>% unique 
dob <- all_models[[1]]$others$study$part_data %>% unique 
num <- 6
textplt <- dob %>% filter(pid == ids[num]) %>% pull(yob)
all_models[[1]]$others$study$sero_data %>% filter(sample_time == 0, pid == ids[num]) %>%
        ggplot() + 
                geom_point(aes(x = virus_isol_yr, y = titre_value)) +
                geom_text(x = 1970, y = 6, label = textplt)


dchodge/pilot.hcw documentation built on Feb. 2, 2022, 11:29 p.m.