In phuonghtht/PMA_dengue_v2: Lab diagnosis

#commit:
##- using AGE_MIN instead of age mean-> max age is 30 in both HC and KH

rm(list=ls())
# setwd("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/FOI/CombinedCode_Maxine")

library(rstan)
util <- new.env()
source('./codes/stan_utility.R', local=util)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

indicate model type here!

input site, data input, no. of steps for years

# Changeable 
model <- "cons" # cons, TV,SS, TVSS
groupAge <- 5 # 1,5 # Grouping Input data by group of ....
step_year <- 1 # non-TV models, input 0
site <- "HC&KH" #KH, HC,HC&KH
dataInput <-"C1D1" # C1D1, C1F1, C1D3, C1F3
#for time-varying model, options are 1,2,3,4,5

# # for loop to run at once
# site_vec <- c("KH","HC","KH&HC")
# dataInput_vec <- c("C1D1","C1F1")
# step_year_vec <- c(1:5)
# 
# for (idx_site_vec in 1:length(site_vec)){
#     for (idx_dataInput_vec in 1: length(dataInput_vec)){
#         for (idx_step_year_vec in 1: length(step_year_vec)){
# 
#             site <- site_vec[idx_site_vec]
#             dataInput <- dataInput_vec[idx_dataInput_vec]
#             step_year <- step_year_vec[idx_step_year_vec]
#             code function ........
#         }
#     }
# }


##Results from different IS and infecting serotype infering models:
#~/Dropbox/1.SHARED_FOLDER/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model  # original locations
if ( dataInput == "C1D1"){
    data <- read.csv("./data/Infecting_Models/PredictedIS_B23_567_pred.serotype_C1D1.csv")  
}
if ( dataInput == "C1D3"){
    data <- read.csv("./data/Infecting_Models/PredictedIS_B23_567_pred.serotype_C1D3.csv")  
}
if ( dataInput == "C1F1"){
    data <- read.csv("./data/Infecting_Models/PredictedIS_B23_567_pred.serotype_HomoAssigned_C1F1.csv")  
}
if ( dataInput == "C1F3"){
    data <- read.csv("./data/Infecting_Models/PredictedIS_B23_567_pred.serotype_HomoAssigned_C1F3.csv")  
}

data$YEAR <- as.factor(data$YEAR)
data$predictedIS<- ordered(data$predictedIS,levels=c("Secondary","Primary","Neg"))
data$pred.serotype<- as.factor(data$pred.serotype)

# Extract population data only!
pop<- data[which(!is.na(data$AGE_MIN)&is.na(data$PanbioUnit)),]#cross out acute and ELISA samples
pop$Site <- substr(pop$sampleID,1,2)

# Age grouping
if(groupAge == 1){
  ##Agegroup_ 1year
  pop$AgeGroup <-factor(as.factor(ceiling(pop$AGE_MIN)),labels=c("(1-2]","(2-3]","(3-4]","(4-5]","(5-6]","(6-7]","(7-8]","(8-9]","(9-10]","(10-11]","(11-12]","(12-13]","(13-14]","(14-15]","(15-16]","(16-17]","(17-18]","(18-19]","(19-20]","(20-21]","(21-22]","(22-23]","(23-24]","(24-25]","(25-26]","(26-27]","(27-28]","(28-29]","(29-30]"))# group as  1year 1 group, no individual is less than 1 year old 
}
if(groupAge == 2){
  pop$AgeGroup <-factor(as.factor(ceiling(pop$AGE_MIN/2)),labels=c("(0-2]","(2-4]","(4-6]","(6-8]","(8-10]","(10-12]","(12-14]","(14-16]","(16-18]","(18-20]","(20-22]","(22-24]","(24-26]","(26-28]","(28-30]"))
}
if(groupAge == 3){
  pop$AgeGroup <- ceiling(pop$AGE_MIN/3)
  pop$AgeGroup <- factor(
    pop$AgeGroup,
    labels = c("(0-3]","(3-6]","(6-9]","(9-12]","(12-15]","(15-18]",
               "(18-21]","(21-24]","(24-27]","(27-30]"))
}
if(groupAge == 5){
  pop$AgeGroup <- ceiling(pop$AGE_MIN/5)
  pop$AgeGroup <- factor(
    pop$AgeGroup,
    labels = c("(0-5]","(5-10]","(10-15]","(15-20]","(20-25]","(25-30]"))
}

data filtering and preparation

library(dplyr)
library(tidyr)
# spread(x,y): spread rows of col x into collums, then value of col y will fit in as rows.
#rename(new_name=old_name).

if (site=="HC"){
    p_year=dplyr::filter(pop,Site=="HC")%>%group_by(AgeGroup,YEAR,Site)%>%dplyr::count(pred.serotype)%>%spread(pred.serotype,n)}
if(site == "KH"){
    p_year=dplyr::filter(pop,Site=="KH")%>%group_by(AgeGroup,YEAR,Site)%>%dplyr::count(pred.serotype)%>%spread(pred.serotype,n)}
if(site == "KH&HC"|site =="HC&KH"){
    p_year=dplyr::filter(pop)%>%group_by(AgeGroup,YEAR)%>%dplyr::count(pred.serotype)%>%spread(pred.serotype,n)
    p_year$Site <- "HC&KH" # add Site-> same no.columns-> later convenient
    #Rearrange by column index
    p_year <- p_year[,colnames(p_year)[c(1:2,9,3:8)]]
    }

# Sorting "YEAR" in chronological order
p_year<-p_year[order(p_year$YEAR),]

# p_year$total <- apply(p_year[,3:8],1,sum,na.rm=T)
p_year$Primary <-rowSums(p_year[,4:7],na.rm = T)
p_year$total <-rowSums(p_year[,4:9],na.rm = T)

#create function for substr age rather than put age in order, this is useful in case of missing data in a specific age: 
my_substr <- function(s){
  if (nchar(s) == 5){
    t <- substr(s, 4, 4)
  }else{
    if (nchar(s) == 6){
      t <- substr(s, 4, 5)
    }else{
      t <- substr(s, 5, 6)
    }
  }
  return(t)
}

# lower range
my_substr_lower <- function(s){
  if (nchar(s) == 5|nchar(s) == 6){
    t <- substr(s, 2, 2)
  }else{
    t <- substr(s, 2, 3)
  }
  return(t)
}

#
p_year$age<- apply(p_year[,1],1,my_substr)
p_year$age <- as.numeric(p_year$age)
p_year$age_lower<- apply(p_year[,1],1,my_substr_lower)
p_year$age_lower <- as.numeric(p_year$age_lower)
p_year$ageMean <- apply(p_year[,12:13],1,mean)


# Assign NA as 0 because stan does not support NA value:

for (idx in 4:9){
  idx.na <- which(is.na(p_year[[idx]]))
  p_year[[idx]][idx.na] <- 0
}

# setnames(p_year, old=c("AgeGroup","1","2","3","4","Neg","Secondary","total"),new=c("AgeGroup","DV1.13","DV2.13","DV3.13","DV4.13","Neg.13","Sec.13","total.13"))

data formatting for stan

b = length(p_year$age)
n_y = 2017-2013+1+max(as.integer(p_year[which(p_year$YEAR==2013),]$age)) # number of years backward, time period that data is available ## KH: 36 -> 1982, HC: 35 -> 1983

# data input for constant FOI model
if (model == "cons") {
  datapp<- list(a = b,
              datap=c(p_year[1:b, 8],
                      p_year[1:b, 4],
                      p_year[1:b, 5],
                      p_year[1:b, 6],
                      p_year[1:b, 7],
                      p_year[1:b, 9]),
                      veca = as.integer(p_year$age),
                      veca_mean = as.integer(p_year$ageMean))
}

# data input for serotype-specific FOI model
if (model == "SS") {
  datapp<- list(a = b,
              datap=c(p_year[1:b, 8],
                      p_year[1:b, 4],
                      p_year[1:b, 5],
                      p_year[1:b, 6],
                      p_year[1:b, 7],
                      p_year[1:b, 9]),
                      veca=as.integer(p_year$age),
                      veca_mean = as.integer(p_year$ageMean))
}

#
if (model == "TV"|model == "TVSS"){

l_vecj = length(c(rep(1:ceiling(n_y/step_year), each =  step_year))) # ceiling: round up

datapp<- list(a = b,
              step_year = step_year,
              datap=c(p_year[1:b, 8],
                      p_year[1:b, 4],
                      p_year[1:b, 5],
                      p_year[1:b, 6],
                      p_year[1:b, 7],
                      p_year[1:b, 9]),
              vecj=as.integer(c(rep(1:ceiling(n_y/step_year), each =  step_year))),#for index of actual lambda
              l_vecj = length(c(rep(1:ceiling(n_y/step_year), each =  step_year))),# length of vecj for later use
              vecy=as.integer(as.character(p_year$YEAR)),
              veca=as.integer(p_year$age),
              veca_mean = as.integer(p_year$ageMean))

}

Stan codes

if (model == "cons"&groupAge==1){
  FOI_Dengue <-"./codes/FoI/Stan/FOI_constant_MLE.stan"
}
if (model == "cons"&groupAge==5){
  FOI_Dengue <-"./codes/FoI/Stan/FOI_constant_MLE_GroupAge.stan"
}
if (model == "SS"&groupAge==1){
  FOI_Dengue <-"./codes/FoI/Stan/FOI_serotype_specific_MLE.stan"
}
if (model == "SS"&groupAge==5){
  FOI_Dengue <-"./codes/FoI/Stan/FOI_serotype_specific_MLE_GroupAge.stan"
}
if (model == "TV"){
  FOI_Dengue <- "./codes/FoI/Stan/FOI_time varying_MLE.stan"
}
if (model == "TVSS"){
  FOI_Dengue <-"./codes/FoI/Stan/FOI_time varying_serotype specific_MLE.stan"
}

run model and obtain posterior values

# you then run using this: From this output we can quickly assess model convergence by looking at the Rhat values for each parameter. When these are at or near 1, the chains have converged. There are many other diagnostics, but this is an important one for Stan.
set.seed(9)
fit<-stan(file = FOI_Dengue,
          data=datapp, iter=6000,
          chains= 4,
          cores = 4, # parallel::detectCores()
          control = list(adapt_delta=0.8, max_treedepth=10))# default

posterior <- rstan::extract(fit) # specify package rstan rather than tidyr
# str(posterior)
#this summarizes the parameters, gets mean,etc. of parameters and metrics of whether converged or not like Rhat and neff
foisummary <- summary(fit)
foisummary

tab <- data.frame(foisummary$summary)
# # saving fits
#trace previous result here: /home/phuong/pCloudDrive/PMA analysis results/FoI/RDS/FOI
if (model == "cons") {
  saveRDS(fit,paste("./output/FoI/RDS/FOI",model,"groupAge",groupAge,site,dataInput,".rds"))
} else if (model == "SS") {
  saveRDS(fit, paste("./output/FoI/RDS/FOI",model,"groupAge",groupAge,site,dataInput, ".rds"))
} else if (model == "TV") {
  saveRDS(fit, paste("./output/FoI/RDS/FOI",model,"groupAge",groupAge,"step_year",step_year,site,dataInput, ".rds"))
}else {
  saveRDS(fit,paste("./output/FoI/RDS/FOI",model,"groupAge",groupAge,"step_year",step_year,site,dataInput,".rds"))
}

# # plot to see whether estimated parameter is convergence or not: traceplot
# plottedRows <- 1 : nrow(posterior$lambda)
# plottedRows <- which(plottedRows %% 6 == 0)
# plot(posterior$lambda[plottedRows, 4],type="l")

Dianostic

#eval=F: prevent the codes unless doing it manually
util$check_all_diagnostics(fit)

library(purrr)

class(fit)
summary(fit)$summary %>% head()
plot(fit,pars="lambda")# mean lamda 
plot(fit,pars=c("lambda[1,1]",
                "lambda[1,2]",
                 "lambda[1,3]",
                 "lambda[1,4]",
                 "lambda[1,5]",
                 "lambda[1,6]",
                 "lambda[1,7]"))
traceplot(fit,pars="lambda[1,1]")
#change the names in the stanfit object you can use the names method for stanfit objects.
print(names(fit))
names(fit)[1] <- "DENV1_"

#Assessing and Fixing Divergences and Treedepth Problems
rstan::check_divergences(fit)
warmups <- 3000
total_iterations <- 6000
max_treedepth <-  10 # default
n_chains <-  4
n_cores <- 8 # parallel::detectCores()
adapt_delta <- 0.8 # default

mack_diagnostics <- rstan::get_sampler_params(fit) %>% 
  set_names(1:n_chains) %>% 
   map_df(as_data_frame,.id = 'chain') %>% 
  group_by(chain) %>% 
  mutate(iteration = 1:length(chain)) %>% 
  mutate(warmup = iteration <= warmups)

library(ggsci)
mack_diagnostics %>% 
  group_by(warmup, chain) %>% 
  summarise(percent_divergent = mean(divergent__ >0)) %>% 
  ggplot() +
  geom_col(aes(chain, percent_divergent, fill = warmup), position = 'dodge', color = 'black') + 
  scale_y_continuous(labels = scales::percent, name = "% Divergent Runs")  + 
  scale_fill_npg()



mack_diagnostics %>% 
  ggplot(aes(iteration, treedepth__, color = chain)) + 
  geom_line() + 
  geom_hline(aes(yintercept = max_treedepth), color = 'red') + 
  scale_color_locuszoom()

mack_diagnostics %>% 
  ggplot(aes(iteration, stepsize__, color = chain)) + 
  geom_line()  + 
  scale_color_locuszoom()


bh_summary <- summary(fit)$summary %>% 
  as.data.frame() %>% 
  mutate(variable = rownames(.)) %>% 
  select(variable, everything()) %>% 
  as_data_frame()

bh_summary %>% head()
bh_summary %>% 
  ggplot(aes(n_eff)) + 
  geom_histogram() + 
  geom_vline(aes(xintercept = 4000), color = 'red')

bh_summary %>% 
  ggplot(aes(Rhat)) + 
  geom_histogram() + 
  geom_vline(aes(xintercept = 1.1), color = 'red')

bh_summary %>% 
  filter(variable %in% c('lambda[1,1]','lambda[1,2]','lambda[1,3]')) %>% 
  ggplot() + 
  geom_linerange(aes(variable, ymin = `2.5%`,ymax = `97.5%`)) + 
  geom_crossbar(aes(variable, mean, ymin = `25%`, ymax = `75%`), fill= 'grey') + 
  facet_wrap(~variable, scales = 'free')

library(stringr)
rhat <- bh_summary %>% 
  filter(str_detect(variable,'rhat') & !str_detect(variable,'log') & !str_detect(variable,'pp'))

# sal_data <- sal_data %>% 
#   mutate(mean_rhat = rhat$mean,
#          lower = rhat$`2.5%`,
#          upper = rhat$`97.5%`)
# 
# sal_data %>% 
#   ggplot() + 
#   geom_point(aes(ssb, r)) + 
#   geom_line(aes(ssb, mean_rhat)) + 
#   geom_ribbon(aes(ssb, ymin = lower, ymax = upper), alpha = 0.25)

bh_mcmc <- fit %>% 
  rstan::extract()

bh_pars <- bh_mcmc[c('lambda')] 

bh_pars %>% 
  map_df(as_data_frame, .id = 'variable')
  ggplot(aes(value, fill = variable)) + 
  geom_density() + 
  # facet_wrap(~ variable, scales = 'free') + 
  coord_flip() + 
  scale_fill_locuszoom()

plots for posterior values

## constant FOI
if (model == "cons") {
  p <- data.frame(lambda = posterior$lambda)
p$year <- rep("FOI")
# violin plot
lamb <- ggplot(p, aes(x=year,y=lambda)) + geom_violin()+
  ggtitle(paste("FoI",model,"groupAge",groupAge,site,dataInput))+
  coord_cartesian(ylim=c (0.0,0.03))+
  theme(plot.title = element_text(hjust=0.5,face="bold",size=20,colour = "blue"),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold", angle = 0, hjust = 1))+
  # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
  # geom_boxplot(width=0.1)+# add median and quartile
  stat_summary(fun.data=mean_sdl, 
               geom="pointrange", color="red")#Add mean and standard deviation
# # save the figure
ggsave(filename=paste("./output/FoI/Lambda/FOI",model,"groupAge",groupAge,site,dataInput,".png"),plot=lamb)
lamb
}

## serotype-specific FOI
if (model == "SS") {

  p <- data.frame(lambda = as.numeric(),
                DENV = as.character())

for(a in 1:4){
  p_temp <- data.frame(lambda = posterior$lambda[ , a],
                       DENV = paste0('DENV ', a))
  p <- rbind(p, p_temp)
}

# violin plot
lamb = ggplot(p, aes(x=DENV,y=lambda)) + geom_violin()+
  ggtitle(paste("FoI",model,"groupAge",groupAge,site,dataInput))+
  theme(plot.title = element_text(hjust=0.5,face="bold",size=20,colour = "blue"),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"))+
    ylim(0,0.05)+
  # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
  # geom_boxplot(width=0.1)+# add median and quartile
  stat_summary(fun.data=mean_sdl, 
                 geom="pointrange", color="red")#Add mean and standard deviation

ggsave(filename = paste("./output/FoI/Lambda/FOI",model,"groupAge",groupAge,site,dataInput,".png" ),plot=lamb)
lamb

}

## time-varyingFOI
if (model == "TV") {
  #create dataframe for violin plot:
p <- data.frame(lambda = as.numeric(),
                YEAR = as.character()) 


for(a in 1:(l_vecj/step_year)){
  p_temp <- if(step_year ==1){
    data.frame(
      lambda = posterior$lambda[ ,a],
      YEAR = paste0(2018-(step_year*(a-1)+1)))# 1 year
  }else{
    data.frame(
      lambda = posterior$lambda[ ,a],
      YEAR = paste0(2018-(step_year*(a-1)+1),"-",2018-(step_year*(a-1)+1)-(step_year-1)))# period

  }

  p <- rbind(p, p_temp)
}

# violin plot
lamb <- ggplot(p, aes(x=YEAR,y=lambda)) + geom_violin()+
  ggtitle(paste("FoI",model,step_year,"yr(s) groupAge",groupAge,site,dataInput))+
  ylim(-.01,0.2)+
  theme(plot.title = element_text(hjust=0.5,face="bold",size=20,colour = "blue"),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold", angle = 90, hjust = 1))+
  # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
  # geom_boxplot(width=0.1)+# add median and quartile
  stat_summary(fun.data=mean_sdl, 
               geom="pointrange", color="red")#Add mean and standard deviation
ggsave(filename=paste("./output/FoI/Lambda/FOI",model,"groupAge",groupAge,"step_year",step_year,site,dataInput,".png"), plot = lamb)

lamb
}


## time-varying serotype-specific FOI
if (model == "TVSS"){
  #create dataframe for violin plot:

p <- data.frame(lambda = as.numeric(),
                DENV = as.character())

for(a in 1:4){
  p_temp <- data.frame(lambda = posterior$lambda[,a,],
                       DENV = paste0('DENV ', a))
  p <- rbind(p, p_temp)
}


## Grouping less info. yrs >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# #1. renames colnames of dataframe p as year, group less info. yrs

#     if ( step_year == 1)
#     {names(p) <- c(2017:1988,"87-83","DENV")}
#     if ( step_year ==2){
#         names(p) <- c("2017-2016","2015-2014","2013-2012","2011-2010","2009-2008","2007-2006","2005-2004","2003-2002","2001-2000","1999-1998","1997-1996","1995-1994","1993-1992","1991-1990","1989-1988","1987-1986","1985-1983","DENV")}
#     if (step_year ==5){
#         names(p) <- c("2017-2013","2012-2008","2007-2003","2002-1998","1997-1993","1992-1988","1987-1983","DENV")
#     }


# Not grouping the very last years
#1.renames colnames of dataframe p as year, not grouping less info.yrs
if ( step_year == 1)# 35 lambda
{names(p) <- c(2017:1983,"DENV")}
if ( step_year ==2){# 18 lambda
  names(p) <- c("2017-2016","2015-2014","2013-2012","2011-2010","2009-2008","2007-2006","2005-2004","2003-2002","2001-2000","1999-1998","1997-1996","1995-1994","1993-1992","1991-1990","1989-1988","1987-1986","1985-1984","1983","DENV")}
if ( step_year ==3){# 12 lambda
  names(p) <- c("2017-2015","2014-2012","2011-2009","2008-2006","2005-2003","2002-2000","1999-1997","1996-1994","1993-1991","1990-1988","1987-1985","1984-1983","DENV")}
if ( step_year ==4){# 9 lambda
  names(p) <- c("2017-2014","2013-2010","2009-2006","2005-2002","2001-1998","1997-1994","1993-1990","1989-1986","1985-1983","DENV")}
if (step_year ==5){ #7 lambda
  names(p) <- c("2017-2013","2012-2008","2007-2003","2002-1998","1997-1993","1992-1988","1987-1983","DENV")
}


#2. violin plot, not grouping less info. yrs

    DV1 <- p[1:12000,-((l_vecj/step_year)+1)]
    DV2 <- p[(12001:24000),-((l_vecj/step_year)+1)]
    DV3 <- p[24001:36000,-((l_vecj/step_year)+1)]
    DV4 <- p[36001:48000,-((l_vecj/step_year)+1)]


# tranform data frame for ploting

library(reshape2)
DV1 <- reshape2::melt(DV1,id.vars=0) # transformed data frame
DV2 <- reshape2::melt(DV2,id.vars=0)
DV3 <- reshape2::melt(DV3,id.vars=0)
DV4 <- reshape2::melt(DV4,id.vars=0)

# DV1
p1 <- ggplot(DV1,aes(x=variable,y=value)) +
      geom_violin()+
      # geom_boxplot(width=0.1)+
        # ggtitle(paste("posterior lambda _ age varying_serotype specific",p_year$Site[1]))+
        theme(plot.title = element_text(hjust=0.5,face="bold",size=20,colour = "blue"),
              axis.title.x = element_blank(),
              # axis.text.x = element_text(angle = 90, hjust = 0.5,
              axis.text.x = element_blank())+
        coord_cartesian(ylim=c(-0.01,0.3))+
        ylab("DENV1")+
        # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
        # geom_boxplot(width=0.1)+# add median and quartile
        stat_summary(fun.data=mean_sdl, 
                     geom="pointrange", color="red")#Add mean and standard deviation
#DV2   
p2 <- ggplot(DV2,aes(x=variable,y=value)) + 
      geom_violin()+
      # geom_boxplot(width=0.1)+
        # ggtitle(paste("posterior lambda _ age varying_serotype specific",p_year$Site[1]))+
        theme(plot.title = element_text(hjust=0.5,face="bold",size=20,colour = "blue"),
              axis.title.x = element_blank(),
              # axis.text.x = element_text(angle = 90, hjust = 0.5,
              axis.text.x = element_blank())+
        coord_cartesian(ylim=c(-0.01,0.3))+
        ylab("DENV2")+
        # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
        # geom_boxplot(width=0.1)+# add median and quartile
        stat_summary(fun.data=mean_sdl, 
                     geom="pointrange", color="red")#Add mean and standard deviation

#DV3
p3 <- ggplot(DV3,aes(x=variable,y=value)) + 
      geom_violin()+
      # geom_boxplot(width=0.1)+
        # ggtitle(paste("posterior lambda _ age varying_serotype specific",p_year$Site[1]))+
        theme(plot.title = element_text(hjust=0.5,face="bold",size=20,colour = "blue"),
              axis.title.x = element_blank(),
              axis.text.x = element_text(angle = 90,size=6,face = "bold"))+
              # axis.text.x = element_blank())+
        coord_cartesian(ylim=c(-0.01,0.3))+
        ylab("DENV3")+
        # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
        # geom_boxplot(width=0.1)+# add median and quartile
        stat_summary(fun.data=mean_sdl, 
                     geom="pointrange", color="red")#Add mean and standard deviation

#DV4
p4 <- ggplot(DV4,aes(x=variable,y=value)) +
      geom_violin()+
      # geom_boxplot(width=0.1)+
        # ggtitle(paste("posterior lambda _ age varying_serotype specific",p_year$Site[1]))+
        theme(plot.title = element_text(hjust=0.5,face="bold",size=20,colour = "blue"),
              axis.title.x = element_blank(),
              axis.text.x = element_text(angle = 90,size=6,face = "bold"))+
        coord_cartesian(ylim=c(-0.01,0.3))+
        ylab("DENV4")+
        # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean
        # geom_boxplot(width=0.1)+# add median and quartile
        stat_summary(fun.data=mean_sdl, 
                     geom="pointrange", color="red")#Add mean and standard deviation


library(gridExtra)
library(grid)


lamb <- grid.arrange(p1,p2,p3,p4, ncol=2,nrow=2, top =textGrob(paste("FoI",model,step_year,"yr(s) groupAge",groupAge,site,dataInput),gp=gpar(fontsize=20,col = "blue")))

# save the figure
ggsave(filename=paste("./output/FoI/Lambda/FOI",model,"groupAge",groupAge,"step_year",step_year,site,dataInput,".png"), plot = lamb)
}