##Examining waning immunity in US FLU VE NETWORK data
library(psych)
require(ggplot2)
library(splines)
require(stats)
require(lattice)
library(arm)
library(interplot)
require(MatchIt)
require(survival)
require(matrixStats)
library(dplyr)
#input dataset
fluve <- read.csv("//cdc.gov/private/M310/zdn5/Flu VE AMBULATORY Network/Waning 2014/pooled/h3pool_22MAR16.csv", head=TRUE)
#define factors
fluve <- fluve %>% mutate(case.f = factor(case, levels = c(0,1), labels = c("controls", "cases")),
SVAC_ID.f = factor(SVAC_ID14, levels = c(0,1), labels = c("unvaccinated", "vaccinated")),
season.f = factor(season), days = as.numeric(days_flu), days_sq = days_flu*days_flu,
days2 = as.numeric(days_sq))
# fluve$case.f <-factor(fluve$case, levels = c(0,1), labels = c("controls", "cases"))
# fluve$SVAC_ID.f <-factor(fluve$SVAC_ID14, levels = c(0,1), labels = c("unvaccinated", "vaccinated"))
# fluve$season.f <-factor(fluve$season)
#Days since start of flu season
# fluve$days <-as.numeric(fluve$days_flu)
# fluve$days_sq <- fluve$days_flu*fluve$days_flu
# fluve$days2 <-as.numeric(fluve$days_sq)
############ CHECKING BALANCE OF CASES AND CONTROLS ON ONSET DATE #############################
histogram(~fluve$onset_percent|fluve$case.f, main="Pooled Influenza A(H3N2) cases and controls", xlab="Onset date percentile (%)",
col="grey")
########### GENERATING RESTRICTED DATASET WITH MATCHING ON ONSET OF ILLNESS ##############################
set.seed(100)
m.out4 = matchit(case ~ onset_percent, data = fluve, method = "nearest" , ratio = 3)
summary(m.out4)
plot(m.out4, type = "hist")
m.data4 <- match.data(m.out4)
#Onset percentile (variable onset_percent) is illness onset date of the observation defined
#relative to percentile of distribution of site-, season- and subtype-specific case onset dates
histogram(~fluve_new$onset_percent|fluve$case.f, main="Pooled Influenza A(H3N2) cases and controls (restricted)", xlab="Onset date percentile (%)",
col="grey")
#exporting matchded dataset from matchit
write.csv(m.data4, file = "//cdc.gov/private/M310/zdn5/Flu VE AMBULATORY Network/Waning 2014/pooled/h3pool_nearest3def1.csv")
#inputing matched dataset from matchit
fluve_new <- read.csv("//cdc.gov/private/M310/zdn5/Flu VE AMBULATORY Network/Waning 2014/pooled/h3pool_nearest3def1.csv",head=TRUE)
#################### CASES AND VACCINATION STATUS ######################################
nrow(fluve)
nrow(fluve_new)
table2 <- table(fluve_new$SVAC_ID14, fluve_new$case); table2
#######################################################
# MULTIVARIATE LOGISTIC REGRESSION
# Model with dichotomous vaccination only (no term for time since vaccination)
ve.logr1<- glm(case ~ agele18 + age50to64 + agege65
+ SVAC_ID14
+ any_hr_py + health_not_excel
+ spe2 + spe3
+ onset0to20 + onset20to40 + onset60to80 + onset80to100
+ UM + UP + SW + GH
+ female + BLACK + priorvx
+ seas12 + seas13 + seas14
+ days + days2, family=binomial("logit"), data=fluve_new)
exp(cbind(OR = coef(ve.logr1), confint(ve.logr1)))
########### DISTRIBUTIONS OF VACCINE INTERVAL FOR DEFINING SPLINES ############################
vacc.all <-subset(fluve_new, SVAC_ID14==1)
quantile(vacc.all$vaccint, probs = seq(0, 1, .1), na.rm = TRUE)
quantile(vacc.all$vaccint, probs = seq(.9, 1, .01), na.rm = TRUE)
qus <- quantile(vacc.all$vaccint, probs = c(0.10,0.98,0.99,0.80,0.20,0.50))
names(qus) <- c('qu0','qu1','qu2','qu3','qu4','qu5')
# qu0 <-quantile(vacc.all$vaccint, probs = 0.10);qu0
# qu1 <-quantile(vacc.all$vaccint, probs = 0.98);qu1
# qu2 <-quantile(vacc.all$vaccint, probs = 0.99);qu2
# qu3 <-quantile(vacc.all$vaccint, probs = 0.80);qu3
# qu4 <-quantile(vacc.all$vaccint, probs = 0.20);qu4
# qu5 <-quantile(vacc.all$vaccint, probs = 0.50);qu5
#Spline model - days from vaccination to onset modeled as spline
#Specification of spline knots based on comparison of model fit
#for numerous alternative specifications (not shown here)
ve.logr2<- glm(case ~ agele18 + age50to64 + agege65
+ SVAC_ID14 + ns(vaccint, knots=c(qu1, qu2))
+ any_hr_py + health_not_excel
+ spe2 + spe3
+ onset0to20 + onset20to40 + onset60to80 + onset80to100
+ UM + UP + SW + GH
+ female + BLACK + priorvx
+ seas12 + seas13 + seas14
+ days + days2, family=binomial("logit"), data=fluve_new)
#exp(cbind(OR = coef(ve.logr2), confint(ve.logr2)))
summary(ve.logr2)
aOR.avg <- exp(ve.logr1$coefficient[5])
aVE.avg <- (1 - aOR.avg)*100; aVE.avg
LH1 <- logLik(ve.logr1);LH1
LH2 <- logLik(ve.logr2);LH2
delta <- 2*(LH2[1] - LH1[1]);delta
#p.val <- pchisq(delta, 4, lower.tail=FALSE);p.val
#p.val <- pchisq(delta, 3, lower.tail=FALSE);p.val
##############################GRAPH#################################
#For VE graph
newdata2 = data.frame(agele18=0, age50to64=0, agege65=0, any_hr_py=0, health_not_excel=0, spe2=0, spe3=0,
onset0to20=0, onset20to40=0, onset60to80=0, onset80to100=0, days=0, days2=0,
UM=0, UP=0, SW=0, GH=0, female=0, BLACK=0, priorvx=0, seas12=0, seas13=0, seas14=0,
vaccint=seq(14,180,2), SVAC_ID14=1)
yhat=predict(ve.logr2, newdata2, type="response")
unvacc <- ve.logr2$coefficient[1]
unvacc.1 <- (exp(unvacc))/(1+exp(unvacc))
b <- rep((unvacc.1), times=84)
aOR <- (yhat/(1-yhat))/(b/(1-b))
aVE <- (1-aOR)*100
plot(newdata2$vaccint, aVE, ylim=c(-50,100), cex.main=0.95, cex.lab = 0.75, cex.sub=0.8, main="VE against influenza A(H3N2) \n NAT CU SPLINE (2 knots, 98th and 99th%) \n n=11,200",
type="l", xlab="days between vaccination & onset", ylab="adjusted VE (%)")
max(aVE)
min(aVE)
##Bootstrapped confidence intervals
myfun <- function(){
srows <- sample(1:nrow(fluve_new),nrow(fluve_new),TRUE)
glm.out <- glm(case ~ agele18 + age50to64 + agege65
+ SVAC_ID14 + ns(vaccint, knots=c(qu1, qu2))
+ any_hr_py + health_not_excel
+ spe2 + spe3
+ onset0to20 + onset20to40 + onset60to80 + onset80to100
+ UM + UP + SW + GH
+ female + BLACK + priorvx
+ seas12 + seas13 + seas14
+ days + days2, family=binomial("logit"), data=fluve_new[srows,])
yhat <- predict(glm.out, newdata = newdata2, "response")
unvacc <- ve.logr2$coefficient[1]
unvacc.1 <- (exp(unvacc))/(1+exp(unvacc))
b <- rep((unvacc.1), times=84)
aOR <- (yhat/(1-yhat))/(b/(1-b))
aVE <- (1-aOR)*100
return(aVE)
}
set.seed(5)
bootdist1 <- replicate(1000, myfun())
mean(bootdist2)
q <- rowQuantiles(bootdist1, probs=c(0.025, 0.975));q
ci.df <- as.data.frame(q)
ci.df$rownumber = 1:dim(ci.df)[1]
ci.df$vaccint <- 2*(ci.df$rownumber - 1) + 14
names(ci.df)[names(ci.df) == '2.5%'] <- 'ci.lo'
names(ci.df)[names(ci.df) == '97.5%'] <- 'ci.hi'
attach(ci.df)
# Apply loess smoothing using the default span value of 0.8.
y.loess <- loess(y ~ x, span=0.8, data.frame(x=vaccint, y=ci.lo))
y2.loess <-loess(y2 ~ x, span=0.8, data.frame(x=vaccint, y2=ci.hi))
# Compute loess smoothed values for all points along the curve
y.predict <- predict(y.loess, data.frame(x=vaccint))
y2.predict <- predict(y2.loess, data.frame(x=vaccint))
tiff(file = "Fig 2a H3 VE.tiff", width = 4600, height = 3200, units = "px", res = 800)
par(bty = 'l')
plot(newdata2$vaccint, aVE, ylim=c(-50,100), lwd=2, cex.main=0.80, cex.lab = 0.75, cex.sub=0.7,
main="Vaccine effectiveness against influenza A(H3N2) (n=11,200) \n p=0.004",
sub="", type="l", xlab="days between vaccination and onset",
ylab="adjusted VE (%)")
lines(vaccint, y2.predict, type="l", col="black", lty=3)
lines(vaccint,y.predict, type="l", col="black", lty=3)
#legend("topleft", inset=0.03, legend=c("VE", "95% CI"),
#col=c("black", "black"), lty=1:3, cex=0.65, box.lty=0)
legend("bottomleft", inset=0.02, legend=c("VE", "95% CI"),
col=c("black", "black"), lty=1:3, cex=0.65, box.lty=0)
dev.off()
detach(ci.df)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.