#######################################################
### CODE TO ANALYZE CHMI AB DATA FOLLOWING THE SAP ###
#######################################################
# Start: 2017-06-16
rm(list=ls())
library(Hmisc)
library(plyr)
library(compareGroups)
manual.theme <- theme(panel.background = element_rect(fill = "white",
colour = NA), panel.border = element_rect(fill = NA,
colour = "grey20"), panel.grid.major = element_line(colour = "grey92"),
panel.grid.minor = element_line(colour = "grey92",
size = 0.25), strip.background = element_rect(fill = "grey85",
colour = "grey20"), legend.key = element_rect(fill = "white",
colour = NA), complete = TRUE, plot.title = element_text(hjust = 0.5))
fcn.pvalue.display<-function(p.vec, show.p = FALSE){
p.vec.plot<-as.character(rep(NA,length(p.vec)))
if(any((p.vec >= .01 & p.vec <= .04) | p.vec >= .06)==TRUE) {
p.vec.plot<-as.character(round(p.vec,2))
p.vec.plot[nchar(as.character(p.vec.plot))==3 & p.vec.plot > 0.1]<-paste(as.character(p.vec.plot)[nchar(as.character(p.vec.plot))==3 & p.vec.plot > 0.1],"0",sep="")
}
p.vec.plot<-ifelse ((p.vec >= 0.001 & p.vec < 0.01) | (p.vec > 0.04 & p.vec < 0.06), as.character(round(p.vec,3)),p.vec.plot)
if(show.p){
p.vec.plot[p.vec < 0.001]<-c("P<0.001")
p.vec.plot[p.vec >= 0.001]<-c(paste0("P=", p.vec.plot))
} else {
p.vec.plot[p.vec < 0.001]<-c("<0.001")
}
return(p.vec.plot)
}
localpath <- "/Users/hsanz/Dropbox (M067)/m067_team/working_areas/MALIMMUNO Team Support/3. CHMI study Antibodies"
############################################################################################################################################
## load("/Users/hsanz/Dropbox (M067)/m067_team/working_areas/MALIMMUNO Team Support/3. CHMI study Antibodies/Data/1. All isotypes data 20171107/All_data_20171218.Rdata")
load('~/datasets_ori/v_17.12.18_chmi/v_17.12.18_chmi_dat.RData')
### SOME RECODES
#################
dat <- chmi_ab_l
names(dat) <- tolower(names(dat))
dim(dat)
head(dat)
xxx <- unique(dat[,c("original_id", "dataset", "immune_status", "status")])
with(xxx, table(status, dataset))
with(xxx, table(immune_status, dataset))
## Some recodes
dat$timepoint <- as.character(dat$timepoint)
dat$timepoint <- with(dat, ifelse(dataset=="T2" & timepoint=="DM", "D11", timepoint))
dat$hb <- as.character(dat$hb)
dat$hb <- with(dat, ifelse(dataset=="L1", "", hb))
dat$cvac_dose <- with(dat, ifelse(dataset=="T2", cvac_dose, NA))
dat$hb_status <- NULL
dat$hb_status_benjamin <- NULL
dat$pre_patent_period <- as.character(dat$pre_patent_period)
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-005" & dataset=="T1", "9.5",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-018" & dataset=="T1", "7",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-039" & dataset=="T1", "11",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-040" & dataset=="T1", "8",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-041" & dataset=="T1", "9",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-043" & dataset=="T1", "7.5",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-044" & dataset=="T1", "10",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-045" & dataset=="T1", "11.5",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-048" & dataset=="T1", "12.5",pre_patent_period))
dat$gender <- ""
dat$gender <- with(dat, ifelse(original_id=="T1-005" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-018" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-039" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-040" & dataset=="T1", "F",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-041" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-043" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-044" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-045" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-048" & dataset=="T1", "F",gender))
datx <- dat
datx$log10_mfi <- NULL
datx$batch <- NULL
datx$hb <- NULL
datx$cvac_dose <- NULL
datx$pre_patent_period <- with(datx, ifelse(is.na(pre_patent_period), "NA", pre_patent_period))
datx$pcr.pos <- with(datx, ifelse(is.na(pcr.pos), 999, pcr.pos))
datx <- datx[, c("original_id", "timepoint","ig","study_number",
"pre_patent_period", "dataset", "antigen","mfi","pcr.pos",
"immune_status", "status", "gender", "malaria")]
datx <- aggregate(mfi ~ original_id + timepoint + ig +
+ study_number + malaria
+ pre_patent_period + dataset + antigen + pcr.pos + immune_status
+ status + gender, FUN = mean, data=datx)
xxx <- as.data.frame(with(datx[datx$antigen=="CSP",], table(original_id,ig,study_number, timepoint)))
xxx[xxx$Freq>1,]
dat <- datx
dat$log10_mfi <- log10(dat$mfi)
#### PREPARE DATASET FOR ANALYSIS
##################################
tmp.dat <- subset(dat, status!="Naive" | dataset!="T2")
tmp.dat <- subset(tmp.dat, antigen%nin%c("BSA", "agal"))
tmp.dat$status <- with(tmp.dat, ifelse(status=="Naive", "Naive",
ifelse(status=="PfSPZ_CVac_vaccinated", "Vaccinated",
ifelse(status=="Semi_immune", "Semi-immune","XXX"))))
tmp.dat$status <- factor(tmp.dat$status, levels=c("Naive", "Vaccinated", "Semi-immune"))
lantigen <- sort(unique(tmp.dat$antigen))
with(dat, table(status, dataset))
with(tmp.dat, table(status, dataset))
tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))
########################################
########################################
########################################
### AIM 1 Analysis
########################################
########################################
########################################
## Differences in MFIs before challenge
ff.figure <- function(dataset, isotype, lantigen, x.text, y.text){
ff.pval <- function(isotype, lantigen){
ans <- ldply(lantigen, function(x){
ff.dat <- dataset[dataset$ig==isotype & dataset$antigen==x,]
ff.ans <- try(oneway.test(log10_mfi ~ status, data=ff.dat)$p.value, silent=TRUE)
if(class(ff.ans)!="try-error") ans <- data.frame( isotype=isotype, antigen=x, raw.pvalue=ff.ans)
if(class(ff.ans)=="try-error") ans <- data.frame( isotype=isotype, antigen=x, raw.pvalue=NA)
return(ans)})
return(ans)
}
pvals <- ff.pval(isotype, lantigen)
pvals$adj.pvalue <- p.adjust(pvals$raw.pvalue, method = "BH")
pvals$adj.pvalue <- sapply(pvals$adj.pvalue, function(x) {
if(!is.na(x)) ans <- fcn.pvalue.display(x,show.p=TRUE)
if(is.na(x)) ans <- "--"
return(ans)
})
#pvals$adj.pvalue.text <- pvals$adj.pvalue
ymax <- max(dataset[dataset$ig==isotype, "log10_mfi"], na.rm=TRUE) * 1.2
q <- ggplot(dataset[dataset$ig==isotype, ], aes(status, log10_mfi))
q <- q + geom_boxplot(outlier.size = NA) + geom_jitter(width = 0.2, height = 0, color="black", alpha=0.25)
q <- q + facet_wrap( ~ antigen) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))
q <- q + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Status") + ylab("log10(MFI)")
q <- q + ggtitle(isotype) + geom_text(data=pvals, aes(x=x.text, y=y.text, label=adj.pvalue), size=3)
q <- q + ylim(y.text - (abs(y.text)*0.85), ymax)
ans <- list(q = q, pvals = pvals)
return(ans)
}
igg <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG", lantigen, 2, 2.2)
ig1 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG1", lantigen, 2, 0.5)
ig2 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG2", lantigen, 2, 1)
ig3 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG3", lantigen, 2, 0.5)
ig4 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG4", lantigen, 2, -2)
igm <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgM", lantigen, 2, 1.5)
pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "before_challenge_all.pdf"), width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()
###########################
## Stratify by dataset ##
##########################
## Differences in MFIs before challenge
ff.figure <- function(f.data, isotype, lantigen){
q <- ggplot(f.data[f.data$ig==isotype, ], aes(status, log10_mfi, color=dataset))
q <- q + geom_boxplot(outlier.size = NA) + geom_jitter(width = 0.2, height = 0, alpha=0.25)
q <- q + facet_wrap( ~ antigen) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))
q <- q + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Status") + ylab("log10(MFI)")
q <- q + ggtitle(isotype)
ans <- list(q = q)
return(ans)
}
igg <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG", lantigen)
ig1 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG1", lantigen)
ig2 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG2", lantigen)
ig3 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG3", lantigen)
ig4 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG4", lantigen)
igm <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgM", lantigen)
pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "before_challenge_dataset.pdf"),
width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()
#############################################################
# After challenge within timepoint: only three timepoints
#############################################################
ff.figure <- function(dataset, isotype, lantigen, x.text, y.text){
ff.pval <- function(isotype, lantigen, timepointx){
ans <- ldply(lantigen, function(x){
ff.dat <- dataset[dataset$ig==isotype & dataset$timepoint2c==timepointx & dataset$antigen==x,]
ff.ans <- try(oneway.test(log10_mfi ~ status, data=ff.dat)$p.value, silent=TRUE)
if(class(ff.ans)!="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx, antigen=x, raw.pvalue=ff.ans)
if(class(ff.ans)=="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx,antigen=x, raw.pvalue=NA)
return(ans)})
return(ans)
}
pvals <- ldply(c("D7", "D11-D13", "D28") ,function(x) ff.pval(isotype, lantigen, x))
pvals$adj.pvalue <- p.adjust(pvals$raw.pvalue, method = "BH")
pvals$adj.pvalue <- sapply(pvals$adj.pvalue, function(x) {
if(!is.na(x)) ans <- fcn.pvalue.display(x,show.p=TRUE)
if(is.na(x)) ans <- "--"
return(ans)
})
pvals$adj.pvalue.text <- pvals$adj.pvalue
pvals$log10_mfi <- y.text
pvals$status <- NA
ymax <- max(dataset[dataset$ig==isotype, "log10_mfi"], na.rm=TRUE) * 1.2
q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status))
q <- q + geom_boxplot(outlier.size = NA)
q <- q + geom_point(position = position_jitterdodge(jitter.width = 0.2, jitter.height = 0), alpha=0.25)
q <- q + facet_wrap(~antigen) + scale_color_discrete(name = "Status",
breaks=c("Naive", "Vaccinated", "Semi-immune"),
labels=c("Naive", "Vaccinated", "Semi-immune"))
q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
q <- q + xlab("Timepoint") + ylab("log10(MFI)")
q <- q + geom_text(data=pvals, aes(x=timepoint2c, y=log10_mfi, label=adj.pvalue.text), size=3)
q <- q + ylim(y.text - (abs(y.text)*0.85), ymax)
ans <- list(q = q, pvals = pvals)
return(ans)
}
tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("D7", "D11-D13", "D28"))
igg <- ff.figure(dataset, "IgG", lantigen, NA, 1.5)
ig1 <- ff.figure(dataset, "IgG1", lantigen, 2, 0.5)
ig2 <- ff.figure(dataset, "IgG2", lantigen, 2, 1)
ig3 <- ff.figure(dataset, "IgG3", lantigen, 2, 0.5)
ig4 <- ff.figure(dataset, "IgG4", lantigen, 2, -2)
igm <- ff.figure(dataset, "IgM", lantigen, 2, 1.5)
pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_timepoint_all.pdf"),
width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()
#############################################################
# After challenge within timepoint: only four timepoints
#############################################################
ff.figure <- function(dataset, isotype, lantigen, x.text, y.text){
ff.pval <- function(isotype, lantigen, timepointx){
ans <- ldply(lantigen, function(x){
ff.dat <- dataset[dataset$ig==isotype & dataset$timepoint2c==timepointx & dataset$antigen==x,]
ff.ans <- try(oneway.test(log10_mfi ~ status, data=ff.dat)$p.value, silent=TRUE)
if(class(ff.ans)!="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx, antigen=x, raw.pvalue=ff.ans)
if(class(ff.ans)=="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx,antigen=x, raw.pvalue=NA)
return(ans)})
return(ans)
}
pvals <- ldply(c("C-1","D7", "D11-D13", "D28") ,function(x) ff.pval(isotype, lantigen, x))
pvals$adj.pvalue <- p.adjust(pvals$raw.pvalue, method = "BH")
pvals$adj.pvalue <- sapply(pvals$adj.pvalue, function(x) {
if(!is.na(x)) ans <- fcn.pvalue.display(x,show.p=TRUE)
if(is.na(x)) ans <- "--"
return(ans)
})
pvals$adj.pvalue.text <- pvals$adj.pvalue
pvals$log10_mfi <- y.text
pvals$status <- NA
ymax <- max(dataset[dataset$ig==isotype, "log10_mfi"], na.rm=TRUE) * 1.2
q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status))
q <- q + geom_boxplot(outlier.size = NA)
q <- q + geom_point(position = position_jitterdodge(jitter.width = 0.2, jitter.height = 0), alpha=0.25)
q <- q + facet_wrap(~antigen) + scale_color_discrete(name = "Status",
breaks=c("Naive", "Vaccinated", "Semi-immune"),
labels=c("Naive", "Vaccinated", "Semi-immune"))
q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
q <- q + xlab("Timepoint") + ylab("log10(MFI)")
q <- q + geom_text(data=pvals, aes(x=timepoint2c, y=log10_mfi, label=adj.pvalue.text), size=3)
q <- q + ylim(y.text - (abs(y.text)*0.85), ymax)
ans <- list(q = q, pvals = pvals)
return(ans)
}
tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("C-1","D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("C-1","D7", "D11-D13", "D28"))
igg <- ff.figure(dataset, "IgG", lantigen, NA, 1.5)
ig1 <- ff.figure(dataset, "IgG1", lantigen, 2, 0.5)
ig2 <- ff.figure(dataset, "IgG2", lantigen, 2, 1)
ig3 <- ff.figure(dataset, "IgG3", lantigen, 2, 0.5)
ig4 <- ff.figure(dataset, "IgG4", lantigen, 2, -2)
igm <- ff.figure(dataset, "IgM", lantigen, 2, 1.5)
pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_timepoint_all_v02.pdf"),
width = 14, height = 9)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()
######################################################
# After challenge within Status: three timepoints
######################################################
ff.figure <- function(dataset, isotype, lantigen){
summ.group <- ddply(dataset[dataset$ig==isotype, ], .(antigen,timepoint2c, status), summarise, med = median(log10_mfi))
summ.group$original_id <- NA
q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status, group=original_id))
q <- q + geom_line(alpha=0.2)
# q <- q + geom_point(alpha=0.2)
q <- q + facet_wrap(~antigen) + scale_color_discrete(name = "Status",
breaks=c("Naive", "Vaccinated", "Semi-immune"),
labels=c("Naive", "Vaccinated", "Semi-immune"))
q <- q + geom_point(data=summ.group, aes(timepoint2c, med, color=status), size=2)
q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
q <- q + xlab("Timepoint") + ylab("log10(MFI)")
ans <- list(q = q)
return(ans)
}
tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("D7", "D11-D13", "D28"))
igg <- ff.figure(dataset, "IgG", lantigen)
ig1 <- ff.figure(dataset, "IgG1", lantigen)
ig2 <- ff.figure(dataset, "IgG2", lantigen)
ig3 <- ff.figure(dataset, "IgG3", lantigen)
ig4 <- ff.figure(dataset, "IgG4", lantigen)
igm <- ff.figure(dataset, "IgM", lantigen)
pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_status_all.pdf"),
width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()
######################################################
# After challenge within Status: four timepoints
######################################################
ff.figure <- function(dataset, isotype, lantigen){
summ.group <- plyr::ddply(dataset[dataset$ig==isotype, ], .(antigen,timepoint2c, status), summarise, med = median(log10_mfi))
summ.group$original_id <- NA
q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status, group=original_id))
q <- q + geom_line(alpha=0.2)
# q <- q + geom_point(alpha=0.2)
q <- q + facet_wrap(~antigen) + scale_color_discrete(name = "Status",
breaks=c("Naive", "Vaccinated", "Semi-immune"),
labels=c("Naive", "Vaccinated", "Semi-immune"))
q <- q + geom_point(data=summ.group, aes(timepoint2c, med, color=status), size=2)
q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
q <- q + xlab("Timepoint") + ylab("log10(MFI)")
ans <- list(q = q)
return(ans)
}
tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("C-1","D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("C-1","D7", "D11-D13", "D28"))
igg <- ff.figure(dataset, "IgG", lantigen)
ig1 <- ff.figure(dataset, "IgG1", lantigen)
ig2 <- ff.figure(dataset, "IgG2", lantigen)
ig3 <- ff.figure(dataset, "IgG3", lantigen)
ig4 <- ff.figure(dataset, "IgG4", lantigen)
igm <- ff.figure(dataset, "IgM", lantigen)
pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_status_all_v02.pdf"),
width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()
######################################################
# After challenge within Status: five timepoints
######################################################
ff.figure <- function(dataset, isotype, lantigen){
summ.group <- plyr::ddply(dataset[dataset$ig==isotype, ], .(antigen,timepoint2c, status), summarise, med = median(log10_mfi))
summ.group$original_id <- NA
q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status, group=original_id))
q <- q + geom_line(alpha=0.2)
# q <- q + geom_point(alpha=0.2)
q <- q + facet_wrap(~antigen) + scale_color_discrete(name = "Status",
breaks=c("Naive", "Vaccinated", "Semi-immune"),
labels=c("Naive", "Vaccinated", "Semi-immune"))
q <- q + geom_point(data=summ.group, aes(timepoint2c, med, color=status), size=2)
q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
q <- q + xlab("Timepoint") + ylab("log10(MFI)")
ans <- list(q = q)
return(ans)
}
tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("C-1","D7", "D11-D13", "D28", "D84"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("C-1","D7", "D11-D13", "D28", "D84"))
igg <- ff.figure(dataset, "IgG", lantigen)
ig1 <- ff.figure(dataset, "IgG1", lantigen)
ig2 <- ff.figure(dataset, "IgG2", lantigen)
ig3 <- ff.figure(dataset, "IgG3", lantigen)
ig4 <- ff.figure(dataset, "IgG4", lantigen)
igm <- ff.figure(dataset, "IgM", lantigen)
pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_status_all_v03.pdf"),
width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()
## P for trend test
#######################
tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("D7", "D11-D13", "D28"))
ff <- function(f.data){
ans <- createTable(compareGroups(timepoint2c ~ log10_mfi, data=f.data, method=2), show.p.trend = TRUE)
ans <- as.numeric(ans$descr[, "p.trend"])
return(ans)
}
summ.ans <- ddply(dataset, .(ig, antigen, status), ff)
names(summ.ans)[4] <- "raw.pvalue"
ligs <- c("IgG", "IgG1", "IgG2", "IgG3", "IgG4", "IgM")
adj.pvalue <- unlist(lapply(ligs, function(x) p.adjust(summ.ans[summ.ans$ig==x, "raw.pvalue"], method="BH")))
adj.pvalue <- sapply(adj.pvalue, function(x){
if(!is.na(x)) ans <- fcn.pvalue.display(x)
if(is.na(x)) ans <- NA
return(ans)
})
summ.ans$adj.pvalue <- adj.pvalue
summ.ans.wide <- reshape(summ.ans, timevar = "ig", idvar = c("antigen", "status"),
v.names = c("raw.pvalue", "adj.pvalue"),
direction="wide")
write.csv(summ.ans.wide,
file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "pvalue_after_challenge_within_status_all.csv"),
row.names = FALSE, na = "--")
########################
## FOLD CHANGE
########################
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28", "C-1"),]
# compute fold change
dataset.wide <- dataset
dataset.wide$timepoint <- NULL
dataset.wide$log10_mfi <- NULL
dataset.wide <- dataset.wide[, c("original_id", "ig", "study_number","antigen", "mfi", "timepoint2c", "status")]
dataset.wide <- reshape(data = dataset.wide, direction="wide",
idvar = c("original_id", "ig", "antigen", "study_number", "status"),
v.names = c("mfi") ,
timevar = "timepoint2c")
dataset.wide$mfi.D7 <- dataset.wide[, "mfi.D7"] / dataset.wide[, "mfi.C-1"]
dataset.wide[, "mfi.D11-D13"] <- dataset.wide[, "mfi.D11-D13"] / dataset.wide[, "mfi.C-1"]
dataset.wide$mfi.D28 <- dataset.wide[, "mfi.D28"] / dataset.wide[, "mfi.C-1"]
dataset.ratio <- reshape::melt(data=dataset.wide,
id.vars=c("original_id", "ig", "antigen", "study_number", "status"))
dataset.ratio <- subset(dataset.ratio, variable!="mfi.C-1")
dataset.ratio$timepoint2c <- gsub(pattern = "mfi.", replacement = "", x = dataset.ratio$variable, fixed=TRUE)
### Miquel VÃ zquez-Santiago
dataset.ratio <- dataset.ratio %>%
mutate(dataset = factor(
ifelse(str_detect(original_id, 'L1'), 'L1',
ifelse(str_detect(original_id, 'T1'), 'T1',
ifelse(str_detect(original_id, 'T2'), 'T2', 'NA'))),
levels = c('L1', 'T1', 'T2')))
# function.to.plot
ff.figure <- function(dataset, isotype, lantigen, x.text, y.text){
ff.pval <- function(isotype, lantigen, timepointx){
ans <- ldply(lantigen, function(x){
ff.dat <- dataset[dataset$ig==isotype & dataset$timepoint2c==timepointx & dataset$antigen==x,]
ff.ans <- try(oneway.test(value ~ status, data=ff.dat)$p.value, silent=TRUE)
if(class(ff.ans)!="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx, antigen=x, raw.pvalue=ff.ans)
if(class(ff.ans)=="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx,antigen=x, raw.pvalue=NA)
return(ans)})
return(ans)
}
pvals <- ldply(c("D7", "D11-D13", "D28") ,function(x) ff.pval(isotype, lantigen, x))
pvals$adj.pvalue <- p.adjust(pvals$raw.pvalue, method = "BH")
pvals$adj.pvalue <- sapply(pvals$adj.pvalue, function(x) {
if(!is.na(x)) ans <- fcn.pvalue.display(x,show.p=TRUE)
if(is.na(x)) ans <- "--"
return(ans)
})
pvals$adj.pvalue.text <- pvals$adj.pvalue
pvals$value <- y.text
pvals$status <- NA
ymax <- max(dataset[dataset$ig==isotype, "value"], na.rm=TRUE) * 1.2
q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, value, color=status))
q <- q + geom_boxplot(outlier.size = NA) + scale_y_log10(breaks=c(0.01,1,100,1000), labels=c(0.01,1,100,1000))
q <- q + geom_point(position = position_jitterdodge(jitter.width = 0.2, jitter.height = 0), alpha=0.25)
q <- q + facet_wrap(~antigen) + scale_color_discrete(name = "Status",
breaks=c("Naive", "Vaccinated", "Semi-immune"),
labels=c("Naive", "Vaccinated", "Semi-immune"))
q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
q <- q + xlab("Timepoint") + ylab("MFI ratio (Timepoint/C-1)")
q <- q + geom_text(data=pvals, aes(x=timepoint2c, y=value, label=adj.pvalue.text), size=3)
# q <- q + ylim(log10(y.text) - (abs(log10(y.text))*0.85), log10(ymax))
ans <- list(q = q, pvals = pvals)
return(ans)
}
dataset.ratio$timepoint2c <- factor(dataset.ratio$timepoint2c, levels =c("D7", "D11-D13", "D28"))
igg <- ff.figure(dataset.ratio, "IgG", lantigen, 2, 0.1)
ig1 <- ff.figure(dataset.ratio, "IgG1", lantigen, 2, 0.001)
ig2 <- ff.figure(dataset.ratio, "IgG2", lantigen, 2, 0.01)
ig3 <- ff.figure(dataset.ratio, "IgG3", lantigen, 2, 0.01)
ig4 <- ff.figure(dataset.ratio, "IgG4", lantigen, 2, 0.001)
igm <- ff.figure(dataset.ratio, "IgM", lantigen, 2, 0.01)
pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_timepoint_all_fold_change.pdf"),
width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()
###########################
### Mixed models: FOLD CHANGE
###########################
library(nlme)
library(lmerTest)
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28", "C-1"),]
dataset$newtimepoint <- with(dataset, ifelse(timepoint2c=="C-1",0,
ifelse(timepoint2c=="D7",7,
ifelse(timepoint2c=="D11-D13",12,
ifelse(timepoint2c=="D28",28,NA)))))
with(dataset, table(ig))
with(dataset, table(antigen))
with(dataset, table(status))
with(dataset, table(dataset))
with(dataset, table(newtimepoint))
head(dataset)
#
# # test
# aux.data <- dataset[dataset$status=="Naive",]
# aux.data$status <- as.character(aux.data$status)
# x1 <- aux.data
# x1$status <- "vaccinated"
# x1$log10_mfi <- x1$log10_mfi + 0.01
# x2 <- aux.data
# x2$status <- "semmiimmune"
# x2$log10_mfi <- x2$log10_mfi + 0.02
#
# x3 <- rbind(aux.data, x1, x2)
# x3 <- rbind(x3, x3, x3,x3,x3,x3)
#
#
# x3$dummy <- rnorm(n = nrow(x3), mean=0, sd=1)
#
# mod <- lmer(log10_mfi ~ newtimepoint*status + (newtimepoint|original_id), data=aux.data[aux.data$newtimepoint%nin%c(28), ])
# summary(mod)
#
# # mod0 <- lm(log10_mfi ~ status, data=x3[x3$newtimepoint%nin%c(28), ])
# # mod1 <- lm(log10_mfi ~ 1, data=x3[x3$newtimepoint%nin%c(28), ])
# # summary(mod0)
# # anova(mod1, mod0, test="F")
# # oneway.test(log10_mfi ~ status, data=x3[x3$newtimepoint%nin%c(28), ],var.equal = TRUE)
# # car::linearHypothesis(mod0, rbind(b1=c(0,1,0), b2=c(0,0,1)))
# # car::linearHypothesis(mod0, "1*statussemmiimmune - 1*statusvaccinated = 0")
#
#
# mod0 <- lm(log10_mfi ~ status + dummy, data=x3[x3$newtimepoint%nin%c(28), ])
# mod1 <- lm(log10_mfi ~ 1 + dummy, data=x3[x3$newtimepoint%nin%c(28), ])
# summary(mod0)
# anova(mod1, mod0, test="F")
# oneway.test(log10_mfi ~ status, data=x3[x3$newtimepoint%nin%c(28), ],var.equal = TRUE)
#
#
# mod0 <- lm(log10_mfi ~ status + dummy + status:dummy, data=x3[x3$newtimepoint%nin%c(28), ])
# mod1 <- lm(log10_mfi ~ 1 + dummy + status:dummy, data=x3[x3$newtimepoint%nin%c(28), ])
# anova(mod1, mod0, test="F")
# oneway.test(log10_mfi ~ status, data=x3[x3$newtimepoint%nin%c(28), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0), b2=c(0,0,1,0)))
#
#
# mod0 <- lm(log10_mfi ~ status + newtimepoint + status:newtimepoint, data=x3[x3$newtimepoint%nin%c(28), ])
# summary(mod0)
# summary(glht(mod0, linfct = c("statussemmiimmune = 0", "statusvaccinated = 0")))
#
#
#
# xxx <- x3[complete.cases(x3),]
# mod0 <- lm(log10_mfi ~ status + newtimepoint + status:newtimepoint, data=xxx)
# summary(mod0)
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(0), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,0,0), b2=c(0,0,1,0,0,0)))
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(12), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,12,0), b2=c(0,0,1,0,0,12)))
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(28), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,0,0), b2=c(0,0,1,0,0,0),b3=c(0,0,0,0,28,0), b4=c(0,0,0,0,0,28)))
#
# xxx <- x3[complete.cases(x3),]
# xxx <- xxx[xxx$newtimepoint%in%c(0,12,28),]
# mod0 <- lm(log10_mfi ~ status + as.factor(newtimepoint) + status:as.factor(newtimepoint), data=xxx)
# summary(mod0)
#
# mod0 <- lm(log10_mfi ~ status + (newtimepoint) + status:(newtimepoint), data=xxx)
# summary(mod0)
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(0), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,rep(0,3)), b2=c(0,0,1,rep(0,3))))
#
# car::linearHypothesis(mod0, rbind(b1=c(0,1,rep(0,7)), b2=c(0,0,1,rep(0,6))))
#
#
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(12), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,12,0), b2=c(0,0,1,0,0,12)))
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(28), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,0,0), b2=c(0,0,1,0,0,0),b3=c(0,0,0,0,28,0), b4=c(0,0,0,0,0,28)))
#
#
#
#
### All models and isotype
ff1 <- function(model){
# Slope Naive
a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,0,0,0))))
a1.est <- round(a$test$coefficients, 3)
a1.pvalue <- as.numeric(a$test$pvalues)
# Slope Vaccinated
a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,0,1,0))))
a2.est <- round(a$test$coefficients, 3)
a2.pvalue <- as.numeric(a$test$pvalues)
a2.pvalue.int <- summary(model)$coefficients[5,5]
# Slope Semi-Immune
a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,0,0,1))))
a3.est <- round(a$test$coefficients, 3)
a3.pvalue <- as.numeric(a$test$pvalues)
a3.pvalue.int <- summary(model)$coefficients[6,5]
# Summary
ans <- data.frame(a1.est, a1.pvalue, a2.est, a2.pvalue, a3.est, a3.pvalue,
a2.pvalue.int, a3.pvalue.int)
return(ans)
}
ff2 <- function(model){
# Slope Naive
a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,0))))
a1.est <- round(a$test$coefficients, 3)
a1.pvalue <- as.numeric(a$test$pvalues)
# Slope Semi-Immune
a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,1))))
a2.est <- round(a$test$coefficients, 3)
a2.pvalue <- as.numeric(a$test$pvalues)
a2.pvalue.int <- summary(model)$coefficients[4,5]
# Summary
ans <- data.frame(a1.est, a1.pvalue, a2.est, a2.pvalue,
a2.pvalue.int)
return(ans)
}
ans.dat.two <- list()
ans.dat.three <- list()
zz <- 0
for(i in c("IgG", "IgG1", "IgG2", "IgG3", "IgG4", "IgM") ){
z <- 0
ans1 <- list()
ans2 <- list()
for(j in lantigen){
z <- z + 1
aux.data <- subset(dataset, ig==i & antigen==j) # INCLUDE BASELINE. THINK BASELINE
if(nrow(aux.data)!=0){
# Two timepoints D7 vs D11-D13 (three status)
mod <- lmer(log10_mfi ~ newtimepoint*status + (newtimepoint|original_id), data=aux.data[aux.data$newtimepoint%nin%c(28), ])
ans <- ff1(mod)
ans$isotype <- i
ans$antigen <- j
ans$model <- "two.timepoints"
ans1[[z]] <- ans
# Three timepoints D7 vs D11-D13 (two status)
mod <- lmer(log10_mfi ~ newtimepoint*status + (newtimepoint | original_id), data=aux.data[aux.data$status%nin%"Vaccinated", ])
ans <- try(ff2(mod), silent = TRUE)
if(inherits(ans, "try-error")){
ans <- data.frame(a1.est=NA, a1.pvalue=NA, a2.est=NA,
a2.pvalue=NA, a2.pvalue.int=NA)}
ans$isotype <- i
ans$antigen <- j
ans$model <- "three.timepoints"
ans2[[z]] <- ans
}
}
zz <- zz + 1
ans1 <- ldply(ans1)
ans1[, c(2,4,6,7,8)] <- sapply(c(2,4,6,7,8), function(x) fcn.pvalue.display(p.adjust(ans1[,x], method="BH")))
ans1 <- ans1[,c(9,10, 1:8)]
ans.dat.two[[zz]] <- ans1
ans2 <- ldply(ans2)
ans2[, c(2,4,5)] <- sapply(c(2,4,5), function(x) fcn.pvalue.display(p.adjust(ans2[,x], method="BH")))
ans2 <- ans2[,c(6,7, 1:5)]
ans.dat.three[[zz]] <- ans2
}
ans.dat.two.summ <- ans.dat.two
ans.dat.three.summ <- ans.dat.three
#################################
### Export models to word
#################################
library(ReporteRs)
#########################################################
## Create Tables in Word describing
#########################################################
mydoc = docx(file.path(localpath, "Results", "Paper", "aim1", "mfi", "mixed_models.docx"))
xvector <- c("IgG", "IgG1", "IgG2", "IgG3", "IgG4","IgGM")
z <- 0
for(i in 1:6){
z <- z + 1
my_text = pot(paste0("Table ", z, ". Mixed models (log10MFI response models) by antigen fitting the three status and two timepoints, D7 and D11-D13 (including C-1 information) for) ", xvector[i] , " isotype. Coefficient and adjusted p value is shown (reference: Naive)."))
mydoc = addParagraph( mydoc, value = my_text, stylename = "Normal")
mydoc = addParagraph( mydoc, value = "", stylename = "Normal")
ftab = FlexTable(ans.dat.two.summ[[i]] ,add.rownames = FALSE, header.columns=FALSE,
body.par.props = parCenter(),
header.par.props = parCenter(),
body.text.props = textProperties( font.size = 8))
ftab = addHeaderRow( ftab, text.properties = textProperties( font.size = 8, font.weight='bold'),
value = c("", "","Slope Naive","Slope Vaccinated", "Slope Semi-immune", "Int. Vaccinated", "Int. Semi-immune"),
colspan = c(1,1,2,2,2,1,1), par.properties = parCenter())
ftab = addHeaderRow( ftab, text.properties = textProperties( font.size = 8, font.weight='bold'),
value = c("Isotype", "Antigen",rep(c("Coef", "p value"),3), "pvalue", "pvalue"),
colspan = rep(1,10), par.properties = parCenter())
mydoc = addFlexTable( mydoc, ftab, par.properties = parCenter())
z <- z + 1
mydoc <- addPageBreak(mydoc)
my_text = pot(paste0("Table ", z,". Mixed models (log10MFI response models) by antigen fitting the two status (Naive and Semi-immune) and three timepoints, D7, D11-13 and D28 (including C-1 information) for ", xvector[i], "isotype. Coefficient and raw p value is shown (reference: Naive)."))
mydoc = addParagraph( mydoc, value = my_text, stylename = "Normal")
mydoc = addParagraph( mydoc, value = "", stylename = "Normal")
ftab = FlexTable(ans.dat.three.summ[[i]] ,add.rownames = FALSE, header.columns=FALSE,
body.par.props = parCenter(),
header.par.props = parCenter(),
body.text.props = textProperties( font.size = 8))
ftab = addHeaderRow( ftab, text.properties = textProperties( font.size = 8, font.weight='bold'),
value = c("", "","Slope Naive", "Slope Semi-immune", "Int. Semi-immune"),
colspan = c(1,1,2,2,1), par.properties = parCenter())
ftab = addHeaderRow( ftab, text.properties = textProperties( font.size = 8, font.weight='bold'),
value = c("Isotype", "Antigen",rep(c("Coef", "p value"),2), "pvalue"),
colspan = rep(1,7), par.properties = parCenter())
mydoc = addFlexTable( mydoc, ftab, par.properties = parCenter())
mydoc <- addPageBreak(mydoc)
}
writeDoc( mydoc, file = file.path(localpath, "Results", "Paper", "aim1", "mfi", "mixed_models.docx"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.