projects/01_aim1/r_old/v_18.12.04_chmi_aim_01.R

#######################################################
### CODE TO ANALYZE CHMI AB DATA FOLLOWING THE SAP ###
#######################################################

# Start: 2017-06-16
rm(list=ls())
library(Hmisc)
library(plyr)
library(compareGroups)

manual.theme <- theme(panel.background = element_rect(fill = "white",
            colour = NA), panel.border = element_rect(fill = NA,
            colour = "grey20"), panel.grid.major = element_line(colour = "grey92"),
            panel.grid.minor = element_line(colour = "grey92",
                size = 0.25), strip.background = element_rect(fill = "grey85",
                colour = "grey20"), legend.key = element_rect(fill = "white",
                colour = NA), complete = TRUE, plot.title = element_text(hjust = 0.5))

fcn.pvalue.display<-function(p.vec, show.p = FALSE){

  p.vec.plot<-as.character(rep(NA,length(p.vec)))

  if(any((p.vec >= .01 & p.vec <= .04) | p.vec >= .06)==TRUE) {
    p.vec.plot<-as.character(round(p.vec,2))
    p.vec.plot[nchar(as.character(p.vec.plot))==3 & p.vec.plot > 0.1]<-paste(as.character(p.vec.plot)[nchar(as.character(p.vec.plot))==3  & p.vec.plot > 0.1],"0",sep="")
  }
  p.vec.plot<-ifelse ((p.vec >= 0.001 & p.vec < 0.01) | (p.vec > 0.04 & p.vec < 0.06), as.character(round(p.vec,3)),p.vec.plot)
  if(show.p){
        p.vec.plot[p.vec < 0.001]<-c("P<0.001")
        p.vec.plot[p.vec >= 0.001]<-c(paste0("P=", p.vec.plot))
  } else {
        p.vec.plot[p.vec < 0.001]<-c("<0.001")
  }
  return(p.vec.plot)
}


localpath <- "/Users/hsanz/Dropbox (M067)/m067_team/working_areas/MALIMMUNO Team Support/3. CHMI study Antibodies"

############################################################################################################################################
## load("/Users/hsanz/Dropbox (M067)/m067_team/working_areas/MALIMMUNO Team Support/3. CHMI study Antibodies/Data/1. All isotypes data 20171107/All_data_20171218.Rdata")
load('~/datasets_ori/v_17.12.18_chmi/v_17.12.18_chmi_dat.RData')



### SOME RECODES
#################

dat <- chmi_ab_l
names(dat) <- tolower(names(dat))

dim(dat)
head(dat)

xxx <- unique(dat[,c("original_id", "dataset", "immune_status", "status")])
with(xxx, table(status, dataset))
with(xxx, table(immune_status, dataset))


## Some recodes
dat$timepoint <- as.character(dat$timepoint)
dat$timepoint <- with(dat, ifelse(dataset=="T2" & timepoint=="DM", "D11", timepoint))

dat$hb <- as.character(dat$hb)
dat$hb <- with(dat, ifelse(dataset=="L1", "", hb))
dat$cvac_dose <- with(dat, ifelse(dataset=="T2", cvac_dose, NA))
dat$hb_status <- NULL
dat$hb_status_benjamin <- NULL

dat$pre_patent_period <- as.character(dat$pre_patent_period)
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-005" & dataset=="T1", "9.5",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-018" & dataset=="T1", "7",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-039" & dataset=="T1", "11",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-040" & dataset=="T1", "8",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-041" & dataset=="T1", "9",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-043" & dataset=="T1", "7.5",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-044" & dataset=="T1", "10",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-045" & dataset=="T1", "11.5",pre_patent_period))
dat$pre_patent_period <- with(dat, ifelse(original_id=="T1-048" & dataset=="T1", "12.5",pre_patent_period))

dat$gender <- ""
dat$gender <- with(dat, ifelse(original_id=="T1-005" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-018" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-039" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-040" & dataset=="T1", "F",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-041" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-043" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-044" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-045" & dataset=="T1", "M",gender))
dat$gender <- with(dat, ifelse(original_id=="T1-048" & dataset=="T1", "F",gender))


datx <- dat
datx$log10_mfi <- NULL
datx$batch <- NULL
datx$hb <- NULL
datx$cvac_dose <- NULL
datx$pre_patent_period <- with(datx, ifelse(is.na(pre_patent_period), "NA", pre_patent_period))
datx$pcr.pos <- with(datx, ifelse(is.na(pcr.pos), 999, pcr.pos))


datx <- datx[, c("original_id", "timepoint","ig","study_number",
                 "pre_patent_period", "dataset", "antigen","mfi","pcr.pos",
                 "immune_status", "status", "gender", "malaria")]

datx <- aggregate(mfi ~  original_id + timepoint + ig +
                 + study_number + malaria
                 + pre_patent_period + dataset + antigen + pcr.pos + immune_status
                 + status + gender, FUN = mean, data=datx)


xxx <- as.data.frame(with(datx[datx$antigen=="CSP",], table(original_id,ig,study_number, timepoint)))
xxx[xxx$Freq>1,]



dat <- datx
dat$log10_mfi <- log10(dat$mfi)


#### PREPARE DATASET FOR ANALYSIS
##################################
tmp.dat <- subset(dat, status!="Naive" | dataset!="T2")
tmp.dat <- subset(tmp.dat, antigen%nin%c("BSA", "agal"))
tmp.dat$status <- with(tmp.dat, ifelse(status=="Naive", "Naive",
                                ifelse(status=="PfSPZ_CVac_vaccinated", "Vaccinated",
                                ifelse(status=="Semi_immune", "Semi-immune","XXX"))))

tmp.dat$status <- factor(tmp.dat$status, levels=c("Naive", "Vaccinated", "Semi-immune"))
lantigen <- sort(unique(tmp.dat$antigen))

with(dat, table(status, dataset))
with(tmp.dat, table(status, dataset))

tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
                                     ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
                                     ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))




########################################
########################################
########################################
### AIM 1 Analysis
########################################
########################################
########################################

## Differences in MFIs before challenge
ff.figure <- function(dataset, isotype, lantigen, x.text, y.text){
      ff.pval <- function(isotype, lantigen){
        ans <- ldply(lantigen, function(x){
                ff.dat <- dataset[dataset$ig==isotype & dataset$antigen==x,]
                ff.ans <- try(oneway.test(log10_mfi ~ status, data=ff.dat)$p.value, silent=TRUE)
                if(class(ff.ans)!="try-error") ans <- data.frame( isotype=isotype, antigen=x, raw.pvalue=ff.ans)
                if(class(ff.ans)=="try-error") ans <- data.frame( isotype=isotype, antigen=x, raw.pvalue=NA)
                return(ans)})

        return(ans)
      }

      pvals <- ff.pval(isotype, lantigen)
      pvals$adj.pvalue <- p.adjust(pvals$raw.pvalue, method = "BH")

      pvals$adj.pvalue <- sapply(pvals$adj.pvalue, function(x) {
        if(!is.na(x))  ans <- fcn.pvalue.display(x,show.p=TRUE)
        if(is.na(x))  ans <- "--"
        return(ans)
      })

      #pvals$adj.pvalue.text <-  pvals$adj.pvalue

      ymax <- max(dataset[dataset$ig==isotype, "log10_mfi"], na.rm=TRUE) * 1.2

      q <- ggplot(dataset[dataset$ig==isotype, ], aes(status, log10_mfi))
      q <- q + geom_boxplot(outlier.size = NA) + geom_jitter(width = 0.2, height = 0, color="black", alpha=0.25)
      q <- q + facet_wrap( ~ antigen) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))
      q <- q + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Status") + ylab("log10(MFI)")
      q <- q + ggtitle(isotype) + geom_text(data=pvals, aes(x=x.text, y=y.text,  label=adj.pvalue), size=3)
      q <- q + ylim(y.text - (abs(y.text)*0.85), ymax)


      ans <- list(q = q, pvals = pvals)
      return(ans)

}


igg <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG", lantigen, 2, 2.2)
ig1 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG1", lantigen, 2, 0.5)
ig2 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG2", lantigen, 2, 1)
ig3 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG3", lantigen, 2, 0.5)
ig4 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG4", lantigen, 2, -2)
igm <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgM", lantigen, 2, 1.5)


pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "before_challenge_all.pdf"), width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()

###########################
## Stratify by dataset   ##
##########################
## Differences in MFIs before challenge
ff.figure <- function(f.data, isotype, lantigen){

      q <- ggplot(f.data[f.data$ig==isotype, ], aes(status, log10_mfi, color=dataset))
      q <- q + geom_boxplot(outlier.size = NA) + geom_jitter(width = 0.2, height = 0, alpha=0.25)
      q <- q + facet_wrap( ~ antigen) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))
      q <- q + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Status") + ylab("log10(MFI)")
      q <- q + ggtitle(isotype)
      ans <- list(q = q)
      return(ans)
}



igg <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG", lantigen)
ig1 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG1", lantigen)
ig2 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG2", lantigen)
ig3 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG3", lantigen)
ig4 <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgG4", lantigen)
igm <- ff.figure(tmp.dat[tmp.dat$timepoint=="C-1",], "IgM", lantigen)



pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "before_challenge_dataset.pdf"),
    width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()


#############################################################
# After challenge within timepoint: only three timepoints
#############################################################
ff.figure <- function(dataset, isotype, lantigen, x.text, y.text){
      ff.pval <- function(isotype, lantigen, timepointx){
        ans <- ldply(lantigen, function(x){
                ff.dat <- dataset[dataset$ig==isotype & dataset$timepoint2c==timepointx & dataset$antigen==x,]
                ff.ans <- try(oneway.test(log10_mfi ~ status, data=ff.dat)$p.value, silent=TRUE)
                if(class(ff.ans)!="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx, antigen=x, raw.pvalue=ff.ans)
                if(class(ff.ans)=="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx,antigen=x, raw.pvalue=NA)
                return(ans)})

        return(ans)
      }

      pvals <- ldply(c("D7", "D11-D13", "D28") ,function(x) ff.pval(isotype, lantigen, x))
      pvals$adj.pvalue <- p.adjust(pvals$raw.pvalue, method = "BH")

      pvals$adj.pvalue <- sapply(pvals$adj.pvalue, function(x) {
        if(!is.na(x))  ans <- fcn.pvalue.display(x,show.p=TRUE)
        if(is.na(x))  ans <- "--"
        return(ans)
      })

      pvals$adj.pvalue.text <-  pvals$adj.pvalue
      pvals$log10_mfi <- y.text
      pvals$status <- NA

      ymax <- max(dataset[dataset$ig==isotype, "log10_mfi"], na.rm=TRUE) * 1.2

     q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status))
      q <- q + geom_boxplot(outlier.size = NA)
      q <- q + geom_point(position = position_jitterdodge(jitter.width = 0.2, jitter.height = 0), alpha=0.25)
      q <- q + facet_wrap(~antigen) +  scale_color_discrete(name = "Status",
                                                            breaks=c("Naive", "Vaccinated", "Semi-immune"),
                                                            labels=c("Naive", "Vaccinated", "Semi-immune"))
      q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
      q <- q +  xlab("Timepoint") + ylab("log10(MFI)")
      q <- q  + geom_text(data=pvals, aes(x=timepoint2c, y=log10_mfi,  label=adj.pvalue.text), size=3)
      q <- q + ylim(y.text - (abs(y.text)*0.85), ymax)


      ans <- list(q = q, pvals = pvals)
      return(ans)

}


tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
                                     ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
                                     ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))

dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("D7", "D11-D13", "D28"))


igg <- ff.figure(dataset, "IgG", lantigen, NA, 1.5)
ig1 <- ff.figure(dataset, "IgG1", lantigen, 2, 0.5)
ig2 <- ff.figure(dataset, "IgG2", lantigen, 2, 1)
ig3 <- ff.figure(dataset, "IgG3", lantigen, 2, 0.5)
ig4 <- ff.figure(dataset, "IgG4", lantigen, 2, -2)
igm <- ff.figure(dataset, "IgM", lantigen, 2, 1.5)


pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_timepoint_all.pdf"),
    width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()




#############################################################
# After challenge within timepoint: only four timepoints
#############################################################
ff.figure <- function(dataset, isotype, lantigen, x.text, y.text){
      ff.pval <- function(isotype, lantigen, timepointx){
        ans <- ldply(lantigen, function(x){
                ff.dat <- dataset[dataset$ig==isotype & dataset$timepoint2c==timepointx & dataset$antigen==x,]
                ff.ans <- try(oneway.test(log10_mfi ~ status, data=ff.dat)$p.value, silent=TRUE)
                if(class(ff.ans)!="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx, antigen=x, raw.pvalue=ff.ans)
                if(class(ff.ans)=="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx,antigen=x, raw.pvalue=NA)
                return(ans)})

        return(ans)
      }

      pvals <- ldply(c("C-1","D7", "D11-D13", "D28") ,function(x) ff.pval(isotype, lantigen, x))
      pvals$adj.pvalue <- p.adjust(pvals$raw.pvalue, method = "BH")

      pvals$adj.pvalue <- sapply(pvals$adj.pvalue, function(x) {
        if(!is.na(x))  ans <- fcn.pvalue.display(x,show.p=TRUE)
        if(is.na(x))  ans <- "--"
        return(ans)
      })

      pvals$adj.pvalue.text <-  pvals$adj.pvalue
      pvals$log10_mfi <- y.text
      pvals$status <- NA

      ymax <- max(dataset[dataset$ig==isotype, "log10_mfi"], na.rm=TRUE) * 1.2

     q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status))
      q <- q + geom_boxplot(outlier.size = NA)
      q <- q + geom_point(position = position_jitterdodge(jitter.width = 0.2, jitter.height = 0), alpha=0.25)
      q <- q + facet_wrap(~antigen) +  scale_color_discrete(name = "Status",
                                                            breaks=c("Naive", "Vaccinated", "Semi-immune"),
                                                            labels=c("Naive", "Vaccinated", "Semi-immune"))
      q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
      q <- q +  xlab("Timepoint") + ylab("log10(MFI)")
      q <- q  + geom_text(data=pvals, aes(x=timepoint2c, y=log10_mfi,  label=adj.pvalue.text), size=3)
      q <- q + ylim(y.text - (abs(y.text)*0.85), ymax)


      ans <- list(q = q, pvals = pvals)
      return(ans)

}


tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
                                     ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
                                     ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))

dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("C-1","D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("C-1","D7", "D11-D13", "D28"))


igg <- ff.figure(dataset, "IgG", lantigen, NA, 1.5)
ig1 <- ff.figure(dataset, "IgG1", lantigen, 2, 0.5)
ig2 <- ff.figure(dataset, "IgG2", lantigen, 2, 1)
ig3 <- ff.figure(dataset, "IgG3", lantigen, 2, 0.5)
ig4 <- ff.figure(dataset, "IgG4", lantigen, 2, -2)
igm <- ff.figure(dataset, "IgM", lantigen, 2, 1.5)


pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_timepoint_all_v02.pdf"),
    width = 14, height = 9)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()

######################################################
# After challenge within Status: three timepoints
######################################################
ff.figure <- function(dataset, isotype, lantigen){
      summ.group <- ddply(dataset[dataset$ig==isotype, ], .(antigen,timepoint2c, status), summarise, med = median(log10_mfi))
      summ.group$original_id <- NA

      q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status, group=original_id))
      q <- q + geom_line(alpha=0.2)
     # q <- q + geom_point(alpha=0.2)
      q <- q + facet_wrap(~antigen) +  scale_color_discrete(name = "Status",
                                                            breaks=c("Naive", "Vaccinated", "Semi-immune"),
                                                            labels=c("Naive", "Vaccinated", "Semi-immune"))
      q <- q + geom_point(data=summ.group, aes(timepoint2c, med, color=status), size=2)
      q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
      q <- q +  xlab("Timepoint") + ylab("log10(MFI)")
      ans <- list(q = q)
      return(ans)

}


tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
                                     ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
                                     ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))

dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("D7", "D11-D13", "D28"))


igg <- ff.figure(dataset, "IgG", lantigen)
ig1 <- ff.figure(dataset, "IgG1", lantigen)
ig2 <- ff.figure(dataset, "IgG2", lantigen)
ig3 <- ff.figure(dataset, "IgG3", lantigen)
ig4 <- ff.figure(dataset, "IgG4", lantigen)
igm <- ff.figure(dataset, "IgM", lantigen)

pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_status_all.pdf"),
    width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()



######################################################
# After challenge within Status: four timepoints
######################################################
ff.figure <- function(dataset, isotype, lantigen){
      summ.group <- plyr::ddply(dataset[dataset$ig==isotype, ], .(antigen,timepoint2c, status), summarise, med = median(log10_mfi))
      summ.group$original_id <- NA

      q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status, group=original_id))
      q <- q + geom_line(alpha=0.2)
     # q <- q + geom_point(alpha=0.2)
      q <- q + facet_wrap(~antigen) +  scale_color_discrete(name = "Status",
                                                            breaks=c("Naive", "Vaccinated", "Semi-immune"),
                                                            labels=c("Naive", "Vaccinated", "Semi-immune"))
      q <- q + geom_point(data=summ.group, aes(timepoint2c, med, color=status), size=2)
      q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
      q <- q +  xlab("Timepoint") + ylab("log10(MFI)")
      ans <- list(q = q)
      return(ans)
}


tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
                                     ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
                                     ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))

dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("C-1","D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("C-1","D7", "D11-D13", "D28"))


igg <- ff.figure(dataset, "IgG", lantigen)
ig1 <- ff.figure(dataset, "IgG1", lantigen)
ig2 <- ff.figure(dataset, "IgG2", lantigen)
ig3 <- ff.figure(dataset, "IgG3", lantigen)
ig4 <- ff.figure(dataset, "IgG4", lantigen)
igm <- ff.figure(dataset, "IgM", lantigen)

pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_status_all_v02.pdf"),
    width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()



######################################################
# After challenge within Status: five timepoints
######################################################
ff.figure <- function(dataset, isotype, lantigen){
      summ.group <- plyr::ddply(dataset[dataset$ig==isotype, ], .(antigen,timepoint2c, status), summarise, med = median(log10_mfi))
      summ.group$original_id <- NA

      q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, log10_mfi, color=status, group=original_id))
      q <- q + geom_line(alpha=0.2)
     # q <- q + geom_point(alpha=0.2)
      q <- q + facet_wrap(~antigen) +  scale_color_discrete(name = "Status",
                                                            breaks=c("Naive", "Vaccinated", "Semi-immune"),
                                                            labels=c("Naive", "Vaccinated", "Semi-immune"))
      q <- q + geom_point(data=summ.group, aes(timepoint2c, med, color=status), size=2)
      q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
      q <- q +  xlab("Timepoint") + ylab("log10(MFI)")
      ans <- list(q = q)
      return(ans)
}


tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
                                     ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
                                     ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))

dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("C-1","D7", "D11-D13", "D28", "D84"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("C-1","D7", "D11-D13", "D28", "D84"))


igg <- ff.figure(dataset, "IgG", lantigen)
ig1 <- ff.figure(dataset, "IgG1", lantigen)
ig2 <- ff.figure(dataset, "IgG2", lantigen)
ig3 <- ff.figure(dataset, "IgG3", lantigen)
ig4 <- ff.figure(dataset, "IgG4", lantigen)
igm <- ff.figure(dataset, "IgM", lantigen)

pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_status_all_v03.pdf"),
    width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()

## P for trend test
#######################
tmp.dat$timepoint2c <- with(tmp.dat, ifelse(timepoint=="D13" & dataset=="L1", "D11-D13",
                                     ifelse(timepoint=="DM" & dataset=="T1", "D11-D13",
                                     ifelse(timepoint=="D11" & dataset=="T2", "D11-D13",timepoint))))

dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28"),]
dataset$timepoint2c <- factor(dataset$timepoint2c, levels =c("D7", "D11-D13", "D28"))


ff <- function(f.data){
  ans <- createTable(compareGroups(timepoint2c ~ log10_mfi, data=f.data, method=2), show.p.trend = TRUE)
  ans <- as.numeric(ans$descr[, "p.trend"])
  return(ans)
}


summ.ans <- ddply(dataset, .(ig, antigen, status), ff)
names(summ.ans)[4]  <- "raw.pvalue"
ligs <- c("IgG", "IgG1", "IgG2", "IgG3", "IgG4",  "IgM")
adj.pvalue <- unlist(lapply(ligs, function(x) p.adjust(summ.ans[summ.ans$ig==x, "raw.pvalue"], method="BH")))
adj.pvalue <- sapply(adj.pvalue, function(x){
  if(!is.na(x)) ans <- fcn.pvalue.display(x)
  if(is.na(x)) ans <- NA
  return(ans)
  })

summ.ans$adj.pvalue <- adj.pvalue

summ.ans.wide <- reshape(summ.ans, timevar = "ig", idvar = c("antigen", "status"),
                         v.names = c("raw.pvalue", "adj.pvalue"),
                         direction="wide")

 write.csv(summ.ans.wide,
            file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "pvalue_after_challenge_within_status_all.csv"),
            row.names = FALSE, na = "--")




########################
## FOLD CHANGE
########################
dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28", "C-1"),]


# compute fold change
dataset.wide <- dataset
dataset.wide$timepoint <- NULL
dataset.wide$log10_mfi <- NULL
dataset.wide <- dataset.wide[, c("original_id", "ig", "study_number","antigen", "mfi", "timepoint2c", "status")]
dataset.wide <- reshape(data = dataset.wide, direction="wide",
                    idvar = c("original_id", "ig",  "antigen", "study_number", "status"),
                    v.names = c("mfi") ,
                    timevar = "timepoint2c")

dataset.wide$mfi.D7 <- dataset.wide[, "mfi.D7"] / dataset.wide[, "mfi.C-1"]
dataset.wide[, "mfi.D11-D13"] <- dataset.wide[, "mfi.D11-D13"] / dataset.wide[, "mfi.C-1"]
dataset.wide$mfi.D28 <- dataset.wide[, "mfi.D28"] / dataset.wide[, "mfi.C-1"]

dataset.ratio <- reshape::melt(data=dataset.wide,
                               id.vars=c("original_id", "ig",  "antigen", "study_number", "status"))

dataset.ratio <- subset(dataset.ratio, variable!="mfi.C-1")
dataset.ratio$timepoint2c <- gsub(pattern = "mfi.", replacement = "", x = dataset.ratio$variable, fixed=TRUE)


### Miquel Vàzquez-Santiago
dataset.ratio <- dataset.ratio %>%
  mutate(dataset = factor(
    ifelse(str_detect(original_id, 'L1'), 'L1',
    ifelse(str_detect(original_id, 'T1'), 'T1',
    ifelse(str_detect(original_id, 'T2'), 'T2', 'NA'))),
  levels = c('L1', 'T1', 'T2')))


# function.to.plot
ff.figure <- function(dataset, isotype, lantigen, x.text, y.text){
      ff.pval <- function(isotype, lantigen, timepointx){
        ans <- ldply(lantigen, function(x){
                ff.dat <- dataset[dataset$ig==isotype & dataset$timepoint2c==timepointx & dataset$antigen==x,]
                ff.ans <- try(oneway.test(value ~ status, data=ff.dat)$p.value, silent=TRUE)
                if(class(ff.ans)!="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx, antigen=x, raw.pvalue=ff.ans)
                if(class(ff.ans)=="try-error") ans <- data.frame( isotype=isotype, timepoint2c=timepointx,antigen=x, raw.pvalue=NA)
                return(ans)})

        return(ans)
      }

      pvals <- ldply(c("D7", "D11-D13", "D28") ,function(x) ff.pval(isotype, lantigen, x))
      pvals$adj.pvalue <- p.adjust(pvals$raw.pvalue, method = "BH")

      pvals$adj.pvalue <- sapply(pvals$adj.pvalue, function(x) {
        if(!is.na(x))  ans <- fcn.pvalue.display(x,show.p=TRUE)
        if(is.na(x))  ans <- "--"
        return(ans)
      })

      pvals$adj.pvalue.text <-  pvals$adj.pvalue
      pvals$value <- y.text
      pvals$status <- NA

      ymax <- max(dataset[dataset$ig==isotype, "value"], na.rm=TRUE) * 1.2

     q <- ggplot(dataset[dataset$ig==isotype, ], aes(timepoint2c, value, color=status))
      q <- q + geom_boxplot(outlier.size = NA) + scale_y_log10(breaks=c(0.01,1,100,1000), labels=c(0.01,1,100,1000))
      q <- q + geom_point(position = position_jitterdodge(jitter.width = 0.2, jitter.height = 0), alpha=0.25)
      q <- q + facet_wrap(~antigen) +  scale_color_discrete(name = "Status",
                                                            breaks=c("Naive", "Vaccinated", "Semi-immune"),
                                                            labels=c("Naive", "Vaccinated", "Semi-immune"))
      q <- q + theme_bw() + ggtitle(isotype) + theme(plot.title = element_text(hjust = 0.5))
      q <- q +  xlab("Timepoint") + ylab("MFI ratio (Timepoint/C-1)")
      q <- q  + geom_text(data=pvals, aes(x=timepoint2c, y=value,  label=adj.pvalue.text), size=3)
    # q <- q + ylim(log10(y.text) - (abs(log10(y.text))*0.85), log10(ymax))


      ans <- list(q = q, pvals = pvals)
      return(ans)
}

dataset.ratio$timepoint2c <- factor(dataset.ratio$timepoint2c, levels =c("D7", "D11-D13", "D28"))


igg <- ff.figure(dataset.ratio, "IgG", lantigen, 2, 0.1)
ig1 <- ff.figure(dataset.ratio, "IgG1", lantigen, 2, 0.001)
ig2 <- ff.figure(dataset.ratio, "IgG2", lantigen, 2, 0.01)
ig3 <- ff.figure(dataset.ratio, "IgG3", lantigen, 2, 0.01)
ig4 <- ff.figure(dataset.ratio, "IgG4", lantigen, 2, 0.001)
igm <- ff.figure(dataset.ratio, "IgM", lantigen, 2, 0.01)

pdf(file=file.path(localpath, "Results", "Paper", "aim1", "mfi", "after_challenge_within_timepoint_all_fold_change.pdf"),
    width = 12, height = 8)
igg$q
ig1$q
ig2$q
ig3$q
ig4$q
igm$q
dev.off()



###########################
### Mixed models: FOLD CHANGE
###########################
library(nlme)
library(lmerTest)


dataset <- tmp.dat[tmp.dat$timepoint2c%in%c("D7", "D11-D13", "D28", "C-1"),]
dataset$newtimepoint <- with(dataset, ifelse(timepoint2c=="C-1",0,
                                      ifelse(timepoint2c=="D7",7,
                                      ifelse(timepoint2c=="D11-D13",12,
                                      ifelse(timepoint2c=="D28",28,NA)))))


with(dataset, table(ig))
with(dataset, table(antigen))
with(dataset, table(status))
with(dataset, table(dataset))
with(dataset, table(newtimepoint))
head(dataset)
#
# # test
# aux.data <- dataset[dataset$status=="Naive",]
# aux.data$status <- as.character(aux.data$status)
# x1 <- aux.data
# x1$status <- "vaccinated"
# x1$log10_mfi <- x1$log10_mfi + 0.01
# x2 <- aux.data
# x2$status <- "semmiimmune"
# x2$log10_mfi <- x2$log10_mfi + 0.02
#
# x3 <- rbind(aux.data, x1, x2)
# x3 <- rbind(x3, x3, x3,x3,x3,x3)
#
#
# x3$dummy <- rnorm(n = nrow(x3), mean=0, sd=1)
#
# mod <- lmer(log10_mfi ~ newtimepoint*status + (newtimepoint|original_id), data=aux.data[aux.data$newtimepoint%nin%c(28), ])
# summary(mod)
#
# # mod0 <- lm(log10_mfi ~ status, data=x3[x3$newtimepoint%nin%c(28), ])
# # mod1 <- lm(log10_mfi ~ 1, data=x3[x3$newtimepoint%nin%c(28), ])
# # summary(mod0)
# # anova(mod1, mod0, test="F")
# # oneway.test(log10_mfi ~ status, data=x3[x3$newtimepoint%nin%c(28), ],var.equal = TRUE)
# # car::linearHypothesis(mod0, rbind(b1=c(0,1,0), b2=c(0,0,1)))
# # car::linearHypothesis(mod0, "1*statussemmiimmune - 1*statusvaccinated = 0")
#
#
# mod0 <- lm(log10_mfi ~ status + dummy, data=x3[x3$newtimepoint%nin%c(28), ])
# mod1 <- lm(log10_mfi ~ 1 + dummy, data=x3[x3$newtimepoint%nin%c(28), ])
# summary(mod0)
# anova(mod1, mod0, test="F")
# oneway.test(log10_mfi ~ status, data=x3[x3$newtimepoint%nin%c(28), ],var.equal = TRUE)
#
#
# mod0 <- lm(log10_mfi ~ status + dummy + status:dummy, data=x3[x3$newtimepoint%nin%c(28), ])
# mod1 <- lm(log10_mfi ~ 1 + dummy + status:dummy, data=x3[x3$newtimepoint%nin%c(28), ])
# anova(mod1, mod0, test="F")
# oneway.test(log10_mfi ~ status, data=x3[x3$newtimepoint%nin%c(28), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0), b2=c(0,0,1,0)))
#
#
# mod0 <- lm(log10_mfi ~ status + newtimepoint + status:newtimepoint, data=x3[x3$newtimepoint%nin%c(28), ])
# summary(mod0)
# summary(glht(mod0, linfct = c("statussemmiimmune = 0", "statusvaccinated = 0")))
#
#
#
# xxx <- x3[complete.cases(x3),]
# mod0 <- lm(log10_mfi ~ status + newtimepoint + status:newtimepoint, data=xxx)
# summary(mod0)
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(0), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,0,0), b2=c(0,0,1,0,0,0)))
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(12), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,12,0), b2=c(0,0,1,0,0,12)))
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(28), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,0,0), b2=c(0,0,1,0,0,0),b3=c(0,0,0,0,28,0), b4=c(0,0,0,0,0,28)))
#
# xxx <- x3[complete.cases(x3),]
# xxx <- xxx[xxx$newtimepoint%in%c(0,12,28),]
# mod0 <- lm(log10_mfi ~ status + as.factor(newtimepoint) + status:as.factor(newtimepoint), data=xxx)
# summary(mod0)
#
# mod0 <- lm(log10_mfi ~ status + (newtimepoint) + status:(newtimepoint), data=xxx)
# summary(mod0)
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(0), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,rep(0,3)), b2=c(0,0,1,rep(0,3))))
#
# car::linearHypothesis(mod0, rbind(b1=c(0,1,rep(0,7)), b2=c(0,0,1,rep(0,6))))
#
#
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(12), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,12,0), b2=c(0,0,1,0,0,12)))
#
# oneway.test(log10_mfi ~ status, data=xxx[xxx$newtimepoint%in%c(28), ],var.equal = TRUE)
# car::linearHypothesis(mod0, rbind(b1=c(0,1,0,0,0,0), b2=c(0,0,1,0,0,0),b3=c(0,0,0,0,28,0), b4=c(0,0,0,0,0,28)))
#
#
#
#


### All models and isotype
ff1 <- function(model){
  # Slope Naive
  a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,0,0,0))))
  a1.est <- round(a$test$coefficients, 3)
  a1.pvalue <- as.numeric(a$test$pvalues)


  # Slope Vaccinated
  a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,0,1,0))))
  a2.est <- round(a$test$coefficients, 3)
  a2.pvalue <- as.numeric(a$test$pvalues)
  a2.pvalue.int <- summary(model)$coefficients[5,5]


  # Slope Semi-Immune
  a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,0,0,1))))
  a3.est <- round(a$test$coefficients, 3)
  a3.pvalue <- as.numeric(a$test$pvalues)
  a3.pvalue.int <- summary(model)$coefficients[6,5]

  # Summary
  ans <- data.frame(a1.est, a1.pvalue, a2.est, a2.pvalue, a3.est, a3.pvalue,
                    a2.pvalue.int, a3.pvalue.int)
  return(ans)
}

ff2 <- function(model){

  # Slope Naive
  a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,0))))
  a1.est <- round(a$test$coefficients, 3)
  a1.pvalue <- as.numeric(a$test$pvalues)

  # Slope Semi-Immune
  a <- summary(glht(model,linfct=rbind(b1=c(0,1,0,1))))
  a2.est <- round(a$test$coefficients, 3)
  a2.pvalue <- as.numeric(a$test$pvalues)
  a2.pvalue.int <- summary(model)$coefficients[4,5]

  # Summary
  ans <- data.frame(a1.est, a1.pvalue, a2.est, a2.pvalue,
                    a2.pvalue.int)
  return(ans)
}


ans.dat.two <- list()
ans.dat.three <- list()
zz <- 0
for(i in c("IgG", "IgG1", "IgG2", "IgG3", "IgG4", "IgM") ){
  z <- 0
  ans1 <- list()
  ans2 <- list()
  for(j in lantigen){

         z <- z + 1
          aux.data <- subset(dataset, ig==i & antigen==j) # INCLUDE BASELINE. THINK BASELINE

          if(nrow(aux.data)!=0){
                  # Two timepoints D7 vs D11-D13 (three status)
                  mod <- lmer(log10_mfi ~ newtimepoint*status + (newtimepoint|original_id), data=aux.data[aux.data$newtimepoint%nin%c(28), ])
                  ans <- ff1(mod)
                  ans$isotype <- i
                  ans$antigen <- j
                  ans$model <- "two.timepoints"

                  ans1[[z]] <- ans

                  # Three timepoints D7 vs D11-D13 (two status)
                  mod <- lmer(log10_mfi ~ newtimepoint*status + (newtimepoint | original_id), data=aux.data[aux.data$status%nin%"Vaccinated", ])
                  ans <- try(ff2(mod), silent = TRUE)
                  if(inherits(ans, "try-error")){
                    ans <- data.frame(a1.est=NA, a1.pvalue=NA, a2.est=NA,
                                      a2.pvalue=NA, a2.pvalue.int=NA)}
                  ans$isotype <- i
                  ans$antigen <- j
                  ans$model <- "three.timepoints"

                  ans2[[z]] <- ans
          }

  }
  zz <- zz + 1
  ans1 <- ldply(ans1)
  ans1[, c(2,4,6,7,8)] <- sapply(c(2,4,6,7,8), function(x) fcn.pvalue.display(p.adjust(ans1[,x], method="BH")))
  ans1 <- ans1[,c(9,10, 1:8)]
  ans.dat.two[[zz]] <- ans1

  ans2 <- ldply(ans2)
  ans2[, c(2,4,5)] <- sapply(c(2,4,5), function(x) fcn.pvalue.display(p.adjust(ans2[,x], method="BH")))
  ans2 <- ans2[,c(6,7, 1:5)]
  ans.dat.three[[zz]] <- ans2

}


ans.dat.two.summ <- ans.dat.two
ans.dat.three.summ <- ans.dat.three


#################################
### Export models to word
#################################
library(ReporteRs)

#########################################################
## Create Tables in Word describing
#########################################################
mydoc = docx(file.path(localpath, "Results", "Paper", "aim1", "mfi", "mixed_models.docx"))

xvector <- c("IgG", "IgG1", "IgG2", "IgG3", "IgG4","IgGM")

z <- 0
for(i in 1:6){

    z <- z + 1
    my_text = pot(paste0("Table ", z, ". Mixed models (log10MFI response models) by antigen fitting the three status and two timepoints, D7 and D11-D13 (including C-1 information) for) ", xvector[i] , " isotype. Coefficient and adjusted p value is shown (reference: Naive)."))
    mydoc = addParagraph( mydoc, value = my_text, stylename = "Normal")

    mydoc = addParagraph( mydoc, value = "", stylename = "Normal")
    ftab = FlexTable(ans.dat.two.summ[[i]] ,add.rownames = FALSE, header.columns=FALSE,
                       body.par.props = parCenter(),
                       header.par.props = parCenter(),
                       body.text.props = textProperties( font.size = 8))

    ftab = addHeaderRow( ftab, text.properties = textProperties( font.size = 8, font.weight='bold'),
                           value = c("", "","Slope Naive","Slope Vaccinated", "Slope Semi-immune", "Int. Vaccinated", "Int. Semi-immune"),
                           colspan = c(1,1,2,2,2,1,1),  par.properties = parCenter())

    ftab = addHeaderRow( ftab, text.properties = textProperties( font.size = 8, font.weight='bold'),
                           value = c("Isotype", "Antigen",rep(c("Coef", "p value"),3), "pvalue", "pvalue"),
                           colspan = rep(1,10),  par.properties = parCenter())
    mydoc = addFlexTable( mydoc, ftab, par.properties = parCenter())

    z <- z + 1
    mydoc <- addPageBreak(mydoc)
    my_text = pot(paste0("Table ", z,". Mixed models (log10MFI response models) by antigen fitting the two status (Naive and Semi-immune) and three timepoints, D7, D11-13 and D28 (including C-1 information) for ", xvector[i], "isotype. Coefficient and raw p value is shown (reference: Naive)."))
    mydoc = addParagraph( mydoc, value = my_text, stylename = "Normal")


    mydoc = addParagraph( mydoc, value = "", stylename = "Normal")
    ftab = FlexTable(ans.dat.three.summ[[i]] ,add.rownames = FALSE, header.columns=FALSE,
                       body.par.props = parCenter(),
                       header.par.props = parCenter(),
                       body.text.props = textProperties( font.size = 8))

    ftab = addHeaderRow( ftab, text.properties = textProperties( font.size = 8, font.weight='bold'),
                           value = c("", "","Slope Naive", "Slope Semi-immune", "Int. Semi-immune"),
                           colspan = c(1,1,2,2,1),  par.properties = parCenter())

    ftab = addHeaderRow( ftab, text.properties = textProperties( font.size = 8, font.weight='bold'),
                           value = c("Isotype", "Antigen",rep(c("Coef", "p value"),2), "pvalue"),
                           colspan = rep(1,7),  par.properties = parCenter())
    mydoc = addFlexTable( mydoc, ftab, par.properties = parCenter())
  mydoc <- addPageBreak(mydoc)

}


writeDoc( mydoc, file = file.path(localpath, "Results", "Paper", "aim1", "mfi", "mixed_models.docx"))
mvazquezs/chmitools documentation built on May 1, 2020, 2:06 a.m.