knitr::opts_chunk$set(echo = FALSE,message=FALSE,warning=FALSE)
library(plyr)
library(ggplot2)
library(knitr)

## Read in the latest titer data
dat <- read.csv("~/Documents/Fluscape/fluscape/trunk/data/HI_titers_paired.csv")
viruses <- read.csv("~/Documents/Fluscape/FluScapeOwn/H3N2_codes.csv")

## Reorder viruses with what is there
colnames(viruses)[1] <- "Virus"
dat <- merge(dat,viruses[,c("Virus","no_duplicated_codes")], by = "Virus")
colnames(dat)[1] <- "OldVirus"
colnames(dat)[15] <- "Virus"

viruses <- viruses[viruses$no_duplicated_codes %in% unique(dat$Virus),]
tmpViruses <- unique(viruses[,c("year","no_duplicated_codes")])
tmpViruses <- tmpViruses[order(tmpViruses$no_duplicated_codes),]
real_order <- order(tmpViruses$year)
dat$Virus <- factor(dat$Virus,levels(dat$Virus)[real_order])

parV4 <- read.csv("~/Documents/Fluscape/fluscape/trunk/data/Participants_V4.csv")
parV1 <- read.csv("~/Documents/Fluscape/fluscape/trunk/data/Participants_V1.csv")
parV2 <- read.csv("~/Documents/Fluscape/fluscape/trunk/data/Participants_V2.csv")
parV3 <- read.csv("~/Documents/Fluscape/fluscape/trunk/data/Participants_V3.csv")


parV4$Participant_ID <- apply(parV4[,1:3],1,
                                     function(x){
                                       sprintf("L%02dH%02dP%02d",as.numeric(x[3]),as.numeric(x[2]),as.numeric(x[1]))
                                       })
parV1$Participant_ID <- apply(parV1[,1:3],1,
                                     function(x){
                                       sprintf("L%02dH%02dP%02d",as.numeric(x[3]),as.numeric(x[2]),as.numeric(x[1]))
                                       })
parV2$Participant_ID <- apply(parV2[,1:3],1,
                                     function(x){
                                       sprintf("L%02dH%02dP%02d",as.numeric(x[3]),as.numeric(x[2]),as.numeric(x[1]))
                                       })
parV3$Participant_ID <- apply(parV3[,1:3],1,
                                     function(x){
                                       sprintf("L%02dH%02dP%02d",as.numeric(x[3]),as.numeric(x[2]),as.numeric(x[1]))
                                       })
parV1 <- parV1[,c("Participant_ID","PART_SAMPLE_TIME")]
parV1$Visit <- "V1"
colnames(parV1) <- c("Participant_ID","sample","Visit")
parV2 <- parV2[,c("Participant_ID","PART_SAMPLE_TIME")]
parV2$Visit <- "V2"
colnames(parV2) <- c("Participant_ID","sample","Visit")
parV3 <- parV3[,c("Participant_ID","PART_SAMPLE_TIME")]
parV3$Visit <- "V3"
colnames(parV3) <- c("Participant_ID","sample","Visit")
parV4A <- parV4[,c("Participant_ID","PART_BIRTH_YEAR","PART_BIRTH_MONTH")]
parV4B <- parV4[,c("Participant_ID","PART_SAMPLE_TIME")]
parV4B$Visit <- "V4"
colnames(parV4B) <- c("Participant_ID","sample","Visit")

dat <- merge(dat,parV4A,by="Participant_ID")

dat$birth_date <- paste0(dat$PART_BIRTH_YEAR,"-",dat$PART_BIRTH_MONTH,"-01")
dat$birth_date <- as.Date(dat$birth_date)

datV1 <- merge(dat[dat$Visit=="V1",],parV1,by=c("Participant_ID","Visit"),all = TRUE)
datV2 <- merge(dat[dat$Visit=="V2",],parV2,by=c("Participant_ID","Visit"),all=TRUE)
datV3 <- merge(dat[dat$Visit=="V3",],parV3,by=c("Participant_ID","Visit"),all = TRUE)
datV4 <- merge(dat[dat$Visit=="V4",],parV4B,by=c("Participant_ID","Visit"),all = TRUE)
dat <- rbind(datV1,datV2,datV3,datV4)
dat$sample <- as.Date(dat$sample)
dat <- dat[,c("Participant_ID","Visit","Virus","HI_Titer","birth_date","sample")]



####################################
## Do some data cleaning here
dat[dat$Visit == "v1","Visit"] <- "V1"
dat <- dat[dat$Visit != "20",]
dat[dat$HI_Titer == 1 & !is.na(dat$HI_Titer),"HI_Titer"] <- 10
######################################

## Convert to log scale
dat[dat$HI_Titer == 0 & !is.na(dat$HI_Titer),"HI_Titer"] <- 5
dat$logTiter <- log2(dat$HI_Titer/5)
##

Aim

A quick report to summarise the available HI titer data from Fluscape. We are using the file "HI_titers_paired.csv", which gives titers from visits 1-4 for the H3N2 strains listed in Fonville et al. 2014. These are:

kable(viruses, caption="Summary of the viruses with available titers")

Example individual

minx <- as.Date(min(dat$sample[!is.na(dat$sample)]),origin="2000-01-01")
maxx <- as.Date(max(dat$sample[!is.na(dat$sample)]),origin="2000-01-01")

indiv1 <- ggplot(dat[dat$Participant_ID==unique(dat$Participant_ID)[2],]) + 
  geom_line(aes(x=sample,y=logTiter)) + facet_wrap(~Virus) + scale_x_date(limits=c(minx,maxx))
indiv2 <- ggplot(dat[dat$Participant_ID==unique(dat$Participant_ID)[15],]) + 
  geom_line(aes(x=sample,y=logTiter)) + facet_wrap(~Virus) + scale_x_date(limits=c(minx,maxx))
indiv3 <- ggplot(dat[dat$Participant_ID==unique(dat$Participant_ID)[28],]) + 
  geom_line(aes(x=sample,y=logTiter)) + facet_wrap(~Virus) + scale_x_date(limits=c(minx,maxx))

cowplot::plot_grid(indiv1,indiv2,indiv3,ncol=1)

Some useful stats

######################################
## Useful stats
######################################
## Number by visit
visit_ns <- ddply(dat[,c("Visit","Participant_ID")], "Visit",function(x) length(unique(x$Participant_ID)))
colnames(visit_ns) <- c("Visit","Unique_Participants")

visit_key <- c("V1","V2","V3","V4")
visits <- ddply(dat[,c("Visit","Participant_ID")], "Participant_ID", function(x){
  as.numeric(visit_key %in% unique(x$Visit))})
visits$total <- rowSums(visits[,2:5])
visit_summary <- count(visits$total)
print(visit_summary)

kable(visit_ns,caption="Summary of number of visits")
kable(count(visits[,2:5]),caption="Number of participants for each combination of visits")
## How many visits each individual participated in
HI_counts <- ddply(dat[,c("Visit","Virus","logTiter")], c("Visit","Virus"), count)
tab1 <- ddply(HI_counts, "Virus", function(x) reshape2::dcast(x,Visit~logTiter))
kable(tab1, caption="Summary of number of samples by virus, visit and titer")
## How many visits each individual participated in
HI_counts2 <- ddply(dat[,c("Virus","Visit")], c("Virus","Visit"), count)
kable(HI_counts2, caption="Summary of number of samples by virus and visit ")

Distribution of titers by visit and virus, frequencies normalised

HI_counts <- ddply(dat[,c("Visit","Virus","logTiter")], c("Visit","Virus"), count)
p1 <- ggplot(dat) + 
  geom_histogram(aes(x=logTiter,y=..density..),binwidth=1) + 
  facet_grid(Virus~Visit) + 
  theme_bw() + 
  scale_x_continuous(limits = c(-0.5,8),breaks=seq(0,8,by=1),labels=seq(0,8,by=1)) +
  scale_y_continuous(limits=c(0,0.75))
p1

Distribution of titers by visit and virus, frequencies not normalised

p2 <- ggplot(dat) + 
  geom_histogram(aes(x=logTiter),binwidth=1) + 
  facet_grid(Virus~Visit) + 
  theme_bw() + 
  scale_x_continuous(limits = c(-0.5,8),breaks=seq(0,8,by=1),labels=seq(0,8,by=1)) 
p2


jameshay218/serosim2 documentation built on Nov. 29, 2017, 6:21 p.m.