knitr::opts_chunk$set( echo = TRUE, message = FALSE, warning = FALSE, cache = params$cache, cache.path = params$report_cache_dir, cache.lazy = FALSE )
suppressPackageStartupMessages({ library(package = "eulerr") library(package = "knitr") library(package = "Biobase") library(package = "RColorBrewer") library(package = "dplyr") library(package = "ggbeeswarm") library(package = "pvca") # dleelab/pvca library(package = "lme4") library(package = "cluster") library(package = "ComplexHeatmap") # Bioc Install library(package = "patchwork") library(package = "stringr") library(package = "RAPToR") library(package = "ggplot2") library(package = "org.Hs.eg.db") })
data_dir <- params$data_dir extra_data_dir <- params$extra_data_dir if (!dir.exists(extra_data_dir)) dir.create(extra_data_dir) date_prefix <- params$date_prefix
These figures only look at the "Young" cohort that is defined as 18 to 50 years old and the "Extended Old" cohort of 50 to 90 years old. The "Old" cohort (not used here) was 60 to 90 years old.
Full codebase for pre-processing can be found in the ImmuneSignatures2 package on github. Here, we use the resulting data to reproduce figures from the manuscript.
noRespGEFile <- file.path(data_dir, paste0(date_prefix,"extendedOld_norm_batchCorrectedFromYoung_eset.rds")) old.NoResp <- readRDS(file = noRespGEFile) noRespGEFile <- file.path(data_dir, paste0(date_prefix,"young_norm_eset.rds")) young.NoResp <- readRDS(file = noRespGEFile) withRespGEFile <- file.path(data_dir, paste0(date_prefix,"young_norm_withResponse_eset.rds")) young.WithResp <- readRDS(file = withRespGEFile) all.noResp <- readRDS(file.path(data_dir, paste0(date_prefix, "all_norm_eset.rds"))) all.noResp$featureSetVendor <- ifelse(all.noResp$featureSetVendor == "NA", "RNAseq", all.noResp$featureSetVendor) all.noNorm <- readRDS(file.path(data_dir, paste0(date_prefix, "all_noNorm_eset.rds"))) all.noNorm$featureSetVendor <- ifelse(all.noNorm$featureSetVendor == "NA", "RNAseq", all.noNorm$featureSetVendor) all.immdata <- readRDS(file.path(data_dir, paste0(date_prefix, "all_immdata_with_response.rds"))) all_noNorm_withResponse <- readRDS(file.path(data_dir, paste0(date_prefix, "all_noNorm_withResponse_eset.rds"))) immdata <- readRDS(file.path(data_dir, paste0(date_prefix, "immdata_all.rds"))) # Set seed for consistent results set.seed(123)
# Fig 1 set3colors <- brewer.pal("Set3", n = 12) colors.fig1a <- c(set3colors[c(2, 11, 3, 12, 8)], "black", set3colors[c(7, 6, 5, 1, 4, 9, 10)]) # Other Figures getPalette <- colorRampPalette(c(brewer.pal(name = "Set3", n = 10), brewer.pal(name = "Dark2", n = 4))) vaccines <- unique(paste0(young.NoResp$pathogen, " (", young.NoResp$vaccine_type, ")")) vaccine2color <- getPalette(n = length(vaccines)) %>% setNames(nm = sort(vaccines))
Systems vaccinology datasets from existing HIPC studies as well as published systems vaccinology papers and databases were submitted to the ImmPort database. ImmuneSpace captures these datasets to create a combined compendium dataset. Quality control assessments of these data include array quality checks for microarray studies, batch correction, imputations for missing age and sex/y-chromosome presence information, and normalization per study. The combined virtual study includes transcriptional profiles and antibody response measurements from 1405 participants across 53 cohorts, profiling the response to 24 different vaccines.
Demographic data included sex, race, vaccine, and number of participants.
getPdata <- function(eset){ pd <- as_tibble(pData(eset)) pd$age_reported <- as.numeric(pd$age_reported) return(pd) } pdata_young <- getPdata(young.NoResp) pdata_old <- getPdata(old.NoResp) pdata_df <- getPdata(all.noResp) # Remove malaria #pdata_no_malaria <- filter(pdata_df, study_accession != "SDY1293") # Filter to unique subjects swr = function(string, nwrap=13) { paste(strwrap(string, width=nwrap), collapse="\n") } swr = Vectorize(swr) pdata_df_uniqueSubjects <- distinct(pdata_df, participant_id, y_chrom_present, race, pathogen, vaccine_type) # Create new column for pathogen and vaccine pdata_df_uniqueSubjects <- mutate(pdata_df_uniqueSubjects, pathogen_vacc = paste0(pathogen, "\n(", vaccine_type, ")")) pdata_df_uniqueSubjects$pathogen_vacc = swr(pdata_df_uniqueSubjects$pathogen_vacc) getCounts <- function(pd, colnm, prefix){ counts_df <- arrange(dplyr::count(pd, .data[[colnm]], pathogen_vacc), n) counts_df$pathogen_total <- sapply(1:nrow(counts_df), function(i){ return(sum(filter(counts_df, pathogen_vacc == counts_df$pathogen_vacc[i])$n)) }) counts_df$pathogen_vacc <- paste0(counts_df$pathogen_vacc, "\nn = ", counts_df$pathogen_total) counts_df$n <- counts_df$n/counts_df$pathogen_total colnames(counts_df) <- c("category", "pathogen_vacc", "freq") counts_df$category <- paste0(prefix, counts_df$category) return(counts_df) } sex_counts_df <- getCounts(pdata_df_uniqueSubjects, "y_chrom_present", "YCHROM_") race_counts_df <- getCounts(pdata_df_uniqueSubjects, "race", "RACE_") total_counts <- rbind(sex_counts_df, race_counts_df) names(colors.fig1a) <- c(unique(total_counts$pathogen_vacc)[c(2, 11, 3, 12, 8)], "Malaria (Recombinant protein)", unique(total_counts$pathogen_vacc)[c(7, 6, 5, 1, 4, 9, 10)]) p <- ggplot(total_counts) + geom_bar(aes(x = freq, y = category, fill = pathogen_vacc), color = "black", stat = "identity", lwd = .2) + # geom_hline(yintercept = 7.5, color = "grey", lwd = .4) + facet_wrap(vars(pathogen_vacc), nrow = 2) + scale_y_discrete(limits = rev(c("YCHROM_TRUE", "YCHROM_FALSE", "RACE_White", "RACE_Black or\nAfrican American", "RACE_Asian", "RACE_American Indian or\nAlaska Native", "RACE_Other", "RACE_Not Specified", "RACE_Unknown")), labels = rev(c("Male", "Female", "White", "Black or\nAfrican American", "Asian", "American Indian or\nAlaska Native", "Other", "Not Specified", "Unknown"))) + scale_x_continuous(breaks = c(0, .2, .4, .6, .8, 1), labels = c(0, 20, 40, 60, 80, 100)) + scale_fill_manual(values = colors.fig1a) + guides(fill = FALSE) + xlab("Distribution (%)") + ylab(NULL) + theme_classic() + # theme(axis.text.x = element_text(angle = 45, size=12))+ theme ( axis.text.y = element_text(hjust = 1, size=18), axis.title= element_text (size=20), axis.text.x = element_text(angle = 45, hjust = 1, size=18), strip.text = element_text(size=18, face="bold")) p
Sample quality assessments of gene expression datasets using Array Quality metrics. arrayQualityMetrics package was employed to assess quality of microarray datasets by checking the following criteria:
1. absolute mean difference between arrays to check the probe and median intensity across all arrays
2. Kolmogorov-Smirnov statistics to check the signal intensity distribution of arrays, comparing each probe versus distribution of test statistics for all other probes
3. Hoeffding's D-statistics for arrays.
Arrays will be excluded if they fail all three criteria above.
Quality control code is available in the ImmuneSignatures2 package on github.
Principal component analysis (Top) and Principal Variation Component Analysis (PVCA) of baseline expression data per study before (B) and after (C) batch correction.
Code used to perform normalization is available in the ImmuneSignatures2 package.
## Helper functions for PCA getPCAs <- function (eset, maxpc = 3) { if (!is.matrix(eset)) { ex <- exprs(eset) out <- pData(eset) } else { out <- NULL ex <- eset } pca.res <- prcomp(t(ex)) x <- pca.res$x[, 1:maxpc] colnames(x) <- paste("PC", 1:maxpc, sep = ".") varex = round(100 * summary(pca.res)$importance[2, ]) db <- cbind.data.frame(x) if (!is.null(out)) db <- cbind(db, out) return(list(db = as.data.frame(db), varex = varex)) } plotPCA<-function(pca.db,title=paste0('PCA'), color.var='study2', var.shape='pathogen', var.size='months2' ){ p1 <- ggplot(mutate(as.data.frame(pca.db$db), months=round((study_time_collected/28),1), months2=ifelse(months<0, -1, months)+2), aes_string(x='PC.1', y='PC.2', color=color.var, shape=var.shape)) + geom_jitter() + theme_bw() + scale_color_manual(values=getPalette(length(unique(pca.db$db[[color.var]])))) + labs(x=paste('PC-1 (',pca.db$varex[1],'%)',sep=''), y=paste('PC-2 (',pca.db$varex[2],'%)',sep=''), title=title) return(p1) } impute.na<-function(x){x[is.na(x)]<-mean(x,na.rm=T); return(x)} ## Helper functions for PVCA getEigen<-function(eset){ theDataMatrix <- exprs(eset) dataRowN <- nrow(theDataMatrix) dataColN <- ncol(theDataMatrix) theDataMatrixCentered <- matrix(data = 0, nrow = dataRowN, ncol = dataColN) theDataMatrixCentered_transposed <- apply(theDataMatrix, 1, scale, center = TRUE, scale = FALSE) theDataMatrixCentered <- t(theDataMatrixCentered_transposed) theDataCor <- cor(theDataMatrixCentered) eigenData <- eigen(theDataCor) return(eigenData) } pvcaBatchAssess.MSF2<-function(eset, eigenData=NULL, batch.factors, threshold, include.inter="all") { require(lme4) theDataMatrix <- exprs(eset) dataRowN <- nrow(theDataMatrix) dataColN <- ncol(theDataMatrix) if (is.null(eigenData)){ theDataMatrixCentered <- matrix(data = 0, nrow = dataRowN, ncol = dataColN) theDataMatrixCentered_transposed <- apply(theDataMatrix, 1, scale, center = TRUE, scale = FALSE) theDataMatrixCentered <- t(theDataMatrixCentered_transposed) theDataCor <- cor(theDataMatrixCentered) eigenData <- eigen(theDataCor) } eigenValues <- eigenData$values ev_n <- length(eigenValues) eigenVectorsMatrix <- eigenData$vectors eigenValuesSum <- sum(eigenValues) percents_PCs <- eigenValues/eigenValuesSum expInfo <- pData(eset)[, batch.factors] exp_design <- as.data.frame(expInfo) expDesignRowN <- nrow(exp_design) expDesignColN <- ncol(exp_design) my_counter_2 <- 0 my_sum_2 <- 1 for (i in ev_n:1) { my_sum_2 = my_sum_2 - percents_PCs[i] if ((my_sum_2) <= threshold) { my_counter_2 = my_counter_2 + 1 } } if (my_counter_2 < 3) { pc_n = 3 } else { pc_n = my_counter_2 } pc_data_matrix <- matrix(data = 0, nrow = (expDesignRowN * pc_n), ncol = 1) mycounter = 0 for (i in 1:pc_n) { for (j in 1:expDesignRowN) { mycounter <- mycounter + 1 pc_data_matrix[mycounter, 1] = eigenVectorsMatrix[j, i] } } AAA <- exp_design[rep(1:expDesignRowN, pc_n), ] Data <- cbind(AAA, pc_data_matrix) variables <- c(colnames(exp_design)) for (i in 1:length(variables)) { Data$variables[i] <- as.factor(Data$variables[i]) } op <- options(warn = (-1)) model.func <- c() index <- 1 for (i in 1:length(variables)) { mod = paste("(1|", variables[i], ")", sep = "") model.func[index] = mod index = index + 1 } for (i in 1:(length(variables) - 1)) { for (j in (i + 1):length(variables)) { mod = paste("(1|", variables[i], ":", variables[j], ")", sep = "") model.func[index] = mod index = index + 1 } } if (include.inter!="all"){ i.delete.RE <- setdiff(grep(":", model.func), grep(include.inter, model.func)) delete.RE <- model.func[i.delete.RE] model.func <- setdiff(model.func, delete.RE) } effects_n = length(model.func) + 1 randomEffectsMatrix <- matrix(data = 0, nrow = pc_n, ncol = effects_n) function.mods <- paste(model.func, collapse = " + ") for (i in 1:pc_n) { y = (((i - 1) * expDesignRowN) + 1) funct <- paste("pc_data_matrix", function.mods, sep = " ~ ") Rm1ML <- lmer(funct, Data[y:(((i - 1) * expDesignRowN) + expDesignRowN), ], REML = TRUE, verbose = FALSE, na.action = na.omit) randomEffects <- Rm1ML randomEffectsMatrix[i, ] <- c(unlist(VarCorr(Rm1ML)), resid = sigma(Rm1ML)^2) } effectsNames <- c(names(getME(Rm1ML, "cnms")), "resid") randomEffectsMatrixStdze <- matrix(data = 0, nrow = pc_n, ncol = effects_n) for (i in 1:pc_n) { mySum = sum(randomEffectsMatrix[i, ]) for (j in 1:effects_n) { randomEffectsMatrixStdze[i, j] = randomEffectsMatrix[i, j]/mySum } } randomEffectsMatrixWtProp <- matrix(data = 0, nrow = pc_n, ncol = effects_n) for (i in 1:pc_n) { weight = eigenValues[i]/eigenValuesSum for (j in 1:effects_n) { randomEffectsMatrixWtProp[i, j] = randomEffectsMatrixStdze[i, j] * weight } } randomEffectsSums <- matrix(data = 0, nrow = 1, ncol = effects_n) randomEffectsSums <- colSums(randomEffectsMatrixWtProp) totalSum <- sum(randomEffectsSums) randomEffectsMatrixWtAveProp <- matrix(data = 0, nrow = 1, ncol = effects_n) for (j in 1:effects_n) { randomEffectsMatrixWtAveProp[j] = randomEffectsSums[j]/totalSum } return(list(dat = randomEffectsMatrixWtAveProp, label = effectsNames)) } plotPVCA <- function(pvcaObj, cex.percentage = 1, ht = 4, wd = 5, title = fname, race.vars=NULL, outputDir = NULL) { labels <- gsub(":", " x ", pvcaObj$label) labels <- gsub("_imputed", " ", labels) db <- data.frame(perc = t(pvcaObj$dat), labels = factor(labels, levels = labels)) if (!is.null(race.vars)) { db<-rbind(db, data.frame(perc=sum(subset(db, labels%in%race.vars)$perc), labels='race')) %>% subset(!(labels%in%race.vars)) %>% arrange(1*(labels=='resid'), perc) } p <- ggplot(db, aes(x = reorder(labels, perc), y = perc)) + geom_bar(stat = "identity", fill = "blue") + theme_bw() + scale_y_continuous(limits = c(0, 1.1)) + geom_text(size=8,aes(x = labels, y = perc + 0.05, label = paste0(as.character(round(100 * perc, 1)), "%"))) + labs(x = "Effects", y = "Weighted average proportion variance", title = paste("PVCA ", title)) + scale_x_discrete(labels = function(x) str_wrap(x, width = 8)) return(p) }
# ----- PCA ----- all.noNorm$age_group <- ifelse(all.noNorm$age_imputed < 50, "young", "old") eset_noNorm_baseline <- all.noNorm[, all.noNorm$study_time_collected == 0] features.pca.0 <- featureNames(eset_noNorm_baseline)[ which( (rowMeans(is.na(exprs(eset_noNorm_baseline)))==0) & (apply(exprs(eset_noNorm_baseline),1,sd)>0) )] pca_noNorm_cacheFile <- file.path(extra_data_dir, "pca_noNorm.rds") if (file.exists(pca_noNorm_cacheFile)) { pca_noNorm_baseline <- readRDS(pca_noNorm_cacheFile) } else { pca_noNorm_baseline <- getPCAs(eset_noNorm_baseline[features.pca.0,]) saveRDS(pca_noNorm_baseline, pca_noNorm_cacheFile) } pcaPlot_noNorm <-plotPCA(pca_noNorm_baseline, title='Baseline Expression', color.var='featureSetName2', var.shape=NULL, var.size=1) + scale_shape_manual(values=c(12,20,21:23,10,11,13:19))+ theme_classic() + theme(title=element_text (size=18), axis.text=element_text(size=16), axis.text.x=element_text(size=16), legend.position = "right", legend.title = element_text(size=16), legend.text = element_text(size=12), axis.title=element_text(size=14,face="bold")) pcaPlot_noNorm
# ----- PVCA ----- batch.vars <- c('cell_type', 'featureSetVendor', 'featureSetName2', 'study_accession', 'y_chrom_present', 'age_group') pvca_noNorm_cacheFile <- file.path(extra_data_dir, "pvca_noNorm_withAge.rds") if (!file.exists(pvca_noNorm_cacheFile)) { eigen_noNorm <- getEigen(eset_noNorm_baseline[features.pca.0,]) pvca_noNorm <- pvcaBatchAssess.MSF2(eset_noNorm_baseline[features.pca.0,], eigenData = eigen_noNorm, batch.factors = batch.vars, include.inter = "none", threshold = 0.8) save(eigen_noNorm, pvca_noNorm, file = pvca_noNorm_cacheFile) } else { load(pvca_noNorm_cacheFile) } pvcaPlot_noNorm <- plotPVCA(pvca_noNorm, title = "Baseline", ht=3, wd=7.5, race.vars = NULL, outputDir = outputDir) + theme(axis.text.x = element_text(angle = 20, hjust = 1))+ theme_classic() + theme(title=element_blank(), axis.text=element_text(size=20,color = "black"), axis.text.x=element_text(size=20, angle=45, hjust=1,color = "black"), legend.position = "right", legend.title = element_text(size=16), legend.text = element_text(size=18), axis.title=element_text(size=20,face="bold")) pvcaPlot_noNorm
# ----- PCA ----- all.noResp$age_group <- ifelse(all.noResp$age_imputed < 50, "young", "old") eset_norm_baseline <- all.noResp[, all.noResp$study_time_collected == 0] features.pca.n0 <- featureNames(eset_norm_baseline)[ which( (rowMeans(is.na(exprs(eset_norm_baseline)))==0) & (apply(exprs(eset_norm_baseline),1,sd)>0) )] pca_norm_cacheFile <- file.path(extra_data_dir, "pca_norm.rds") if (file.exists(pca_norm_cacheFile)) { pca_norm_baseline <- readRDS(pca_norm_cacheFile) } else { pca_norm_baseline <- getPCAs(eset_norm_baseline[features.pca.n0, ]) saveRDS(pca_norm_baseline, pca_norm_cacheFile) } pcaPlot_norm <-plotPCA(pca_norm_baseline, title='Baseline post-batch correction', color.var='featureSetName2', var.shape=NULL, var.size=1) + scale_shape_manual(values=c(12,20,21:23,10,11,13:19)) + theme_classic() + theme(title=element_text (size=18), axis.text=element_text(size=16), axis.text.x=element_text(size=16), legend.position = "right", legend.title = element_text(size=16), legend.text = element_text(size=12), axis.title=element_text(size=14,face="bold")) pcaPlot_norm
# ----- PVCA ----- pvca_norm_cacheFile <- file.path(extra_data_dir, "pvca_norm_withAge.rds") if (!file.exists(pvca_norm_cacheFile)) { eigen_norm <- getEigen(eset_norm_baseline[features.pca.n0,]) pvca_norm <- pvcaBatchAssess.MSF2(eset_norm_baseline[features.pca.n0,], eigenData = eigen_norm, batch.factors = batch.vars, include.inter = "none", threshold = 0.8) save(eigen_norm, pvca_norm, file = pvca_norm_cacheFile) } else { load(pvca_norm_cacheFile) } pvcaPlot_norm <- plotPVCA(pvca_norm, title = "Baseline post-batch correction", ht=3, wd=6.2, race.vars = NULL, outputDir = outputDir) + theme(axis.text.x = element_text(size=22, angle = 45, hjust = 1, color = "black",))+ theme_classic() + theme(title=element_blank(), axis.text=element_text(size=16,color="black"), axis.text.x=element_text(size=20, hjust=1, angle=45, color="black"), legend.position = "bottom", legend.title = element_text(size=16), legend.text = element_text(size=16), axis.title=element_text(size=20,face="bold")) pvcaPlot_norm
Biological sex imputation based on expression of Y-chromosome genes. We used 13 Y-chromosome-associated genes to cluster samples into 2 groups assuming biological male or female.
Source code for this calculation is available in the ImmuneSignatures2 package.
# Matrices chosen for plotting plot_matrices <- c("SDY180_WholeBlood_Grp1Fluzone_Geo", "SDY180_WholeBlood_Grp2Pneunomax23_Geo") all.noNorm.noResponse.exprs.noNa <- na.omit(exprs(all.noNorm)) gene2chr <- rbind(as.data.frame(org.Hs.egSYMBOL), as.data.frame(org.Hs.egALIAS2EG) %>% dplyr::rename("symbol" = "alias_symbol")) %>% dplyr::left_join(., as.data.frame(org.Hs.egCHR), by = "gene_id") y_genes <- gene2chr %>% dplyr::filter(symbol %in% rownames(all.noNorm.noResponse.exprs.noNa), chromosome == "Y") non_y_genes.symbol <- setdiff(rownames(all.noNorm.noResponse.exprs.noNa), y_genes$symbol) plot_df <- pData(all.noResp) %>% dplyr::filter(matrix %in% plot_matrices) %>% dplyr::group_by(participant_id, time_post_last_vax) %>% tidyr::nest() %>% dplyr::ungroup() %>% dplyr::mutate( y_ratio = purrr::map_dbl( .x = data, ~mean(all.noNorm.noResponse.exprs.noNa[y_genes$symbol, .x$uid]) / mean(all.noNorm.noResponse.exprs.noNa[non_y_genes.symbol, .x$uid]) )) %>% tidyr::unnest(data) # Plot figure ------------------------------------------------------------------ p <- ggplot(plot_df, aes(x = time_post_last_vax, y = y_ratio, group = participant_id)) + labs(x = "Time since last vaccination (days)", y = "Average Y / non-Y gene expression") + geom_line(alpha = 1, aes(color = y_chrom_present)) + geom_point(alpha = .5, aes(color = y_chrom_present)) + scale_color_manual(values = c("TRUE" = "navy", "FALSE" = "salmon")) + theme_classic() + theme(axis.text=element_text(size=16), legend.position = "bottom", legend.title = element_text(size=16), legend.text = element_text(size=16), axis.title=element_text(size=16,face="bold")) + theme(plot.title = element_text(size = 18, face = "bold"), strip.text = element_text(size=16, face="bold")) + facet_wrap(vars(study_accession, matrix), ncol = 2) p
Age imputation for studies without reported ages for young adults. Several studies (SDY1260, SDY1264, SDY1293, SDY1294, SDY1364, SDY1370, SDY1373, SDY984) were missing participant age. We performed age imputation for transcriptomic profiles of participants without reported ages using the RAPToR R package. Virtual studies were split into young (age < 50, E) and older (age >= 50, F) for two separate predictive models.
## RAPToR Helper Function get_raptor_results <- function(eset) { raptor_function <- "X ~ s(age_reported, bs = 'cr') + matrix" raptor_eset <- eset[, eset$time_post_last_vax == 0] p <- pData(raptor_eset) X <- exprs(raptor_eset) # Choose which samples to use for reference dataset: studies_without_ages <- c("SDY1260", "SDY1264","SDY1293","SDY1294","SDY1364","SDY1370","SDY1373","SDY984") test_sample_indices <- which(p$study_accession %in% studies_without_ages) ref_sample_indices <- which(!p$study_accession %in% studies_without_ages) # Make test sets: p_test <- p[test_sample_indices,] p <- p[ref_sample_indices,] X_test <- X[,test_sample_indices] X <- X[,ref_sample_indices] # Number of components pca <- stats::prcomp(X, rank = 20) nc <- sum(summary(pca)$importance[3, ] < .999) + 1 # Number of significant components # Build Model: funct <- raptor_function m <- ge_im(X = X, p = p, formula = funct, nc = nc) # Build interpolation data n.inter = 500 ndat <- data.frame( age_reported = seq(min(p$age_reported), max(p$age_reported), l = n.inter), arm_accession = sample(unique(p$arm_accession), n.inter, replace = TRUE), y_chrom_present = sample(c(TRUE, FALSE), n.inter, replace = TRUE), matrix = sample(unique(p$matrix),n.inter,replace = TRUE), study_accession = sample(unique(p$study_accession),n.inter,replace = TRUE), race = sample(unique(p$race), n.inter, replace = TRUE), ethnicity = sample(unique(p$ethnicity), n.inter, replace = TRUE), cohort = sample(unique(p$cohort),n.inter,replace = TRUE) ) # get interpolated GE matrix, as a reference r_X <- list(interpGE = stats::predict(m, ndat), time.series = ndat$age_reported) # test ae_X <- ae(eset[, eset$time_post_last_vax == 0], r_X$interpGE, r_X$time.series) return(ae_X) }
# Get RAPToR age estimates for young and old age groups set.seed(123) raptor_young_file <- file.path(extra_data_dir, "raptor_young_results.rds") if (file.exists(raptor_young_file)) { raptor_results_young <- readRDS(raptor_young_file) } else { raptor_results_young <- get_raptor_results(young.NoResp) saveRDS(raptor_results_young, raptor_young_file) } raptor_old_file <- file.path(extra_data_dir, "raptor_old_results.rds") if (file.exists(raptor_old_file)) { raptor_results_old <- readRDS(raptor_old_file) } else { raptor_results_old <- get_raptor_results(old.NoResp) saveRDS(raptor_results_old, raptor_old_file) }
# Use RAPToR age estimate for studies without age_reported: # SDY1260, SDY1264, SDY1293, SDY1294, SDY1364, SDY1370, SDY1373, SDY984 studies_missing_age <- c("SDY1260", "SDY1264","SDY1293", "SDY1294","SDY1364","SDY1370","SDY1373","SDY984") young_raptor_age <- raptor_results_young$age.estimates[, 1] names(young_raptor_age) <- young.NoResp[, match(names(young_raptor_age), young.NoResp$uid)]$participant_id young.NoResp$raptor_age <- young_raptor_age[young.NoResp$participant_id] young.NoResp$raptor_age <- ifelse(young.NoResp$study_accession %in% studies_missing_age, young.NoResp$raptor_age, young.NoResp$age_imputed) young.WithResp$raptor_age <- young_raptor_age[young.WithResp$participant_id] young.WithResp$raptor_age <- ifelse(young.WithResp$study_accession %in% studies_missing_age, young.NoResp$age_reported, young.NoResp$raptor_age) old_raptor_age <- raptor_results_old$age.estimates[, 1] names(old_raptor_age) <- old.NoResp[, match(names(old_raptor_age), old.NoResp$uid)]$participant_id old.NoResp$raptor_age <- raptor_results_old$age.estimates[,1][old.NoResp$uid] old.NoResp$raptor_age <- ifelse(old.NoResp$study_accession %in% studies_missing_age, old.NoResp$age_reported, old.NoResp$raptor_age)
results_young_tibble <- tibble( actual = young.NoResp$age_reported[match(names(young_raptor_age), young.NoResp$participant_id)], estimates = young_raptor_age, study_accession = young.NoResp$study_accession[match(names(young_raptor_age), young.NoResp$participant_id)] ) # only include participants from studies with age data results_young_tibble <- filter(results_young_tibble, !study_accession %in% studies_missing_age) # Get trend lm_fit_young <- lm(actual ~ estimates, data=results_young_tibble) # Plot: ggplot(results_young_tibble) + geom_point(aes(x = actual, y = estimates)) + geom_smooth(aes(x = actual, y = actual), method='lm') + xlab("age_reported") + ylab("estimated age (from RAPToR)") + xlim(18, 50) + ylim(18, 50) + ggtitle(paste0("Young Adults: RAPToR Prediction vs. Actual Age: R^2 = ", round(summary(lm_fit_young)$r.squared, 3)))+ theme_classic ()+ theme(title = element_text (size=16,face="bold"), axis.text=element_text(size=16), legend.position = "bottom", legend.title = element_text(size=16), legend.text = element_text(size=16), axis.title=element_text(size=16,face="bold"))
Age imputation for studies without reported ages for older adults.
results_old_tibble <- tibble( actual = old.NoResp$age_reported[match(rownames(raptor_results_old$age.estimates), old.NoResp$uid)], estimates = raptor_results_old$age.estimates[, 1], study_accession = old.NoResp$study_accession[match(rownames(raptor_results_old$age.estimates), old.NoResp$uid)] ) # only include participants from studies with age data results_old_tibble <- filter(results_old_tibble, !study_accession %in% studies_missing_age) lm_fit_old <- lm(actual ~ estimates, data=results_old_tibble) # Plot: ggplot(results_old_tibble) + geom_point(aes(x = actual, y = estimates)) + geom_smooth(aes(x = actual, y = actual), method='lm') + xlab("age_reported") + ylab("estimated age (from RAPToR)") + xlim(50, 90) + ylim(50, 90) + ggtitle(paste0("Older Adults: RAPToR Prediction vs. Actual Age: R^2 = ", round(summary(lm_fit_old)$r.squared, 3)))+ theme_classic ()+ theme(title = element_text (size=16,face="bold"), axis.text=element_text(size=16), legend.position = "bottom", legend.title = element_text(size=16), legend.text = element_text(size=16), axis.title=element_text(size=16,face="bold"))
Number of samples for each data type, including transcriptomics (TX), hemagglutination inhibition assay (HAI), neutralizing antibody assay (NAB), and ELISA assays (ELISA).
Bar plot depicting the number of samples at each time point. The colors within each bar indicate the breakdown for each unique combination of pathogen and vaccine type. Day -7 and day 0 correspond to times pre-vaccination.
# any sampling data after 20 days coded as >20 df.fig1b <- pData(young.NoResp) %>% dplyr::select(uid, study_time_collected, pathogen, vaccine_type) %>% arrange(study_time_collected) %>% mutate(study_time_collected = signif(study_time_collected, digits = 3), study_time_collected.factor = ifelse(test = study_time_collected > 20, yes = ">20", no = study_time_collected), study_time_collected.factor = factor(study_time_collected.factor, levels = unique(study_time_collected.factor)), vaccine = paste0(pathogen, " (", vaccine_type, ")")) %>% group_by(study_time_collected.factor, vaccine) %>% summarize(n = n()) Fig4b <- ggplot(data = df.fig1b, mapping = aes(x = study_time_collected.factor, y = n)) + geom_bar(stat = "identity", mapping = aes(fill = vaccine)) + theme_classic () + labs(x = "Days post-vaccination", y = "Number of samples") + scale_y_continuous(limits = c(0, 800), breaks = seq(from = 0, to = 800, by = 100)) + scale_fill_manual(name = "Pathogen (Vaccine type)", values = vaccine2color) Fig4b + guides(fill = guide_legend(ncol = 2)) + theme_classic() + theme ( axis.text.y = element_text(hjust = 1, size=20), axis.title= element_text (size=20), axis.text.x = element_text(angle = 45, hjust = 1, size=18),legend.pos= "bottom",legend.text = element_text(size=18), legend.title =element_text(size=20))
Box plot depicting the participants' age distribution for each unique combination of pathogen and vaccine type.
# Set seed for consistent results set.seed(123) # Combined pdata with raptor results pdata_df <- pdata_df <- rbind(pData(young.NoResp)[, !grepl("age_group", names(pData(young.NoResp)))], pData(old.NoResp)) D0pdata <- pdata_df %>% dplyr::filter (time_post_last_vax == 0) %>% dplyr::filter(vaccine != "Saline" ) %>% dplyr::filter (age_imputed !=0) D0pdata$vaccine=dplyr::recode(D0pdata$vaccine, "Quadrivalent Meningococcal Vaccine plain-polysaccharide conjugate"="MenACWY-PS") D0pdata$vaccine=dplyr::recode(D0pdata$vaccine, "Quadrivalent Meningococcal Vaccine protein-polysaccharide conjugate"="MenACWY-CRM") D0pdata$vaccine =dplyr::recode(D0pdata$vaccine, "TIV(2011)"="TIV (2011)") D0pdata$vaccine =dplyr::recode(D0pdata$vaccine, "TIV(2013)"="TIV (2013)") D0pdata$pathogen_vaccinetype=paste0(D0pdata$pathogen," (",D0pdata$vaccine_type,")") D0pdata$pathogen_vaccinetype=paste0(D0pdata$pathogen," (",D0pdata$vaccine_type,")") D0pdatafig <- ggplot(D0pdata, aes(x=pathogen_vaccinetype, y=raptor_age)) + geom_boxplot(outlier.shape=NA) + theme_classic() + #scale_shape_manual(values=1:nlevels(vaccine)) + theme(axis.text.x = element_text(size=18, angle=45, hjust=1)) + theme(axis.text.y = element_text(size = 18, hjust = 1)) + scale_y_continuous(breaks = pretty(D0pdata$raptor_age, n = 10)) + ylab("Age") + xlab ("Pathogen (Vaccine Type)")+ #ggtitle("HIPC Signatures 2 Summary: Comparative Meta-analysis Study Metadata") + theme(plot.title = element_text(face="bold", size=18)) + theme(axis.title = element_text(face="bold", size=18)) + scale_fill_manual(values=getPalette (length(unique(D0pdata$vaccine)))) + #scale_color_viridis_d(option="plasma", direction=-1) + geom_jitter(aes(color=vaccine, shape=D0pdata$y_chrom_present), size=3, alpha=0.5, position=position_jitter(width=.20, height=.1))+ theme (legend.title = element_text(size=16, face="bold"), legend.text = element_text(size = 14), legend.key.height = unit(0.02, units = "npc"), legend.key.width = unit(0.015, units = "npc")) + scale_shape_discrete(name="biological sex\n(Y chrom presence)", labels = c("Female", "Male"))+ theme(legend.key=element_blank(), legend.key.size=unit(10,"point")) + theme (legend.position = "bottom") + scale_x_discrete(labels = c("Ebola\n(Recombinant\nViral Vector)", "Hepatitis A/B\n(Inactivated/Recombinant protein)", "HIV\n(Recombinant\nViral Vector)", "Influenza\n(Inactivated)", "Influenza\n(Live virus)", "Malaria\n(Recombinant\nProtein)", "Meningococcus\n(Conjugate)", "Meningococcus\n(Polysaccharide)","Pneumococcus\n(Polysaccharide)", "Smallpox\n(Live virus)", "Tuberculosis\n(Recombinant\nViral Vector)","Varicella Zoster\n(Live virus)", "Yellow Fever\n(Live virus)")) D0pdatafig + guides(col = guide_legend(ncol = 3))
Each area-proportional Euler diagram represents the total number of participants with corresponding data types.
IS2_all_GE_metaData <- pData(all.noNorm) hai <- immdata$hai nab <- immdata$neut_ab_titer elisa <- immdata$elisa hai_pt <- length (unique (hai$participant_id)) nab_pt <- length (unique(nab$participant_id)) elisa_pt <- length (unique(elisa$participant_id)) TX_pt <-length (unique(IS2_all_GE_metaData$participant_id)) nab_hai <-length(intersect(nab$participant_id,hai$participant_id)) nab_elisa <-length(intersect(nab$participant_id,elisa$participant_id)) nab_TX <-length(intersect(nab$participant_id,IS2_all_GE_metaData$participant_id)) hai_elisa<-length(intersect(hai$participant_id,elisa$participant_id)) hai_TX <-length(intersect(hai$participant_id,IS2_all_GE_metaData$participant_id)) elisa_TX<-length(intersect(elisa$participant_id,IS2_all_GE_metaData$participant_id)) nab_hai_elisa <- length(intersect(intersect(nab$participant_id,hai$participant_id), elisa$participant_id)) nab_hai_TX <- length(intersect(intersect(nab$participant_id,hai$participant_id), IS2_all_GE_metaData$participant_id)) nab_elisa_TX <- length(intersect(intersect(nab$participant_id, elisa$participant_id), IS2_all_GE_metaData$participant_id)) hai_elisa_TX <- length(intersect(intersect(hai$participant_id,elisa$participant_id), IS2_all_GE_metaData$participant_id)) nab_hai_elisa_TX <- length(intersect(intersect(intersect(nab$participant_id,hai$participant_id),elisa$participant_id), IS2_all_GE_metaData$participant_id)) hai$pathogen_vaccinetype=paste0(hai$pathogen," (",hai$vaccine_type,")") elisa$pathogen_vaccinetype=paste0(elisa$pathogen," (",elisa$vaccine_type,")") nab$pathogen_vaccinetype=paste0(nab$pathogen," (",nab$vaccine_type,")") TXeuler <- c("nab" = nab_pt, "hai"=hai_pt , "elisa" = elisa_pt, "transcriptomics" = TX_pt,"nab&hai" = nab_hai, "nab&elisa" = nab_elisa, "nab&transcriptomics" = nab_TX, "hai&elisa" =hai_elisa, "hai&transcriptomics" = hai_TX, "elisa&transcriptomics"=elisa_TX,"nab&hai&elisa" = nab_hai_elisa, "nab&hai&transcriptomics" = nab_hai_TX, "hai&elisa&transcriptomics" = hai_elisa_TX, "nab&hai&elisa&transcriptomics" = nab_hai_elisa_TX)
plot(euler(TXeuler, shape = "circle"), quantities = list(cex = 1.5))
mutate_cond <- function(.data, condition, ..., envir = parent.frame()) { condition <- eval(substitute(condition), .data, envir) .data[condition, ] <- .data[condition, ] %>% mutate(...) .data } hai1 = hai %>% # filter(complete.cases(.)) %>% mutate(value_reported = as.numeric(value_reported)) %>% mutate(value_preferred = as.numeric(value_preferred)) %>% mutate(study_time_collected = as.factor(study_time_collected)) %>% mutate_cond (study_accession=="SDY1276", value_reported = as.numeric (2^value_reported), value_reported) %>% mutate_cond (study_accession=="SDY1276", value_preferred = as.numeric (2^value_preferred), value_preferred) %>% mutate(vaccine = recode(vaccine, "Quadrivalent Meningococcal Vaccine plain-polysaccharide conjugate"="MenACWY-PS", "Quadrivalent Meningococcal Vaccine protein-polysaccharide conjugate"="MenACWY-CRM", "TIV(2011)"="TIV (2011)", "TIV(2013)"="TIV (2013)")) nab1 = nab %>% #filter(complete.cases(.)) %>% mutate(value_reported = as.numeric(value_reported)) %>% mutate(value_preferred = as.numeric(value_preferred)) %>% mutate(study_time_collected = as.factor(study_time_collected)) %>% mutate_cond (study_accession=="SDY1276", value_reported = as.numeric (2^value_reported), value_reported) %>% mutate_cond (study_accession=="SDY1276", value_preferred = as.numeric (2^value_preferred), value_preferred) %>% mutate(vaccine = recode(vaccine, "Quadrivalent Meningococcal Vaccine plain-polysaccharide conjugate"="MenACWY-PS", "Quadrivalent Meningococcal Vaccine protein-polysaccharide conjugate"="MenACWY-CRM", "TIV(2011)"="TIV (2011)", "TIV(2013)"="TIV (2013)")) nab2 = nab %>% #filter(complete.cases(.)) %>% mutate(value_reported = as.numeric(value_reported)) %>% mutate(value_preferred = as.numeric(value_preferred)) %>% mutate(study_time_collected = as.factor(study_time_collected)) %>% # mutate_cond (vaccine=="Pneumovax23", value_preferred = as.numeric (2^value_preferred), value_preferred) %>% # mutate_cond (vaccine=="Pneumovax23", value_reported = as.numeric (2^value_reported), value_reported) %>% mutate_cond (study_accession=="SDY1276", value_reported = as.numeric (2^value_reported), value_reported) %>% mutate_cond (study_accession=="SDY1276", value_preferred = as.numeric (2^value_preferred), value_preferred) %>% mutate(vaccine = recode(vaccine, "Quadrivalent Meningococcal Vaccine plain-polysaccharide conjugate"="MenACWY-PS", "Quadrivalent Meningococcal Vaccine protein-polysaccharide conjugate"="MenACWY-CRM", "TIV(2011)"="TIV (2011)", "TIV(2013)"="TIV (2013)")) #%>% # mutate(study_time_collected = as.factor(study_time_collected)) %>% # mutate_cond (vaccine=="Pneumovax23", value_reported = as.numeric #(2^value_reported), value_reported) %>% # mutate_cond (vaccine=="Pneumovax23", value_preferred = as.numeric #(2^value_preferred), value_preferred) elisa = elisa %>% # filter(complete.cases(.)) %>% mutate(value_reported = as.numeric(value_reported)) %>% mutate(value_preferred = as.numeric(value_preferred)) %>% mutate(study_time_collected = as.factor(study_time_collected)) %>% mutate_cond (study_accession=="SDY1260", value_reported = as.numeric (2^value_reported), value_reported) %>% mutate_cond (study_accession=="SDY1260", value_preferred = as.numeric (2^value_preferred), value_preferred) %>% mutate_cond (study_accession=="SDY269", value_reported = as.numeric (value_reported*1000), value_reported) %>% mutate_cond (study_accession=="SDY269", value_preferred = as.numeric (value_preferred*1000), value_preferred)
The longitudinal trajectory (summarized as a loess curve) of hemagglutinin inhibition assay (HAI) measurements (in log2 scale) by influenza vaccine type and year.
# df <- bind_rows(hai1, df.2, .id = "id_col") hai1$study_time_collected <- factor(hai1$study_time_collected, levels=sort(as.numeric(as.character (unique(hai1$study_time_collected))))) haiplot <- ggplot(data = hai1, mapping = aes(x = study_time_collected, y = log2(value_preferred))) + scale_y_continuous(trans='log2')+ ylab ("log2(value_preferred)") + theme_classic ()+ geom_smooth (method="loess", se=TRUE, aes(color=vaccine, group=vaccine)) + facet_wrap (~vaccine, scale="free_x", nrow=1)+ theme(legend.position = "none") + theme(text = element_text(size=30)) #, axis.text.x = element_text(angle=45, hjust=1, size=14)) # facet_grid(facet =~pathogen_vaccinetype, scale = "free_x") haiplot+ theme (axis.text.y = element_text(hjust = 1, size=16), axis.title= element_text (size=20), axis.text.x = element_text(angle = 45, hjust = 1, size=16),legend.pos= "none",legend.text = element_text(size=18), legend.title =element_text(size=20), strip.text = element_text(size=16, face="bold"))+ ylab ("HAI titer (log2)")+ ggtitle("Hemagglutination Inhibition Antibody Trajectory") + theme(plot.title = element_text(size=20,hjust = 0.5, face="bold")) + scale_x_discrete(breaks = c(-7,-3,0,3,7,14,15,24,25,28,30,32,35,36,37,41,43,180))+ theme(panel.spacing.x = unit(1, "lines"))
The longitudinal trajectory of neutralizing antibody titers (NAB) titers (in log2 scale) for influenza, meningococcus, pneumococcus and yellow fever vaccines.
swr = function(string, nwrap=13) { paste(strwrap(string, width=nwrap), collapse="\n") } swr = Vectorize(swr) nab2$pathogen_vaccinetype = swr(nab2$pathogen_vaccinetype) nab2$vaccine <- recode(nab2$vaccine, "Quadrivalent Meningococcal Vaccine plain-polysaccharide conjugate"="MenACWY-PS", "Quadrivalent Meningococcal Vaccine protein-polysaccharide conjugate"="MenACWY-CRM", "TIV(2011)"="TIV (2011)", "TIV(2013)"="TIV (2013)") nab2$study_time_collected <- factor(nab2$study_time_collected, levels=sort(as.numeric(as.character (unique(nab2$study_time_collected))))) nabplot <-ggplot(data = nab2, mapping = aes(x = study_time_collected, y = log2(value_preferred))) + #geom_line(mapping = aes(color = study_accession, group = participant_id), alpha=0.3) + geom_smooth (method="loess", se=TRUE, aes(color=vaccine, group=vaccine)) + # geom_smooth (method="loess", se=TRUE, aes(group=1)) + scale_y_continuous(trans="log2")+ #facet_grid(rows = vars(value_pref), facet = ~vaccine, scale = "free_x") + facet_wrap (~pathogen_vaccinetype, scale="free_x", nrow=1)+ theme_classic() + theme(legend.position = "bottom") nabplot+ theme ( text = element_text(size=28),axis.text.y = element_text(hjust = 1, size=20), axis.title= element_text (size=20), axis.text.x = element_text(angle = 45, hjust = 1, size=22),legend.pos= "bottom",legend.text = element_text(size=18), legend.title =element_text(size=20), strip.text = element_text(size=20, face="bold") )+ ylab ("NAb titer (log2)")+ ggtitle("Neutralizing Antibody Titers Trajectory") + theme(plot.title = element_text(size=24,hjust = 0.5, face="bold"))
Neutralizing antibody titers were plotted for each unique combination of targeted pathogen and vaccine type to compare each participants' post-vaccination (day 28-30) values versus baseline (day 0). The violin plo shows the variation in magnitude for each unique combination of targeted pathogen and vaccine type.
nabFC_28vs0 <- nab2 %>% filter(study_time_collected %in% c(0, 28) & !is.na(value_preferred)) %>% group_by(participant_id, age_imputed, study_time_collected, vaccine, pathogen_vaccinetype, study_accession, virus) %>% mutate (value_preferred=as.numeric (value_preferred)) %>% dplyr::summarize (nabval= mean (value_preferred, na.rm=TRUE)) %>% ungroup () %>% filter(complete.cases (.)) %>% tidyr::spread (study_time_collected, nabval) %>% mutate (nabFC_28vs0= log2((`28`+0.025)/(`0`+0.025))) #VIOLIN PLOTS nabFC_vaccinelfc <- ggplot(nabFC_28vs0, aes(pathogen_vaccinetype, nabFC_28vs0, fill=pathogen_vaccinetype), outlier.size = 0) + geom_violin(trim=FALSE) + geom_boxplot(width=0.1, fill="white") + #scale_y_continuous(trans="log2")+ facet_grid(~pathogen_vaccinetype, scales = "free") + ylab("Log2 Fold Change (Day 28 vs Day 0))") + #geom_point(aes (colour=factor (study_accession)), pch=20, size=2, position=position_jitterdodge(dodge.width=0.5)) + geom_smooth(method = "loess", se=TRUE, aes(group=1)) + theme(plot.title = element_text(size = 20, face = "bold", hjust=0))+ theme(text = element_text(size=14), axis.title.x = element_text(size = 14)) + theme (axis.text=element_text(size=14))+ labs(color='Study Accession') + scale_fill_brewer(palette="RdBu") + theme_classic() nabFC_vaccinelfc+ theme(legend.position = "none")+ theme ( text = element_text(size=14),axis.text.y = element_text(hjust = 1, size=14), axis.title= element_text (size=14), axis.text.x = element_blank(),legend.pos= "none",legend.text = element_text(size=14), legend.title =element_text(size=18), strip.text = element_text(size=14, face="bold") )
library(emmeans) f<-lm(nabFC_28vs0~pathogen_vaccinetype, data=nabFC_28vs0) g <- as.data.frame (emmeans(f,'pairwise'~pathogen_vaccinetype, adjust='none')[[2]] %>% as.data.frame() %>% dplyr::select("contrast", "estimate", "SE", "p.value")) print (g) h <- as.data.frame (anova(f)) print (h)
The correlation plot of influenza studies compares the maximum fold change (MFC) across strains for hemagglutinin inhibition assay (HAI) titers versus neutralizing antibody (NAB) titers. Size is proportional to the number of samples analyzed.
sdyLS <- intersect(immdata$hai$study_accession, immdata$neut_ab_titer$study_accession) haiDF <- immdata$hai %>% filter(study_accession %in% sdyLS & (study_time_collected <= 0 | study_time_collected %in% 28)) %>% mutate(study_time_collected = pmax(study_time_collected, 0), value_preferred = as.numeric(value_preferred)) %>% dplyr::select(participant_id, study_accession, virus, study_time_collected, value_preferred) %>% distinct() %>% tidyr::pivot_wider(names_from = study_time_collected, values_from = value_preferred, values_fn = mean) %>% filter(complete.cases(.)) %>% mutate(FC = `28`/`0`) %>% group_by(participant_id, study_accession) %>% summarize(hai.MFC = max(FC)) nabDF <- immdata$neut_ab_titer %>% filter(study_accession %in% sdyLS & (study_time_collected <= 0 | study_time_collected %in% 28)) %>% mutate(study_time_collected = pmax(study_time_collected, 0), value_preferred = as.numeric(value_preferred)) %>% dplyr::select(participant_id, study_accession, virus, study_time_collected, value_preferred) %>% distinct() %>% tidyr::pivot_wider(names_from = study_time_collected, values_from = value_preferred, values_fn = mean) %>% filter(complete.cases(.)) %>% mutate(FC = `28`/`0`) %>% group_by(participant_id, study_accession) %>% summarize(nab.MFC = max(FC)) combDF <- merge(x = haiDF, y = nabDF, by = c("participant_id", "study_accession")) ggplot(data = combDF, mapping = aes(x = hai.MFC, y = nab.MFC)) + scale_x_log10() + scale_y_log10() + geom_count() + scale_size_area() + theme_classic() + theme (axis.text=element_text(size=18), axis.title=element_text(size=20), legend.text = element_text (size=14), legend.title=element_text(size=16)) cor.test(formula = ~hai.MFC+nab.MFC, data = combDF, method = "spearman") combDF %>% group_by(study_accession) %>% do(rho = cor.test(formula = ~hai.MFC+nab.MFC, data = ., method = "spearman")$estimate, p = cor.test(formula = ~hai.MFC+nab.MFC, data = ., method = "spearman")$p.value) %>% ungroup() %>% mutate(rho = unlist(rho), p = unlist(p))
IS2_eset <- all.noNorm IS2_all_GE_metaData <- pData(IS2_eset) #transform into dataframe #Convert timepoint to numeric IS2_all_GE_metaData$time_post_last_vax <- as.numeric(IS2_all_GE_metaData$time_post_last_vax) IS2_all_GE_metaData$study_time_collected <- as.numeric(IS2_all_GE_metaData$study_time_collected) #Remove saline groups #IS2_all_GE_metaData<-IS2_all_GE_metaData%>% # filter(vaccine !="Saline") #dim (IS2_all_GE_metaData) # table (IS2_all_GE_metaData$time_post_last_vax) studies <- unique(IS2_all_GE_metaData$study) vaccine_type <- unique(IS2_all_GE_metaData$vaccine_type) pathogen <- unique(IS2_all_GE_metaData$pathogen) timepoint <- unique(IS2_all_GE_metaData$study_time_collected) pathogen_type <- unique(IS2_all_GE_metaData$pathogen_type) #Recode IS2_all_GE_metaData$vaccine <- recode(IS2_all_GE_metaData$vaccine, "Quadrivalent Meningococcal Vaccine plain-polysaccharide conjugate"="MenACWY-PS") IS2_all_GE_metaData$vaccine <- recode(IS2_all_GE_metaData$vaccine, "Quadrivalent Meningococcal Vaccine protein-polysaccharide conjugate"="MenACWY-CRM") IS2_all_GE_metaData$vaccine <- recode(IS2_all_GE_metaData$vaccine, "TIV(2011)"="TIV (2011)") IS2_all_GE_metaData$vaccine <- recode(IS2_all_GE_metaData$vaccine, "TIV(2013)"="TIV (2013)") x1 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, vaccine, pathogen, adjuvant,vaccine_type, race))%>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+pathogen+vaccine+vaccine_type+adjuvant,., paste, collapse=",") #aggregate study time collected y1 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, vaccine, ethnicity))%>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected y2 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession,vaccine, biosample_accession))%>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected y3 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, cohort,vaccine))%>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected y4= IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, vaccine, arm_accession))%>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected y5 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, vaccine, exposure_material_reported))%>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected c <- merge (x1,y1) d <- merge (y2,y3) e <- merge (y4,y5) f <- merge (c,d) x <- merge (f,e) #Add new merged variables x$pathogen_vaccinetype <- paste0(x$pathogen," (",x$vaccine_type,")") x$sdy_vax <- paste0(x$study_accession,"_",x$vaccine) IS2_all_GE_metaData$sdy_vax <- paste0(IS2_all_GE_metaData$study_accession,"_",IS2_all_GE_metaData$vaccine) #colnames(x)[6] = "study_time_collected" #Manually add Pubmed and GEO Accession IDs x$pubmed_id <- recode(x$study_accession, "SDY1260" = "24336226", "SDY1328" = "26742691", "SDY1370" = "21921208", "SDY984" = "28502771", "SDY1364" = "23844129", "SDY61" = "21743478", "SDY1276" = "21357945", "SDY212" = "23591775", "SDY269" = "21743478", "SDY180" = "23601689", "SDY270" = "21743478", "SDY224" = "23900141", "SDY56" = "26682988", "SDY63" = "25596819", "SDY1119" = "26682988", "SDY404" = "25596819", "SDY400" = "32060136", "SDY520" = "32060136", "SDY640" = "32060136", "SDY67" = "25816015,27031986,27441275,29130882", "SDY1529" = "19047440", "SDY1291" = "23151505", "SDY1325" = "28137280", "SDY80" = "24725414", "SDY1264" = "19029902", "SDY1289" = "19047440", "SDY1294" = "28687661", "SDY1373"= "", "SDY1293"="") x$geo_accession <- recode(x$study_accession, "SDY1260" = "GSE52245", "SDY1328" = "GSE65834", "SDY1370" = "GSE22121", "SDY984" = "GSE79396", "SDY1364" = "GSE40719", "SDY61" = "GSE29617,GSE29614", "SDY1276" = "GSE48024,GSE48018", "SDY212" = "GSE41080", "SDY269" = "GSE29615,GSE29617,GSE29614", "SDY180" = "GSE48762", "SDY270" = "GSE29617,GSE29614", "SDY224" = "GSE45735", "SDY56" = "GSE74817", "SDY63" = "GSE59635", "SDY1119" = "GSE74817", "SDY67" = "", "SDY1529" = "GSE125921,GSE136163", "SDY404" = "GSE59654", "SDY400" = "GSE59743,GSE95584", "SDY520" = "GSE101709", "SDY1368"="GSE86332", "SDY640" = "GSE101710", "SDY1325" = "GSE92884", "SDY80" = "GSE47353", "SDY1264" = "GSE13485", "SDY1289" = "GSE13699", "SDY1294" = "GSE82152", "SDY1291" = "GSE22768", "SDY1293" = "GSE18323", "SDY1373" = "GSE97590") #Calculate number of participants and samples num_samples <- IS2_all_GE_metaData %>% group_by(sdy_vax) %>% tally(name="number_of_samples") num_participants <- plyr::ddply( IS2_all_GE_metaData, ~sdy_vax,summarise, number_of_participants = length(unique(participant_id))) final_x <- Reduce(function(x,y) merge(x = x, y = y, by = "sdy_vax"), list(x, num_participants,num_samples)) finaltable <- dplyr::select(final_x, "pathogen_vaccinetype", "study_accession","number_of_participants", "number_of_samples", "vaccine", "adjuvant","exposure_material_reported", "race", "ethnicity", "pubmed_id","arm_accession","cohort") finaltable1 <- finaltable %>% arrange (pathogen_vaccinetype, study_accession) %>% relocate(study_accession) finaltable1
IS2_all_GE_metaData <- pData(IS2_eset) #transform into dataframe #Convert timepoint to numeric IS2_all_GE_metaData$time_post_last_vax <- as.numeric(IS2_all_GE_metaData$time_post_last_vax) IS2_all_GE_metaData$study_time_collected <- as.numeric(IS2_all_GE_metaData$study_time_collected) #Remove saline groups #IS2_all_GE_metaData<-IS2_all_GE_metaData%>% # filter(vaccine !="Saline") #dim (IS2_all_GE_metaData) #table (IS2_all_GE_metaData$time_post_last_vax) studies <- unique(IS2_all_GE_metaData$study) vaccine_type <- unique(IS2_all_GE_metaData$vaccine_type) pathogen <- unique(IS2_all_GE_metaData$pathogen) timepoint <- unique(IS2_all_GE_metaData$study_time_collected) pathogen_type <- unique(IS2_all_GE_metaData$pathogen_type) #Recode IS2_all_GE_metaData$vaccine <- recode(IS2_all_GE_metaData$vaccine, "Quadrivalent Meningococcal Vaccine plain-polysaccharide conjugate"="MenACWY-PS") IS2_all_GE_metaData$vaccine <- recode(IS2_all_GE_metaData$vaccine, "Quadrivalent Meningococcal Vaccine protein-polysaccharide conjugate"="MenACWY-CRM") IS2_all_GE_metaData$vaccine <- recode(IS2_all_GE_metaData$vaccine, "TIV(2011)"="TIV (2011)") IS2_all_GE_metaData$vaccine <- recode(IS2_all_GE_metaData$vaccine, "TIV(2013)"="TIV (2013)") #gsm, cell_type, featureSetName, featureSetName2, featureSetVendor x1 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, pathogen,vaccine_type, vaccine, time_post_last_vax, cell_type, featureSetName)) %>% #select desired columns distinct() %>% #remove duplicated rows arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+pathogen+vaccine_type+vaccine+cell_type+featureSetName,., paste, collapse=",") #aggregate study time collected y1 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, vaccine, gsm)) %>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected y2 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, vaccine, featureSetName2)) %>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected y3 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, vaccine, featureSetVendor)) %>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected y4 <- IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, vaccine, gsm)) %>% #select desired columns distinct() %>% #remove duplicated rows #arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+vaccine,., paste, collapse=",") #aggregate study time collected x2 <- merge (y1,y2) x3 <- merge (y3,y4) x4 <- merge (x2,x3) x <- merge (x1,x4) #Add new merged variables x$pathogen_vaccinetype <- paste0(x$pathogen," (",x$vaccine_type,")") x$sdy_vax <- paste0(x$study_accession,"_",x$vaccine) IS2_all_GE_metaData$sdy_vax <- paste0(IS2_all_GE_metaData$study_accession,"_",IS2_all_GE_metaData$vaccine) #colnames(x)[6] = "study_time_collected" #Manually add Pubmed and GEO Accession IDs x$pubmed_id <- recode(x$study_accession, "SDY1260" = "24336226", "SDY1328" = "26742691", "SDY1370" = "21921208", "SDY984" = "28502771", "SDY1364" = "23844129", "SDY61" = "21743478", "SDY1276" = "21357945", "SDY212" = "23591775", "SDY269" = "21743478", "SDY180" = "23601689", "SDY270" = "21743478", "SDY224" = "23900141", "SDY56" = "26682988", "SDY63" = "25596819", "SDY1119" = "26682988", "SDY404" = "25596819", "SDY400" = "32060136", "SDY520" = "32060136", "SDY640" = "32060136", "SDY67" = "25816015/27031986/27441275/29130882", "SDY1529" = "19047440", "SDY1291" = "23151505", "SDY1325" = "28137280", "SDY80" = "24725414", "SDY1264" = "19029902", "SDY1289" = "19047440", "SDY1294" = "28687661", "SDY1373"= "", "SDY1293"="") x$geo_accession <- recode(x$study_accession, "SDY1260" = "GSE52245", "SDY1328" = "GSE65834", "SDY1370" = "GSE22121", "SDY984" = "GSE79396", "SDY1364" = "GSE40719", "SDY61" = "GSE29617/GSE29614", "SDY1276" = "GSE48024/GSE48018", "SDY212" = "GSE41080", "SDY269" = "GSE29615/GSE29617/GSE29614", "SDY180" = "GSE48762", "SDY270" = "GSE29617/GSE29614", "SDY224" = "GSE45735", "SDY56" = "GSE74817", "SDY63" = "GSE59635", "SDY1119" = "GSE74817", "SDY67" = "", "SDY1529" = "GSE125921/GSE136163", "SDY404" = "GSE59654", "SDY400" = "GSE59743/GSE95584", "SDY520" = "GSE101709", "SDY1368"="GSE86332", "SDY640" = "GSE101710", "SDY1325" = "GSE92884", "SDY80" = "GSE47353", "SDY1264" = "GSE13485", "SDY1289" = "GSE13699", "SDY1294" = "GSE82152", "SDY1291" = "GSE22768", "SDY1293" = "GSE18323", "SDY1373" = "GSE97590") #Calculate number of participants and samples num_samples <- IS2_all_GE_metaData %>% group_by(sdy_vax) %>% tally(name="number_of_samples") num_participants <- plyr::ddply(IS2_all_GE_metaData,~sdy_vax,summarise,number_of_participants=length(unique(participant_id))) final_x <- Reduce(function(x,y) merge(x = x, y = y, by = "sdy_vax"), list(x, num_participants,num_samples)) finaltable <- dplyr::select(final_x, "study_accession","pathogen_vaccinetype", "cell_type", "featureSetName", "featureSetName2","featureSetVendor", "time_post_last_vax","geo_accession","gsm") finaltable1 <- finaltable %>% arrange (pathogen_vaccinetype, study_accession) %>% relocate(study_accession) finaltable1
IS2_eset_withResp <- all_noNorm_withResponse IS2_all_GE_metaData=pData(IS2_eset_withResp) #transform into dataframe IS2_all_GE_metaData$pathogen_vaccinetype=paste0(IS2_all_GE_metaData$pathogen," (",IS2_all_GE_metaData$vaccine_type,")") IS2_all_GE_metaData$vaccine=recode(IS2_all_GE_metaData$vaccine, "Quadrivalent Meningococcal Vaccine plain-polysaccharide conjugate"="MenACWY-PS") IS2_all_GE_metaData$vaccine=recode(IS2_all_GE_metaData$vaccine, "Quadrivalent Meningococcal Vaccine protein-polysaccharide conjugate"="MenACWY-CRM") IS2_all_GE_metaData$vaccine =recode(IS2_all_GE_metaData$vaccine, "TIV(2011)"="TIV (2011)") IS2_all_GE_metaData$vaccine =recode(IS2_all_GE_metaData$vaccine, "TIV(2013)"="TIV (2013)") x= IS2_all_GE_metaData %>% dplyr::select(., c(study_accession, pathogen_vaccinetype, assay, vaccine, time_post_last_vax))%>% #select desired columns distinct() %>% #remove duplicated rows arrange(time_post_last_vax) %>% #arrange by study time (ensures order when aggregating) aggregate(.~study_accession+pathogen_vaccinetype+vaccine+time_post_last_vax,., paste, collapse=",") #aggregate study time collected #Add new merged variables #x$pathogen_vaccinetype=paste0(x$pathogen," (",x$vaccine_type,")") x$sdy_vax=paste0(x$study_accession,"_",x$vaccine) IS2_all_GE_metaData$sdy_vax=paste0(IS2_all_GE_metaData$study_accession,"_",IS2_all_GE_metaData$vaccine) #colnames(x)[6] = "time_post_last_vax_in_days" #Manually add Pubmed and GEO Accession IDs x$pubmed_id = recode(x$study_accession, "SDY1260" = "24336226", "SDY1328" = "26742691", "SDY1370" = "21921208", "SDY984" = "28502771", "SDY1364" = "23844129", "SDY61" = "21743478", "SDY1276" = "21357945", "SDY212" = "23591775", "SDY269" = "21743478", "SDY180" = "23601689", "SDY270" = "21743478", "SDY224" = "23900141", "SDY56" = "26682988", "SDY63" = "25596819", "SDY1119" = "26682988", "SDY404" = "25596819", "SDY400" = "32060136", "SDY520" = "32060136", "SDY640" = "32060136", "SDY67" = "25816015/27031986/27441275/29130882", "SDY1529" = "19047440", "SDY1291" = "23151505", "SDY1325" = "28137280", "SDY80" = "24725414", "SDY1264" = "19029902", "SDY1289" = "19047440", "SDY1294" = "28687661", "SDY1373"= "", "SDY1293"="") x$geo_accession = recode(x$study_accession, "SDY1260" = "GSE52245", "SDY1328" = "GSE65834", "SDY1370" = "GSE22121", "SDY984" = "GSE79396", "SDY1364" = "GSE40719", "SDY61" = "GSE29617/GSE29614", "SDY1276" = "GSE48024/GSE48018", "SDY212" = "GSE41080", "SDY269" = "GSE29615/GSE29617/GSE29614", "SDY180" = "GSE48762", "SDY270" = "GSE29617/GSE29614", "SDY224" = "GSE45735", "SDY56" = "GSE74817", "SDY63" = "GSE59635", "SDY1119" = "GSE74817", "SDY67" = "", "SDY1529" = "GSE125921/GSE136163", "SDY404" = "GSE59654", "SDY400" = "GSE59743/GSE95584", "SDY520" = "GSE101709", "SDY1368"="GSE86332", "SDY640" = "GSE101710", "SDY1325" = "GSE92884", "SDY80" = "GSE47353", "SDY1264" = "GSE13485", "SDY1289" = "GSE13699", "SDY1294" = "GSE82152", "SDY1291" = "GSE22768", "SDY1293" = "GSE18323", "SDY1373" = "GSE97590") #Calculate number of participants and samples num_samples=IS2_all_GE_metaData %>% group_by(sdy_vax) %>% tally(name="number_of_samples") num_participants=plyr::ddply(IS2_all_GE_metaData,~sdy_vax,summarise,number_of_participants=length(unique(participant_id))) num_visit=plyr::ddply (IS2_all_GE_metaData, ~sdy_vax, summarise,number_of_sampling_visits=length (unique (time_post_last_vax))) final_x=Reduce(function(x,y) merge(x = x, y = y, by = "sdy_vax"), list(x, num_participants,num_samples,num_visit)) finaltable=dplyr::select(final_x, "pathogen_vaccinetype","assay", "study_accession","number_of_participants", "number_of_samples", "number_of_sampling_visits") colnames(x)[2] = "immune_response_assay" finaltable1 <- finaltable %>% arrange (pathogen_vaccinetype, study_accession) %>% dplyr::rename ("immune_response_assay"=assay) %>% relocate(study_accession) finaltable1
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.