Overview

The purpose of this vignette is to pull all expressionsets from the ImmuneSpace portal, www.immunespace.org and save as an R object for later processing.

knitr::opts_chunk$set(echo = TRUE, include = TRUE)
suppressPackageStartupMessages({
  library(ImmuneSignatures2) # vaccine map loaded as `vaccines`
  library(ImmuneSpaceR) 
  library(Biobase)
  library(data.table)
})
# Output variables
outputDir <- params$outputDir
dataCacheDir <- params$dataCacheDir
if (!dir.exists(outputDir)) dir.create(outputDir, recursive = TRUE)
if (!dir.exists(dataCacheDir)) dir.create(dataCacheDir)
timeStamp <- params$timestamp
con <- CreateConnection("IS2", onTest = FALSE)

Load meta data

Load metadata about samples and participants.

demographics <- con$getDataset("demographics")
geneExpressionFiles <- con$getDataset("gene_expression_files", original_view = TRUE)
featureAnnotationMap <- getTable(con, "microarray", "fasMap", showHidden = TRUE)
featureAnnotation <- getTable(con, "microarray", "FeatureAnnotationSet", showHidden = TRUE)
sharedMetaDataFile <- file.path(dataCacheDir, paste0(timeStamp, "sharedMetaData.rds"))
sharedMetaData <- addStudy(demographics)
sharedMetaData <- addArmAccession(sharedMetaData, geneExpressionFiles)
sharedMetaData <- addVaccineFields(sharedMetaData, vaccines)
sharedMetaData <- filterOutNoVaccineSamples(sharedMetaData)
sharedMetaData <- addGeBatchName(sharedMetaData)
sharedMetaData <- addIrpBatchName(sharedMetaData)
sharedMetaData <- addSDY1325metadata(sharedMetaData)
sharedMetaData <- imputeAge(sharedMetaData)

saveRDS(sharedMetaData, file = sharedMetaDataFile)
write_data_metadata(file.path(dataCacheDir, "dataset_metadata.csv"), 
                    data_path = sharedMetaDataFile, 
                    dataset_name = "sharedMetaData.rds")

Immune Response Data Retrieval

immdata_filename <- file.path(dataCacheDir, paste0(timeStamp, "immdata_all.rds"))
assays <- c("hai", "neut_ab_titer", "elisa")

immdata_all <- sapply(assays, USE.NAMES = TRUE, function(assay){
  dt <- con$getDataset(assay, original_view = TRUE)
  dt <- dt[, -"lsid"]
  if(assay == "elisa"){
    dt <- rbind(dt, sdy1370_elisa)
  }
  dt$assay <- assay
  dt <- correctHrs(dt)
  dt <- createUniqueIdColumn(dt)
  dt <- merge(dt, sharedMetaData, by = c("participant_id", "study_accession", "arm_accession"))
})
saveRDS(immdata_all, file = immdata_filename)
write_data_metadata(file.path(dataCacheDir, "dataset_metadata.csv"),
                    data_path = immdata_filename,
                    dataset_name = "immdata_all.rds")

Gene Expression Data Retrieval

Gene expression matrices in ImmuneSpace are created on a cohort*cell_type basis and each matrix is quantile normalized and log-transformed separately. The .rds file that contains the list of expressionSets needed for downstream analysis is approximately 1GB and is therefore cached.

geMatrices <- con$cache$GE_matrices


# Removing cohorts from gene expression data:
# SDY1370 - BCell and TCell, since others are PBMC / WholeBlood
# SDY1325 - lowdose and subcutaenous PS, different vaccine method not related
# SDY1364 - intraDermal, different vaccination method
# SDY180 - Saline cohorts did not receive stimulation
rmCohorts <- "cell|Subcutaneous|LowIntraMuscular|IntraDermal|Saline"
geMatrices <- geMatrices[ grep( rmCohorts, geMatrices$name, invert = TRUE), ] 


esetsFile <- file.path(dataCacheDir, paste0(timeStamp, "IS2_esets.rds"))
esets <- lapply(
  geMatrices$name,
  con$getGEMatrix,
  outputType = "normalized",
  annotation = "latest")
names(esets) <- geMatrices$name
saveRDS(esets, file = esetsFile)
write_data_metadata(file.path(dataCacheDir, "dataset_metadata.csv"),
                    data_path = esetsFile,
                    dataset_name = "IS2_esets.rds")

There are r nrow(geMatrices) matrices in the IS2 virtual study.

results <- testExtractedGEData(esets)

if( !all(unlist(results)) ){
    stop("Normalized matrices do not meet dim and NA value expectations")
}


RGLab/ImmuneSignatures2 documentation built on Dec. 9, 2022, 10:51 a.m.