The purpose of this vignette is to pull all expressionsets from the ImmuneSpace portal, www.immunespace.org and save as an R object for later processing.
knitr::opts_chunk$set(echo = TRUE, include = TRUE) suppressPackageStartupMessages({ library(ImmuneSignatures2) # vaccine map loaded as `vaccines` library(ImmuneSpaceR) library(Biobase) library(data.table) }) # Output variables outputDir <- params$outputDir dataCacheDir <- params$dataCacheDir if (!dir.exists(outputDir)) dir.create(outputDir, recursive = TRUE) if (!dir.exists(dataCacheDir)) dir.create(dataCacheDir) timeStamp <- params$timestamp
con <- CreateConnection("IS2", onTest = FALSE)
Load metadata about samples and participants.
demographics <- con$getDataset("demographics") geneExpressionFiles <- con$getDataset("gene_expression_files", original_view = TRUE) featureAnnotationMap <- getTable(con, "microarray", "fasMap", showHidden = TRUE) featureAnnotation <- getTable(con, "microarray", "FeatureAnnotationSet", showHidden = TRUE)
sharedMetaDataFile <- file.path(dataCacheDir, paste0(timeStamp, "sharedMetaData.rds")) sharedMetaData <- addStudy(demographics) sharedMetaData <- addArmAccession(sharedMetaData, geneExpressionFiles) sharedMetaData <- addVaccineFields(sharedMetaData, vaccines) sharedMetaData <- filterOutNoVaccineSamples(sharedMetaData) sharedMetaData <- addGeBatchName(sharedMetaData) sharedMetaData <- addIrpBatchName(sharedMetaData) sharedMetaData <- addSDY1325metadata(sharedMetaData) sharedMetaData <- imputeAge(sharedMetaData) saveRDS(sharedMetaData, file = sharedMetaDataFile) write_data_metadata(file.path(dataCacheDir, "dataset_metadata.csv"), data_path = sharedMetaDataFile, dataset_name = "sharedMetaData.rds")
immdata_filename <- file.path(dataCacheDir, paste0(timeStamp, "immdata_all.rds")) assays <- c("hai", "neut_ab_titer", "elisa") immdata_all <- sapply(assays, USE.NAMES = TRUE, function(assay){ dt <- con$getDataset(assay, original_view = TRUE) dt <- dt[, -"lsid"] if(assay == "elisa"){ dt <- rbind(dt, sdy1370_elisa) } dt$assay <- assay dt <- correctHrs(dt) dt <- createUniqueIdColumn(dt) dt <- merge(dt, sharedMetaData, by = c("participant_id", "study_accession", "arm_accession")) }) saveRDS(immdata_all, file = immdata_filename) write_data_metadata(file.path(dataCacheDir, "dataset_metadata.csv"), data_path = immdata_filename, dataset_name = "immdata_all.rds")
Gene expression matrices in ImmuneSpace are created on a cohort*cell_type basis and each matrix is quantile normalized and log-transformed separately. The .rds file that contains the list of expressionSets needed for downstream analysis is approximately 1GB and is therefore cached.
geMatrices <- con$cache$GE_matrices # Removing cohorts from gene expression data: # SDY1370 - BCell and TCell, since others are PBMC / WholeBlood # SDY1325 - lowdose and subcutaenous PS, different vaccine method not related # SDY1364 - intraDermal, different vaccination method # SDY180 - Saline cohorts did not receive stimulation rmCohorts <- "cell|Subcutaneous|LowIntraMuscular|IntraDermal|Saline" geMatrices <- geMatrices[ grep( rmCohorts, geMatrices$name, invert = TRUE), ] esetsFile <- file.path(dataCacheDir, paste0(timeStamp, "IS2_esets.rds")) esets <- lapply( geMatrices$name, con$getGEMatrix, outputType = "normalized", annotation = "latest") names(esets) <- geMatrices$name saveRDS(esets, file = esetsFile) write_data_metadata(file.path(dataCacheDir, "dataset_metadata.csv"), data_path = esetsFile, dataset_name = "IS2_esets.rds")
There are r nrow(geMatrices)
matrices in the IS2 virtual study.
results <- testExtractedGEData(esets) if( !all(unlist(results)) ){ stop("Normalized matrices do not meet dim and NA value expectations") }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.