In mrj31/immdonor: Search Immport for Donors

library(flowCore)
library(flowWorkspace)
library(ggcyto)
library(openCyto)
library(MetaCyto)
library(cytoqc)
library(tidyverse)
library(knitr)
library(plotly)
library(kableExtra)
library(gtools)

Immdonor is a GraphQL API of Immport Organ Donor metadata hosted on Hasura Cloud. This API lets you request exactly what data you need from the Immport data model and returns a JSON output which can be converted to a R dataframe!

🔥This is a fast way to link Flow Cytometry Standard (.FCS) files to their metadata and enables more robust analysis with Cytoverse. You will need to have the FCS files saved locally (see quick_fetch.R).

Connect to API

You can connect using your preferred GraphQL R client. Here we use rOpenSci's ghql package

library(ghql)

#By connecting to this API you agree to the Immport.org Terms of Service
con <- GraphqlClient$new(url = "https://resolved-lab-57.hasura.app/v1/graphql")

GET

qry <- Query$new()

studies<- qry$query('studies','{
  shared_data_study {
    study_accession
    actual_enrollment
    minimum_age
    maximum_age
  }
}')
x <- con$exec(qry$queries$studies)

ANSWER

studies<- as.data.frame (jsonlite::fromJSON(x))

studies=studies[mixedorder(studies$data.shared_data_study.study_accession),]
studies=studies %>% select(data.shared_data_study.study_accession,data.shared_data_study.actual_enrollment,data.shared_data_study.minimum_age,data.shared_data_study.maximum_age)
row.names(studies) <- NULL

studies %>% kable(col.names = gsub("data.shared_data_study.", "", names(studies)))

Organ Donor Metadata

The studies listed above feature cytometry data generously donated by organ donors. We can build queries to return exactly what metadata we would like to know about these organ donors.

qry <- Query$new()

files<- qry$query('files','{
  resultFiles {
    filePath
    studyAccession
    subjectAccession
    maxSubjectAge
    gender
    biosampleType
    parameters
    panel_id
    markers

  }
}')

x <- con$exec(qry$queries$files)
files<- as.data.frame (jsonlite::fromJSON(x))

Biosample types

samples<- files %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType)

plo<- files %>% filter(data.resultFiles.biosampleType == "Thymus"|data.resultFiles.biosampleType == "Bone Marrow") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType)

slo<- files %>% filter(data.resultFiles.biosampleType == "Spleen"|data.resultFiles.biosampleType == "Lung lymph node"|data.resultFiles.biosampleType == "Inguinal lymph node"|data.resultFiles.biosampleType == "Mesenteric lymph node"| data.resultFiles.biosampleType == "Tonsil") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType)

mucosal<- files %>% filter(data.resultFiles.biosampleType == "Lung"|data.resultFiles.biosampleType == "Colon"|data.resultFiles.biosampleType == "Ileum"|data.resultFiles.biosampleType == "Jejunum"|data.resultFiles.biosampleType == "PBMC"|data.resultFiles.biosampleType == "Whole blood") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType)

knitr::kables(
  list(
    knitr::kable(plo, col.names = c('Primary Lymphoid Organs', 'Count'), valign = 't' ),
    knitr::kable(slo, col.names = c('Secondary Lymphoid Organs', 'Count'), valign = 't'),
    knitr::kable(mucosal, col.names = c('Mucosal', 'Count'), valign = 't')
    ))

Panels

Here are the top 5 staining panels with the most biosamples

panels<- files %>% group_by(data.resultFiles.panel_id,data.resultFiles.parameters) %>% count(data.resultFiles.markers)
panels=panels[order(panels$n, decreasing = TRUE),]
top10panels<- panels[1:5,]
top10panels %>% kable(col.names = gsub("data.resultFiles.", "", names(panels)))

T Cell Surface Panel Analysis

fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3")

tpanels<- fcs %>% group_by(data.resultFiles.panel_id,data.resultFiles.parameters) %>% count(data.resultFiles.markers)
tpanels %>% kable(col.names = gsub("data.resultFiles.", "", names(tpanels)))

Link cytoset to metadata

In order to use all of OpenCyto's features like collapseDataForGating we need to annotate the donor metadata into the cytoset's pData.

# Load cytoset 
fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3")
fn<- fcs$data.resultFiles.filePath
cs<- load_cytoset_from_fcs(files = fn)

#Annotate immdonor metadata to pData
p<- pData(cs)
m<- cbind(p,fcs)
pData(cs)<- m

# Harmonize marker names
channels <- colnames(cs)
markers <- as.vector(pData(parameters(cs[[1]]))$desc)
names(markers)<- channels
markernames(cs) <- markers

Compensate and Transform

# Apply file internal compensation
comps <- lapply(cs, function(cf) spillover(cf)$SPILL)
cs_comp <- compensate(cs, comps)

# Transform fluorescent channels with FCSTrans
channels_to_exclude <- c(grep(colnames(cs), pattern="FSC"),
                         grep(colnames(cs), pattern="SSC"),
                         grep(colnames(cs), pattern="Time"))
chnls <- colnames(cs)[-channels_to_exclude]

fcstrans<- FCSTransTransform(transformationId = "defaultFCSTransTransform", channelrange = 2^18, channeldecade = 4.5, range = 4096, cutoff = -111, w = 0.5, rescale = TRUE)
transList <- transformList(chnls, fcstrans)

cs_trans<- transform(cs_comp,transList)

cs<- save_cytoset(cs_trans, "cytosets/tcell)

Build Autogating strategy

Since we annotated the cytoset with each file's age, gender and biosample type we can use the collapseDataForGating feature and groupby biosampleType to improve our autogating.

cs<- load_cytoset(path = "cytosets/tcell")
gs<- GatingSet(cs)

gs_add_gating_method(gs, alias = "nonDebris",
                     pop = "+",
                     parent = "root",
                     dims = "FSC-A",
                     gating_method = "mindensity",
                     gating_args = "min = 20000, max=50000",
                     collapseDataForGating = "TRUE",
                     groupBy = "data.resultFiles.biosampleType") 

gs_add_gating_method(gs, alias = "singlets",
                     pop = "+",
                     parent = "nonDebris",
                     dims = "FSC-A,FSC-H",
                     gating_method = "singletGate")


gs_add_gating_method(gs, alias = "bcells",
                     pop = "+/-",
                     parent = "singlets",
                     dims = "CD19",
                     gating_method = "mindensity",
                     collapseDataForGating = "TRUE",
                     groupBy = "data.resultFiles.biosampleType")

gs_add_gating_method(gs, alias = "CD4",
                     pop = "+/-+/-",
                     parent = "CD19-",
                     dims = "CD3,CD4",
                     gating_method = "mindensity",
                     gating_args = "min = 1500, max=2500",
                     collapseDataForGating = "TRUE",
                     groupBy = "data.resultFiles.biosampleType")

gs_add_gating_method(gs, alias = "Tmem",
                     pop = "+/-+/-",
                     parent = "CD4+",
                     dims = "CCR7,CD45RA",
                     gating_method = "mindensity",
                     gating_args = "min = 1750, max=2500",
                     collapseDataForGating = "TRUE",
                     groupBy = "data.resultFiles.biosampleType")


lln<- subset(gs, data.resultFiles.biosampleType == "Lung lymph node")
lung<- subset(gs, data.resultFiles.biosampleType == "Lung")
spleen<- subset(gs, data.resultFiles.biosampleType == "Spleen")
blood<- subset(gs, data.resultFiles.biosampleType == "Whole blood")

CD3+CD4 T cells

Lung Lymph node

ggcyto(lln, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")

Lung

ggcyto(lung, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")

Spleen

ggcyto(spleen, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")

CD4 memory subsets

Lung Lymph Node

ggcyto(gs_pop_get_data(lln ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)

Lung

ggcyto(gs_pop_get_data(lung ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)

Spleen

ggcyto(gs_pop_get_data(spleen ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)

Metacyto analysis

work in progress...

fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3")

fcs<- fcs %>%
  dplyr::filter(data.resultFiles.biosampleType == "Lung lymph node"|data.resultFiles.biosampleType == "Lung"|data.resultFiles.biosampleType == "Spleen"|data.resultFiles.biosampleType == "PBMC"|data.resultFiles.biosampleType == "Whole blood"|data.resultFiles.biosampleType == "Inguinal lymph node"|data.resultFiles.biosampleType == "Colon"|data.resultFiles.biosampleType == "Ileum"|data.resultFiles.biosampleType == "Jejunum")

fcs$fcs_files<- fcs$data.resultFiles.filePath
fcs$study_id <- fcs$data.resultFiles.biosampleType

fcs_info<- fcs %>% 
  dplyr::select(fcs_files, study_id)

sample_info <- fcs %>%
  dplyr::select(fcs_files, data.resultFiles.maxSubjectAge,data.resultFiles.biosampleType, data.resultFiles.gender)

preprocessing.batch(inputMeta = fcs_info,
                    assay = "FCM",
                    outpath = "metacyto/panel/preprocess_output",
                    b = 1/150,
                    excludeTransformParameters=c("FSC-A","FSC-W","FSC-H","SSC-A","SSC-W","SSC-H","Time"))

files=list.files("metacyto/panel",pattern="processed_sample",recursive=TRUE,full.names=TRUE)

cs<- load_cytoset(path = "cytosets/tcell")
new<- markernames(cs)
channels_to_exclude <- c(grep(colnames(cs), pattern="FSC"),
                         grep(colnames(cs), pattern="SSC"),
                         grep(colnames(cs), pattern="Time"))
old <- colnames(cs)[-channels_to_exclude]
old<- toupper(old)
oldplus<-paste0(old,"+")
oldminus<-paste0(old,"-")

nameUpdator(oldNames=old, newNames=new, files=files)
nameUpdator(oldNames=oldplus, newNames=new, files=files)
nameUpdator(oldNames=oldminus, newNames=new, files=files)


#define parameters that we don't want to cluster
excludeClusterParameters=c("FSC-A","FSC-W","FSC-H","SSC-A","SSC-W","SSC-H","Time")

cluster_label=autoCluster.batch(preprocessOutputFolder="metacyto/panel/preprocess_output",
                                excludeClusterParameters=excludeClusterParameters,
                                labelQuantile=0.95,
                                minPercent = 0.05,
                                clusterFunction=flowSOM.MC)

searchCluster.batch(preprocessOutputFolder="metacyto/panel/preprocess_output",
                    outpath="metacyto/panel/search_output",
                    clusterLabel=cluster_label)

files=list.files("metacyto/panel/search_output",pattern="cluster_stats_in_each_sample",recursive=TRUE,full.names=TRUE)
fcs_stats=collectData(files,longform=TRUE)


# join the cluster summary statistics with sample information
all_data=inner_join(fcs_stats,sample_info,by="fcs_files")

# See the fraction of what clusters are affected by Gender (while controlling for age)
GA=glmAnalysis(value="value",
               variableOfInterst="data.resultFiles.gender",
               parameter="fraction", 
               otherVariables=c("data.resultFiles.maxSubjectAge"),
               studyID="study_id",label="label",
               data=all_data,CILevel=0.95,ifScale=c(TRUE,FALSE))

GA=GA[order(GA$`Effect_size`),]

GA$`label`=as.character(GA$`label`)
w = which((GA$`label`)>15)

plotGA(GA, size = 12)

Top 20 largest effect size

sig<-GA[order(GA$Effect_size, decreasing = TRUE),]
top20<- sig[1:20,]
top20 %>% kable()

Coming soon:
CyTOF data support
Please contact me with ANY questions, comments or suggestions!!!
Slack: Join the immunespace slack!
iMessage: mrjaffery at icloud dot com
email: mrjaffery at gmail dot com

mrj31/immdonor documentation built on Nov. 1, 2021, 4:29 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

mrj31/immdonor
Search Immport for Donors

In mrj31/immdonor: Search Immport for Donors

Connect to API

GET

ANSWER

Organ Donor Metadata

Biosample types

Panels

T Cell Surface Panel Analysis

Link cytoset to metadata

Compensate and Transform

Build Autogating strategy

CD3+CD4 T cells

CD4 memory subsets

Metacyto analysis

R Package Documentation

Browse R Packages

We want your feedback!

mrj31/immdonor Search Immport for Donors

In mrj31/immdonor: Search Immport for Donors

Connect to API

GET

ANSWER

Organ Donor Metadata

Biosample types

Panels

T Cell Surface Panel Analysis

Link cytoset to metadata

Compensate and Transform

Build Autogating strategy

CD3+CD4 T cells

CD4 memory subsets

Metacyto analysis

R Package Documentation

Browse R Packages

We want your feedback!

mrj31/immdonor
Search Immport for Donors