library(flowCore)
library(flowWorkspace)
library(ggcyto)
library(openCyto)
library(MetaCyto)
library(cytoqc)
library(tidyverse)
library(knitr)
library(plotly)
library(kableExtra)
library(gtools)

Immdonor is a GraphQL API of Immport Organ Donor metadata hosted on Hasura Cloud. This API lets you request exactly what data you need from the Immport data model and returns a JSON output which can be converted to a R dataframe!

🔥This is a fast way to link Flow Cytometry Standard (.FCS) files to their metadata and enables more robust analysis with Cytoverse. You will need to have the FCS files saved locally (see quick_fetch.R).

Connect to API

You can connect using your preferred GraphQL R client. Here we use rOpenSci's ghql package

library(ghql)

#By connecting to this API you agree to the Immport.org Terms of Service
con <- GraphqlClient$new(url = "https://resolved-lab-57.hasura.app/v1/graphql")

GET

qry <- Query$new()

studies<- qry$query('studies','{
  shared_data_study {
    study_accession
    actual_enrollment
    minimum_age
    maximum_age
  }
}')
x <- con$exec(qry$queries$studies)

ANSWER

studies<- as.data.frame (jsonlite::fromJSON(x))
studies=studies[mixedorder(studies$data.shared_data_study.study_accession),]
studies=studies %>% select(data.shared_data_study.study_accession,data.shared_data_study.actual_enrollment,data.shared_data_study.minimum_age,data.shared_data_study.maximum_age)
row.names(studies) <- NULL

studies %>% kable(col.names = gsub("data.shared_data_study.", "", names(studies)))

Organ Donor Metadata

The studies listed above feature cytometry data generously donated by organ donors. We can build queries to return exactly what metadata we would like to know about these organ donors.

qry <- Query$new()

files<- qry$query('files','{
  resultFiles {
    filePath
    studyAccession
    subjectAccession
    maxSubjectAge
    gender
    biosampleType
    parameters
    panel_id
    markers

  }
}')

x <- con$exec(qry$queries$files)
files<- as.data.frame (jsonlite::fromJSON(x))

Biosample types

samples<- files %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType)

plo<- files %>% filter(data.resultFiles.biosampleType == "Thymus"|data.resultFiles.biosampleType == "Bone Marrow") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType)

slo<- files %>% filter(data.resultFiles.biosampleType == "Spleen"|data.resultFiles.biosampleType == "Lung lymph node"|data.resultFiles.biosampleType == "Inguinal lymph node"|data.resultFiles.biosampleType == "Mesenteric lymph node"| data.resultFiles.biosampleType == "Tonsil") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType)

mucosal<- files %>% filter(data.resultFiles.biosampleType == "Lung"|data.resultFiles.biosampleType == "Colon"|data.resultFiles.biosampleType == "Ileum"|data.resultFiles.biosampleType == "Jejunum"|data.resultFiles.biosampleType == "PBMC"|data.resultFiles.biosampleType == "Whole blood") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType)

knitr::kables(
  list(
    knitr::kable(plo, col.names = c('Primary Lymphoid Organs', 'Count'), valign = 't' ),
    knitr::kable(slo, col.names = c('Secondary Lymphoid Organs', 'Count'), valign = 't'),
    knitr::kable(mucosal, col.names = c('Mucosal', 'Count'), valign = 't')
    ))

Panels

Here are the top 5 staining panels with the most biosamples

panels<- files %>% group_by(data.resultFiles.panel_id,data.resultFiles.parameters) %>% count(data.resultFiles.markers)
panels=panels[order(panels$n, decreasing = TRUE),]
top10panels<- panels[1:5,]
top10panels %>% kable(col.names = gsub("data.resultFiles.", "", names(panels)))

T Cell Surface Panel Analysis

fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3")

tpanels<- fcs %>% group_by(data.resultFiles.panel_id,data.resultFiles.parameters) %>% count(data.resultFiles.markers)
tpanels %>% kable(col.names = gsub("data.resultFiles.", "", names(tpanels)))

Link cytoset to metadata

In order to use all of OpenCyto's features like collapseDataForGating we need to annotate the donor metadata into the cytoset's pData.

# Load cytoset 
fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3")
fn<- fcs$data.resultFiles.filePath
cs<- load_cytoset_from_fcs(files = fn)

#Annotate immdonor metadata to pData
p<- pData(cs)
m<- cbind(p,fcs)
pData(cs)<- m

# Harmonize marker names
channels <- colnames(cs)
markers <- as.vector(pData(parameters(cs[[1]]))$desc)
names(markers)<- channels
markernames(cs) <- markers

Compensate and Transform

# Apply file internal compensation
comps <- lapply(cs, function(cf) spillover(cf)$SPILL)
cs_comp <- compensate(cs, comps)

# Transform fluorescent channels with FCSTrans
channels_to_exclude <- c(grep(colnames(cs), pattern="FSC"),
                         grep(colnames(cs), pattern="SSC"),
                         grep(colnames(cs), pattern="Time"))
chnls <- colnames(cs)[-channels_to_exclude]

fcstrans<- FCSTransTransform(transformationId = "defaultFCSTransTransform", channelrange = 2^18, channeldecade = 4.5, range = 4096, cutoff = -111, w = 0.5, rescale = TRUE)
transList <- transformList(chnls, fcstrans)

cs_trans<- transform(cs_comp,transList)

cs<- save_cytoset(cs_trans, "cytosets/tcell)

Build Autogating strategy

Since we annotated the cytoset with each file's age, gender and biosample type we can use the collapseDataForGating feature and groupby biosampleType to improve our autogating.

cs<- load_cytoset(path = "cytosets/tcell")
gs<- GatingSet(cs)

gs_add_gating_method(gs, alias = "nonDebris",
                     pop = "+",
                     parent = "root",
                     dims = "FSC-A",
                     gating_method = "mindensity",
                     gating_args = "min = 20000, max=50000",
                     collapseDataForGating = "TRUE",
                     groupBy = "data.resultFiles.biosampleType") 

gs_add_gating_method(gs, alias = "singlets",
                     pop = "+",
                     parent = "nonDebris",
                     dims = "FSC-A,FSC-H",
                     gating_method = "singletGate")


gs_add_gating_method(gs, alias = "bcells",
                     pop = "+/-",
                     parent = "singlets",
                     dims = "CD19",
                     gating_method = "mindensity",
                     collapseDataForGating = "TRUE",
                     groupBy = "data.resultFiles.biosampleType")

gs_add_gating_method(gs, alias = "CD4",
                     pop = "+/-+/-",
                     parent = "CD19-",
                     dims = "CD3,CD4",
                     gating_method = "mindensity",
                     gating_args = "min = 1500, max=2500",
                     collapseDataForGating = "TRUE",
                     groupBy = "data.resultFiles.biosampleType")

gs_add_gating_method(gs, alias = "Tmem",
                     pop = "+/-+/-",
                     parent = "CD4+",
                     dims = "CCR7,CD45RA",
                     gating_method = "mindensity",
                     gating_args = "min = 1750, max=2500",
                     collapseDataForGating = "TRUE",
                     groupBy = "data.resultFiles.biosampleType")


lln<- subset(gs, data.resultFiles.biosampleType == "Lung lymph node")
lung<- subset(gs, data.resultFiles.biosampleType == "Lung")
spleen<- subset(gs, data.resultFiles.biosampleType == "Spleen")
blood<- subset(gs, data.resultFiles.biosampleType == "Whole blood")

CD3+CD4 T cells

Lung Lymph node

ggcyto(lln, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")

Lung

ggcyto(lung, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")

Spleen

ggcyto(spleen, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")

CD4 memory subsets

Lung Lymph Node

ggcyto(gs_pop_get_data(lln ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)

Lung

ggcyto(gs_pop_get_data(lung ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)

Spleen

ggcyto(gs_pop_get_data(spleen ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)

Metacyto analysis

work in progress...

fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3")

fcs<- fcs %>%
  dplyr::filter(data.resultFiles.biosampleType == "Lung lymph node"|data.resultFiles.biosampleType == "Lung"|data.resultFiles.biosampleType == "Spleen"|data.resultFiles.biosampleType == "PBMC"|data.resultFiles.biosampleType == "Whole blood"|data.resultFiles.biosampleType == "Inguinal lymph node"|data.resultFiles.biosampleType == "Colon"|data.resultFiles.biosampleType == "Ileum"|data.resultFiles.biosampleType == "Jejunum")

fcs$fcs_files<- fcs$data.resultFiles.filePath
fcs$study_id <- fcs$data.resultFiles.biosampleType

fcs_info<- fcs %>% 
  dplyr::select(fcs_files, study_id)

sample_info <- fcs %>%
  dplyr::select(fcs_files, data.resultFiles.maxSubjectAge,data.resultFiles.biosampleType, data.resultFiles.gender)

preprocessing.batch(inputMeta = fcs_info,
                    assay = "FCM",
                    outpath = "metacyto/panel/preprocess_output",
                    b = 1/150,
                    excludeTransformParameters=c("FSC-A","FSC-W","FSC-H","SSC-A","SSC-W","SSC-H","Time"))

files=list.files("metacyto/panel",pattern="processed_sample",recursive=TRUE,full.names=TRUE)

cs<- load_cytoset(path = "cytosets/tcell")
new<- markernames(cs)
channels_to_exclude <- c(grep(colnames(cs), pattern="FSC"),
                         grep(colnames(cs), pattern="SSC"),
                         grep(colnames(cs), pattern="Time"))
old <- colnames(cs)[-channels_to_exclude]
old<- toupper(old)
oldplus<-paste0(old,"+")
oldminus<-paste0(old,"-")

nameUpdator(oldNames=old, newNames=new, files=files)
nameUpdator(oldNames=oldplus, newNames=new, files=files)
nameUpdator(oldNames=oldminus, newNames=new, files=files)


#define parameters that we don't want to cluster
excludeClusterParameters=c("FSC-A","FSC-W","FSC-H","SSC-A","SSC-W","SSC-H","Time")

cluster_label=autoCluster.batch(preprocessOutputFolder="metacyto/panel/preprocess_output",
                                excludeClusterParameters=excludeClusterParameters,
                                labelQuantile=0.95,
                                minPercent = 0.05,
                                clusterFunction=flowSOM.MC)

searchCluster.batch(preprocessOutputFolder="metacyto/panel/preprocess_output",
                    outpath="metacyto/panel/search_output",
                    clusterLabel=cluster_label)

files=list.files("metacyto/panel/search_output",pattern="cluster_stats_in_each_sample",recursive=TRUE,full.names=TRUE)
fcs_stats=collectData(files,longform=TRUE)


# join the cluster summary statistics with sample information
all_data=inner_join(fcs_stats,sample_info,by="fcs_files")

# See the fraction of what clusters are affected by Gender (while controlling for age)
GA=glmAnalysis(value="value",
               variableOfInterst="data.resultFiles.gender",
               parameter="fraction", 
               otherVariables=c("data.resultFiles.maxSubjectAge"),
               studyID="study_id",label="label",
               data=all_data,CILevel=0.95,ifScale=c(TRUE,FALSE))

GA=GA[order(GA$`Effect_size`),]

GA$`label`=as.character(GA$`label`)
w = which((GA$`label`)>15)
plotGA(GA, size = 12)

Top 20 largest effect size

sig<-GA[order(GA$Effect_size, decreasing = TRUE),]
top20<- sig[1:20,]
top20 %>% kable()


mrj31/immdonor documentation built on Nov. 1, 2021, 4:29 a.m.