library(flowCore) library(flowWorkspace) library(ggcyto) library(openCyto) library(MetaCyto) library(cytoqc) library(tidyverse) library(knitr) library(plotly) library(kableExtra) library(gtools)
Immdonor is a GraphQL API of Immport Organ Donor metadata hosted on Hasura Cloud. This API lets you request exactly what data you need from the Immport data model and returns a JSON output which can be converted to a R dataframe!
🔥This is a fast way to link Flow Cytometry Standard (.FCS) files to their metadata and enables more robust analysis with Cytoverse. You will need to have the FCS files saved locally (see quick_fetch.R).
You can connect using your preferred GraphQL R client. Here we use rOpenSci's ghql package
library(ghql) #By connecting to this API you agree to the Immport.org Terms of Service con <- GraphqlClient$new(url = "https://resolved-lab-57.hasura.app/v1/graphql")
qry <- Query$new() studies<- qry$query('studies','{ shared_data_study { study_accession actual_enrollment minimum_age maximum_age } }') x <- con$exec(qry$queries$studies)
studies<- as.data.frame (jsonlite::fromJSON(x))
studies=studies[mixedorder(studies$data.shared_data_study.study_accession),] studies=studies %>% select(data.shared_data_study.study_accession,data.shared_data_study.actual_enrollment,data.shared_data_study.minimum_age,data.shared_data_study.maximum_age) row.names(studies) <- NULL studies %>% kable(col.names = gsub("data.shared_data_study.", "", names(studies)))
The studies listed above feature cytometry data generously donated by organ donors. We can build queries to return exactly what metadata we would like to know about these organ donors.
qry <- Query$new() files<- qry$query('files','{ resultFiles { filePath studyAccession subjectAccession maxSubjectAge gender biosampleType parameters panel_id markers } }') x <- con$exec(qry$queries$files) files<- as.data.frame (jsonlite::fromJSON(x))
samples<- files %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType) plo<- files %>% filter(data.resultFiles.biosampleType == "Thymus"|data.resultFiles.biosampleType == "Bone Marrow") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType) slo<- files %>% filter(data.resultFiles.biosampleType == "Spleen"|data.resultFiles.biosampleType == "Lung lymph node"|data.resultFiles.biosampleType == "Inguinal lymph node"|data.resultFiles.biosampleType == "Mesenteric lymph node"| data.resultFiles.biosampleType == "Tonsil") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType) mucosal<- files %>% filter(data.resultFiles.biosampleType == "Lung"|data.resultFiles.biosampleType == "Colon"|data.resultFiles.biosampleType == "Ileum"|data.resultFiles.biosampleType == "Jejunum"|data.resultFiles.biosampleType == "PBMC"|data.resultFiles.biosampleType == "Whole blood") %>% group_by(data.resultFiles.biosampleType) %>% count(data.resultFiles.biosampleType) knitr::kables( list( knitr::kable(plo, col.names = c('Primary Lymphoid Organs', 'Count'), valign = 't' ), knitr::kable(slo, col.names = c('Secondary Lymphoid Organs', 'Count'), valign = 't'), knitr::kable(mucosal, col.names = c('Mucosal', 'Count'), valign = 't') ))
Here are the top 5 staining panels with the most biosamples
panels<- files %>% group_by(data.resultFiles.panel_id,data.resultFiles.parameters) %>% count(data.resultFiles.markers) panels=panels[order(panels$n, decreasing = TRUE),] top10panels<- panels[1:5,] top10panels %>% kable(col.names = gsub("data.resultFiles.", "", names(panels)))
fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3") tpanels<- fcs %>% group_by(data.resultFiles.panel_id,data.resultFiles.parameters) %>% count(data.resultFiles.markers) tpanels %>% kable(col.names = gsub("data.resultFiles.", "", names(tpanels)))
In order to use all of OpenCyto's features like collapseDataForGating
we need to annotate the donor metadata into the cytoset's pData.
# Load cytoset fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3") fn<- fcs$data.resultFiles.filePath cs<- load_cytoset_from_fcs(files = fn) #Annotate immdonor metadata to pData p<- pData(cs) m<- cbind(p,fcs) pData(cs)<- m # Harmonize marker names channels <- colnames(cs) markers <- as.vector(pData(parameters(cs[[1]]))$desc) names(markers)<- channels markernames(cs) <- markers
# Apply file internal compensation comps <- lapply(cs, function(cf) spillover(cf)$SPILL) cs_comp <- compensate(cs, comps) # Transform fluorescent channels with FCSTrans channels_to_exclude <- c(grep(colnames(cs), pattern="FSC"), grep(colnames(cs), pattern="SSC"), grep(colnames(cs), pattern="Time")) chnls <- colnames(cs)[-channels_to_exclude] fcstrans<- FCSTransTransform(transformationId = "defaultFCSTransTransform", channelrange = 2^18, channeldecade = 4.5, range = 4096, cutoff = -111, w = 0.5, rescale = TRUE) transList <- transformList(chnls, fcstrans) cs_trans<- transform(cs_comp,transList) cs<- save_cytoset(cs_trans, "cytosets/tcell)
Since we annotated the cytoset with each file's age, gender and biosample type we can use the collapseDataForGating
feature and groupby
biosampleType to improve our autogating.
cs<- load_cytoset(path = "cytosets/tcell") gs<- GatingSet(cs) gs_add_gating_method(gs, alias = "nonDebris", pop = "+", parent = "root", dims = "FSC-A", gating_method = "mindensity", gating_args = "min = 20000, max=50000", collapseDataForGating = "TRUE", groupBy = "data.resultFiles.biosampleType") gs_add_gating_method(gs, alias = "singlets", pop = "+", parent = "nonDebris", dims = "FSC-A,FSC-H", gating_method = "singletGate") gs_add_gating_method(gs, alias = "bcells", pop = "+/-", parent = "singlets", dims = "CD19", gating_method = "mindensity", collapseDataForGating = "TRUE", groupBy = "data.resultFiles.biosampleType") gs_add_gating_method(gs, alias = "CD4", pop = "+/-+/-", parent = "CD19-", dims = "CD3,CD4", gating_method = "mindensity", gating_args = "min = 1500, max=2500", collapseDataForGating = "TRUE", groupBy = "data.resultFiles.biosampleType") gs_add_gating_method(gs, alias = "Tmem", pop = "+/-+/-", parent = "CD4+", dims = "CCR7,CD45RA", gating_method = "mindensity", gating_args = "min = 1750, max=2500", collapseDataForGating = "TRUE", groupBy = "data.resultFiles.biosampleType") lln<- subset(gs, data.resultFiles.biosampleType == "Lung lymph node") lung<- subset(gs, data.resultFiles.biosampleType == "Lung") spleen<- subset(gs, data.resultFiles.biosampleType == "Spleen") blood<- subset(gs, data.resultFiles.biosampleType == "Whole blood")
Lung Lymph node
ggcyto(lln, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")
Lung
ggcyto(lung, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")
Spleen
ggcyto(spleen, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")
Lung Lymph Node
ggcyto(gs_pop_get_data(lln ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)
Lung
ggcyto(gs_pop_get_data(lung ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)
Spleen
ggcyto(gs_pop_get_data(spleen ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)
work in progress...
fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3") fcs<- fcs %>% dplyr::filter(data.resultFiles.biosampleType == "Lung lymph node"|data.resultFiles.biosampleType == "Lung"|data.resultFiles.biosampleType == "Spleen"|data.resultFiles.biosampleType == "PBMC"|data.resultFiles.biosampleType == "Whole blood"|data.resultFiles.biosampleType == "Inguinal lymph node"|data.resultFiles.biosampleType == "Colon"|data.resultFiles.biosampleType == "Ileum"|data.resultFiles.biosampleType == "Jejunum") fcs$fcs_files<- fcs$data.resultFiles.filePath fcs$study_id <- fcs$data.resultFiles.biosampleType fcs_info<- fcs %>% dplyr::select(fcs_files, study_id) sample_info <- fcs %>% dplyr::select(fcs_files, data.resultFiles.maxSubjectAge,data.resultFiles.biosampleType, data.resultFiles.gender) preprocessing.batch(inputMeta = fcs_info, assay = "FCM", outpath = "metacyto/panel/preprocess_output", b = 1/150, excludeTransformParameters=c("FSC-A","FSC-W","FSC-H","SSC-A","SSC-W","SSC-H","Time")) files=list.files("metacyto/panel",pattern="processed_sample",recursive=TRUE,full.names=TRUE) cs<- load_cytoset(path = "cytosets/tcell") new<- markernames(cs) channels_to_exclude <- c(grep(colnames(cs), pattern="FSC"), grep(colnames(cs), pattern="SSC"), grep(colnames(cs), pattern="Time")) old <- colnames(cs)[-channels_to_exclude] old<- toupper(old) oldplus<-paste0(old,"+") oldminus<-paste0(old,"-") nameUpdator(oldNames=old, newNames=new, files=files) nameUpdator(oldNames=oldplus, newNames=new, files=files) nameUpdator(oldNames=oldminus, newNames=new, files=files) #define parameters that we don't want to cluster excludeClusterParameters=c("FSC-A","FSC-W","FSC-H","SSC-A","SSC-W","SSC-H","Time") cluster_label=autoCluster.batch(preprocessOutputFolder="metacyto/panel/preprocess_output", excludeClusterParameters=excludeClusterParameters, labelQuantile=0.95, minPercent = 0.05, clusterFunction=flowSOM.MC) searchCluster.batch(preprocessOutputFolder="metacyto/panel/preprocess_output", outpath="metacyto/panel/search_output", clusterLabel=cluster_label) files=list.files("metacyto/panel/search_output",pattern="cluster_stats_in_each_sample",recursive=TRUE,full.names=TRUE) fcs_stats=collectData(files,longform=TRUE) # join the cluster summary statistics with sample information all_data=inner_join(fcs_stats,sample_info,by="fcs_files") # See the fraction of what clusters are affected by Gender (while controlling for age) GA=glmAnalysis(value="value", variableOfInterst="data.resultFiles.gender", parameter="fraction", otherVariables=c("data.resultFiles.maxSubjectAge"), studyID="study_id",label="label", data=all_data,CILevel=0.95,ifScale=c(TRUE,FALSE)) GA=GA[order(GA$`Effect_size`),] GA$`label`=as.character(GA$`label`) w = which((GA$`label`)>15)
plotGA(GA, size = 12)
Top 20 largest effect size
sig<-GA[order(GA$Effect_size, decreasing = TRUE),] top20<- sig[1:20,] top20 %>% kable()
CyTOF data support
Please contact me with ANY questions, comments or suggestions!!!
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.