Nothing
#'Platypus V3 VGM to AIRR compatibility function
#'
#'@description Exports AIRR compatible tables supplemented with VDJ and GEX information from the Platypus VGM object and the cellranger output airr_rearrangements.tsv
#' @param VGM Output object of the VDJ_GEX_matrix function generated with VDJ.combine = T, GEX.combine = T (to merge all samples) and integrate.VDJ.to.GEX = T (to integrate VDJ and GEX data)
#'@param VDJ.features.to.append Character vector. Defaults to "none". Can be either "all" or column names of the VGM VDJ matrix (VGM[[1]]) to append to the AIRR compatible table.
#'@param GEX.features.to.append Character vector. Defaults to "none". Can be either "all" or GEX metadata column names or Gene names of the VGM GEX object (VGM[[2]])(passed to Seurat::FetchData()) to append to the AIRR compatible table. For a list of available features run: names(VGM[[2]]@meta.data) and rownames(VGM[[2]])
#'@param airr.rearrangements Source of the airr_rearrangements.tsv file as generated by Cellranger. There are 3 available input options: 1. R list object from Platypus_DB_load_from_disk or Platypus_DB_fetch / 2. List with local paths to airr_rearrangements.tsv / 3. List of airr_rearrangements.tsv loaded in as R objects within the current R enviroment. ! Order of input list must be identical to that of sample_ids in the VGM ! If not provided or set to "none" CIGAR strings in output will be empty.
#' @param airr.integrate Boolean. Defaults to TRUE, whether to integrate output AIRR tables
#' @return A list of length of samples in VGM containing a AIRR-compatible dataframe for each sample if airr.integrate = F or a single dataframe if airr.integrate = T ! Cave the format: VGM object => 1 cell = 1 row; AIRR table 1 cell = as many rows as VDJ and VJ chains available for that cell. GEX cell-level information is attached to all rows containing a chain of that cell.
#' @export
#' @examples
#' \donttest{
#'try({
#'airr.list.out <- PlatypusDB_VGM_to_AIRR(VGM = VGM
#', VDJ.features.to.append = c("VDJ_cdr3s_aa")
#', GEX.features.to.append = c("CTLA4", "TOX"), airr.rearrangements = Data.in)
#'
#'airr.list.out <- PlatypusDB_VGM_to_AIRR(VGM = VGM
#', VDJ.features.to.append = c("VDJ_cdr3s_aa")
#', GEX.features.to.append = c("CTLA4", "TOX"),
#'airr.rearrangements =list("~/path_to/s1/airr.rearrangement.tsv"
#',"~/path_to/s2/airr_rearrangement.tsv"))
#'
#'airr.list.out <- PlatypusDB_VGM_to_AIRR(VGM = VGM
#', VDJ.features.to.append = c("VDJ_cdr3s_aa")
#', GEX.features.to.append = c("CTLA4", "TOX"),
#'airr.rearrangements = list(airr_rearrangements.s1, airr_rearrangements_2))
#'
#'VDJ.out.directory.list <- list()
#'VDJ.out.directory.list[[1]] <- c("~/cellrangerVDJ/s1")
#'VDJ.out.directory.list[[2]] <- c("~/cellrangerVDJ/s2")
#'
#'GEX.out.directory.list <- list()
#'GEX.out.directory.list[[1]] <- c("~/cellrangerGEX/s1")
#'GEX.out.directory.list[[2]] <- c("~/cellrangerGEX/s2")
#'VGM <- VDJ_GEX_matrix(VDJ.out.directory.list = VDJ.out.directory.list,
#'GEX.out.directory.list = GEX.out.directory.list,
#'GEX.integrate = TRUE, VDJ.combine = TRUE, integrate.GEX.to.VDJ = TRUE
#', integrate.VDJ.to.GEX = TRUE,
#'get.VDJ.stats = FALSE, trim.and.align = FALSE)
#'airr.list.out <- PlatypusDB_VGM_to_AIRR(VGM = VGM,
#'VDJ.features.to.append = c("VDJ_sequence_nt_trimmed","VJ_sequence_nt_trimmed"),
#'GEX.features.to.append = c("UMAP_1","UMAP_2","CTLA4", "TOX"),
#'airr.rearrangements = c("~/cellrangerVDJ/s1/airr_rearrangement.tsv"
#',"~/cellrangerVDJ/s2/airr_rearrangement.tsv"))
#' })
#' }
#'
PlatypusDB_VGM_to_AIRR <- function(VGM,
VDJ.features.to.append,
GEX.features.to.append,
airr.rearrangements,
airr.integrate){
#garbage collector to clear some ram
gc()
#grab relevant columns from the VDJ and GEX VGM
if(missing(VDJ.features.to.append)) VDJ.features.to.append <- "none"
if(missing(GEX.features.to.append)) GEX.features.to.append <- "none"
if(missing(airr.integrate)) airr.integrate <- TRUE
if(VDJ.features.to.append[1] != "none"){
if(any(c("barcode", "sample_id","celltype", "VDJ_chain_contig", "VJ_chain_contig") %in% names(VGM[[1]]) == FALSE)){
stop('VGM VDJ matrix does not contain all neccessary columns: "barcode", "sample_id","celltype", "VDJ_chain_contig", "VJ_chain_contig"')
}
if(tolower(VDJ.features.to.append[1]) == "all"){
VDJ.features.to.append <- names(VGM[[1]])[!stringr::str_detect(names(VGM[[1]]), "(VDJ_)|(VJ_)")]
VDJ.features.to.append <- VDJ.features.to.append[!VDJ.features.to.append %in% c("barcode", "sample_id")]
}
to_del <- c(1000) #to avoid an error if all features are found and no entry is added to to_del
for(k in 1:length(VDJ.features.to.append)){
if(!VDJ.features.to.append[k] %in% names(VGM[[1]])){
to_del <- c(to_del, k)
warning(paste0("Column ", VDJ.features.to.append[k], " was not found in VGM VDJ matrix. Skipping this feature..."))
}
}
VDJ.features.to.append <- VDJ.features.to.append[-to_del]
#grab features
VDJ.features.to.append <- c("barcode", "sample_id","celltype", "VDJ_chain_contig", "VJ_chain_contig", VDJ.features.to.append)
VDJ.to.add <- VGM[[1]][,VDJ.features.to.append]
} else {
#grab necessary features
VDJ.features.to.append <- c("barcode", "sample_id","celltype", "VDJ_chain_contig", "VJ_chain_contig")
VDJ.to.add <- VGM[[1]][,VDJ.features.to.append]
}
if(GEX.features.to.append[1] != "none"){
if(tolower(GEX.features.to.append[1]) != "all"){
#first grab gene features
gene.ids <- which(GEX.features.to.append %in% rownames(VGM[[2]]))
if(length(gene.ids) > 0){
GEX.to.add <- SeuratObject::FetchData(VGM[[2]], vars = GEX.features.to.append[gene.ids])
GEX.features.to.append <- GEX.features.to.append[-gene.ids]
} else {
GEX.to.add <- as.data.frame(VGM[[2]]@meta.data[,1])
names(GEX.to.add) <- "orig.ident"
}
} else {
GEX.to.add <- as.data.frame(VGM[[2]]@meta.data[,1])
names(GEX.to.add) <- "orig.ident"
}
if(length(GEX.features.to.append)>0){
if(tolower(GEX.features.to.append[1]) == "all"){
if(tolower(VDJ.features.to.append[1]) == "all"){
GEX.features.to.append <- names(VGM[[2]]@meta.data)[!names(VGM[[2]]@meta.data) %in% VDJ.features.to.append]
GEX.features.to.append <- GEX.features.to.append[GEX.features.to.append != "FB_assignment.y"]
} else {
GEX.features.to.append <- names(VGM[[2]]@meta.data)[!stringr::str_detect(names(VGM[[2]]@meta.data), "(VDJ_)|(VJ_)")]
GEX.features.to.append <- GEX.features.to.append[GEX.features.to.append != "FB_assignment.y"]
}
}
#secondly grab metadata items
metadat.ids <- which(GEX.features.to.append %in% names(VGM[[2]]@meta.data))
if(length(GEX.features.to.append)>0){
GEX.metadata.add <- VGM[[2]]@meta.data[,GEX.features.to.append[metadat.ids]]
GEX.to.add <- cbind(GEX.to.add, GEX.metadata.add)
GEX.features.to.append <- GEX.features.to.append[-metadat.ids]
}
}
if(length(GEX.features.to.append) > 0){ #any remaining inputs that have not been found in genes or metadata columns
warning(paste0("Features ", GEX.features.to.append, " were not found in GEX genes or metadata. Skipping these features..."))
}
#make barcode column for merging
GEX.to.add$barcode <- rownames(GEX.to.add)
} else {
GEX.to.add <- "none"
}
#Figure out input format of airr_rearrangements
#cases:
#1. A list of loaded tables => just use as is (order must be the same as samples in VGM)
#2. A list of data.in from platypus DB (similar to the VGM function) => use code from VGM function to order and make list of tables
#3. A list of local paths to access => load in as list
if(inherits(airr.rearrangements,"list")){
if(inherits(airr.rearrangements[[1]], "data.frame")){ #case 1.
airr.list <- airr.rearrangements
} else if(inherits(airr.rearrangements[[1]],"list")){ #case 2.
#Make sense of input => we need this small parser to accept data loaded locally and downloaded and compiled from the PlatypusDB
airr.list <- list()
for(i in 1:length(airr.rearrangements)){ #First level
if(names(airr.rearrangements[[i]])[1] == "VDJ"){ #check if we are already on a sample level
#add the respective tsv table
airr.list[[length(airr.list) + 1]] <- airr.rearrangements[[i]][[1]][[6]] #[[1]] for VDJ [[6]] for the airr_rearrangements table
} else {
for(j in 1:length(airr.rearrangements[[i]])){ #Second level
if(names(airr.rearrangements[[i]][[j]])[1] == "VDJ"){ #check if we are a sample level
airr.list[[length(airr.list) + 1]] <- airr.rearrangements[[i]][[j]][[1]][[6]]
} else { #Data structure does not match expectations
stop("Provided airr_rearrangement list input datastructure does not match necessary input format")
}
}
}
}
} else if(inherits(airr.rearrangements[[1]],"character")){ #case 3
vdj_load_error <- tryCatch({
airr.list <- list()
for(j in 1:length(airr.rearrangements)){
if(file.exists(airr.rearrangements[[j]])){
airr.list[[j]] <- utils::read.delim(airr.rearrangements[[j]], header = T)
} else {
warning(paste0(" airr_rearrangement.tsv not found for sample ", j, ". AIRR compatibility for this sample will not be available"))
airr.list[[j]] <- "none"
}
}
}, error = function(e){e
message(e)})
if(inherits(vdj_load_error,"error")){
message("Loading airr_rearrangement from disk failed")}
}
} else {
stop("Please provide airr_rearrangement inputs as a list. e.g. for paths list(~/data/project/airr_rearrangement.tsv)")
}
#Do some checks on the Airr dataframes
if(length(airr.list) != length(unique(stringr::str_split(VGM[[1]]$barcode, "_", simplify = T)[,1]))){
stop("List of airr_rearrangement dataframes has a different length than the number of unique barcode prefixes/sample_ids in the provided VGM. Please provide one dataframe per sample_id in the VGM in the same order.")
}
for(j in 1:length(airr.list)){
if(any(!c("cell_id", "clone_id", "sequence_id", "sequence", "v_cigar") %in% names(airr.list[[j]]))){
stop(paste0("Airr_rearrangement dataframe ", j, " does not contain all neccessary input columns. Please verifiy column names or table formatting are unaltered from the cellranger output"))
}
}
#Tests passed => can proceed
message(paste0("Order of samples in VGM is (Barcode prefix: entry in sample_id column): ", paste0(unique(paste0(stringr::str_split(VDJ.to.add$barcode, "_", simplify = T)[,1], ": ", VGM[[1]]$sample_id)), collapse = " / ")))
#rename barcodes in airr and join data
names(airr.list) <- paste0("s", c(1:length(airr.list)))
for(i in 1:length(airr.list)){
airr.list[[i]]$cell_id <- paste0(names(airr.list)[i], "_", airr.list[[i]]$cell_id)
airr.list[[i]]$barcode <- airr.list[[i]]$cell_id
#join VDJ data
#Because the Airr format has one row per chain, a cell is represented in > 1 row. the merge function below appends information of one cell to all rows containing chains from that cell.
airr.list[[i]] <- merge(airr.list[[i]], subset(VDJ.to.add, stringr::str_detect(VDJ.to.add$barcode, names(airr.list)[i])), by = "barcode", all.x = T, all.y = F)
if(inherits(GEX.to.add) != "character"){
#join GEX data
GEX.to.add <- GEX.to.add[!names(GEX.to.add) %in% names(airr.list[[i]])[names(airr.list[[i]]) != "barcode"]] #dont add existing columns
airr.list[[i]] <- merge(airr.list[[i]], subset(GEX.to.add, stringr::str_detect(GEX.to.add$barcode, names(airr.list)[i])), by = "barcode", all.x = T, all.y = F)
}
}
if(airr.integrate) airr.list <- do.call("rbind", airr.list)
return(airr.list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.