#' Add cell IDs to a h5_list object
#'
#' @param h5_list a list object generated by running rhdf5::h5dump() on a 10x HDF5 file.
#' @param target the group name for the matrix within the h5_list to use. Default is "matrix", which is used by 10x.
#' @param add_uuid a logical indicating whether or not to add a uuid generated by ids::uuid(). Default is TRUE.
#' @param replace_barcode a logical indicating whether or not the new uuid should replace the barcodes slot. Default is TRUE.
#' @param retain_original_barcode a logical indicating whether to retain the original barcodes if replace_barcode is TRUE. These will be moved to /matrix/original_barcode if TRUE. Default is TRUE.
#' @param add_name a logical indicating whether to add a new cell name. These are adjective_adjective_animal names generated by ids::adjective_animal(). Default is TRUE.
#'
#' @return a modified h5_list object
#' @export
#'
add_cell_ids <- function(h5_list,
target = "matrix",
add_uuid = TRUE,
replace_barcode = TRUE,
retain_original_barcode = TRUE,
add_name = TRUE) {
assertthat::assert_that(class(h5_list) == "list")
assertthat::assert_that(class(target) == "character")
assertthat::assert_that(length(target) == 1)
if(grepl("^/",target)) {
target <- sub("^/","",target)
}
assertthat::assert_that(target %in% names(h5_list))
assertthat::assert_that("barcodes" %in% names(h5_list[[target]]))
assertthat::assert_that(is.logical(add_uuid))
assertthat::assert_that(is.logical(add_name))
if(add_uuid) {
if(replace_barcode) {
if(retain_original_barcode) {
h5_list <- BarMixer::set_list_path(h5_list,
paste0("/",target,"/observations/original_barcodes"),
sub("-1","",h5_list[[target]]$barcodes))
}
# matrix$barcodes is used for column ids by most functions that read 10x HDF5 files.
h5_list[[target]]$barcodes <- ids::uuid(n = length(h5_list[[target]]$barcodes),
drop_hyphens = TRUE,
use_time = TRUE)
h5_list <- BarMixer::set_list_path(h5_list,
paste0("/",target,"/observations/cell_uuid"),
h5_list[[target]]$barcodes)
} else {
h5_list <- BarMixer::set_list_path(h5_list,
paste0("/",target,"/observations/cell_uuid"),
ids::uuid(n = length(h5_list[[target]]$barcodes),
drop_hyphens = TRUE,
use_time = TRUE))
}
}
if(add_name) {
h5_list <- BarMixer::set_list_path(h5_list,
paste0("/",target,"/observations/cell_name"),
ids::adjective_animal(n = length(h5_list[[target]]$barcodes),
n_adjectives = 2,
max_len = 10))
}
h5_list
}
#' Add well metadata
#'
#' Parses a standard well_id to generate chip_id, pool_id, and batch_id.
#'
#' @param h5_list a list object generated by running rhdf5::h5dump() on a 10x HDF5 file.
#' @param well_id a character object indicating the well name for the input h5. Should match the pattern B000-P1C1W1
#'
#' @return a modified h5_list object
#' @export
#'
add_well_metadata <- function(h5_list,
well_id) {
assertthat::assert_that(class(h5_list) == "list")
assertthat::assert_that("matrix" %in% names(h5_list))
assertthat::assert_that(class(well_id) == "character")
assertthat::assert_that(length(well_id) == 1)
well_id <- sub("-[AHR]P","-P",well_id)
chip_id <- sub("W[0-9]+$","",well_id)
pool_id <- sub("C[0-9]+$","",chip_id)
batch_id <- sub("-P[0-9]+$","",pool_id)
n_cells <- length(h5_list$matrix$barcodes)
h5_list <- BarMixer::set_list_path(h5_list,
"/matrix/observations/well_id",
rep(well_id, n_cells))
h5_list <- BarMixer::set_list_path(h5_list,
"/matrix/observations/chip_id",
rep(chip_id, n_cells))
h5_list <- BarMixer::set_list_path(h5_list,
"/matrix/observations/pool_id",
rep(pool_id, n_cells))
h5_list <- BarMixer::set_list_path(h5_list,
"/matrix/observations/batch_id",
rep(batch_id, n_cells))
h5_list
}
#' Add mitochondrial gene UMI counts to an h5_list object
#'
#' @param h5_list a list object generated by running rhdf5::h5dump() on a 10x HDF5 file.
#'
#' @return a list object with mitochondrial counts stored in h5_list$matrix$observations$mito_umis
#' @export
h5_list_add_mito_umis <- function(h5_list) {
assertthat::assert_that(class(h5_list) == "list")
assertthat::assert_that("matrix" %in% names(h5_list))
chrM_genes <- data.table::fread(system.file("reference/GRCh38_10x_chrM_gene_metadata.csv.gz",
package = "BarMixer"))
reconvert_matrix <- FALSE
if(!"matrix_dgCMatrix" %in% names(h5_list)) {
h5_list <- BarMixer::h5_list_convert_to_dgCMatrix(h5_list,
target = "matrix")
reconvert_matrix <- TRUE
}
common_mito_genes <- intersect(chrM_genes$name, h5_list$matrix$features$name)
if(length(common_mito_genes) == 0) {
stop("No Mitochondrial Genes found in h5_list")
}
common_mito_ids <- rownames(h5_list$matrix_dgCMatrix)[h5_list$matrix$features$name %in% common_mito_genes]
mito_umis <- Matrix::colSums(h5_list$matrix_dgCMatrix[common_mito_ids,])
names(mito_umis) <- NULL
h5_list <- BarMixer::set_list_path(h5_list,
"/matrix/observations/n_mito_umis",
mito_umis)
if(reconvert_matrix) {
h5_list <- BarMixer::h5_list_convert_from_dgCMatrix(h5_list,
target = "matrix")
}
h5_list
}
#' Convert the matrix in an h5_list from 10x Genomics data to a sparse matrix
#'
#' This is very useful for subsetting the matrix based on column names.
#'
#' @param h5_list a list object generated by running rhdf5::h5dump() on a 10x HDF5 file.
#' @param target the group name for the sparse matrix. Default is "matrix", which is used by 10x.
#' @param feature_names the group name in target/features to use for rownames. Must be either "id" or "name". Default is "id".
#'
#' @return a list object
#' @export
#'
h5_list_convert_to_dgCMatrix <- function(h5_list,
target = "matrix",
feature_names = "id") {
assertthat::assert_that(class(h5_list) == "list")
assertthat::assert_that(target %in% names(h5_list))
assertthat::assert_that(feature_names %in% c("id","name"))
target_dgCMatrix <- paste0(target,"_dgCMatrix")
h5_list[[target_dgCMatrix]] <- Matrix::sparseMatrix(x = h5_list[[target]]$data,
i = h5_list[[target]]$indices,
index1 = FALSE,
p = h5_list[[target]]$indptr,
dims = h5_list[[target]]$shape,
dimnames = list(h5_list[[target]]$features[[feature_names]],
h5_list[[target]]$barcodes))
h5_list[[target]]$data <- NULL
h5_list[[target]]$indices <- NULL
h5_list[[target]]$indptr <- NULL
h5_list[[target]]$shape <- NULL
h5_list[[target]]$features$id <- NULL
h5_list[[target]]$barcodes <- NULL
h5_list
}
#' Convert the matrix in an h5_list from a sparse matrix back to its original structure
#'
#' This is very useful for preparing to write back out to an .h5 file.
#'
#' @param h5_list a list object generated by running rhdf5::h5dump() on a 10x HDF5 file, with a matrix converted by h5_list_convert_to_dgCMatrix().
#' @param target the group name of the original sparse matrix. Default is "matrix", which is used by 10x.
#'
#' @return a list object
#' @export
#'
h5_list_convert_from_dgCMatrix <- function(h5_list,
target = "matrix") {
assertthat::assert_that(class(h5_list) == "list")
target_dgCMatrix <- paste0(target,"_dgCMatrix")
assertthat::assert_that(target_dgCMatrix %in% names(h5_list))
h5_list[[target]]$data <- h5_list[[target_dgCMatrix]]@x
h5_list[[target]]$indices <- h5_list[[target_dgCMatrix]]@i
h5_list[[target]]$indptr <- h5_list[[target_dgCMatrix]]@p
h5_list[[target]]$shape <- dim(h5_list[[target_dgCMatrix]])
h5_list[[target]]$barcodes <- colnames(h5_list[[target_dgCMatrix]])
h5_list[[target]]$features$id <- rownames(h5_list[[target_dgCMatrix]])
h5_list[[target_dgCMatrix]] <- NULL
h5_list
}
#' Add a sparse matrix to an h5_list containing 10x genomics data
#'
#' @param h5_list a list object, usually generated by running rhdf5::h5dump() on a 10x HDF5 file.
#' @param mat a dgCMatrix to add to the h5_list
#' @param target a character object specifying the location within the list to write to.
#'
#' @return a list object with the additional matrix added similar to 10x HDF5 matrix format.
#' @export
#'
h5_list_add_dgCMatrix <- function(h5_list,
mat,
target) {
assertthat::assert_that(class(h5_list) == "list")
assertthat::assert_that(class(mat) == "dgCMatrix")
h5_list[[target]]$data <- mat@x
h5_list[[target]]$indices <- mat@i
h5_list[[target]]$indptr <- mat@p
h5_list[[target]]$shape <- dim(mat)
h5_list[[target]]$barcodes <- colnames(mat)
h5_list[[target]]$features <- list(id = rownames(mat))
h5_list
}
#' Subset a h5_list object based on a set of observations.
#'
#' @param h5_list a list object generated by running rhdf5::h5dump() on a 10x HDF5 file.
#' @param match_values a vector of values to use for filtering
#' @param match_target a character object specifying which values to use for filtering in the h5_list. If "barcodes", uses matrix/barcodes; Otherwise, looks in matrix/observations for matching values
#' @param sparse_matrices a character vector specifying which list objects to treat as sparse matrices. If these aren't currently dgCMatrices, they will be converted by h5_list_convert_to_dgCMatrix for filtering.
#'
#' @return a list object
#' @export
#'
subset_h5_list_by_observations <- function(h5_list,
match_values,
match_target = "barcodes",
sparse_matrices = "matrix") {
assertthat::assert_that(class(h5_list) == "list")
assertthat::assert_that(is.vector(match_values))
assertthat::assert_that(is.character(match_target))
assertthat::assert_that(length(match_target) == 1)
converted_matrices <- sparse_matrices[paste0(sparse_matrices, "_dgCMatrix") %in% names(h5_list)]
unconverted_matrices <- setdiff(sparse_matrices[sparse_matrices %in% names(h5_list)], converted_matrices)
assertthat::assert_that((length(unconverted_matrices) + length(converted_matrices)) > 0)
if(match_target == "barcodes") {
if("matrix" %in% unconverted_matrices) {
assertthat::assert_that("barcodes" %in% names(h5_list$matrix))
keep <- match(match_values, h5_list$matrix$barcodes)
} else if("matrix" %in% converted_matrices) {
keep <- match(match_values, colnames(h5_list$matrix_dgCMatrix))
}
} else {
assertthat::assert_that("observations" %in% names(h5_list$matrix))
assertthat::assert_that(match_target %in% names(h5_list$matrix$observations))
keep <- match(match_values, h5_list$matrix$observations[[match_target]])
}
if(length(unconverted_matrices) > 0) {
for(unconverted in unconverted_matrices) {
h5_list <- BarMixer::h5_list_convert_to_dgCMatrix(h5_list,
unconverted)
}
}
for(converted in c(unconverted_matrices, converted_matrices)) {
mat_name <- paste0(converted, "_dgCMatrix")
h5_list[[mat_name]] <- h5_list[[mat_name]][, keep]
if("observations" %in% names(h5_list[[converted]])) {
additional_cell_values <- names(h5_list[[converted]]$observations)
for(additional_value in additional_cell_values) {
h5_list[[converted]]$observations[[additional_value]] <- h5_list[[converted]]$observations[[additional_value]][keep]
}
}
}
if(length(unconverted_matrices) > 0) {
for(unconverted in unconverted_matrices) {
h5_list <- BarMixer::h5_list_convert_from_dgCMatrix(h5_list,
unconverted)
}
}
h5_list
}
#' Add cell hashing results to an h5_list.
#'
#' @param h5_list a list object generated by running rhdf5::h5dump() on a 10x HDF5 file.
#' @param hash_category_table a data.frame or data.table with results from HTOparser::categorize_binary_hash_matrix() consisting of 3 columns: cell_barcode, hto_category, and hto_barcode.
#' @param hash_count_matrix (optional) a matrix or dgCMatrix with results from CITE-seq-Count, HTOparser::add_missing_hto_rows(), or HTOparser::barcode_table_to_matrix().
#' @param match_target a character object specifying which values to use for filtering in the h5_list. If "barcodes", uses matrix/barcodes; Otherwise, looks in matrix/observations for matching values
#'
#' @return a list object with hash data added to /hash/ and /matrix/observations/
#' @export
#'
add_h5_list_hash_results <- function(h5_list,
hash_category_table,
hash_count_matrix,
match_target = "barcodes") {
assertthat::assert_that(class(h5_list) == "list")
assertthat::assert_that(sum(class(hash_category_table) %in% c("data.frame","data.table")) > 0)
if(!is.null(hash_count_matrix)) {
assertthat::assert_that(sum(class(hash_count_matrix) %in% c("dgCMatrix","matrix")) > 0)
}
assertthat::assert_that(class(match_target) == "character")
assertthat::assert_that(length(match_target) == 1)
if(match_target == "barcodes") {
h5_list$matrix$barcodes <- sub("-.+","",h5_list$matrix$barcodes)
common_barcodes <- intersect(h5_list$matrix$barcodes,
hash_category_table$cell_barcode)
} else {
common_barcodes <- intersect(h5_list$matrix$observations[[match_target]],
hash_category_table$cell_barcode)
}
# Filter the hash table based on common barcodes
common_hash_table <- hash_category_table[match(common_barcodes,
hash_category_table$cell_barcode),]
# Convert matrix to dgCMatrix for filtering
h5_list <- BarMixer::h5_list_convert_to_dgCMatrix(h5_list,
target = "matrix")
# Filter the h5_list based on common barcodes
h5_list <- BarMixer::subset_h5_list_by_observations(h5_list,
match_values = common_barcodes,
match_target = match_target,
sparse_matrices = "matrix")
if(is.null(hash_count_matrix)) {
# If there's no hash count matrix, we only need to filter matrix as sparse
sparse_matrices <- "matrix"
} else {
# Otherwise, filter the hash count matrix for common barcodes
hash_count_matrix <- hash_count_matrix[, common_barcodes]
colnames(hash_count_matrix) <- colnames(h5_list$matrix_dgCMatrix)
# Convert to sparse for storage
hash_count_matrix <- as(hash_count_matrix, "dgCMatrix")
# Add the hash count matrix to the h5_list
h5_list$hash_dgCMatrix <- hash_count_matrix
# Keep track of hash_count_matrix as sparse for filtering
sparse_matrices <- c("matrix","hash")
}
h5_list <- BarMixer::set_list_path(h5_list,
"/matrix/observations/hto_category",
common_hash_table$hto_category)
h5_list <- BarMixer::set_list_path(h5_list,
"/matrix/observations/hto_barcode",
common_hash_table$hto_barcode)
if("sample_id" %in% names(common_hash_table)) {
h5_list <- BarMixer::set_list_path(h5_list,
"/matrix/observations/sample_id",
common_hash_table$sample_id)
}
for(sparse_matrix in sparse_matrices) {
h5_list <- BarMixer::h5_list_convert_from_dgCMatrix(h5_list,
target = sparse_matrix)
}
h5_list
}
#' Split a 10x HDF5 file based on HTOparser results
#'
#' @param h5_list a list object generated by h5dump(), which has hash labels obtained from add_h5_list_hash_results().
#'
#' @return A list object with contents of the input h5_file separated by each hto_barcode. Also includes multiplets.
#' @export
#'
split_h5_list_by_hash <- function(h5_list) {
assertthat::assert_that(class(h5_list) == "list")
assertthat::assert_that("hto_category" %in% names(h5_list$matrix$observations))
meta <- BarMixer::h5_list_cell_metadata(h5_list)
# Separate singlets from non-singlets
singlet_meta <- meta[meta$hto_category == "singlet",]
multiplet_meta <- meta[meta$hto_category != "singlet",]
# Make a list to hold output
split_h5_list <- list()
# Get the set of unique barcodes (and multiplet) to use for splitting
hto_barcodes <- c(unique(as.character(singlet_meta$hto_barcode)), "multiplet")
for(barcode in hto_barcodes) {
# Use the multiplet table if multiplet. Otherwise, filter singlet table for hto_barcode
if(barcode == "multiplet") {
hto_meta <- multiplet_meta
} else {
hto_meta <- singlet_meta[singlet_meta$hto_barcode == barcode,]
}
# Subset the h5_list for this barcode using barcode_target and sparse_matrices, defined above in this function
split_h5_list[[barcode]] <- BarMixer::subset_h5_list_by_observations(
h5_list,
match_values = hto_meta$barcodes,
match_target = "barcodes",
sparse_matrices = c("matrix","hash")
)
}
split_h5_list
}
#' Concatenate two h5_list objects.
#'
#' This is suitable for h5_list objects that contain vectors and/or dgCMatrices. It expects that the two lists have the same structure and names.
#'
#' @param x the first list to combine.
#' @param y the second list to append.
#'
#' @return a list object with the same structure as x.
#' @export
#'
cat_h5_list <- function (x, y) {
assertthat::assert_that(is.list(x))
assertthat::assert_that(is.list(y))
assertthat::assert_that(all(names(x) == names(y)))
for (v in seq_along(x)) {
if(class(x[[v]]) == "list") {
# If the target is a list, recurse to the next level
x[[v]] <- BarMixer::cat_h5_list(x[[v]], y[[v]])
} else if(class(x[[v]]) == "dgCMatrix") {
# If it's a dgCMatrix, perform a cbind
x[[v]] <- cbind(x[[v]], y[[v]])
} else {
# If it's a vector, use c to combine.
x[[v]] <- c(as.vector(x[[v]]), as.vector(y[[v]]))
}
}
x
}
#' Reduce a list-of-lists of h5_list objects to a single, concatenated object.
#'
#' @param h5_ll A list-of-lists containing h5_list objects generated by rhdf5::h5dump() or similar.
#' @param sparse_matrices A vector of object names within each list that should be treated as dgCMatrices for combining. Default is "matrix", which is used by 10x.
#'
#' @return An h5_list object with the structure of a single h5_list within the h5_ll object.
#' @export
#'
reduce_h5_list <- function(h5_ll,
sparse_matrices = c("matrix")) {
assertthat::assert_that(is.list(h5_ll))
assertthat::assert_that(all(sparse_matrices %in% names(h5_ll[[1]])))
for(i in seq_along(sparse_matrices)) {
h5_ll <- lapply(h5_ll,
BarMixer::h5_list_convert_to_dgCMatrix,
sparse_matrices[i])
}
out_list <- h5_ll[[1]]
for(i in 2:length(h5_ll)) {
out_list <- BarMixer::cat_h5_list(out_list, h5_ll[[i]])
}
for(i in seq_along(sparse_matrices)) {
# Exception here for "features", which shouldn't be concatenated.
out_list[[sparse_matrices[i]]]$features <- h5_ll[[1]][[sparse_matrices[i]]]$features
out_list <- BarMixer::h5_list_convert_from_dgCMatrix(out_list,
target = sparse_matrices[i])
}
out_list
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.