start_time <- Sys.time() quiet_library <- function(...) { suppressPackageStartupMessages(library(...)) } quiet_library(rhdf5) quiet_library(BarMixer) quiet_library(Matrix) quiet_library(ggplot2) quiet_library(cowplot)
Declaring start
stm("Starting .h5 split") stm(paste0("Using BarMixer v", installed.packages()["BarMixer","Version"]))
in_h5 <- params$in_h5 in_hto <- params$in_hto out_dir <- params$out_dir stm(paste0("IN H5 file : ", in_h5)) stm(paste0("IN HTO results path : ", in_hto)) stm(paste0("OUT Directory : ", out_dir))
print(c( paste0("IN H5 file : ", in_h5), paste0("IN HTO results path : ", in_hto), paste0("OUT Directory : ", out_dir) ))
if(!file.exists(in_h5)) { stm(paste0("ERROR: Cannot find IN H5 file:", in_h5)) stop() } else { stm(paste0("Found H5 file:", in_h5)) in_well <- sub(".h5$","",basename(in_h5)) } in_mat <- list.files(in_hto, pattern = "hto_count_matrix", full.names = TRUE) if(!file.exists(in_mat)) { stm(paste0("ERROR: Cannot find HTO count matrix:", in_mat)) stop() } else { stm(paste0("Found HTO count matrix:", in_mat)) } in_tbl <- list.files(in_hto, pattern = "hto_category_table", full.names = TRUE) if(!file.exists(in_tbl)) { stm(paste0("ERROR: Cannot find HTO category table:", in_tbl)) stop() } else { stm(paste0("Found HTO category table:", in_tbl)) }
stm(paste0("Loading HTO count matrix from ", in_mat)) hash_count_df <- fread(in_mat) hash_count_mat <- as.matrix(hash_count_df[,-1]) rownames(hash_count_mat) <- hash_count_df[[1]] rm(hash_count_df)
stm(paste0("Loading HTO category table from ", in_tbl)) hash_category_table <- fread(in_tbl) hto_barcodes <- unique(hash_category_table$hto_barcode[hash_category_table$hto_category == "singlet"])
output_h5_files <- file.path(out_dir, paste0(in_well, "_", c(hto_barcodes,"multiplet"), ".h5")) output_messages <- paste0("OUT HDF5 : ", output_h5_files) for(i in seq_along(output_messages)) { stm(output_messages[i]) } out_json <- file.path(out_dir, paste0(in_well, "_split_h5_metrics.json")) json_message <- paste0(paste0("OUT JSON : ", out_json)) stm(json_message) print(output_messages) print(json_message)
stm(paste0("Loading HDF5 from ", in_h5)) h5_list <- h5dump(in_h5)
stm("Adding hash data to HDF5 results") h5_list <- add_h5_list_hash_results(h5_list, hash_category_table, hash_count_mat, match_target = "original_barcodes")
stm("Splitting HDF5 based on Hash Category Table") split_h5_list <- split_h5_list_by_hash(h5_list)
for(i in seq_along(split_h5_list)) { out_file <- file.path(out_dir, paste0(in_well, "_", names(split_h5_list)[i],".h5")) stm(paste0("Writing HDF5 to ", out_file)) write_h5_list(split_h5_list[[i]], h5_file = out_file, overwrite = TRUE) h5closeAll() }
Extract metadata for plotting
stm("Generating tables and plots for report") meta <- h5_list_cell_metadata(h5_list) qc_list <- list(report_type = "split_h5_by_hash", report_datetime = as.character(start_time), report_uuid = ids::uuid(use_time = TRUE), package = "BarMixer", package_version = sessionInfo()$otherPkgs$BarMixer$Version, well_id = in_well)
Metadata prep
meta <- as.data.table(meta) if("sample_id" %in% names(meta)) { meta$plot_barcode <- paste0(meta$hto_barcode,"\n",meta$sample_id) } else { meta$sample_id <- "no_sample_id" meta$plot_barcode <- meta$hto_barcode }
hto_order <- c("no_hash","singlet","doublet","multiplet") hto_count_stats <- meta[, .(n_cells = nrow(.SD), frac_cells = round(nrow(.SD)/nrow(meta), 4), total_aligned_reads = sum(.SD$n_reads), frac_all_aligned_reads = round(sum(.SD$n_reads)/sum(meta$n_reads), 4)), by = hto_category] hto_count_stats <- hto_count_stats[match(hto_order,hto_count_stats$hto_category),] qc_list$hto_category_stats <- lapply(1:nrow(hto_count_stats), function(x) { list(n_cells = hto_count_stats$n_cells[x], frac_cells = hto_count_stats$frac_cells[x], total_aligned_reads = hto_count_stats$total_aligned_reads[x], frac_all_aligned_reads = hto_count_stats$frac_all_aligned_reads[x]) }) names(qc_list$hto_category_stats) <- hto_count_stats$hto_category qc_table(hto_count_stats)
hto_count_stats$fill <- c("black", "darkblue", "dodgerblue", "mediumorchid4") category_counts_barplot <- ggplot() + geom_bar(data = hto_count_stats, aes(x = as.factor(1:nrow(hto_count_stats)), y = n_cells, fill = fill), stat = "identity") + geom_text(data = hto_count_stats, aes(x = as.factor(1:nrow(hto_count_stats)), y = n_cells + 0.02 * max(n_cells), label = n_cells), hjust = 0.5, vjust = 0) + scale_fill_identity() + scale_x_discrete("", breaks = 1:nrow(hto_count_stats), labels = hto_count_stats$hto_category) + scale_y_continuous("N Cells", limits = c(0, 4e4)) + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.3)) + ggtitle("Cells per HTO Category") suppressWarnings( category_counts_barplot )
plot_df <- hto_count_stats plot_df$cells_max <- cumsum(plot_df$frac_cells) plot_df$cells_min <- data.table::shift(plot_df$cells_max, type = "lag", fill = 0) plot_df$reads_max <- cumsum(plot_df$frac_all_aligned_reads) plot_df$reads_min <- data.table::shift(plot_df$reads_max, type = "lag", fill = 0) hto_category_fraction_barplot <- ggplot() + geom_rect(data = plot_df, aes(xmin = reads_min, xmax = reads_max, ymin = 0.6, ymax = 1.4, fill = fill)) + geom_rect(data = plot_df, aes(xmin = cells_min, xmax = cells_max, ymin = 1.6, ymax = 2.4, fill = fill)) + scale_fill_identity() + scale_x_continuous("") + scale_y_continuous("", breaks = c(1, 2), labels = c("Fraction of Reads", "Fraction of Cell Barcodes")) + theme_bw() + ggtitle("Fraction of Cells and Reads per HTO Category") suppressWarnings( hto_category_fraction_barplot )
singlet_meta <- as.data.table(meta[meta$hto_category == "singlet",]) bc_count_stats <- singlet_meta[, .(n_singlets = nrow(.SD), frac_singlets = round(nrow(.SD)/nrow(singlet_meta), 4)), by = list(sample_id,hto_barcode)] qc_list$sample_singlet_stats <- lapply(1:nrow(bc_count_stats), function(x) { list(hto_barcode = bc_count_stats$hto_barcode[x], n_singlets = bc_count_stats$n_singlets[x], frac_singlets = bc_count_stats$frac_singlets[x]) }) names(qc_list$sample_singlet_stats) <- bc_count_stats$sample_id qc_table(bc_count_stats)
bc_to_plot_bc <- data.frame(hto_barcode = unique(singlet_meta$hto_barcode), plot_barcode = unique(singlet_meta$plot_barcode)) bc_count_stats <- bc_count_stats[bc_to_plot_bc, on = "hto_barcode"] ggplot() + geom_bar(data = bc_count_stats, aes(x = as.factor(plot_barcode), y = n_singlets), stat = "identity") + geom_hline(aes(yintercept = median(bc_count_stats$n_singlets)), linetype = "dashed") + scale_x_discrete("") + scale_y_continuous("N Cells", limits = c(0, 4000)) + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3)) + ggtitle("Cells per HTO Barcode")
hto_read_stats <- meta[, .(q25 = quantile(n_reads, 0.25), median = quantile(n_reads, 0.50), q75 = quantile(n_reads, 0.75)), by = hto_category] hto_read_stats <- hto_read_stats[match(hto_order,hto_read_stats$hto_category),] for(category in hto_read_stats$hto_category) { qc_list$hto_category_stats[[category]]$mapped_reads_per_cell_q25 <- hto_read_stats$q25[hto_read_stats$hto_category == category] qc_list$hto_category_stats[[category]]$mapped_reads_per_cell_median <- hto_read_stats$median[hto_read_stats$hto_category == category] qc_list$hto_category_stats[[category]]$mapped_reads_per_cell_q75 <- hto_read_stats$q75[hto_read_stats$hto_category == category] } qc_table(hto_read_stats)
bc_read_stats <- singlet_meta[, .(q25 = quantile(n_reads, 0.25), median = quantile(n_reads, 0.50), q75 = quantile(n_reads, 0.75)), by = list(sample_id,hto_barcode)] for(sample_id in bc_read_stats$sample_id) { qc_list$sample_singlet_stats[[sample_id]]$mapped_reads_per_cell_q25 <- bc_read_stats$q25[bc_read_stats$sample_id == sample_id] qc_list$sample_singlet_stats[[sample_id]]$mapped_reads_per_cell_median <- bc_read_stats$median[bc_read_stats$sample_id == sample_id] qc_list$sample_singlet_stats[[sample_id]]$mapped_reads_per_cell_q75 <- bc_read_stats$q75[bc_read_stats$sample_id == sample_id] } qc_table(bc_read_stats)
category_reads_violins <- qc_violin_plot(meta, category_x = "hto_category", name_x = "HTO Category", column_y = "n_reads", name_y = "N Reads per Cell", fill = "dodgerblue") barcode_reads_violins <- qc_violin_plot(meta[meta$hto_category == "singlet",], category_x = "plot_barcode", name_x = "HTO Barcode (singlets)", column_y = "n_reads", name_y = "N Reads per Cell", fill = "dodgerblue") + ggtitle("Reads per Cell") reads_violin_list <- list(category_reads_violins, barcode_reads_violins) suppressWarnings( plot_grid(plotlist = reads_violin_list, ncol = 2, rel_widths = c(1, 3), nrow = 1, align = "h") )
hto_umi_stats <- meta[, .(q25 = quantile(n_umis, 0.25), median = quantile(n_umis, 0.50), q75 = quantile(n_umis, 0.75)), by = hto_category] hto_umi_stats <- hto_umi_stats[match(hto_order,hto_umi_stats$hto_category),] for(category in hto_umi_stats$hto_category) { qc_list$hto_category_stats[[category]]$umis_per_cell_q25 <- hto_umi_stats$q25[hto_umi_stats$hto_category == category] qc_list$hto_category_stats[[category]]$umis_per_cell_median <- hto_umi_stats$median[hto_umi_stats$hto_category == category] qc_list$hto_category_stats[[category]]$umis_per_cell_q75 <- hto_umi_stats$q75[hto_umi_stats$hto_category == category] } qc_table(hto_umi_stats)
bc_umi_stats <- singlet_meta[, .(q25 = quantile(n_umis, 0.25), median = quantile(n_umis, 0.50), q75 = quantile(n_umis, 0.75)), by = list(sample_id,hto_barcode)] for(sample_id in bc_umi_stats$sample_id) { qc_list$sample_singlet_stats[[sample_id]]$umis_per_cell_q25 <- bc_umi_stats$q25[bc_umi_stats$sample_id == sample_id] qc_list$sample_singlet_stats[[sample_id]]$umis_per_cell_median <- bc_umi_stats$median[bc_umi_stats$sample_id == sample_id] qc_list$sample_singlet_stats[[sample_id]]$umis_per_cell_q75 <- bc_umi_stats$q75[bc_umi_stats$sample_id == sample_id] } qc_table(bc_umi_stats)
category_umis_violins <- qc_violin_plot(meta, category_x = "hto_category", name_x = "HTO Category", column_y = "n_umis", name_y = "N UMIs per Cell", fill = "purple") barcode_umis_violins <- qc_violin_plot(meta[meta$hto_category == "singlet",], category_x = "plot_barcode", name_x = "HTO Barcode (singlets)", column_y = "n_umis", name_y = "N UMIs per Cell", fill = "purple") + ggtitle("UMIs per Cell") umis_violin_list <- list(category_umis_violins, barcode_umis_violins) suppressWarnings( plot_grid(plotlist = umis_violin_list, ncol = 2, rel_widths = c(1, 3), nrow = 1, align = "h") )
hto_gene_stats <- meta[, .(q25 = quantile(n_genes, 0.25), median = quantile(n_genes, 0.50), q75 = quantile(n_genes, 0.75)), by = hto_category] hto_gene_stats <- hto_gene_stats[match(hto_order,hto_gene_stats$hto_category),] for(category in hto_gene_stats$hto_category) { qc_list$hto_category_stats[[category]]$genes_per_cell_q25 <- hto_gene_stats$q25[hto_gene_stats$hto_category == category] qc_list$hto_category_stats[[category]]$genes_per_cell_median <- hto_gene_stats$median[hto_gene_stats$hto_category == category] qc_list$hto_category_stats[[category]]$genes_per_cell_q75 <- hto_gene_stats$q75[hto_gene_stats$hto_category == category] } qc_table(hto_gene_stats)
bc_gene_stats <- singlet_meta[, .(q25 = quantile(n_genes, 0.25), median = quantile(n_genes, 0.50), q75 = quantile(n_genes, 0.75)), by = list(sample_id,hto_barcode)] for(sample_id in bc_gene_stats$sample_id) { qc_list$sample_singlet_stats[[sample_id]]$genes_per_cell_q25 <- bc_gene_stats$q25[bc_gene_stats$sample_id == sample_id] qc_list$sample_singlet_stats[[sample_id]]$genes_per_cell_median <- bc_gene_stats$median[bc_gene_stats$sample_id == sample_id] qc_list$sample_singlet_stats[[sample_id]]$genes_per_cell_q75 <- bc_gene_stats$q75[bc_gene_stats$sample_id == sample_id] } qc_table(bc_gene_stats)
category_genes_violins <- qc_violin_plot(meta, category_x = "hto_category", name_x = "HTO Category", column_y = "n_genes", name_y = "N Genes per Cell", fill = "orangered") barcode_genes_violins <- qc_violin_plot(meta[meta$hto_category == "singlet",], category_x = "plot_barcode", name_x = "HTO Barcode (singlets)", column_y = "n_genes", name_y = "N Genes per Cell", fill = "orangered") + ggtitle("Genes per Cell") genes_violin_list <- list(category_genes_violins, barcode_genes_violins) suppressWarnings( plot_grid(plotlist = genes_violin_list, ncol = 2, rel_widths = c(1, 3), nrow = 1, align = "h") )
stm(paste0("Writing JSON to ", out_json)) qc_list_json <- jsonlite::toJSON(qc_list, auto_unbox = TRUE, pretty = TRUE) writeLines(qc_list_json, out_json)
sessionInfo()
Total time elapsed
end_time <- Sys.time() diff_time <- end_time - start_time time_message <- paste0("Elapsed Time: ", round(diff_time, 3), " ", units(diff_time)) print(time_message) stm(time_message) stm("H5 split process complete.")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.