start_time <- Sys.time() quiet_library <- function(...) { suppressPackageStartupMessages(library(...)) } quiet_library(rhdf5) quiet_library(BarMixer) quiet_library(Matrix) quiet_library(ggplot2) quiet_library(cowplot) quiet_library(jsonlite)
Declaring start
stm("Starting .h5 metadata analysis") stm(paste0("Using BarMixer v", installed.packages()["BarMixer","Version"]))
in_tenx <- params$in_tenx in_well <- params$in_well in_sample <- params$in_sample out_dir <- params$out_dir # Check Well ID format if(grepl("^[A-Z|-]+[0-9]+-.?P[0-9]C[0-9]W[0-9]$", in_well)) { in_well <- sub("-.P","-P",in_well) well_type <- "aifi" } else { well_type <- "other" } stm(paste0("IN 10x outs/ : ", in_tenx)) stm(paste0("IN Well ID : ", in_well)) stm(paste0("OUT H5 directory : ", out_dir))
print(c( paste0("IN 10x outs/ : ", in_tenx), paste0("IN Well ID : ", in_well), paste0("OUT H5 directory : ", out_dir) ))
in_h5 <- file.path(in_tenx, "filtered_feature_bc_matrix.h5") if(!file.exists(in_h5)) { stm(paste0("ERROR: Cannot find IN H5 file:", in_h5)) stop() } else { stm(paste0("Found matrix file:", in_h5)) } in_mol <- file.path(in_tenx, "molecule_info.h5") if(!file.exists(in_mol)) { stm(paste0("ERROR: Cannot find IN Mol Info H5 file:", in_mol)) stop() } else { stm(paste0("Found mol_info file:", in_mol)) } in_sum <- file.path(in_tenx, "metrics_summary.csv") if(!file.exists(in_sum)) { stm(paste0("ERROR: Cannot find IN Metrics Summary file:", in_sum)) stop() } else { stm(paste0("Found metrics file:", in_sum)) }
if(!dir.exists(out_dir)) { stm(paste0("Creating Output Directory: ",out_dir)) dir.create(out_dir, recursive = TRUE) }
stm(paste0("Loading HDF5 from ", in_h5)) h5_list <- h5dump(in_h5)
out_h5 <- file.path(out_dir, paste0(in_well, ".h5")) out_json <- sub(".h5$","_well_metrics.json",out_h5) stm(paste0("OUT H5 file : ", out_h5)) stm(paste0("OUT JSON file : ", out_json)) print(c( paste0("OUT H5 file : ", out_h5), paste0("OUT JSON file : ", out_json) ))
stm(paste0("Assembling Read Counts per Cell from ", in_mol)) bc <- sub("-1","",h5_list$matrix$barcodes) bc_counts <- data.table(mol_idx = h5read(in_mol, "/barcode_idx"), umi_count = h5read(in_mol, "/count")) bc_sums <- bc_counts[, .(n_reads = sum(umi_count)), by = mol_idx] rm(bc_counts) mol_bc <- h5read(in_mol, "/barcodes") bc_sums$cell_barcode <- mol_bc[bc_sums$mol_idx + 1] rm(mol_bc) bc_sums <- bc_sums[,.(cell_barcode, n_reads)] n_reads <- bc_sums$n_reads[match(bc, bc_sums$cell_barcode)] n_reads[is.na(n_reads)] <- 0 h5_list <- set_list_path(h5_list, "/matrix/observations/n_reads", n_reads)
stm("Computing UMI and Gene Counts per Cell") h5_list <- h5_list_convert_to_dgCMatrix(h5_list, target = "matrix") h5_list <- set_list_path(h5_list, "/matrix/observations/n_umis", unname(Matrix::colSums(h5_list$matrix_dgCMatrix))) h5_list <- set_list_path(h5_list, "/matrix/observations/n_genes", unname(Matrix::colSums(h5_list$matrix_dgCMatrix > 0))) h5_list <- h5_list_convert_from_dgCMatrix(h5_list, target = "matrix")
stm("Adding Cell UUIDs and Names") h5_list <- add_cell_ids(h5_list, add_uuid = TRUE, replace_barcode = TRUE, retain_original_barcode = TRUE, add_name = TRUE)
if(well_type == "aifi") { stm("Adding Batch, Pool, Chip, and Well metadata") h5_list <- add_well_metadata(h5_list, well_id = in_well) } else { h5_list <- set_list_path(h5_list, "/matrix/observations/well_id", rep(in_well, length(h5_list$matrix$barcodes))) }
stm("Adding chrM count metadata") h5_list <- h5_list_add_mito_umis(h5_list)
well_metrics <- read_tenx_metrics(in_sum) well_metrics <- as.list(well_metrics) h5_list <- set_list_path(h5_list, "/well", well_metrics) h5_list <- set_list_path(h5_list, "/well/well_id", in_well)
stm(paste0("Writing HDF5 to ", out_h5)) write_h5_list(h5_list, h5_file = out_h5, overwrite = TRUE) h5closeAll()
Extract metadata for plotting
stm("Generating tables and plots for report") meta <- h5_list_cell_metadata(h5_list) qc_list <- list(report_type = "add_tenx_rna_metadata", report_datetime = as.character(start_time), report_uuid = ids::uuid(use_time = TRUE), package = "BarMixer", package_version = sessionInfo()$otherPkgs$H5weaver$Version, well_id = in_well)
qc_well_metrics <- data.frame(tenx_metric = names(well_metrics), value = as.vector(unlist(well_metrics)), stringsAsFactors = FALSE) qc_well_metrics$value[qc_well_metrics$tenx_metric == "number_of_reads"] <- as.numeric(qc_well_metrics$value[qc_well_metrics$tenx_metric == "number_of_reads"] / 1e6) qc_well_metrics$tenx_metric[qc_well_metrics$tenx_metric == "number_of_reads"] <- "millions_of_reads" qc_list$well_metrics <- split(qc_well_metrics$value, 1:nrow(qc_well_metrics)) names(qc_list$well_metrics) <- qc_well_metrics$tenx_metric qc_table(qc_well_metrics)
overview_stats <- data.frame(statistic = c("N Mapped Reads", "N UMIs", "N Mito. UMIs", "N Genes", "Reads per UMI", "UMIs per Gene", "Reads per Gene"), q_25 = c(quantile(meta$n_reads, 0.25), quantile(meta$n_umis, 0.25), quantile(meta$n_mito_umis, 0.25), quantile(meta$n_genes, 0.25), quantile(meta$n_reads/meta$n_umis, 0.25, na.rm = TRUE), quantile(meta$n_umis/meta$n_genes, 0.25, na.rm = TRUE), quantile(meta$n_reads/meta$n_genes, 0.25, na.rm = TRUE)), median = c(quantile(meta$n_reads, 0.5), quantile(meta$n_umis, 0.50), quantile(meta$n_mito_umis, 0.50), quantile(meta$n_genes, 0.50), quantile(meta$n_reads/meta$n_umis, 0.50, na.rm = TRUE), quantile(meta$n_umis/meta$n_genes, 0.50, na.rm = TRUE), quantile(meta$n_reads/meta$n_genes, 0.50, na.rm = TRUE)), q_75 = c(quantile(meta$n_reads, 0.75), quantile(meta$n_umis, 0.75), quantile(meta$n_mito_umis, 0.25), quantile(meta$n_genes, 0.75), quantile(meta$n_reads/meta$n_umis, 0.75, na.rm = TRUE), quantile(meta$n_umis/meta$n_genes, 0.75, na.rm = TRUE), quantile(meta$n_reads/meta$n_genes, 0.75, na.rm = TRUE))) overview_stats$q_25 <- round(overview_stats$q_25, 2) overview_stats$median <- round(overview_stats$median, 2) overview_stats$q_75 <- round(overview_stats$q_75, 2) qc_list$well_count_stats <- lapply(1:nrow(overview_stats), function(x) { list(q_25 = overview_stats$q_25[x], median = overview_stats$median[x], q_75 = overview_stats$q_75[x]) }) names(qc_list$well_count_stats) <- tolower(gsub("[\\. ]+", "_", overview_stats$statistic)) qc_table(overview_stats)
reads_plot <- qc_hist_plot(meta, column = "n_reads", name_x = "N Reads per Cell", fill = "dodgerblue", target = 2e4) + ggtitle("Alignment/Mapping Distributions") umis_plot <- qc_hist_plot(meta, column = "n_umis", name_x = "N UMIs per Cell", fill = "purple", target = 8e3) genes_plot <- qc_hist_plot(meta, column = "n_genes", name_x = "N Genes per Cell", fill = "orangered", target = 2e3) histogram_list <- list(reads_plot, umis_plot, genes_plot) suppressWarnings( plot_grid(plotlist = histogram_list, ncol = 1, nrow = 3) )
umi_reads_scatter <- qc_scatter_plot(meta, column_x = "n_reads", name_x = "N Reads per Cell", column_y = "n_umis", name_y = "N UMIs per Cell", color = "darkblue") + ggtitle("UMIs vs Reads") suppressWarnings( umi_reads_scatter )
genes_umis_scatter <- qc_scatter_plot(meta, column_x = "n_umis", name_x = "N UMIs per Cell", column_y = "n_genes", name_y = "N Genes per Cell", color = "red") + ggtitle("Genes vs UMIs") suppressWarnings( genes_umis_scatter )
genes_reads_scatter <- qc_scatter_plot(meta, column_x = "n_reads", name_x = "N Reads per Cell", column_y = "n_genes", name_y = "N Genes per Cell", color = "darkgreen") + ggtitle("Genes vs Reads") suppressWarnings( genes_reads_scatter )
stm(paste0("Writing JSON to ", out_json)) qc_list_json <- jsonlite::toJSON(qc_list, auto_unbox = TRUE, pretty = TRUE) writeLines(qc_list_json, out_json)
sessionInfo()
Total time elapsed
end_time <- Sys.time() diff_time <- end_time - start_time time_message <- paste0("Elapsed Time: ", round(diff_time, 3), " ", units(diff_time)) print(time_message) stm(time_message) stm("H5 metadata process complete.")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.