start_time <- Sys.time() quiet_library <- function(...) { suppressPackageStartupMessages(library(...)) } quiet_library(BarMixer) quiet_library(ggplot2) quiet_library(cowplot)
Declaring start
stm("Starting HTO count processing") stm(paste0("Using BarMixer v", installed.packages()["BarMixer","Version"]))
in_file <- params$in_file in_samples <- params$in_samples in_well <- params$in_well if(is.null(params$in_min_cutoff)) { in_min_cutoff <- "auto" } else { in_min_cutoff <- as.numeric(params$in_min_cutoff) } if(is.null(params$in_eel)) { in_eel <- TRUE } else { in_eel <- as.logical(params$in_eel) } out_dir <- params$out_dir out_mat <- file.path(out_dir, paste0(in_well, "_hto_count_matrix.csv.gz")) out_tbl <- file.path(out_dir, paste0(in_well, "_hto_category_table.csv.gz")) out_json <- file.path(out_dir, paste0(in_well, "_hto_processing_metrics.json")) stm(paste0("IN HTO Tag_Counts.csv : ", in_file)) stm(paste0("IN SampleSheet.csv : ", in_samples)) stm(paste0("IN Well : ", in_well)) stm(paste0("IN Min. Cutoff : ", in_min_cutoff)) stm(paste0("IN Expect equal load : ", in_eel)) stm(paste0("OUT Count matrix : ", out_mat)) stm(paste0("OUT Category table : ", out_tbl)) stm(paste0("OUT JSON metrics : ", out_json))
print(c( paste0("IN Tag_Counts.csv : ", in_file), paste0("IN HTO sample sheet : ", in_samples), paste0("IN Well : ", in_well), paste0("IN Min. Cutoff : ", in_min_cutoff), paste0("IN Expect equal load: ", in_eel), paste0("OUT Count matrix : ", out_mat), paste0("OUT Category table : ", out_tbl), paste0("OUT JSON metrics : ", out_json) ))
if(!file.exists(in_file)) { stm(paste0("ERROR: Cannot find IN Tag_Counts.csv:", in_file)) stop() } else { stm(paste0("Found Tag Counts file:", in_file)) } if(!file.exists(in_samples)) { stm(paste0("ERROR: Cannot find IN SampleSheet.csv:", in_samples)) stop() } else { stm(paste0("Found SampleSheet.csv:", in_samples)) }
in_hto_key <- read.csv(in_samples, header = TRUE, stringsAsFactors = FALSE) qc_table(in_hto_key)
in_hto_table <- fread(in_file)
stm("Formatting HTO matrix and filtering for valid barcodes") keep_cols <- colnames(in_hto_table)[colnames(in_hto_table) %in% in_hto_key$hash_name] in_hto_mat <- t(as.matrix(in_hto_table[, ..keep_cols])) in_hto_mat <- as(in_hto_mat, "dgCMatrix") colnames(in_hto_mat) <- in_hto_table$cell_barcode rownames(in_hto_mat) <- in_hto_key$hash_tag[match(rownames(in_hto_mat), in_hto_key$hash_name)] in_hto_mat <- add_missing_hto_rows(in_hto_mat, valid_htos = in_hto_key$hash_tag) in_hto_mat <- in_hto_mat[,colSums(in_hto_mat) > 1]
stm("Binarizing HTO counts to call positive and negative hashes") if(in_min_cutoff == "auto") { binary_results <- binarize_hash_matrix(in_hto_mat, valid_htos = in_hto_key$hash_tag, use_median_cut = TRUE, expect_equal_loading = in_eel) } else { binary_results <- binarize_hash_matrix(in_hto_mat, valid_htos = in_hto_key$hash_tag, use_median_cut = FALSE, min_cut = as.numeric(in_min_cutoff), expect_equal_loading = in_eel) }
stm("Converting binary results to categories") category_results <- categorize_binary_hash_matrix(binary_results$bmat)
stm(paste0("Writing count matrix to ", out_mat)) out_hto_mat <- as(in_hto_mat, "matrix") out_hto_mat <- as.data.table(out_hto_mat) out_hto_mat <- cbind(data.table(hto_barcode = rownames(in_hto_mat)), out_hto_mat) fwrite(out_hto_mat, file = out_mat)
stm(paste0("Writing category table to ", out_tbl)) hto_barcodes_to_sample_ids <- function(x, bc_key) { x <- as.character(x) res <- character() for(i in seq_along(x)) { bcs <- unlist(strsplit(x[i], ";")[[1]]) res[i] <- paste(bc_key$sample_id[match(bcs, bc_key$hash_tag)], collapse = ";") } res } hash_category_table <- category_results$hash_category_table hash_category_table$sample_id <- hto_barcodes_to_sample_ids(hash_category_table$hto_barcode, in_hto_key) rownames(hash_category_table) <- NULL fwrite(hash_category_table, file = out_tbl)
qc_list <- list(report_type = "hto_processing", report_datetime = as.character(start_time), report_uuid = ids::uuid(use_time = TRUE), package = "HTOparser", package_version = sessionInfo()$otherPkgs$HTOparser$Version, well_id = in_well)
bsummary <- binary_results$bsummary bsummary$sample_id <- in_hto_key$sample_id[match(bsummary$hto_barcode, in_hto_key$hash_tag)] bsummary <- setcolorder(bsummary, c("sample_id","hto_barcode", "cutoff","n_pos","n_neg","n_below_threshold", "frac_pos","frac_neg","frac_below_threshold")) rownames(bsummary) <- NULL bsummary$hash_tag <- in_hto_key$hash_name[match(bsummary$hto_barcode, in_hto_key$hash_tag)] qc_list$sample_hto_stats <- lapply(1:nrow(bsummary), function(x) { list(hto_barcode = as.character(bsummary$hto_barcode[x]), hash_tag = as.character(bsummary$hash_tag[x]), cutoff = bsummary$cutoff[x], n_pos = bsummary$n_pos[x], n_neg = bsummary$n_neg[x], n_below_threshold = bsummary$n_below_threshold[x], frac_pos = bsummary$frac_pos[x], frac_neg = bsummary$frac_neg[x], frac_below_threshold = bsummary$frac_below_threshold[x]) }) n_no_sample_id <- sum(is.na(bsummary$sample_id)) if(n_no_sample_id > 0) { bsummary$sample_id[is.na(bsummary$sample_id)] <- paste0("NA_",1:n_no_sample_id) } names(qc_list$sample_hto_stats) <- bsummary$sample_id qc_table(bsummary)
stm("Generating plots for report") plot_list <- lapply( 1:nrow(in_hto_key), function(x) { barcode <- in_hto_key$hash_tag[x] sample_id <- in_hto_key$sample_id[x] vals <- in_hto_mat[barcode,] cutoff <- bsummary$cutoff[bsummary$hto_barcode == barcode] n_vals_below_min_cut <- sum(vals <= 5) vals_above_min_cut <- vals[vals > 5] if(length(vals_above_min_cut) > 0) { val_df <- data.frame(vals = log10(vals_above_min_cut)) cut_df <- data.frame(cutoff = log10(cutoff)) ggplot() + geom_histogram(data = val_df, aes(x = vals), fill = "darkgreen", binwidth = 0.05) + geom_vline(data = cut_df, aes(xintercept = cutoff), size = 1, color = "dodgerblue") + scale_x_continuous("log10(HTO Count)", limits = c(0,5)) + scale_fill_identity() + scale_color_identity() + ggtitle(paste(sample_id, "|", barcode)) + theme_bw(base_size = 8) } else { text_df <- data.frame(x = 2.5, y = 0.5, label = "Hash not detected") ggplot() + geom_text(data = text_df, aes(x = x, y = y, label = label)) + scale_x_continuous("log10(HTO Count)", limits = c(0,5)) + scale_y_continuous("count", limits = c(0,1)) + ggtitle(paste(sample_id, "|", barcode)) + theme_bw(base_size = 8) } } )
suppressWarnings( plot_grid(plotlist = plot_list, nrow = ceiling(length(plot_list) / 2), ncol = 2) )
rownames(category_results$hash_summary) <- NULL qc_list$hto_category_stats <- lapply(1:nrow(category_results$hash_summary), function(x) { list(n_cells = category_results$hash_summary$n_category[x], frac_cells = category_results$hash_summary$frac_category[x]) }) names(qc_list$hto_category_stats) <- category_results$hash_summary$hto_category qc_table(category_results$hash_summary)
singlet_summary <- make_singlet_summary(category_results$hash_category_table, valid_htos = in_hto_key$hash_tag) singlet_summary$sample_id <- in_hto_key$sample_id[match(singlet_summary$hto_barcode, in_hto_key$hash_tag)] singlet_summary <- setcolorder(singlet_summary, c("sample_id", "hto_barcode", "n_singlets", "frac_singlets")) n_no_sample_id <- sum(is.na(singlet_summary$sample_id)) if(n_no_sample_id > 0) { singlet_summary$sample_id[is.na(singlet_summary$sample_id)] <- paste0("NA_",1:n_no_sample_id) } for(sample_id in singlet_summary$sample_id) { qc_list$sample_hto_stats[[sample_id]]$n_singlets <- singlet_summary$n_singlets[singlet_summary$sample_id == sample_id] } qc_table(singlet_summary)
singlet_plot_data <- singlet_summary singlet_plot_data$x <- 1:nrow(singlet_summary) ggplot() + geom_rect(data = singlet_plot_data, aes(xmin = x - 0.4, xmax = x + 0.4, ymin = 0, ymax = n_singlets)) + geom_text(data = singlet_plot_data, aes(x = x, y = n_singlets * 1.02, label = n_singlets), vjust = 0, size = 3) + ggtitle("Singlet Counts") + scale_x_continuous("", breaks = singlet_plot_data$x, labels = paste0(singlet_plot_data$hto_barcode, "\n", singlet_plot_data$sample_id)) + scale_y_continuous("N Singlets") + theme_bw(base_size = 8) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3))
stm(paste0("Writing JSON to ", out_json)) qc_list_json <- jsonlite::toJSON(qc_list, auto_unbox = TRUE, pretty = TRUE) writeLines(qc_list_json, out_json)
sessionInfo()
Total time elapsed
end_time <- Sys.time() diff_time <- end_time - start_time time_message <- paste0("Elapsed Time: ", round(diff_time, 3), " ", units(diff_time)) print(time_message) stm(time_message) stm("HTO count processing complete.")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.