# Output formatting
# These functions build a script based on the user inputs.
# Each function returns code to be included in the script output.
# Most code snippets create a table to include in the Results workbook.
# For each table created, a pair of strings is appended to `table_pairs`.
# The first string is code to "clean" the table; the second is code
# to add the table to the final workbook.
# Environment to share among formatting functions
# This is a bit of a hack, an R6 class or nested functions would be cleaner.
format_env = new.env(parent = emptyenv())
# Format everything.
format_all = function(all_data) {
phenotype_values = all_data$phenotype_values
# Filter null values that happen when the controls are created,
# then missing phenotype
phenos = purrr::compact(phenotype_values, 'phenotype') %>%
purrr::discard(~.x$phenotype %in% c(''))
names(phenos) = purrr::map_chr(phenos, 'phenotype') %>%
phenoptr:::phenotype_names()
.by = all_data$by
# Flags for various sections present
has = list()
has$phenotypes = length(phenos) > 0
has$density = has$phenotypes && !is.null(all_data$summary_path)
has$expression = any(purrr::map_lgl(phenos, ~!.x$expression %in% c('', 'NA')))
has$h_score = !is.null(all_data$score_path)
has$include_nearest = all_data$include_nearest && length(phenos) >= 2
has$include_nearest_details =
has$include_nearest && all_data$include_distance_details
has$include_count_within = (all_data$include_count_within
&& length(all_data$radii) > 0 && length(phenos) >= 2)
has$include_count_within_details =
has$include_count_within && all_data$include_distance_details
# Re-initialize
# `table_pairs` accumulates pairs of code snippets for table cleanup and
# worksheet writing.
# It must be at global scope to be shared between all these functions.
assign("table_pairs", list(), envir=format_env)
# Code generation
paste0(
format_header(),
format_path(all_data$input_path, all_data$field_col),
format_tissue_categories(all_data$tissue_categories),
format_phenotypes(phenos, .by),
ifelse(has$density, format_density(all_data$summary_path), ''),
format_expression(phenos),
ifelse(has$h_score, format_h_score(all_data$score_path, phenos), ''),
ifelse(has$include_nearest,
format_nearest_neighbors(all_data$output_dir,
has$include_nearest_details), ''),
ifelse(has$include_count_within,
format_count_within(all_data$output_dir, all_data$radii,
has$include_count_within_details), ''),
format_cleanup(all_data$slide_id_prefix, all_data$use_regex, has),
format_trailer(all_data$output_dir, has))
}
# Front matter loads required packages
format_header = function() {
stringr::str_glue(
'# Created by phenoptr {packageVersion("phenoptr")}',
' and phenoptrReports {packageVersion("phenoptrReports")} on {Sys.Date()}
# http://akoyabio.github.io/phenoptr
# http://akoyabio.github.io/phenoptrReports
suppressPackageStartupMessages(library(dplyr))
library(phenoptr)
library(phenoptrReports)
library(openxlsx)
\n\n')
}
# Format reading cell seg data and making a summary table
format_path = function(path, field_col) {
table_pairs = get("table_pairs", envir=format_env)
table_pairs = c(table_pairs,
list(c(cleanup_code('summary_table'),
worksheet_code('write_summary_sheet',
'summary_table'))))
assign("table_pairs", table_pairs, envir=format_env)
stringr::str_glue('# Read the consolidated data file
csd_path =
"{escape_path(path)}"
csd = read_cell_seg_data(csd_path, col_select="phenoptrReports")
# Make a table summarizing the number of fields per slide
summary_table = csd %>%
group_by(`Slide ID`) %>%
summarize(`Number of fields`=n_distinct(`{field_col}`))
\n\n')
}
# Format tissue categories
format_tissue_categories = function(cats) {
cats = cats %>% purrr::compact() %>% purrr::discard(~.x=='')
if (length(cats)==0) return('tissue_categories = NA\n\n\n')
cat_str = paste(cats, collapse='", "')
stringr::str_glue('tissue_categories = c("{cat_str}")\n\n\n')
}
# Format the phenotype definitions, phenotype counts and percentages
format_phenotypes = function(vals, .by) {
phenos = purrr::map_chr(vals, 'phenotype')
# This allows multiple expression markers per pheno
phenos = unique(phenos)
# Always do all cells
if (!any(stringr::str_detect(phenos,
stringr::regex('Total|All', ignore_case=TRUE))))
phenos = c(unique(phenos), 'Total Cells')
table_pairs = get("table_pairs", envir=format_env)
table_pairs = c(table_pairs, list(
c(cleanup_code('counts'),
worksheet_code('write_counts_sheet', 'counts')),
c(cleanup_code('percents'),
worksheet_code('write_percents_sheet', 'percents'))
))
assign("table_pairs", table_pairs, envir=format_env)
phenos_string = paste(phenos, collapse='", "')
stringr::str_glue('# Define phenotypes
phenotypes = parse_phenotypes("{phenos_string}")
# Column to aggregate by
.by = "{.by}"
# Count phenotypes per tissue category
counts = count_phenotypes(csd, phenotypes, tissue_categories, .by=.by)
percents = counts_to_percents(counts)
\n\n')
}
# Format density calculation
format_density = function(summary_path) {
table_pairs = get("table_pairs", envir=format_env)
table_pairs = c(table_pairs,
list(c(cleanup_code('densities'),
worksheet_code('write_density_sheet', 'densities'))))
assign("table_pairs", table_pairs, envir=format_env)
stringr::str_glue(
'# Path to a cell seg summary file, used for the tissue category area
summary_path =
"{escape_path(summary_path)}"
# Using the counts computed above and the tissue area from the summary,
# compute cell densities for each phenotype
densities = compute_density_from_cell_summary(counts, summary_path,
tissue_categories, .by=.by)
\n\n')
}
# Format the expression parameters and computation of mean expression
format_expression = function(vals) {
# Filter out phenotypes with no expression requested
phenos = vals %>%
purrr::discard(~.x$expression %in% c('', 'NA'))
if (length(phenos) == 0)
return('expression_params = NULL\n\n')
table_pairs = get("table_pairs", envir=format_env)
table_pairs = c(table_pairs,
list(c(cleanup_code('expression_means'),
worksheet_code('write_expression_sheet',
'expression_means'))))
assign("table_pairs", table_pairs, envir=format_env)
# Map phenotype names to expression
pairs = purrr::imap_chr(phenos,
~stringr::str_glue('"{.y}" = "{.x$expression}"'))
phenos_string = paste(pairs, collapse=',\n ')
stringr::str_glue(
'# Associate phenotype names with expression columns to measure.
expression_params = list(
{phenos_string}
)
# Compute mean expression per phenotype
expression_means = csd %>%
compute_mean_expression_many(phenotypes, expression_params,
tissue_categories, .by=.by)
\n\n')
}
# Format calculation of H-Score for all cells and optional phenotypes
format_h_score = function(score_paths, phenos) {
result = '# Compute H-Score\n'
table_pairs = get("table_pairs", envir=format_env)
purrr::iwalk(score_paths, function(score_path, ix) {
base_table_name = stringr::str_glue('h_score_{ix}')
table_pairs <<- c(table_pairs,
list(c(cleanup_code(base_table_name),
worksheet_code('write_h_score_sheet', base_table_name))))
# First the overall H-Score
result <<- stringr::str_glue(
"{result}score_path_{ix} =
'{escape_path(score_path)}'
h_score_{ix} = compute_h_score_from_score_data(csd, score_path_{ix},
tissue_categories, .by=.by)
\n\n")
# Add in any optional scoring by appending to result and table_pairs
wants_scoring = purrr::map_lgl(phenos,
~(!is.null(.x$score) && .x$score==TRUE))
scoring_phenos = phenos[wants_scoring] %>%
purrr::map_chr('phenotype') %>%
names() %>%
unique()
# Names for the constructed data tables
# We need to distinguish e.g. CD8+ and CD8- here so replace + and -
# with valid characters before calling make.names.
table_names = scoring_phenos %>%
stringr::str_replace_all(c('\\+'='p', '-'='m')) %>%
make.names() %>%
stringr::str_replace_all('\\.+', '_') %>%
{ stringr::str_glue('h_score_{ix}_{.}') }
purrr::pmap(list(scoring_phenos, table_names),
function(pheno, table_name) {
table_pairs <<-
c(table_pairs,
list(c(cleanup_code(table_name),
stringr::str_glue("write_h_score_sheet(wb, {table_name},
marker='{pheno}')\n\n"))))
result <<- stringr::str_glue(
"{result}{table_name}=compute_h_score_from_score_data(
csd, score_path_{ix}, tissue_categories, .by=.by,
phenotypes[['{pheno}']])
\n\n")
})
})
assign("table_pairs", table_pairs, envir=format_env)
result
}
# Format calculation of nearest neighbors
format_nearest_neighbors = function(output_dir, include_distance_details) {
table_pairs = get("table_pairs", envir=format_env)
table_pairs = c(table_pairs,
list(c(cleanup_code('nearest_neighbors'),
worksheet_code('write_nearest_neighbor_summary_sheet',
'nearest_neighbors'))))
assign("table_pairs", table_pairs, envir=format_env)
if (include_distance_details)
stringr::str_glue(
'# Summarize nearest neighbor distances
nearest_detail_path = file.path(
"{escape_path(output_dir)}",
"nearest_neighbors.txt")
nearest_neighbors = nearest_neighbor_summary(
csd, phenotypes, tissue_categories, nearest_detail_path, .by=.by,
extra_cols=expression_params)
\n\n')
else
stringr::str_glue(
"# Summarize nearest neighbor distances
nearest_neighbors = nearest_neighbor_summary(
csd, phenotypes, tissue_categories, .by=.by)
\n\n")
}
# Format calculation of `count_within`
format_count_within = function(output_dir, radii,
include_count_within_details) {
table_pairs = get("table_pairs", envir=format_env)
table_pairs = c(table_pairs,
list(c(cleanup_code('count_within'),
worksheet_code('write_count_within_sheet',
'count_within'))))
assign("table_pairs", table_pairs, envir=format_env)
if (include_count_within_details)
stringr::str_glue(
'# Summary of cells within a specific distance
radii = {deparse(radii)}
count_detail_path = file.path(
"{escape_path(output_dir)}",
"count_within.txt")
count_within = count_within_summary(
csd, radii, phenotypes, tissue_categories,
count_detail_path, .by=.by, extra_cols=expression_params)
\n\n')
else
stringr::str_glue(
"# Summary of cells within a specific distance
radii = {deparse(radii)}
count_within = count_within_summary(csd, radii, phenotypes,
tissue_categories, .by=.by)
\n\n")
}
# Format generation of cleanup function and calls to it for each table
format_cleanup = function(slide_id_prefix, use_regex, has) {
if (!has$phenotypes || is.null(slide_id_prefix) || slide_id_prefix == '')
return('')
# We are going to use a regex to remove the Slide ID prefix.
# If the user did not request regex, escape any special characters in
# the prefix they provided and ensure that it is a prefix match.
if (!use_regex) {
slide_id_prefix = escapeRegex(slide_id_prefix)
slide_id_prefix = paste0('^', slide_id_prefix)
}
# Now slide_id_prefix is a valid regex. We still have to double-escape \
# to put it into a string literal.
slide_id_prefix = stringr::str_replace_all(slide_id_prefix,
stringr::fixed('\\'), '\\\\')
# Note: Don't use mutate() in cleanup(),
# it removes attribute from h_score table
start = stringr::str_glue("# Clean up the aggregation column
# Do this at the end or it will break merges
cleanup = function(d) {{
by_col = ifelse(.by %in% names(d), .by, 'Slide ID')
d[[by_col]] = stringr::str_remove_all(d[[by_col]], '{slide_id_prefix}')
d
}}
\n\n")
# Add a cleanup call for each table
table_pairs = get("table_pairs", envir=format_env)
purrr::walk(table_pairs, ~{
start <<- stringr::str_glue("{start}{.x[[1]]}")
})
paste(start, '\n')
}
# Format writing the workbook and creating the chart report
format_trailer = function(output_dir, has) {
start =
'# Write it all out to an Excel workbook
wb = createWorkbook()
'
# Add a write call for each table
table_pairs = get("table_pairs", envir=format_env)
purrr::walk(table_pairs, ~{
start <<- stringr::str_glue("{start}{.x[[2]]}")
})
end = stringr::str_glue(
'
workbook_path = file.path(
"{escape_path(output_dir)}",
"Results.xlsx")
if (file.exists(workbook_path)) file.remove(workbook_path)
saveWorkbook(wb, workbook_path)
# Write summary charts
charts_path = file.path(
"{escape_path(output_dir)}",
"Charts.docx")
if (file.exists(charts_path)) file.remove(charts_path)
write_summary_charts(workbook_path, charts_path, .by=.by)
# Save session info
info_path = file.path(
"{escape_path(output_dir)}",
"session_info.txt")
write_session_info(info_path)
')
paste0(start, end)
}
## Helper functions
# Create a call to `cleanup` for the given table
cleanup_code = function(table_name) {
stringr::str_glue("{table_name} = cleanup({table_name})\n\n")
}
# Create call to write a worksheet
worksheet_code = function(worksheet_function, table_name) {
stringr::str_glue("{worksheet_function}(wb, {table_name})\n\n")
}
# Escape a file path by changing all \ to /
escape_path = function(path) {
stringr::str_replace_all(path, '\\\\', '/')
}
# Hmisc::escapeRegex
escapeRegex = function(string) {
gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", string)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.