In akoyabio/phenoptrReports: Create reports using Phenoptics data

knitr::opts_chunk$set(echo=FALSE,fig.width=9, fig.height=6, 
                      comment=NA, warning=FALSE, message=FALSE)

# The default plot hook for vignettes creates a markdown image 'tag'
# which breaks in pandoc if there are any spaces in the image path.
# The standard HTML plot hook inserts an <img> tag which renders correctly.
knitr::knit_hooks$set(plot = knitr:::hook_plot_html)

suppressPackageStartupMessages(library(tidyverse))
library(phenoptr)

if (is.null(params$csd_path) == is.null(params$csd))
  stop('csd_path or csd must be provided but not both.')

csd = if (!is.null(params$csd)) params$csd else read_cell_seg_data(params$csd_path)

# Require Slide ID and field column
if (!'Slide ID' %in% names(csd))
  stop('The cell seg summary report requires a "Slide ID" column.')
field_col = phenoptr::field_column(csd)

# If csd_path was provided, look for a companion 'rejected' file
rejected = NULL
if (!is.null(params$csd_path) && 
    str_detect(params$csd_path, 'cell_seg_data.txt')) {
  # Try summary first
  rejected_path = str_replace(params$csd_path, 'cell_seg_data',
                              'rejected_cell_seg_data_summary')
  if (!file.exists(rejected_path))
    rejected_path = str_replace(params$csd_path, 'cell_seg_data',
                              'rejected_cell_seg_data')
  if(file.exists(rejected_path)) {
    rejected = read_cell_seg_data(rejected_path)

    # Don't report on empty rejected file
    if (nrow(rejected) == 0)
      rejected = NULL
  }
}

if (!is.null(params$dataset_name)) {
  cat('Summary of cell seg data for **', params$dataset_name, '**.\n\n', sep='')
} else if (!is.null(params$csd_path)) {
  cat('Summary of cell seg data from  \n`', params$csd_path, '`.\n\n')
}

Slides and fields

This file contains data on r n_distinct(csd[[field_col]]) fields taken from r n_distinct(csd[['Slide ID']]) slides:

csd %>% group_by(`Slide ID`) %>% 
  summarize(`Number of fields`=n_distinct(!!rlang::sym(field_col)))

if (!is.null(rejected)) {
  cat('\n\n### Rejected fields\n\n')
  cat(n_distinct(rejected[[field_col]]), 
      ' fields from ', n_distinct(rejected[['Slide ID']]), 
      ' slides were rejected in the merge step:\n\n', sep='')

  rejected %>% group_by(`Slide ID`) %>% 
    summarize(`Number of fields`=n_distinct(!!rlang::sym(field_col)))
}

Tissue categories

The tissue categories present are

cats = unique(csd$`Tissue Category`)
cat('\n- ', paste(cats, collapse='\n- '), '\n\n', sep='')

Phenotypes

The phenotypes present, and their total counts, are

if ('Phenotype' %in% names(csd)) {
  # Old-style phenotypes (single column)
  counts = csd %>% count(Phenotype) %>% 
    rename(Count='n') %>% 
    filter(Phenotype != '')
} else {
  # Phenotype per marker or multi-schema
  counts = csd %>% select(starts_with('Phenotype')) %>% 
    gather() %>% 
    filter(!str_detect(value, '-$')) %>% 
    count(value) %>% 
    rename(Phenotype='value', Count='n') %>% 
    filter(Phenotype != '')
}

# count total and N/A rows
total = count(csd) %>% rename(`Total Cells`='n')
count_NA = csd %>% 
  filter_at(vars(starts_with('Phenotype')), any_vars((. == ''))) %>% 
  count() %>% 
  rename('N/A'='n')

counts %>% spread(Phenotype, Count) %>% bind_cols(count_NA, total)

Phenotype counts per slide

if ('Phenotype' %in% names(csd)) {
  # Old-style phenotypes (single column)
  counts = csd %>% count(`Slide ID`, `Phenotype`) %>% 
    rename(Count='n') %>% 
    filter(Phenotype != '')
} else {
  # Phenotype per marker
  counts = csd %>% select(`Slide ID`, starts_with('Phenotype')) %>% 
    gather('key', 'value', -`Slide ID`) %>% 
    filter(!str_detect(value, '-$')) %>% 
    group_by(`Slide ID`) %>% 
    count(value) %>% 
    rename(Phenotype='value', Count='n') %>%
    filter(Phenotype != '')
}

# count total and N/A rows
totals = csd %>% group_by(`Slide ID`) %>% 
  count() %>% 
  rename(`Total Cells`='n')
counts_NA = csd %>% group_by(`Slide ID`) %>% 
  filter_at(vars(starts_with('Phenotype')), any_vars((. == ''))) %>% 
  count() %>% 
  rename('N/A'='n')

 counts %>% spread(Phenotype, Count, fill=0) %>% 
   left_join(counts_NA, by='Slide ID') %>% 
   left_join(totals, by='Slide ID')

# For phenotype per marker we can show UpSet plots. For classic phenotyping
# they don't add much to the tabular summaries.
if (!'Phenotype' %in% names(csd) && require(UpSetR)) {
  # Some explanation
  cat('\n\n### UpSet plots of phenotype combinations\n\n')
  cat('These "UpSet" plots visualize combinations of phenotypes.
      The horizontal bars show counts of the individual phenotypes.
      The vertical bars show counts of the combination phenotypes
      present in the data. The central matrix shows the combinations
      graphically.')

  # UpSet plot for the whole dataset
  cat('\n\n#### Phenotype combinations, all data\n\n')
  print(upset_plot(csd))

  # And for each slide
  for (slide in unique(csd$`Slide ID`)) {
    cat('\n\n#### Phenotype combinations, ', slide, '\n\n', sep='')
    p = upset_plot(csd %>% filter(`Slide ID`==slide))
    if (is.null(p)) cat('This slide has only one positive phenotype.\n\n') else print(p)
  }
}