In akoyabio/phenoptr: inForm Helper Functions

# This document creates a spatial distribution report for two phenotypes in a
# single frame with optional positivity thresholds for each phenotype. It
# requires a cell seg table with phenotypes. A composite
# image, if available, will be used as a background for the plots.
# Note: this is run in the environment of `spatial_distribution_report`
# so local variables defined there are available here.
library(ggplot2)
library(magrittr)

knitr::opts_chunk$set(echo=FALSE,fig.width=11, fig.height=8, 
                      comment=NA, warning=FALSE, message=FALSE)

# The default plot hook for vignettes creates a markdown image 'tag'
# which breaks in pandoc if there are any spaces in the image path.
# The standard HTML plot hook inserts an <img> tag which renders correctly.
knitr::knit_hooks$set(plot = knitr:::hook_plot_html)

csd = read_cell_seg_data(cell_seg_path, pixels_per_micron='auto')

# Get field metadata from the component data file
component_path = sub('_cell_seg_data.txt', '_component_data.tif', cell_seg_path)
if(!file.exists(component_path))
  stop('Spatial distribution report requires a matching component data file.')
field_info = get_field_info(component_path)

# Window for spatial processing and limit for charts
xlim=c(field_info$location[1], field_info$location[1]+field_info$field_size[1])
ylim=c(field_info$location[2], field_info$location[2]+field_info$field_size[2])
window = spatstat.geom::owin(xrange=xlim, yrange=ylim)


# Segmented image for background
if (grepl('cell_seg_data.txt', cell_seg_path))
{
  image_endings = c(
    'composite_image.tif',
    'composite_image.jpg',
    'image_with_tissue_seg.tif',
    'image_with_tissue_seg.jpg',
    'tissue_seg_map.tif',
    'tissue_seg_map.jpg'
  )
  for (ending in image_endings) {
    tissue_seg_path = sub('cell_seg_data.txt', ending, cell_seg_path)
    if (file.exists(tissue_seg_path)) break
  }
}

if (exists('tissue_seg_path') && file.exists(tissue_seg_path))
{
  if (grepl('jpg$', tissue_seg_path))
    background = jpeg::readJPEG(tissue_seg_path)
  else background = tiff::readTIFF(tissue_seg_path)

  background = as.raster(background)
} else {
  background = NULL
}

# Remove def'n of tissue_seg_path so we will recompute next time for new image
rm(tissue_seg_path)

Data

Spatial distribution of phenotypes from

r cell_seg_path

Selected phenotypes

for (name in phenotypes)
  cat('-', name, '\n')

First-order statistics

cat('Number of cells:', nrow(csd), '\n\n')

# Count phenotypes
counts = tibble::tibble()

# First the ones defined in the cell seg data.
# Use `select_rows` here to accommodate consolidated data.
for (pheno in unique_phenotypes(csd))
  counts = dplyr::bind_rows(counts, 
                            tibble::tibble(Phenotype=pheno, 
                                           Count=sum(select_rows(csd, pheno))))

# Phenotypes not in the cell seg data
phenotype_specials = setdiff(phenotypes, unique_phenotypes(csd))
for (special in phenotype_specials)
  counts = dplyr::bind_rows(counts, tibble::tibble(Phenotype=special, 
                 Count=sum(select_rows(csd, phenotype_rules[[special]]))))

counts %>% dplyr::mutate(Proportion=scales::percent(round(Count/nrow(csd), 2))) %>% 
  knitr::kable(caption='Cell count and fraction of total per phenotype',
               table.attr='style="width: 30%"')

Cell and phenotype locations

All cells

# Colorbrewer Dark2 palette
dark2 = c('#1b9e77','#d95f02','#7570b3','#e7298a',
          '#66a61e','#e6ab02','#a6761d','#666666')

# If csd doesn't have a Phenotype column, make one
csd2 = make_phenotype_column(csd)
p = ggplot(csd2, aes(x=`Cell X Position`, y=`Cell Y Position`, color=Phenotype))
p = add_scales_and_background(p, background, xlim, ylim)
p = p + geom_point() 
if (length(unique(csd2$Phenotype))<=8)
  p = p + scale_color_manual(values=dark2)
p + labs(title='Locations of all cells')

p + facet_wrap(~Phenotype) + guides(color="none")

Nearest neighbors, selected phenotypes

pair_counts = NULL
for (pair in pairs) {
  pheno1 = list(
    name=pair[1], color=colors[pair[[1]]], select=phenotype_rules[[pair[1]]])

  pheno2 = list(
    name=pair[2], color=colors[pair[[2]]], select=phenotype_rules[[pair[2]]])

  # Get point pattern datasets and distance matrices; direction matters...
  pheno_data1 = phenotype_as_ppp(csd, pheno1, window)
  pheno_data2 = phenotype_as_ppp(csd, pheno2, window)

  nn_dist12 = find_nearest_neighbor(pheno_data1, pheno_data2)
  nn_dist21 = find_nearest_neighbor(pheno_data2, pheno_data1)

  print(nn_plot(pheno_data1, pheno_data2, nn_dist12, background, xlim, ylim))
  print(nn_plot(pheno_data2, pheno_data1, nn_dist21, background, xlim, ylim))

  # Find mutual pairs by merging nn_dist12 and nn_dist21
  # First get just the nnDist21 cell ids and rename them to match nnDist12
  nn_mutual = nn_dist21[, c('Cell ID', 'To Cell ID')]
  names(nn_mutual) = c('To Cell ID', 'Cell ID')
  nn_mutual = merge(nn_mutual, nn_dist12)
  print(nn_plot_mutual(pheno_data1, pheno_data2, nn_mutual, background, xlim, ylim))

  pair_counts = dplyr::bind_rows(pair_counts, tibble::tibble(
    From = pheno_data1$pheno$name,
    To = pheno_data2$pheno$name,
    'From Count' = nrow(pheno_data1$data),
    'To Count' = nrow(pheno_data2$data),
    Pairs = nrow(nn_mutual),
    'From Fraction' = round(Pairs/`From Count`, 3),
    'To Fraction' = round(Pairs/`To Count`, 3)
  ))
}