In PerkinElmer/phenoptr: inForm Helper Functions

# This document creates a spatial distribution report for two phenotypes in a
# single frame with optional positivity thresholds for each phenotype. It
# requires a cell seg table with phenotypes. A composite
# image, if available, will be used as a background for the plots.
# Note: this is run in the environment of `spatial_distribution_report`
# so local variables defined there are available here.
library(ggplot2)
library(magrittr)

knitr::opts_chunk$set(echo=FALSE,fig.width=11, fig.height=8, 
                      comment=NA, warning=FALSE, message=FALSE)

csd = read_cell_seg_data(cell_seg_path)
phenotype_specials = setdiff(phenotypes, unique(csd$Phenotype))

window = spatstat::owin(xrange=c(0, max(csd$`Cell X Position`)),
                        yrange=c(0, max(csd$`Cell Y Position`)))

# Segmented image for background
if (grepl('cell_seg_data.txt', cell_seg_path))
{
  image_endings = c(
    'image_with_tissue_seg.tif',
    'image_with_tissue_seg.jpg',
    'tissue_seg_map.tif',
    'tissue_seg_map.jpg',
    'composite_image.tif',
    'composite_image.jpg'
  )
  for (ending in image_endings) {
    tissue_seg_path = sub('cell_seg_data.txt', ending, cell_seg_path)
    if (file.exists(tissue_seg_path)) break
  }
}

if (exists('tissue_seg_path') && file.exists(tissue_seg_path))
{
  if (grepl('jpg$', tissue_seg_path))
    background = jpeg::readJPEG(tissue_seg_path)
  else background = tiff::readTIFF(tissue_seg_path)

  # Tone it down a bit by bringing it closer to neutral gray
  background = (background - 0.75) / 8 + 0.75 
  background = as.raster(background)
  xlim = dim(background)[2] / pixels_per_micron
  ylim = dim(background)[1] / pixels_per_micron
} else {
  # If no background image guess at dimensions
  background = NA
  xlim = max(csd$`Cell X Position`)
  ylim = max(csd$`Cell Y Position`)
}

# Remove def'n of tissue_seg_path so we will recompute next time for new image
rm(tissue_seg_path)

Data

Spatial distribution of phenotypes from

r cell_seg_path

Selected phenotypes

for (name in phenotypes)
  cat('-', name, '\n')

First-order statistics

cat('Number of cells:', nrow(csd), '\n\n')

counts = csd %>% dplyr::group_by(Phenotype) %>% 
  dplyr::summarize(Count=n())
for (special in phenotype_specials)
  counts = dplyr::add_row(counts, Phenotype=special, 
                 Count=sum(select_rows(csd, phenotype_rules[[special]])))

  counts %>% dplyr::mutate(Proportion=scales::percent(round(Count/nrow(csd), 2))) %>% 
  knitr::kable(caption='Cell count and fraction of total per phenotype',
               table.attr='style="width: 30%"')

Cell and phenotype locations

All cells

# Colorbrewer Dark2 palette
dark2 = c('#1b9e77','#d95f02','#7570b3','#e7298a',
          '#66a61e','#e6ab02','#a6761d','#666666')
p = ggplot(csd, aes(x=`Cell X Position`, y=`Cell Y Position`, color=Phenotype))
p = add_scales_and_background(p, background, xlim, ylim)
p = p + geom_point() 
if (length(unique(csd$Phenotype<=8)))
  p = p + scale_color_manual(values=dark2)
p + labs(title='Locations of all cells')

p + facet_wrap(~Phenotype) + guides(color=FALSE)

Nearest neighbors, selected phenotypes

pair_counts = NULL
for (pair in pairs) {
  pheno1 = list(
    name=pair[1], color=colors[pair[[1]]], select=phenotype_rules[[pair[1]]])

  pheno2 = list(
    name=pair[2], color=colors[pair[[2]]], select=phenotype_rules[[pair[2]]])

  # Get point pattern datasets and distance matrices; direction matters...
  pheno_data1 = phenotype_as_ppp(csd, pheno1, window)
  pheno_data2 = phenotype_as_ppp(csd, pheno2, window)

  nn_dist12 = find_nearest_neighbor(pheno_data1, pheno_data2)
  nn_dist21 = find_nearest_neighbor(pheno_data2, pheno_data1)

  print(nn_plot(pheno_data1, pheno_data2, nn_dist12, background, xlim, ylim))
  print(nn_plot(pheno_data2, pheno_data1, nn_dist21, background, xlim, ylim))

  # Find mutual pairs by merging nn_dist12 and nn_dist21
  # First get just the nnDist21 cell ids and rename them to match nnDist12
  nn_mutual = nn_dist21[, c('Cell ID', 'To Cell ID')]
  names(nn_mutual) = c('To Cell ID', 'Cell ID')
  nn_mutual = merge(nn_mutual, nn_dist12)
  print(nn_plot_mutual(pheno_data1, pheno_data2, nn_mutual, background, xlim, ylim))

  pair_counts = dplyr::bind_rows(pair_counts, tibble::data_frame(
    From = pheno_data1$pheno$name,
    To = pheno_data2$pheno$name,
    'From Count' = nrow(pheno_data1$data),
    'To Count' = nrow(pheno_data2$data),
    Pairs = nrow(nn_mutual),
    'From Fraction' = round(Pairs/`From Count`, 3),
    'To Fraction' = round(Pairs/`To Count`, 3)
  ))
}