R/pcr_utils.R

Defines functions flag_samples pal_pcr_flag find_plates

Documented in find_plates flag_samples pal_pcr_flag

# PCR utility functions

#' A QC function for flagging technical replicate issues.
#'
#' Pass bare columns to `...` to specify the technical replicate grouping.
#'
#' @param data Data frame. Must have the columns: target, contents.
#' @param ... Grouping columns specified as bare column names.
#' Using `target` and `contents` is always reccomended. Use additional information
#' as needed, like `primer_tech` and `cell_type`.
#' @param .max_cq Numeric. The maximum Cq value considered detectable.
#' @param .keep Boolean. Should intermediate QC related columns be kept in the
#' returned data frame.
#' @return Data frame. `data` with at least a new column `flag`, with names
#' that match `pcr_pal_flag()`.
#' @examples
#' \dontrun{
#' samps %>% flag_samples()
#'
#' # lower the 'max-cq' cutoff
#' samps %>% flag_samples(max_cq = 30)
#'
#' # don't return intermediate columns
#' samps %>% flag_samples(keep = FALSE)
#'
#' # change goruping if multiple cell types and primer technolgies are present for each sample
#' samples %>% flag_samples(cell_type, primer_tech)
#' }
#' @importFrom rlang ensyms eval_tidy
#' @importFrom purrr map_df
#' @importFrom dplyr case_when select
#' @export
flag_samples <- function(data, ..., .max_cq = 35, .keep = TRUE) {
  vars <- rlang::ensyms(...)

  vars <- purrr::map(vars, function(x) rlang::eval_tidy(x, data))

  data <- split(data, vars, drop = TRUE) %>%
    purrr::map_df(~mutate(.,
      bllod = dplyr::case_when(
        any(cq > .max_cq) || any(is.na(cq)) ~ "Out of Range",
        TRUE ~ "In Range"
      ),
      cq_distance = abs(cq[1] - cq[2]),
      flag = dplyr::case_when(
        bllod == "Out of Range" ~ "Non-detectable",
        cq_distance > .5 ~ "High technical variation",
        TRUE ~ "Detectable"
      )
    ))

  if (!.keep) data <- dplyr::select(data, -cq_distance, -bllod)

  data
}

#' Get a named palette for coloring samples by flag in a PCR experiment.
#'
#' Designed to recoginized labels from `flag_samples`.
#'
#' @md
#' @return Named character vector for use with scale_color_manual
#' @examples
#' \dontrun{
#' ggplot(pcr_data, aes(tx, cq, color = flag)) +
#'   geom_point(position = "jitter") +
#'   scale_color_manual(values = pal_pcr_flag)
#' }
#' @export pal_pcr_flag
pal_pcr_flag <- function() {
  c(
    `Detectable` = "#000000FF",
    `High technical variation` = "#FF7F0EFF",
    `Non-detectable` = "#D62728FF"
  )
}


#' Find 96-well plate layouts in a data frame
#' 
#' @param d data.frame where plate layouts can be located by a set
#'  of row identifiers (A-H) and column identifiers (1-12). 
#' @return list of three elements: `layout`, `row`, and `column`. `layout` is
#'  a list of `data.frame` objects, one for each plate found in the layout. 
#'  `row` and `column` are numeric vectors that indicate which row(s) and column(s)
#'  contain the top-left cells for tables in a plate layout.
#' @export 
#' @examples 
#' \dontrun{
#' d <- read.xlsx('myexcel.xlsx', skipEmptyCols=FALSE,
#'     skipEmptyRows=FALSE, colNames=FALSE)
#' pl <- find_plates(d)
#' }
find_plates <- function(d) {
  row_map <- apply(d, c(1, 2), function(x) (trimws(x) %in% LETTERS[1:8]))
  col_map <- apply(d, c(1, 2), function(x) (gsub('\\.0+', '', trimws(x)) %in% 1:12))
  # Find corner of each plate
  coord_map <- row_map + col_map
  find_pivot <- function(x, subseq) {
    x %<>% as.numeric
    i <- 1:(length(x) - length(subseq) + 1)
    #map(i, ~ .:(.+length(subseq) - 1))
    map_lgl(i,
            ~ all(subseq == x[.:(.+length(subseq) - 1)] |
                    any(grepl('PLATE', x[.:(.+length(subseq) - 1)]))
            ))
  }
  subseqs <- list(
    c(0, rep(1, 8)),
    c(0, rep(1, 12))
  )
  
  col_pivots <- purrr::map(1:ncol(row_map),
                    ~ find_pivot(row_map[,.],
                                 subseq=subseqs[[1]]) %>%
                      which)
  names(col_pivots) <- 1:ncol(row_map)
  col_pivots %<>% Filter(function(x) (length(x) > 0), .)
  # Simplifying assumption that plates are only stacked vertically
  if (length(col_pivots) > 1) {
    col_pivot <- list(col_pivots[[1]])
    names(col_pivot) <- names(col_pivots)[1]
  }
  row_pivots <- purrr::map(1:nrow(col_map),
                    ~ find_pivot(col_map[.,] %>% as.numeric,
                                 subseq=subseqs[[2]]) %>%
                      which)
  names(row_pivots) <- 1:nrow(coord_map)
  row_pivots %<>% Filter(function(x) (length(x) > 0), .)
  if (length(row_pivots) == 0) {
    stop("Cannot find any 8 x 12 plate layouts.")
  }
  row_index <- intersect(names(row_pivots), col_pivots[[1]])
  col_index <- intersect(names(col_pivots), row_pivots[[1]])
  pivots <- list()
  for (i in 1:length(row_index)) {
    pivots[[i]] <- c(row_index[i], col_index)
  }
  layout_pl <- list()
  lmap <- 1:8
  names(lmap) <- LETTERS[1:8]
  if (length(pivots) > 0) {
    k <- 1
    for (pivot in pivots) {
      layout_pl[[k]] <- d[(pivot[1] + 1):(pivot[1] + 8),
                          (pivot[2] + 1):(pivot[2] + 12)] %>%
        assayr2::melt_plate() %>%
        dplyr::filter(!is.na(content)) %>%
        dplyr::mutate(plate = k) %>%
        dplyr::mutate(orig_row = lmap[row] + row_index[k],
                      orig_col = as.numeric(column) + col_index,
                      orig_coord = paste0(orig_row, '.', orig_col))
      k <- k + 1
    }
    list(layout = layout_pl,
         column = col_index,
         row = row_index)
  }
}

#' Add targets to data.frame given a color legend
#' 
#' WARNING: Requires xlsx, which is not only suggested by assayr2
#' @param d File path to xlsx file that was consumed by find_plates.
#' @param plates list Output of find_plates.
#' @param infile Character vector representing 
#'     input file that was used to produce plates.
#' @param sheet Integer indicating which sheet should be read. 
add_targets <- function(d, plates, infile, sheet) {
  # Read sheet with xlsx for getting colors =======
  wb <- xlsx::loadWorkbook(infile)
  sheet <- getSheets(wb)[[sheet]]
  rows <- xlsx::getRows(sheet)
  cells <- xlsx::getCells(rows)
  styles <- sapply(cells, getCellStyle)
  cellColor <- function(style) {
    fg  <- style$getFillForegroundXSSFColor()
    rgb <- tryCatch(fg$getRgb(), error = function(e) NULL)
    rgb <- paste(rgb, collapse = "")
    return(rgb)
  }
  colors <- purrr::map(styles, ~ cellColor(.))
  for (i in 1:length(plates[[1]])) {
    row_range <- plates$row[i]:(plates$row[i] + 8)
    col_range <- (plates$column + 13):ncol(d)
    coords <- expand.grid(row_range, col_range) %>%
      dplyr::mutate(coord = paste0(Var1, '.',Var2)) %>%
      dplyr::select(coord) %>%
      .$coord
    color_coords <- sapply(coords, function(x) (colors[[x]])) %>%
      Filter(function(x) (is.character(x)), .) %>%
      Filter(function(x) (x != ""), .)
    # These coordinates in the legend have color
    # color_coords <- sapply(coords, function(x) (colors[[x]])) 
    cell_values <- sapply(cells, getCellValue)
    color_values <- sapply(names(color_coords), function(x) (cell_values[[x]]))
    
    color_map <- color_values
    names(color_map) <- color_coords
    plates[[1]][[i]]$orig_coord %>% unique
    which(!(plates[[1]][[i]]$orig_coord %in% names(colors)))
    mis <- c()
    plates[[1]][[i]]$target <- purrr::map_chr(plates[[1]][[i]]$orig_coord, 
                                              ~ tryCatch({
                                                color_map[[colors[[.]]]]},
                                                error = function(e) {
                                                  mis <<- c(mis, .)
                                                  NA
                                                }))
  }
  if (length(mis) > 0) {
    warning(paste0("Could not find targets for coordinates: ", paste0(mis, collapse=', ')))
  }
  layouts <- map(plates[[1]], ~ dplyr::select(., content, row, column, plate, target))
  layouts
}


#' Read color PCR layouts
#' 
#' Read PCR layouts where targets have been encoded as a background cell color.
#' Warning: Requires xlsx package, which assayr2 only suggests and does not require. 
#' @param infile character Excel file ('.xlsx')
#' @param sheet integer representing sheet number
#' @export
read_color_layouts <- function(infile, sheet=1) {
  d <- openxlsx::read.xlsx(infile,
                           skipEmptyRows=FALSE,
                           skipEmptyCols=FALSE,
                           colNames=FALSE, sheet=sheet)
  plates <- find_plates(d)
  add_targets(d, plates, infile, sheet)
}
hemoshear/assayr2 documentation built on Nov. 8, 2019, 6:13 p.m.