assayr2: Tools for analyzing assay data

Documented in mp_fit_standards

#' Fit a 4-parameter log-logistic model for each analyte in
#'  a MagPix kit
#'
#' @md
#' @param d Output from `assayr2::()`
#' @param wells_to_omit If not NA, the standard wells to remove.
#' @return list with 4 elements: data, curves, models, and omitted data
#' @examples
#' \dontrun{
#' file <- system.file("extdata", "Magpix_example.csv", package = "assayr2")
#' d <- read_magpix(file)
#' sc <- fitStandardCurves(d)
#' }
#' @importFrom dplyr group_by filter inner_join mutate select_if
#' @importFrom tidyr nest unnest
#' @importFrom purrr map
#' @importFrom drc drm
#' @export
mp_fit_standards <- function(d, wells_to_omit=NA) {
  model_data <- tibble::as.tibble(d[[2]]) %>%
    dplyr::group_by(target) %>%
    tidyr::nest()
  obs_standard <- dplyr::filter(d[[1]], grepl("Standard", content))
  
  # Remove cases with 0 intensity
  to_remove <- dplyr::filter(obs_standard, is.infinite(log10_intensity))
  if (nrow(to_remove) > 0) {
    msg <- paste0("Removed ", nrow(to_remove), " standard observations for 0 signal")
    warning(msg)
    obs_standard <- dplyr::filter(obs_standard, !(is.infinite(log10_intensity)))
  }
  # Remove cases specified by user
  if (!is.na(wells_to_omit)) {
    obs_standard %<>% dplyr::filter(!well %in% wells_to_omit)
  }

  standard_data <- dplyr::inner_join(d[[2]], obs_standard) %>%
    tibble::as.tibble() %>%
    dplyr::group_by(target) %>%
    tidyr::nest() %>% 
    dplyr::mutate(model = purrr::map(data, 
                                     ~ drc::drm(log10_intensity ~ concentration, data = ., fct = drc::LL.4())),
                  curves = purrr::map(model, 
                                      ~ get_curves(.)))
  
  models <- standard_data$model
  names(models) <- standard_data$target
  
  # returns a 4 element list
  list(
    data = tidyr::unnest(standard_data, data) %>% 
      dplyr::select_if(~ !is.list(.)),
    curves = tidyr::unnest(standard_data, curves) %>% 
      dplyr::select_if(~ !is.list(.)),
    models = models,
    omitted_standards = to_remove
  )
}

#' Predict analyte concentrations from observed intensities
#' 
#' @md
#' @param d Output from `assayr2::readMagPix()` 
#' @param sc Output from `assayr2::fitStandardCurves()`
#' @return A `data.frame` of samples processed with predicted concentrations, QC flags and target ID.
#' @examples
#' file <- system.file("extdata", "Magpix_example.csv", package = "assayr2")
#' magpix <- read_magpix(file)
#' sc <- mp_fit_standards(magpix)
#' samples <- mp_predict_concentrations(magpix, sc)
#' @importFrom dplyr filter group_by summarise inner_join mutate select_if
#' @importFrom tidyr nest unnest
#' @importFrom tibble tibble
#' @export
mp_predict_concentrations <- function(d, sc) {
  # flag samples below LLoQ
  bounds <- dplyr::filter(d[[1]], 
                                 grepl("Standard", 
                                       content), intensity > 0) %>%
    dplyr::group_by(target) %>%
    dplyr::summarise(min_standard_intensity = min(intensity),
                     max_standard_intensity = max(intensity))
  
  d[[1]] %<>% dplyr::inner_join(bounds) %>%
    dplyr::mutate(below_lloq = intensity < min_standard_intensity,
                  above_uloq = intensity > max_standard_intensity) %>%
    dplyr::mutate(flag = ifelse(below_lloq, "Below Range", "In Range")) %>%
    dplyr::mutate(flag = ifelse(above_uloq, "Above Range", flag))
  
  tmp <- tibble::tibble(sc$models)
  
  dplyr::filter(d[[1]], grepl("Unknown", content)) %>%
    as.data.frame() %>%
    dplyr::group_by(target) %>%
    tidyr::nest() %>%
    dplyr::inner_join( tibble::tibble(target = names(sc$models), model = sc$models) ) %>%
    dplyr::mutate(data = purrr::map2(data, model,
      ~ dplyr::mutate(.x,
                      pg_ml = inverse_predict(.y, .x$log10_intensity),
                      lloq_concentration = inverse_predict(.y, log10(.x$min_standard_intensity))))) %>%
    tidyr::unnest(data) %>% 
    dplyr::select_if(~ !is.list(.))
}

#' High level wrapper to process raw MagPix files.
#' 
#' @param infiles Character vector denoting file paths for raw data files
#' @param plate_ids Character vector of plate identifiers (since sometimes, it is not always in order)
#' @param concentrations Optional argument, list where keys are analyte identifiers and values
#'   are numeric vectors indicating standard concentrations used.
#' @param wells_to_omit If not NA, a list or equal length to plate_ids with the
#' standard wells to remove. Elements are passed to `mp_fit_standards(wells_to_omit=...)`
#' individually, so an element of `NA` will result in no wells being dropped
#' from the corresponding plate.
#' @param value Name of the absorbance value to use ('Median' or 'Net MFI').
#' @return MagPix object
#' @examples
#'  \dontrun{
#'  file <- system.file("extdata", "Magpix_example.csv", package = "assayr2")
#'  infiles <- c(file, file)
#'  d <- mp_express(infiles, 1:2)
#' # If the line above fails, then we might need to pass an extra argument
#' # for concentrations we can see in the template file
#'  d <- mp_express(files, 1:2, list('BMP-10' = c(3250, 1083, 361, 120, 40, 13, 4)))
#' }
#' @importFrom assertthat assert_that
#' @importFrom evaluate evaluate
#' @importFrom purrr map map_lgl map2 map2_df
#' @importFrom dplyr mutate
#' @importFrom ggplot2 ggplot geom_line geom_rug scale_x_log10 scale_y_log10 annotation_logticks facet_wrap labs theme element_text geom_text geom_point
#' @export
mp_express <- function(infiles, plate_ids, concentrations, wells_to_omit=NA,
                       value='Net MFI') {
  if (missing(concentrations)) {
    concentrations <- list()
  }
  # unique plate identifiers should be present for each infile
  assertthat::assert_that(length(infiles) == length(plate_ids),
                          msg = 'Length of infiles should be the same as the number of plate identifers')
  
  # See if the standard data in the file is correct
  a <- evaluate::evaluate("temp <- purrr::map(infiles, ~ assayr::readMagPix(.))")
  missing_standards <- purrr::map(a, ~ .$message) %>%
  { Filter(function(x) (!(is.null(x))), .)}
  # If the standard data are incorrect and the user has not provided
  #  an alternative vector, then print the problematic analytes to console
  if (any(purrr::map_lgl(missing_standards, 
                     ~ grepl('Invalid standard concentrations', .))) && 
      length(concentrations) == 0) {
    for (i in 1:length(missing_standards)) {
      if (grepl('Invalid standard concentrations', missing_standards[i])) {
        cat(paste0(missing_standards[[i]], '\n'))
      }
    }
    stop("Raw standard data are missing/incorrect. Pass in concentrations as a named list, 
         where the keys are analytes and elements are numeric vectors.")
  }
  # Read the raw data if the data are correct and/or the user 
  #  has provided vectors for the problematic analytes
  d <- purrr::map(infiles, ~ read_magpix(., value=value))
  # Change concentrations to those that user provides
  if (length(concentrations) > 0) {
    for (j in 1:length(concentrations)) {
      this_analyte <- names(concentrations)[j]
      for (i in 1:length(d)) {
        d[[i]][[2]]$concentration[which(d[[i]][[2]]$target == this_analyte)] <- 
          concentrations[[this_analyte]]
      }
    }
  }
  # Fit standard curves
  sc <- tryCatch({purrr::map2(d, wells_to_omit, 
                              ~ mp_fit_standards(.x, wells_to_omit = .y))},
                 error = function(e) {NA})
  if (is.na(sc)) {
    stop("Could not fit standard curves. Try passing in a concentration list for the problematic analyte(s).")
  }
  # Predict concentration from intensity and add plate ids
  sample_data <- purrr::map2(d, sc, 
                             ~ mp_predict_concentrations(.x, .y)) %>%
    purrr::map2_df(plate_ids,
                   ~ dplyr::mutate(.x, plate = .y))
  # Collapse standard curve data and data.frame with curves to plot
  sc_data <- purrr::map2_df(sc, plate_ids, 
                            ~ .x$data %>% dplyr::mutate(plate = .y))
  sc_curves <- purrr::map2_df(sc, plate_ids, 
                              ~ .x$curves %>% dplyr::mutate(plate = .y))
  
  list(sample = sample_data, sc_data = sc_data, curves = sc_curves)
  }


.standard_curve_plot <- function(sample_data, standard_data, standard_curves,
                                 geom, title, subtitle) {
  
  p <- ggplot2::ggplot(standard_data, ggplot2::aes(x=concentration, y=intensity)) +
    ggplot2::geom_line(data=standard_curves, aes(y=10^log10_intensity)) + 
    ggplot2::geom_rug(data=sample_data, ggplot2::aes(x=pg_ml),
                      color='red') + 
    ggplot2::scale_x_log10() +
    ggplot2::scale_y_log10() +
    ggplot2::annotation_logticks() +
    theme_assayr() +
    ggplot2::facet_wrap(~ plate) +
    ggplot2::labs(x="Standard Concentration (pg/ml)",
                  y="Intensity", title=title,subtitle=subtitle) +
    ggplot2::theme(axis.text.x = element_text(size=rel(1.5)),
                   axis.text.y = ggplot2::element_text(size=rel(1.5)),
                   axis.title.y = ggplot2::element_text(size=rel(1.5)),
                   axis.title.x = ggplot2::element_text(size=rel(1.5)))
  if (geom == 'point') {
    p + ggplot2::geom_point()
  } else {
    p + ggplot2::geom_text(aes(label=paste(plate, well)))
  }
}


#' Generate a list of standard curve plots, where each element is a plot for one analyte
#' @param magpix MagPix object generated by magPixExpress.
#' @param title Character vector for plot title.
#' @param geom Control the geom used. If "point" will be `geom_point()`, if anything
#' else will be `geom_text()`.
#' @param chunk_size Number of analytes that should be displayed in each plot
#' @importFrom dplyr filter
#' @importFrom ggplot2 facet_wrap
#' @export
mp_plot <- function(magpix, title, chunk_size, geom) {
  sanalytes <- sort(unique(magpix[[1]]$target)) 
  if (chunk_size > 1) {
    sanalytes <- split(sanalytes, ceiling(seq_along(sanalytes) / chunk_size))
  }
  plots <- vector("list", length(sanalytes))
  k <- 1
  for (analyte in sanalytes) {
    these_sample_data <- dplyr::filter(magpix[[1]], target %in% analyte)
    these_std_data <- dplyr::filter(magpix[[2]], target %in% analyte)
    these_std_curves <- dplyr::filter(magpix[[3]], target %in% analyte)
    plots[[k]] <- .standard_curve_plot(these_sample_data, these_std_data, these_std_curves,
                                       geom=geom, title = title, subtitle = analyte)
    if (is.list(sanalytes)) {
      plots[[k]] <- plots[[k]] + ggplot2::facet_wrap(target ~ plate) + 
        labs(subtitle=NULL)
    }
    k <- k + 1
  }
  plots
}

#' Get character vector of analytes measured in 
#'   MagPix assay
#'   
#' @param x A data frame of MagPix data
#' @return A character vector of detected analytes
#' @export
analytes <- function(x) {
  sort(unique(x$sample$target))
}

#' Add metadata to a MagPix object
#' 
#' @md
#' @param x Magpix object
#' @param templates data.frame with columns found in device layout table
#'    in addition to : `time`, `sample_id`, `plate`, and  `well`.
#'   `time` represents the number of days which proteins are allowed to accumulate
#'   `sample_id` is an intermediate mapping from device to well
#'   `plate` is an identifier for the assay plate
#'   `well` is a string denoting a unique position on the assay plate
#' @return MagPix object with metadata added to the `sample` attribute
#'     and a new attribute `media_blanks`, which is a data.frame of the media
#'     blanks run on the plates. `time` is standardized to read as 'Day x' for 
#'     consistency. 
#' @importFrom assertthat assert_that
#' @importFrom dplyr filter left_join
#' @export
mp_add_metadata <- function(x, templates) {
  # Normalize data 
  assertthat::assert_that(
    'time' %in% colnames(templates),
    msg = '`time` must be a column in `templates`.')
  templates$time %<>% gsub("day", "Day", .) %>% trimws
  templates$time %<>% factor(levels = c("Day 7", "Day 10"))
  assertthat::assert_that(
    all(c('plate', 'well') %in% colnames(templates)),
    msg = '`templates` is missing columns `plate` and `well` to join on.')
  x$sample %<>% dplyr::left_join(templates)
  if ('device' %in% colnames(templates)) {
    x$media_blanks <- dplyr::filter(x$sample, device %in% LETTERS)
    assertthat::assert_that(nrow(x$media_blanks) > 0,
                            msg = 'No media blanks found in templates.')
    x$sample %<>% dplyr::filter(!(device %in% LETTERS)) 
  } else {
    x$media_blanks <- NA
  }
  x
}

#' Print method for MagPix objects
#' @param x A data frame of MagPix data
#' @export
mp_print <- function(x) {
  cat(paste0(length(analytes(x)), ' unique analytes.\n'))
  cat(paste0(length(unique(x$sample$plate)), ' plates.\n'))
  if ('time' %in% colnames(x$sample)) {
    cat(paste0(length(unique(x$sample$time)), ' unique time points.'))
  } else {
    cat("No metadata added. See ?addMetadata()\n")
  }
}

#' A color pallete for flagging MagPix samples
#' 
#' Uses `ggsci::pal_d3()`
#' @md
#' @importFrom ggsci pal_d3
#' @export
pal_magpix_flag <- function() {
  x <- ggsci::pal_d3()(4)
  x <- x[c(1, 2, 4)]
  names(x) <- c('In Range', 'Below Range', 'Above Range')
  x
}  

#' Generate one or multiple dotplots for a single analyte in a MagPix assay
#' 
#' @md
#' @param x Magpix object
#' @param mapping ggplot2 mapping created with `aes()`, must include x, y, and color aesthetics
#' @param analyte String denoting analyte to be plotted
#' @param facets formula for faceting
#' @param geom can be 'point' or 'text'
#' @param color Needs to be flag if color is mapped to flag
#' @param x_text_angle Angle of text on the x axis
#' @param title Title of the plot
#' @param label_pos Where should the labels go?
#' @param data_geom_size Passed onto `geom_point(size = ...)`
#' @param label_geom_size Passed onto `geom_label(size = ...)`
#' @param log_y_axis Boolean, should the y-axis be logged via `+ scale_y_log10()`?
#' @param chunk_size Integer, the number of analytes plotted in each plot.
#' @param text_label Column in `x` for mapping to `aes(label= ...)`.
#' @name mp_dotplot
#' @importFrom dplyr filter mutate
#' @importFrom ggplot2 ggplot scale_color_manual theme scale_shape_manual scale_shape_discrete stat_summary labs geom_line geom_point aes position_dodge
#' @importFrom ggsci scale_color_d3
#' @export
mp_dotplot <- function(x, mapping, analyte, facets, geom='point',
                           color='flag', x_text_angle=0,
                           title='', label_pos='top',
                           data_geom_size=3.5, label_geom_size=3.5,
                           log_y_axis=F, text_label='device') {
  d <- dplyr::filter(x$sample, target %in% analyte)
  # Make paste of device and plate for plotting technical replicates
  d %<>% dplyr::mutate(device_plate = paste0(device, plate))
  
  # Color plots by QC flags 
  if (color == 'flag') {
    p <- ggplot2::ggplot(d, mapping) +
      ggplot2::scale_color_manual(name=NULL, values=pal_magpix_flag()) +
      ggplot2::theme(legend.position = 'top')
  } else if ('color' %in% names(mapping)) { # Other color scheme
    print('color in aes')
    p <- ggplot2::ggplot(d, mapping) +
      ggsci::scale_color_d3(name=NULL, 'category20')
  } else {
    p <- ggplot2::ggplot(d, mapping) 
  }
  if (x_text_angle > 0) {
    p <- p + ggplot2::theme(axis.text.x = ggplot2::element_text(angle=x_text_angle, vjust=1, hjust=1))
  }
  # Add custom shape pal 
  if ('shape' %in% names(mapping)) {
    if (mapping['shape'] == 'flag') {
      p <- p + ggplot2::scale_shape_manual(name=NULL, 
                                           values = c(`Above Range` = 2,
                                                      `In Range` = 1,
                                                      `Below Range` = 3))
    } else {
      p <- p + ggplot2::scale_shape_discrete(name=NULL)
    }
  }
  # Facet by user-specified formula
  if (!(missing(facets))) {
    p <- p + facets
  }
  if (length(analyte) == 1) {
    # Add label based on length of `analyte` 
    p <- p + ggplot2::labs(y= paste0('[', unique(d$target), '] (pg/ml)'),
                           x=NULL, subtitle=unique(d$target), title=title)
  } else {
    p <- p + ggplot2::labs(y='[Analyte] (pg/ml)', x=NULL,
                           title=title)
  }
  
  # Add geom for data    
  if (geom == 'point') {
    p <- p + 
      ggplot2::geom_point(ggplot2::aes(group=device_plate), size=data_geom_size,
                          position = ggplot2::position_dodge(width=0.5)) +
      ggplot2::geom_line(ggplot2::aes(group=device_plate),
                         position = ggplot2::position_dodge(width=0.5)) 
  } else if (geom == 'text') {
    if (text_label == 'device') {
      p <- p +
        ggplot2::stat_summary(fun.y = mean, ggplot2::aes(group=device_plate, label=device),
                              geom='text', position = ggplot2::position_dodge(width=0.3))
    }
    else if (text_label == 'plate_well') {
      p <- p +
        ggplot2::geom_text(
          ggplot2::aes(group=device_plate, label=paste(plate, "-", well)),
          position = ggplot2::position_dodge(width=0.3))
    }
    else {
      stop("if geom == 'text', then text_label must be 'device' or 'plate_well'")
    }
  }
  # Add confidence intervals and p-values if they have been computed
  if ('stats' %in% attributes(x)$names) {
    # only draw stats if they exist and at least one CI exists
    s <- dplyr::filter(x$stats, target %in% analyte)
    v <- dplyr::filter(x$vehicle_means, target %in% analyte)
    if (label_pos == 'top') {
      y_val <- Inf
      vjust_val <- 1.2
    } else {
      y_val <- -Inf
      vjust_val <- -0.4
    }
    if (nrow(v) > 0) {
      p <- p +
        ggplot2::geom_crossbar(data=s, ggplot2::aes(ymin=lwr, y=est, ymax=upr,
                                                    color=NULL)) +
        ggplot2::geom_hline(data=v, ggplot2::aes(yintercept=est, color=NULL),
                            linetype=3) +
        ggplot2::geom_label(data=s, ggplot2::aes(label = label, y=y_val,
                                                 color=NULL),
                            alpha=0.6, vjust=vjust_val,
                            size = label_geom_size) +
        ggplot2::scale_y_continuous(expand = c(a=0.05, b=0.05, c=0.2, d=0.05))
    }
  }
  # Log y-axis
  if (log_y_axis) {
    p <- p + 
      ggplot2::scale_y_log10(expand = c(a=0.05, b=0.05, c=0.2, d=0.05)) +
      ggplot2::annotation_logticks(sides='l')
  }
  p
}


#'@rdname mp_dotplot
#'@export
mp_dotplots <- function(x, mapping, facets, geom='point', color='flag', 
                            x_text_angle=0, title='', label_pos='top', chunk_size=1,
                            data_geom_size = 3.5, label_geom_size = 3.5,
                            log_y_axis=F, text_label='device') {
  plots <- list()
  if (chunk_size == 1) {
    for (i in 1:length(analytes(x))) {
      plots[[i]] <- mp_dotplot(x, mapping, analytes(x)[i], 
                            facets, geom, color, 
                            x_text_angle, title, label_pos,
                            data_geom_size, label_geom_size,
                            log_y_axis=F, text_label)
    }
  } else {
    chunks <- split(analytes(x), ceiling(seq_along(analytes(x))/chunk_size))
    for (i in 1:length(chunks)) {
      plots[[i]] <- mp_dotplot(x, mapping, chunks[[i]],
                            facets, geom, color,
                            x_text_angle, title, label_pos,
                            data_geom_size, label_geom_size,
                            log_y_axis, text_label)
    }
  }
  plots
  
}