#' @title Compute Cy3 scaling factors using emprical reference
#'
#' @description
#' PBM arrays are scanned twice, once for Cy3-tagged dUTPs to quantify
#' dsDNA abundance at each probe, and again for the Alexa488-tagged
#' protein. The Cy3 scans can be used to first filter out probes which
#' appear to have poor dsDNA enrichment, and second, to scale Alexa488
#' intensities to account for differences in dsDNA abundance between
#' probes.
#'
#' Since probe intensities in the Cy3 scans are independent of the
#' assayed Alexa488-tagged protein, Cy3 scans should be comparable across
#' experiments. Based on this observation, a global Cy3 reference can be
#' constructed by taking the empirical average over a large number of Cy3
#' scans. Filtering and scaling can then be performed based on deviations
#' from this reference. Given a PBMExperiment of Cy3 scans and an
#' empirical reference generated by \code{cy3GenerateRef}, this function
#' returns the residuals from the reference, and the corresponding
#' observed-to-expected ratios for each probe as new assays added to the
#' original Cy3 PBMExperiment object.
#'
#' The returned PBMExperiment object can be passed to \code{cy3Normalize}
#' with a PBMExperiment of Alexa488 scan intensities
#' to filter low quality probes and/or normalize the Alexa488 intensities by the
#' computed ratios.
#'
#' @param pe a PBMExperiment object containing Cy3 intensity data.
#' @param refpe a PBMExperiment object containing Cy3 reference
#' intensities as a single assay named \code{"ref"}, most likely
#' generated by a call to \code{cy3GenerateRef}.
#' @param assay a numeric index or string specifying the Cy3 intensity assay.
#' (default = \code{SummarizedExperiment::assayNames(pe)[1]})
#' @param useMean a logical value whether to use the probe-level mean, rather than
#' the probe-level median, from the Cy3 reference (default = TRUE)
#' @param standardize a logical value whether to standardize residuals using
#' MAD (median absolute deviation about the median) intensity
#' computed for PBM Cy3 reference data for probe filtering. (default = TRUE)
#' @param threshold a numeric threshold on the absolute value of the log2 ratio between
#' observed and expected Cy3 intensities. If \code{standardize} is
#' specified, the log2 ratios are first scaled before comparing against
#' the threshold. In this case, the threshold is also scaled by the
#' median of the probe-level MAD intensities across all probes. (default = \code{1/2})
#' @param verbose a logical value whether to print verbose output during
#' analysis. (default = FALSE)
#'
#' @return
#' Original Cy3 PBMExperiment object with additional assays corresponding
#' to ratio of observed to expected probe intensities, and whether probes were
#' flagged as low-quality based on \code{abs(log2(ratio)) > threshold}.
#'
#' @seealso \code{\link{cy3GenerateRef}}, \code{\link{cy3Normalize}}
#' @importFrom dplyr as_tibble select bind_cols group_by ungroup left_join mutate one_of
#' @importFrom tidyr pivot_longer pivot_wider
#' @export
#' @author Patrick Kimes
cy3FitEmpirical <- function(pe, refpe, assay = SummarizedExperiment::assayNames(pe)[1],
useMean = TRUE, standardize = TRUE, threshold = 1/2,
verbose = FALSE) {
stopifnot(is(pe, "PBMExperiment"))
stopifnot(is(refpe, "PBMExperiment"))
## check validity of Cy3 empirical reference
stopifnot("ref" %in% SummarizedExperiment::assayNames(refpe))
stopifnot("sfactor" %in% names(metadata(refpe)))
stopifnot("params" %in% names(metadata(refpe)))
stopifnot("probe_mad" %in% colnames(refpe))
if (useMean) {
stopifnot("probe_mean" %in% colnames(refpe))
} else {
stopifnot("probe_median" %in% colnames(refpe))
}
if (verbose) {
cat("|| upbm::cy3FitEmpirical \n")
cat("|| - Starting calculation of Cy3 deviations from empirical reference",
"for", ncol(pe), "Cy3 PBM scans.\n")
}
if (verbose) {
cat("|| - Filtering probes according to", length(pe@probeFilter),
"probeFilter rule(s).\n")
ntotal <- nrow(pe)
}
## filter using rules
npe <- pbmFilterProbes(pe)
if (verbose) {
cat("|| - Data filtered from", ntotal, "probes to", nrow(npe), "probes.\n")
}
## determine overlap in probe columns
ovnames <- intersect(npe@probeCols, refpe@probeCols)
## check that merging shouldn't be a problem
if (length(ovnames) == 0L) {
stop("rowData of 'pe' and 'refpe' do not have any matching probe design columns.\n",
"Check that rowData contains sufficient probe design columns.\n",
"'pe' rowData probeCols: ", paste0(pe@probeCols, collapse = ", "), "\n",
"'refpe' rowData probeCols: ", paste0(refpe@probeCols, collapse = ", "))
}
## determine row/probe metadata for Cy3 scans
rowdat1 <- as.data.frame(rowData(npe), optional = TRUE)
rowdat1 <- dplyr::as_tibble(rowdat1)
rowdat1 <- dplyr::select(rowdat1, one_of(ovnames))
## check 'pe' rowData alone is alright
if (nrow(rowdat1) > nrow(dplyr::distinct(rowdat1))) {
stop("rowData of 'pe' cannot be used to uniquely identify probes.\n",
"Check that rowData contain probe design columns ",
"which are unique to each probe (after filtering).")
}
## determine row/probe metadata for Cy3 reference
rowdat2 <- as.data.frame(rowData(refpe), optional = TRUE)
rowdat2 <- dplyr::as_tibble(rowdat2)
rowdat2 <- dplyr::select(rowdat2, one_of(ovnames))
## check 'refpe' rowData alone is alright
if (nrow(rowdat2) > nrow(dplyr::distinct(rowdat2))) {
stop("rowData of 'refpe' cannot be used to uniquely identify probes.\n",
"Check that rowData contain probe design columns ",
"which are unique to each probe (after filtering).")
}
## extract intensities for Cy3 scans
pdat1 <- SummarizedExperiment::assay(npe, assay)
pdat1 <- as.data.frame(pdat1, optional = TRUE)
pdat1 <- dplyr::as_tibble(pdat1)
pdat1 <- dplyr::bind_cols(pdat1, rowdat1)
pdat1 <- tidyr::pivot_longer(pdat1, names_to = "condition",
values_to = "intensity", colnames(npe))
## extract intensities for Cy3 reference
pdat2 <- SummarizedExperiment::assay(refpe, "ref")
pdat2 <- as.data.frame(pdat2, optional = TRUE)
pdat2 <- dplyr::as_tibble(pdat2)
pdat2 <- dplyr::bind_cols(pdat2, rowdat2)
## join observed and reference tables
pdat <- dplyr::left_join(pdat1, pdat2, by = ovnames)
if (nrow(pdat) < nrow(pdat1)) {
stop("rowData of 'pe' and 'refpe' could not be used to fully match all probes in 'pe'.\n",
"Check that rowData contains sufficient probe design columns.\n",
"'pe' rowData probeCols: ", paste0(pe@probeCols, collapse = ", "), "\n",
"'refpe' rowData probeCols: ", paste0(refpe@probeCols, collapse = ", "))
}
if (verbose) {
cat("|| - Using reference mean intensity for calculation.\n")
}
## set the probe reference
if (useMean) {
pdat$probe_ref <- pdat$probe_mean
} else {
pdat$probe_ref <- pdat$probe_median
}
## verify that most values aren't NAs
pna <- mean(is.na(pdat$probe_ref))
if (pna > 0.2) {
stop("After merging observed and reference Cy3 intensities, > 20% of",
"probes have NA reference intensities.\n",
"Percent NAs = ", 100*round(pna, 4), "%.\n",
"Please check 'refpe' again before proceeding.")
} else if (pna > 0.01) {
warning("After merging observed and reference Cy3 intensities, > 1% of",
"probes have NA reference intensities.\n",
"Percent NAs = ", 100*round(pna, 4), "%.\n")
}
## register samples if Cy3 ref was computed with registered scans
if (metadata(refpe)$params$register) {
if (verbose) {
cat("|| - Using registered Cy3 intensities for calculation.\n")
}
pdat <- dplyr::group_by(pdat, condition)
pdat <- dplyr::mutate(pdat, sintensity = intensity / median(intensity, na.rm = TRUE))
pdat <- dplyr::ungroup(pdat)
pdat <- dplyr::mutate(pdat, sintensity = sintensity * metadata(refpe)$sfactor)
} else {
if (verbose) {
cat("|| - Using unregistered Cy3 intensities for calculation.\n")
}
pdat <- dplyr::mutate(pdat, sintensity = intensity)
}
## log2 tranform and compute ratios
pdat <- dplyr::mutate(pdat, sintensity = log2(sintensity + metadata(refpe)$params$offset))
pdat <- dplyr::mutate(pdat, pscores = sintensity - probe_ref)
pdat <- dplyr::mutate(pdat, pratios = 2^pscores)
## scale threshold cutoff if standardize specified
if (standardize) {
if (verbose) {
cat("|| - Using studentized/standardized deviations for filtering.\n")
}
median_mad <- median(SummarizedExperiment::assay(refpe, "ref")$probe_mad, na.rm = TRUE)
pdat <- dplyr::mutate(pdat, pscores = pscores / probe_mad * median_mad)
} else if (verbose) {
cat("|| - Using non-studentized/standardized deviations for filtering.\n")
}
## identify outlier probes
pdat <- dplyr::mutate(pdat, pdrop = abs(pscores) > threshold)
## create assays
pexps <- dplyr::select(pdat, one_of(ovnames), condition, probe_ref)
pexps <- dplyr::mutate(pexps, probe_ref = 2^probe_ref - metadata(refpe)$params$offset)
pexps <- tidyr::pivot_wider(pexps, names_from = condition, values_from = probe_ref)
pratios <- dplyr::select(pdat, one_of(ovnames), condition, pratios)
pratios <- tidyr::pivot_wider(pratios, names_from = condition, values_from = pratios)
pscores <- dplyr::select(pdat, one_of(ovnames), condition, pscores)
pscores <- tidyr::pivot_wider(pscores, names_from = condition, values_from = pscores)
plowq <- dplyr::select(pdat, one_of(ovnames), condition, pdrop)
plowq <- tidyr::pivot_wider(plowq, names_from = condition, values_from = pdrop)
## left join to original rowData to get full set
full_rowdat <- as.data.frame(rowData(pe), optional = TRUE)
full_rowdat <- dplyr::as_tibble(full_rowdat)
full_rowdat <- dplyr::select(full_rowdat, one_of(ovnames))
pexps <- dplyr::left_join(dplyr::select(full_rowdat, one_of(ovnames)), pexps, by = ovnames)
pratios <- dplyr::left_join(dplyr::select(full_rowdat, one_of(ovnames)), pratios, by = ovnames)
pscores <- dplyr::left_join(dplyr::select(full_rowdat, one_of(ovnames)), pscores, by = ovnames)
plowq <- dplyr::left_join(dplyr::select(full_rowdat, one_of(ovnames)), plowq, by = ovnames)
pexps <- DataFrame(pexps, check.names = FALSE)
pexps <- pexps[, rownames(colData(pe)), drop = FALSE]
pratios <- DataFrame(pratios, check.names = FALSE)
pratios <- pratios[, rownames(colData(pe)), drop = FALSE]
pscores <- DataFrame(pscores, check.names = FALSE)
pscores <- pscores[, rownames(colData(pe)), drop = FALSE]
plowq <- DataFrame(plowq, check.names = FALSE)
plowq <- plowq[, rownames(colData(pe)), drop = FALSE]
## add to PBMExperiment object
SummarizedExperiment::assay(pe, "expected") <- pexps
SummarizedExperiment::assay(pe, "ratio") <- pratios
SummarizedExperiment::assay(pe, "scores") <- pscores
SummarizedExperiment::assay(pe, "lowq") <- plowq
if (verbose) {
cat("|| - Finished calculation of Cy3 deviations.\n")
cat("|| - Returning PBMExperiment with", nrow(pe), "rows and", ncol(pe), "columns.\n")
}
return(pe)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.