#' Merge Phenotype and Image Data
#'
#' Merge "phenotype" data describing the samples in an image data.frame.
#'
#' @param phenoData A \code{data.frame} containing variables for the multiplicity,
#' typically \code{moi} and either \code{well} or \code{file} associated with
#' each unique multiplicity.
#' @param imageData A \code{data.frame} generated by \code{\link{parseImages}}.
#' @param moi Character name for multiplicity variable, either "moi" or "x".
#' @param stringsAsFactors Convert string variables to factors if \code{TRUE}
#' (default value). Note that if present, \code{column} and \code{row} are
#' preserved as factors regardless of this conversion.
#' @param formatString Character string as \code{\link{sprintf}} format for the
#' column; default value of \code{NULL} pads the column value with zeros.
#' See details for fixing a mismatch between imageData and phenoData
#'
#' @details
#'
#' The format for well names (and column names) may differ between the imageData
#' and phenoData. An "Error in mergePData(pd, df)" might report "A1, A2, <...>
#' are missing from pd." This can happen if, for example, the imageData uses only
#' a few of the possible 96 wells in phenoData. Change the \code{formatString}
#' parameter to a value such as "\%02d" to force a consistent format. This value
#' is passed to the \code{\link{well.info}} as the parameter \code{format}.
#'
#' This function performs a \code{data.frame} merge with additional checks. It
#' does not (yet) respect the 'plate' variable and only merges according to the
#' \code{well} or \code{file} variables. For now, it is necessary to split
#' the phenoData and imageData by \code{plate}, merge each separately,
#' and then join the data back together. The following might work with phenoData
#' in \code{pd} and imageData in \code{df}:
#'
#' \preformatted{
#' g <- pd$plate
#' pd <- split(pd, g)
#' df <- split(df, df$plate)
#' df <- Map(mergePdata, pd, df)
#' df <- do.call(rbind, df)
#' rownames(df) <- NULL
#' pd <- unsplit(pd, g)
#' }
#'
#' @return
#'
#' Merged \code{data.frame} with harmonized well information (if appropriate)
#' and additional data provided in \code{phenoData}. The variables \code{row},
#' \code{column}, and \code{well} are expected to be factors.
#'
#' @export
#'
mergePdata <- function(phenoData, imageData, moi = c("moi", "x"),
stringsAsFactors = TRUE, formatString = NULL)
{
# check formatString argument
if (!is.null(formatString))
formatString <- as.character(formatString)[1]
# intercept data with 'plate' present
if ("plate" %in% names(phenoData) | "plate" %in% names(imageData)) {
if (nlevels(factor(phenoData$plate)) > 1 | nlevels(factor(imageData$plate)) > 1)
warning("see mergePdata() help to use 'plate' variable for merging")
}
# process and check data according to the presence of "well"
if ("well" %in% names(imageData) && !"well" %in% names(phenoData))
stop("'well' found in ", deparse(substitute(imageData)),
" but not in ", deparse(substitute(phenoData)))
if ("well" %in% names(imageData)) {
# process harmonized well value
pdWell <- unique(well.info(phenoData$well, format = formatString)$well)
idWell <- unique(well.info(imageData$well, format = formatString)$well)
diffWell <- setdiff(idWell, pdWell)
if (length(diffWell) != 0) {
message("Perhaps 'formatString' should be set to \"%02d\"?")
stop(paste(diffWell, collapse = ", "), " are missing from ",
deparse(substitute(phenoData)))
}
# # process plate
# pdPlate <- unique(phenoData$plate)
# idPlate <- unique(imageData$plate)
# diffPlate <- setdiff(idWell, pdWell)
# if (length(diffPlate) != 0)
# stop(paste(diffPlate, collapse = ", "), " are missing from ",
# deparse(substitute(phenoData)))
# replace (or add) harmonized row and column information to phenotype data
phenoData$column <- well.info(phenoData$well, format = formatString)$column
phenoData$row <- well.info(phenoData$well, format = formatString)$row
# replace well names in phenodata with harmonized well names
phenoData$well <- pdWell
}
else if ("file" %in% names(phenoData)) {
if (!"file" %in% names(imageData))
stop("A 'file' or 'well' variable must be present in ",
deparse(substitute(imageData)))
}
# check for 'unit' variable and assign if needed
if (!"unit" %in% names(phenoData))
phenoData$unit <- "[no unit]"
# check for 'type' variable and assign if needed
if (!"type" %in% names(phenoData)) {
type <- rep("standard", nrow(phenoData))
if ("moi" %in% names(phenoData))
type[phenoData$moi == 0] <- "control"
else if ("x" %in% names(phenoData))
type[phenoData$x == 0] <- "control"
phenoData$type <- type
}
# remove any variables in imageData that are present in phenoData EXCEPT
# for variables used to merge data frames: 'file' and/or 'well'
vars <- names(imageData)[!names(imageData) %in% names(phenoData)]
if ("file" %in% names(imageData)) vars <- c(vars, "file")
if ("well" %in% names(imageData)) vars <- c(vars, "well")
vars <- unique(vars)
imageData <- imageData[vars]
# add temporary sorting variable for imageData and merge
sval <- tail(make.unique(c(names(imageData), "srt")), 1)
imageData[[sval]] <- seq_len(nrow(imageData))
res <- merge(phenoData, imageData, all.y = TRUE) # Ugh...need all.y = TRUE
ord <- order(res[[sval]])
res <- res[ord, ]
res <- res[names(res) != sval]
# convert strings to factors if requested
if (stringsAsFactors == TRUE) {
sel <- sapply(res, is.character)
res[sel] <- lapply(res[sel], as.factor)
}
# reorganize data
# pdnames <- c("dir","file","plate","well","column","row","frame","type","unit")
pdnames <- c("dir","file","well","column","row","frame","type","unit")
first <- pdnames[pdnames %in% names(res)]
last <- names(res)[!names(res) %in% pdnames]
res <- res[c(first, last)]
# exclude unused levels (allows pd to hold more information than required)
res <- droplevels(res)
rownames(res) <- NULL
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.