#' subtract baseline from signal
#'
#' Calculate baseline signal and subtract it from all observations.
#'
#' @param x a \code{data.frame} or \code{grouped data frame}
#' @param variables character vector of variables (columns) to modify
#' @param reference logical predicate that identifies reference observations, given as string or bare;
#' will be passed to \code{\link{subset}}
#' @param method method with which to compute the baseline, mean or median; given as symbol or string
#' @param by_group character vector of variables to group by (\code{\link[dplyr]{group_by}})
#' in case the baseline is to be taken from means of sets of observations
#' rather than from all observations directly
#'
#' @return a \code{data.frame} with modified variables
#'
#' @section Grouped data frames (\code{dplyr} package):
#' The method for class \code{grouped_df} is home brewed as I don't know
#' how to properly handle this class.
#'
#' @section Warning:
#' Be careful when defining references observations. If a group ends up without reference,
#' the procedure will crash. \code{reference} should not make use of a grouping variable.
#' It is safest to create a separate viariable for the purpose of \code{reference}.
#'
#' @export
#'
baseline <- function(x, variables, reference, method = mean, by_group) {
UseMethod('baseline')
}
#' @export
#' @describeIn baseline
#' computes mean/median of desired variables with \code{vapply}
#' and subtracts them from their respective variables using \code{mapply}
baseline.data.frame <- function(x, variables, reference, method = mean, by_group) {
# check arguments
if (!is.data.frame(x)) stop('x must be a data frame')
if (nrow(x) == 0) stop('x is an empty data frame')
if (missing(variables)) {
message('no variables selected; taking all numeric variables except "well" and "column"')
variables <- setdiff(names(Filter(is.numeric, x)), c('well', 'column'))
} else {
if (!is.character(variables)) stop('varaibles must be a character vector')
if (!all(variables %in% names(x))) stop('invalid variables selected')
if (!all(vapply(x[variables], is.numeric, logical(1)))) stop('non-numeric variables selected')
}
if (missing(reference)) stop('"reference" is missing with no default')
if (!missing(by_group) && !all(by_group %in% names(x))) stop('invalid grouping selected: "by_group"')
# capture logical predicate for subset and, if string, convert to expression
r <- substitute(reference)
r <- if (is.call(r)) r else if (is.character(r)) substitute(eval(parse(text = r)))
# get method
method <- match.fun(method)
# capture column order
column_order <- names(x)
# isolate reference set
x_ref <-
if (missing(reference)) x else
subset(x, subset = eval(r))
if (nrow(x_ref) == 0) {
stop('reference set is empty; reconsider grouping and reference specification')
}
# separate variables to be normalized from remaining ones
x_vars <- x[variables]
x_rems <- x[setdiff(names(x), variables)]
if (!all(sapply(x_vars, is.numeric))) stop('some "variables" are not numeric')
# average reference subgroups if grouping variables are identified
if (!missing(by_group)) {
x_ref <-
do.call(rbind,
by(data = x_ref,
INDICES = x_ref[by_group],
FUN = function(x) {for(i in variables) x[[i]] <- method(x[[i]]); return(x)},
simplify = FALSE))
}
# calculate baselines
baselines. <- vapply(x_ref[variables], method, na.rm = T, numeric(1))
# subtract baselines
x_normalized <- mapply(FUN = '-', x_vars, baselines.)
# add normalized columns to the unchanged ones
y <- cbind(x_rems, x_normalized)
y <- y[, column_order]
return(y)
}
#' @export
#' @describeIn baseline see \code{\link[metamethods]{data.frame__to__grouped_df}}
baseline.grouped_df <- metamethods::data.frame__to__grouped_df(baseline.data.frame)
# x1 <- data.frame(
# well = 1:100,
# int1 = c(rnorm(10, 10, 1), rnorm(10, 150, 2), rnorm(80, 78, 4)),
# class = c(rep('low', 10), rep('high', 10), rep('mid', 80)),
# plate = 'plate 1',
# replica = rep(c('replica 1', 'replica 2'), 50)
# )
# x2 <- x1 %>% dplyr::mutate(int1 = int1 * 1.25, plate = 'plate 2')
# x <- rbind(x1, x2)
# y <- x %>% dplyr::group_by(plate) %>% baseline('int1', class == 'low') %>% data.frame
# rbind(x, y) %>% dplyr::mutate(set = rep(c('x', 'y'), each = 200)) %>%
# ggplot2::ggplot(ggplot2::aes(x = class, y = int1, color = set)) +
# ggplot2::facet_wrap(~plate) +ggplot2::geom_point(position = ggplot2::position_jitterdodge())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.