Nothing
#' Creates a Standardized Data Frame with Rapid Equilibrium Dialysis (RED)
#' Plasma Protein Binding (PPB) Data (Level-1)
#'
#' This function formats data describing mass spectrometry (MS) peak areas
#' from samples collected as part of \emph{in vitro} measurements of chemical fraction
#' unbound in plasma using rapid equilibrium dialysis
#' \insertCite{waters2008validation}{invitroTKstats}.
#' The input data frame is organized into a standard set of columns and written
#' to a tab-separated text file.
#'
#' The data frame of observations should be annotated according to these types:
#' \tabular{rrrrr}{
#' No Plasma Blank (no chemical, no plasma) \tab NoPlasma.Blank\cr
#' Plasma Blank (no chemical, just plasma) \tab Plasma.Blank\cr
#' Plasma well concentration \tab Plasma\cr
#' Phosphate-buffered well concentration\tab PBS\cr
#' Time zero plasma concentration \tab T0\cr
#' Plasma stability sample \tab Stability\cr
#' Acceptor compartment of the equilibrium evaluation \tab EC_acceptor\cr
#' Donor compartment of the equilibrium evaluation (chemical spiked side) \tab EC_donor\cr
#' Calibration Curve \tab CC\cr
#' }
#' Chemical concentration is calculated qualitatively as a response and
#' returned as a column in the output data frame:
#'
#' Response <- AREA / ISTD.AREA * ISTD.CONC
#'
#' If the output level-1 result table is chosen to be exported and an output
#' directory is not specified, it will be exported to the user's R session
#' temporary directory. This temporary directory is a per-session directory
#' whose path can be found with the following code: \code{tempdir()}. For more
#' details, see \url{https://www.collinberke.com/til/posts/2023-10-24-temp-directories/}.
#'
#' As a best practice, \code{INPUT.DIR} and/or \code{OUTPUT.DIR} should be
#' specified to simplify the process of importing and exporting files. This
#' practice ensures that the exported files can easily be found and will not be
#' exported to a temporary directory.
#'
#' @param FILENAME (Character) A string used to identify the output level-1 file.
#' "<FILENAME>-fup-RED-Level1.tsv", and/or used to identify the input level-0 file,
#' "<FILENAME>-fup-RED-Level0.tsv" if importing from a .tsv file. (Defaults to "MYDATA".)
#'
#' @param data.in (Data Frame) A level-0 data frame containing mass-spectrometry peak areas,
#' indication of chemical identity, and measurement type. The data frame should
#' contain columns with names specified by the following arguments:
#'
#' @param sample.col (Character) Column name of \code{data.in} containing the unique mass
#' spectrometry (MS) sample name used by the laboratory. (Defaults to
#' "Lab.Sample.Name".)
#'
#' @param date (Character) The laboratory measurement date, format "MMDDYY" where
#' "MM" = 2 digit month, "DD" = 2 digit day, and "YY" = 2 digit year. (Defaults to \code{NULL}.)
#' (Note: Single entry only, use only if all data were collected on the same date.)
#'
#' @param date.col (Character) Column name containing \code{date} information.
#' (Defaults to "Date".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{date}.)
#'
#' @param compound.col (Character) Column name of \code{data.in} containing the test compound.
#' (Defaults to "Compound.Name".)
#'
#' @param dtxsid.col (Character) Column name of \code{data.in} containing EPA's DSSTox Structure
#' ID (\url{http://comptox.epa.gov/dashboard}). (Defaults to "DTXSID".)
#'
#' @param lab.compound.col (Character) Column name of \code{data.in} containing the test compound
#' name used by the laboratory. (Defaults to "Lab.Compound.Name".)
#'
#' @param type.col (Character) Column name of \code{data.in} containing the sample type (see table
#' under Details). (Defaults to "Sample.Type".)
#'
#' @param cal (Character) MS calibration the samples were based on. Typically, this uses
#' indices or dates to represent if the analyses were done on different machines on
#' the same day or on different days with the same MS analyzer. (Defaults to \code{NULL}.)
#' (Note: Single entry only,
#' use only if all data were collected based on the same calibration.)
#'
#' @param cal.col (Character) Column name containing \code{cal}
#' information. (Defaults to "Cal".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{cal}.)
#'
#' @param dilution (Numeric) Number of times the sample was diluted before MS
#' analysis. (Defaults to \code{NULL}.) (Note: Single entry only, use only if all
#' samples underwent the same number of dilutions.)
#'
#' @param dilution.col (Character) Column name containing \code{dilution}
#' information. (Defaults to "Dilution.Factor".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{dilution}.)
#'
#' @param time (Numeric) Incubation time (in hours) - from the start of incubation to
#' when the sample measurements were taken. (Defaults to \code{NULL}.) (Note: Single entry only, use only if all
#' samples were taken after the same amount of incubation time.)
#'
#' @param time.col (Character) Column name containing \code{time} information.
#' (Defaults to "Time".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{time}.)
#'
#' @param istd.col (Character) Column name of \code{data.in} containing the
#' MS peak area for the internal standard. (Defaults to "ISTD.Area".)
#'
#' @param istd.name (Character) The identity of the internal standard. (Defaults to \code{NULL}.)
#' (Note: Single entry only, use only if all tested compounds use the same internal standard.)
#'
#' @param istd.name.col (Character) Column name containing \code{istd.name} information.
#' (Defaults to "ISTD.Name".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{istd.name}.)
#'
#' @param istd.conc (Numeric) The concentration for the internal standard. (Defaults to \code{NULL}.)
#' (Note: Single entry only, use only if all tested compounds have the same
#' internal standard concentration.)
#'
#' @param istd.conc.col (Character) Column name containing \code{istd.conc} information.
#' (Defaults to "ISTD.Conc".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{istd.conc}.)
#'
#' @param test.nominal.conc (Numeric) The nominal concentration added to the RED assay
#' at time 0. (Defaults to \code{NULL}.) (Note: Single entry only, use only
#' if all tested compounds used the same concentration at time 0.)
#'
#' @param test.nominal.conc.col (Character) Column name containing \code{test.nominal.conc}
#' information. (Defaults to "Test.Target.Conc".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{test.nominal.conc}.)
#'
#' @param plasma.percent (Numeric) The percent of the physiological plasma concentration
#' used in RED assay. (Defaults to \code{NULL}.) (Note: Single entry only, use only
#' if all compounds were tested with the same plasma percent.)
#'
#' @param plasma.percent.col (Character) Column name containing \code{plasma.percent}
#' information. (Defaults to "Plasma.Percent".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{plasma.percent}.)
#'
#' @param test.conc (Numeric) The standard test chemical concentration for
#' the fup RED assay. (Defaults to \code{NULL}.) (Note: Single entry only,
#' use only if the same standard concentration was used for all tested compounds.)
#'
#' @param test.conc.col (Character) Column name containing \code{test.conc}
#' information. (Defaults to "Test.Compound.Conc".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{test.conc}.)
#'
#' @param area.col (Character) Column name of \code{data.in} containing the target analyte (that
#' is, the test compound) MS peak area. (Defaults to "Area".)
#'
#' @param biological.replicates (Character) Replicates with the same analyte. Typically, this uses
#' numbers or letters to index. (Defaults to \code{NULL}.) (Note: Single entry only,
#' use only if none of the test compounds have replicates.)
#'
#' @param biological.replicates.col (Character) Column name of \code{data.in} containing the number or
#' the indices of replicates with the same analyte. (Defaults to "Biological.Replicates".)
#' (Note: \code{data.in} does not necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{biological.replicates}.)
#'
#' @param technical.replicates (Character) Repeated measurements from one sample. Typically, this uses
#' numbers or letters to index. (Defaults to \code{NULL}.) (Note: Single entry only,
#' use only if none of the test compounds have replicates.)
#'
#' @param technical.replicates.col (Character) Column name of \code{data.in} containing the number or
#' the indices of replicates taken from the one sample. (Defaults to "Technical.Replicates".)
#' (Note: \code{data.in} does not necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{technical.replicates}.)
#'
#' @param analysis.method (Character) The analytical chemistry analysis method,
#' typically "LCMS" or "GCMS", liquid chromatography or gas chromatography–mass spectrometry, respectively.
#' (Defaults to \code{NULL}.) (Note: Single entry only,
#' use only if the same method was used for all tested compounds.)
#'
#' @param analysis.method.col (Character) Column name containing \code{analysis.method}
#' information. (Defaults to "Analysis.Method".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{analysis.method}.)
#'
#' @param analysis.instrument (Character) The instrument used for chemical analysis,
#' for example "Waters ACQUITY I-Class UHPLC - Xevo TQ-S uTQMS". (Defaults to \code{NULL}.)
#' (Note: Single entry only, use only if the same instrument was used for all tested compounds.)
#'
#' @param analysis.instrument.col (Character) Column name containing \code{analysis.instrument}
#' information. (Defaults to "Analysis.Instrument".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{analysis.instrument}.)
#'
#' @param analysis.parameters (Character) The parameters used to identify the
#' compound on the chemical analysis instrument. (Defaults to \code{NULL}.)
#' (Note: Single entry only, use only if the same parameters were used for all tested compounds.)
#'
#' @param analysis.parameters.col (Character) Column name containing \code{analysis.parameters}
#' information. (Defaults to "Analysis.Parameters".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{analysis.parameters}.)
#'
#' @param note.col (Character) Column name of \code{data.in} containing additional notes on
#' test compounds. (Defaults to "Note".)
#'
#' @param level0.file (Character) The level-0 file from which the \code{data.in} were obtained.
#' (Defaults to \code{NULL}.) (Note: Single entry only, use only if all rows in \code{data.in}
#' were obtained from the same level-0 file.)
#'
#' @param level0.file.col (Character) Column name containing \code{level0.file} information.
#' (Defaults to "Level0.File".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{level0.file}.)
#'
#' @param level0.sheet (Character) The specific sheet name of level-0 file from which the
#' \code{data.in} is obtained from, if the level-0 file is an Excel workbook.
#' (Defaults to \code{NULL}.) (Note: Single entry only, use only if all rows in \code{data.in}
#' were obtained from the same sheet in the same level-0 file.)
#'
#' @param level0.sheet.col (Character) Column name containing \code{level0.sheet} information.
#' (Defaults to "Level0.Sheet".) (Note: \code{data.in} does not
#' necessarily have this field. If this field is missing, it can be auto-filled with the value
#' specified in \code{level0.sheet}.)
#'
#' @param output.res (Logical) When set to \code{TRUE}, the result
#' table (level-1) will be exported to the user's per-session temporary directory
#' or \code{OUTPUT.DIR} (if specified) as a .tsv file.
#' (Defaults to \code{FALSE}.)
#'
#' @param save.bad.types (Logical) When set to \code{TRUE}, export data removed
#' due to inappropriate sample types. See the Detail section for the required sample types.
#' (Defaults to \code{FALSE}.)
#'
#' @param sig.figs (Numeric) The number of significant figures to round the exported result table (level-1).
#' (Defaults to \code{5}.)
#'
#' @param INPUT.DIR (Character) Path to the directory where the input level-0 file exists.
#' If \code{NULL}, looking for the input level-0 file in the current working
#' directory. (Defaults to \code{NULL}.)
#'
#' @param OUTPUT.DIR (Character) Path to the directory to save the output file.
#' If \code{NULL}, the output file will be saved to the user's per-session temporary
#' directory or \code{INPUT.DIR} if specified. (Defaults to \code{NULL}.)
#'
#' @param verbose (\emph{logical}) Indicate whether printed statements should be shown.
#' (Default is TRUE.)
#'
#' @return A level-1 data frame with a standardized format containing a
#' standardized set of columns and column names with plasma protein
#' binding (PPB) data from an rapid equilibrium dialysis (RED) assay.
#'
#' @author John Wambaugh
#'
#' @examples
#'
#' ## Load the example level-0 data
#' level0 <- invitroTKstats::fup_red_L0
#'
#' ## Run it through level-1 processing function
#' ## This example shows the use of the data.in argument which allows users to pass
#' ## in a data frame from the R session.
#' ## If the input level-0 data exists in an external file such as a .tsv file,
#' ## users may import it using FILENAME and INPUT.DIR to specify the file name
#' ## and its directory path, respectively.
#' level1 <- format_fup_red(data.in = level0,
#' sample.col ="Sample",
#' date.col="Date",
#' compound.col="Compound",
#' lab.compound.col="Lab.Compound.ID",
#' type.col="Sample.Type",
#' dilution.col="Dilution.Factor",
#' technical.replicates.col ="Replicate",
#' biological.replicates = 1,
#' cal=1,
#' area.col = "Peak.Area",
#' istd.conc = 10/1000,
#' istd.col= "ISTD.Peak.Area",
#' test.conc.col = "Compound.Conc",
#' test.nominal.conc = 10,
#' plasma.percent = 100,
#' time.col = "Time",
#' analysis.method = "LCMS",
#' analysis.instrument = "Waters ACQUITY I-Class UHPLC - Xevo TQ-S uTQMS",
#' analysis.parameters = "RT",
#' note.col=NULL,
#' output.res = FALSE
#' )
#'
#'
#' @references
#' \insertRef{waters2008validation}{invitroTKstats}
#'
#' @import Rdpack
#'
#' @export format_fup_red
format_fup_red <- function(
FILENAME = "MYDATA",
data.in,
sample.col="Lab.Sample.Name",
date=NULL,
date.col="Date",
compound.col="Compound.Name",
dtxsid.col="DTXSID",
lab.compound.col="Lab.Compound.Name",
type.col="Sample.Type",
cal=NULL,
cal.col="Cal",
dilution=NULL,
dilution.col="Dilution.Factor",
time = NULL,
time.col="Time",
istd.col="ISTD.Area",
istd.name=NULL,
istd.name.col="ISTD.Name",
istd.conc=NULL,
istd.conc.col="ISTD.Conc",
test.nominal.conc=NULL,
test.nominal.conc.col="Test.Target.Conc",
plasma.percent=NULL,
plasma.percent.col="Plasma.Percent",
test.conc=NULL,
test.conc.col="Test.Compound.Conc",
area.col="Area",
biological.replicates = NULL,
biological.replicates.col = "Biological.Replicates",
technical.replicates = NULL,
technical.replicates.col = "Technical.Replicates",
analysis.method=NULL,
analysis.method.col="Analysis.Method",
analysis.instrument=NULL,
analysis.instrument.col="Analysis.Instrument",
analysis.parameters=NULL,
analysis.parameters.col="Analysis.Parameters",
note.col="Note",
level0.file=NULL,
level0.file.col="Level0.File",
level0.sheet=NULL,
level0.sheet.col="Level0.Sheet",
output.res = FALSE,
save.bad.types = FALSE,
sig.figs = 5,
INPUT.DIR = NULL,
OUTPUT.DIR = NULL,
verbose = TRUE)
{
if (!missing(data.in)) {
data.in <- as.data.frame(data.in)
} else if (!is.null(INPUT.DIR)) {
data.in <- read.csv(file=paste0(INPUT.DIR, "/", FILENAME,"-fup-RED-Level0.tsv"),
sep="\t",header=TRUE)
} else {
data.in <- read.csv(file=paste0(FILENAME,"-fup-RED-Level0.tsv"),
sep="\t",header=TRUE)
}
if (is.null(note.col))
{
data.in[,"Note"] <- ""
note.col <- "Note"
}
# determine the path for output files
if (!is.null(OUTPUT.DIR)) {
file.path <- OUTPUT.DIR
} else if (!is.null(INPUT.DIR)) {
file.path <- INPUT.DIR
} else {
file.path <- tempdir()
}
# These arguments allow the user to specify a single value for every observation
# in the table:
if (!is.null(cal)) data.in[,cal.col] <- cal
if (!is.null(date)){
# if numeric, convert to string and ensuring leading zero is kept for single digit months
if (is.numeric(date)) date <- base::sprintf("%06d", date)
data.in[,date.col] <- date
}
if (!is.null(time)) data.in[,time.col] <- time
if (!is.null(dilution)) data.in[,dilution.col] <- dilution
if (!is.null(istd.name)) data.in[,istd.name.col] <- istd.name
if (!is.null(istd.conc)) data.in[,istd.conc.col] <- istd.conc
if (!is.null(test.conc)) data.in[,test.conc.col] <-
test.conc
if (!is.null(test.nominal.conc)) data.in[,test.nominal.conc.col] <-
test.nominal.conc
if (!is.null(plasma.percent)) data.in[,plasma.percent.col] <-
plasma.percent
if (!is.null(analysis.method)) data.in[,analysis.method.col]<- analysis.method
if (!is.null(analysis.instrument)) data.in[,analysis.instrument.col] <-
analysis.instrument
if (!is.null(analysis.parameters)) data.in[,analysis.parameters.col] <-
analysis.parameters
if (!is.null(level0.file)) data.in[,level0.file.col] <- level0.file
if (!is.null(level0.sheet)) data.in[,level0.sheet.col] <- level0.sheet
if (!is.null(biological.replicates)) data.in[,biological.replicates.col]<- biological.replicates
if (!is.null(technical.replicates)) data.in[,technical.replicates.col]<- technical.replicates
# We need all these columns in data.in
fup.red.cols <- c(L1.common.cols,
time.col = "Time",
test.conc.col = "Test.Compound.Conc",
test.nominal.conc.col = "Test.Nominal.Conc",
plasma.percent.col = "Percent.Physiologic.Plasma"
)
## throw warning for tech reps but require bio reps
if (!biological.replicates.col %in% colnames(data.in)) {
stop(paste("Missing columns named: Biological.Replicates"))
} else if (any(is.na(data.in[,biological.replicates.col]))) {
stop(paste("Provide non-NA value for Biological.Replicates"))
}
if (!technical.replicates.col %in% colnames(data.in)) {
data.in[,technical.replicates.col] <- NA
warning("Technical replicates were not provided and are all assigned to NA.\n")
}
# Assign bio and tech replicate col names
fup.red.cols <- c(fup.red.cols, biological.replicates.col = "Biological.Replicates",
technical.replicates.col = "Technical.Replicates")
cols <- unlist(mget(names(fup.red.cols)))
if (!(all(cols %in% colnames(data.in))))
{
stop(paste("Missing columns named:",
paste(cols[!(cols%in%colnames(data.in))],collapse=", ")))
}
# Only include the data types used:
req.types = c("Plasma", "PBS", "T0", "Plasma.Blank", "NoPlasma.Blank",
"CC", "Stability", "EC_acceptor", "EC_donor")
data.out <- subset(data.in,data.in[,type.col] %in% req.types)
data.in.badtype <- subset(data.in,!(data.in[,type.col] %in% req.types))
# Force code to throw error if data.in accessed after this point:
rm(data.in)
# Option to export data with bad types
if (nrow(data.in.badtype) != 0) {
if (save.bad.types) {
write.table(data.in.badtype,
file=paste0(file.path, "/", FILENAME,"-fup-RED-Level0-badtype.tsv"),
sep="\t",
row.names=FALSE,
quote=FALSE)
if(verbose){
cat(paste0("Data with inappropriate sample types were removed. Removed samples were exported to ",
FILENAME,"-fup-RED-Level0-badtype.tsv", " in the following directory: ", file.path), "\n")
}
} else {
warning("Data with inappropriate sample types were removed.")
}
}
# Organize the columns:
data.out <- data.out[,cols]
colnames(data.out) <- fup.red.cols
# calculate the response:
data.out[,"Response"] <- data.out[,"Area"] /
data.out[,"ISTD.Area"] * data.out[,"ISTD.Conc"]
if (output.res) {
rounded.data.out <- data.out
# Round results to desired number of sig figs
if (!is.null(sig.figs)){
rounded.data.out[,"Area"] <- signif(rounded.data.out[,"Area"], sig.figs)
rounded.data.out[,"ISTD.Area"] <- signif(rounded.data.out[,"ISTD.Area"], sig.figs)
rounded.data.out[,"Response"] <- signif(rounded.data.out[,"Response"], sig.figs)
if(verbose){cat(paste0("\nData to export has been rounded to ", sig.figs, " significant figures.\n"))}
}
# Write out a "level-1" file (data organized into a standard format):
write.table(rounded.data.out,
file=paste0(file.path, "/", FILENAME,"-fup-RED-Level1.tsv"),
sep="\t",
row.names=FALSE,
quote=FALSE)
if(verbose){
cat(paste0("A level-1 file named ",FILENAME,"-fup-RED-Level1.tsv",
" has been exported to the following directory: ", file.path), "\n")
}
}
summarize_table(data.out,
req.types=c("Plasma","PBS","Plasma.Blank","NoPlasma.Blank"),
verbose = verbose)
return(data.out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.