Nothing
#' Calculate a Point Estimate of Intrinsic Hepatic Clearance (Clint) (Level-3)
#'
#' This function calculates a point estimate of intrinsic hepatic clearance (Clint)
#' using mass spectrometry (MS) peak area data collected as part of \emph{in vitro} measurements
#' of chemical clearance, as characterized by the disappearance of parent compound over
#' time when incubated with primary hepatocytes \insertCite{shibata2002prediction}{invitroTKstats}.
#'
#' The input to this function should be "level-2" data. Level-2 data is level-1,
#' data formatted with the \code{\link{format_clint}} function, and curated
#' with a verification column. "Y" in the verification column indicates the
#' data row is valid for analysis.
#'
#' The data frame of observations should be annotated according to
#' these types:
#' \tabular{rrrrr}{
#' Blank \tab Blank\cr
#' Hepatocyte incubation concentration vs. time \tab Cvst\cr
#' }
#'
#' Clint is calculated using \code{\link{lm}} to perform a linear regression of
#' MS response as a function of time.
#'
#' If the output level-3 result table is chosen to be exported and an output
#' directory is not specified, it will be exported to the user's R session
#' temporary directory. This temporary directory is a per-session directory
#' whose path can be found with the following code: \code{tempdir()}. For more
#' details, see \url{https://www.collinberke.com/til/posts/2023-10-24-temp-directories/}.
#'
#' As a best practice, \code{INPUT.DIR} (when importing a .tsv file) and/or \code{OUTPUT.DIR} should be
#' specified to simplify the process of importing and exporting files. This
#' practice ensures that the exported files can easily be found and will not be
#' exported to a temporary directory.
#'
#' @param FILENAME A string used to identify the input level-2 file,
#' "<FILENAME>-Clint-Level2.tsv" (if importing from a .tsv file), and/or used
#' to identify the output level-3 file, "<FILENAME>-Clint-Level3.tsv" (if exporting).
#'
#' @param data.in (Data Frame) A level-2 data frame generated from the
#' \code{format_clint} function with a verification column added by
#' \code{sample_verification}. Complement with manual verification if needed.
#'
#' @param good.col (Character) Column name indicating which rows have been
#' verified, data rows valid for analysis are indicated with a "Y".
#' (Defaults to "Verified".)
#'
#' @param output.res (Logical) When set to \code{TRUE}, the result
#' table (level-3) will be exported to the user's per-session temporary directory
#' or \code{OUTPUT.DIR} (if specified) as a .tsv file.
#' (Defaults to \code{FALSE}.)
#'
#' @param sig.figs (Numeric) The number of significant figures to round the exported result table (level-3).
#' (Note: console print statements are also rounded to specified significant figures.)
#' (Defaults to \code{3}.)
#'
#' @param INPUT.DIR (Character) Path to the directory where the input level-2 file exists.
#' If \code{NULL}, looking for the input level-2 file in the current working
#' directory. (Defaults to \code{NULL}.)
#'
#' @param OUTPUT.DIR (Character) Path to the directory to save the output file.
#' If \code{NULL}, the output file will be saved to the user's per-session
#' temporary directory or \code{INPUT.DIR} if specified. (Defaults to \code{NULL}.)
#'
#' @param verbose (\emph{logical}) Indicate whether printed statements should be shown.
#' (Default is TRUE.)
#'
#' @return A level-3 data frame with one row per chemical, contains a point estimate of intrinsic
#' clearance (Clint), estimates of Clint of assays performed at 1 and 10 uM (if tested),
#' the p-value and the Akaike Information Criterion (AIC) of the linear
#' regression fit for all chemicals in the input data frame.
#'
#' @author John Wambaugh
#'
#' @examples
#' ## Load example level-2 data
#' level2 <- invitroTKstats::clint_L2
#'
#' ## scenario 1:
#' ## input level-2 data from the R session and do not export the result table
#' level3 <- calc_clint_point(data.in = level2, output.res = FALSE)
#'
#' ## scenario 2:
#' ## import level-2 data from a 'tsv' file and export the result table to
#' ## same location as INPUT.DIR
#' \dontrun{
#' ## Refer to sample_verification help file for how to export level-2 data to a directory.
#' ## Unless a different path is specified in OUTPUT.DIR,
#' ## the result table will be saved to the directory specified in INPUT.DIR.
#' ## Will need to replace FILENAME and INPUT.DIR with name prefix and location of level-2 'tsv'.
#' level3 <- calc_clint_point(# e.g. replace with "Examples" from "Examples-Clint-Level2.tsv"
#' FILENAME="<level-2 FILENAME prefix>",
#' INPUT.DIR = "<level-2 FILE LOCATION>",
#' output.res = TRUE)
#' }
#'
#' ## scenario 3:
#' ## input level-2 data from the R session and export the result table to the
#' ## user's temporary directory
#' ## Will need to replace FILENAME with desired level-2 filename prefix.
#' \dontrun{
#' level3 <- calc_clint_point(# e.g. replace with "MYDATA"
#' FILENAME = "<desired level-2 FILENAME prefix>",
#' data.in = level2,
#' output.res = TRUE)
#' # To delete, use the following code. For more details, see the link in the
#' # "Details" section.
#' file.remove(list.files(tempdir(), full.names = TRUE,
#' pattern = "<desired level-2 FILENAME prefix>-Clint-Level3.tsv"))
#' }
#'
#' @references
#' \insertRef{shibata2002prediction}{invitroTKstats}
#'
#' @importFrom stats4 mle coef AIC
#' @importFrom utils read.csv write.table
#'
#' @import Rdpack
#'
#' @export calc_clint_point
calc_clint_point <- function(
FILENAME,
data.in,
good.col="Verified",
output.res=FALSE,
sig.figs = 3,
INPUT.DIR=NULL,
OUTPUT.DIR = NULL,
verbose = TRUE)
{
#assigning global variables
Compound.Name <- Response <- Sample.Type <- Time <- NULL
if (!missing(data.in)) {
clint.data <- as.data.frame(data.in)
} else if (!is.null(INPUT.DIR)) {
clint.data <- read.csv(file=paste0(INPUT.DIR, "/", FILENAME,"-Clint-Level2.tsv"),
sep="\t",header=TRUE)
} else {
clint.data <- read.csv(file=paste0(FILENAME,"-Clint-Level2.tsv"),
sep="\t",header=TRUE)
}
clint.data <- subset(clint.data,!is.na(Compound.Name))
clint.data <- subset(clint.data,!is.na(Response))
clint.cols <- c(L1.common.cols,
time.col = "Time",
test.conc.col = "Test.Compound.Conc",
test.nominal.conc.col = "Test.Nominal.Conc",
density.col = "Hep.Density"
)
list2env(as.list(clint.cols), envir = environment())
cols <- c(unlist(mget(names(clint.cols))), "Response", good.col)
if (!any(c("Biological.Replicates", "Technical.Replicates") %in% colnames(clint.data)))
stop("Need at least one column representing replication, i.e. Biological.Replicates or Technical.Replicates. Run format_clint first (level-1) then curate to (level-2).")
if (!(all(cols %in% colnames(clint.data))))
{
warning("Run format_clint first (level-1) then curate to (level-2).")
stop(paste("Missing columns named:",
paste(cols[!(cols%in%colnames(clint.data))],collapse=", ")))
}
# Only include the data types used:
clint.data <- subset(clint.data,clint.data[,type.col] %in% c(
"Blank","Cvst"))
# Only used verified data:
clint.data <- subset(clint.data, clint.data[,good.col] == "Y")
# Clean up data:
clint.data <- subset(clint.data,!is.na(Response))
clint.data[clint.data$Response<0,"Response"] <- 0
clint.data[clint.data$Sample.Type=="Blank" & is.na(clint.data$Time),"Time"] <- 0
out.table <-NULL
num.chem <- 0
num.cal <- 0
# decay - This function calculates the test compound concentration at time t, using a model of
# exponential decay with time C(t) = C_0*e^{-mt}, where C_0 is the
# test compound concentration at time 0, m is a rate constant (argument k_elim),
# and t is the incubation time in hour.
decay <- function(time.hours,conc,cal,k_elim) cal*conc*exp(-k_elim*time.hours)
# Negative log-likelihood of the linear regression fit
lldecay <- function(cal,k_elim,sigma)
{
if (sigma < 0.0001) sigma <- 0.0001
N <- dim(this.data)[1]
pred <- decay(
time.hours=this.data$Time,
conc=this.data$Test.Nominal.Conc,
cal=cal,
k_elim=k_elim)
ll <- log(1/sigma/sqrt(2*pi))*N
res <- pred-this.data$Response
ll <- ll+sum(-1/2*res^2/sigma^2)
if (is.na(ll)){
stop("lldecay - Estimated Log-likelihood is `NA`.")
# browser()
}
return(-ll)
}
# satdecay - This function calculates the test compound concentration at time t,
# using a model of exponential decay with time while considering a saturation probability.
# C(t) = C_0*e^{-m*sat*t}. C_0 is the test compound concentration at time 0, m is the elimination
# rate constant (argument k_elim), and t is the incubation time in hours.
# sat is the probability of saturation, defined as observing a lower clearance at a higher
# concentration. At 1 uM, sat is 1, meaning saturation is unlikely at the current concentration
# and is going to be observed at a higher concentration. At 10 uM, sat is between 0 and 1,
# meaning full saturation may or may not have been reached.
satdecay <- function(time.hours,conc,cal,k_elim,sat) cal*conc*exp(-k_elim*ifelse(conc==10,sat,1)*time.hours)
# Negative log-likelihood of the linear regression fit
llsatdecay <- function(cal,k_elim,sigma,sat)
{
if (sigma < 0.0001) sigma <- 0.0001
N <- dim(this.data)[1]
pred <- satdecay(
time.hours=this.data$Time,
conc=this.data$Test.Nominal.Conc,
cal=cal,
k_elim=k_elim,
sat=sat)
ll <- log(1/sigma/sqrt(2*pi))*N
res <- pred-this.data$Response
ll <- ll+sum(-1/2*res^2/sigma^2)
if (is.na(ll)){
stop("llsatdecay - Estimated Log-likelihood is `NA`.")
# browser()
}
return(-ll)
}
for (this.chem in unique(clint.data[,compound.col]))
{
this.subset <- subset(clint.data,clint.data[,compound.col]==this.chem)
this.dtxsid <- this.subset$dtxsid[1]
this.row <- cbind(this.subset[1,c(compound.col,dtxsid.col,lab.compound.col)],
data.frame(Calibration="All Data",
Clint=NaN,
Clint.pValue=NaN))
this.cvt <- subset(this.subset,Sample.Type=="Cvst")
this.blank <- subset(this.subset,Sample.Type=="Blank")
if (length(unique(this.cvt$Dilution.Factor))>1){
stop("calc_clint_point - Cvst samples for `",this.chem,"` have more than one `Dilution.Factor`.")
# browser()
}
df.cvt <- this.cvt$Dilution.Factor[1]
if (length(unique(this.cvt$Hep.Density))>1){
stop("calc_clint_point - Cvst samples for `",this.chem,"` have more than one `Hep.Density`.")
# browser()
}
hep.density <- this.cvt$Hep.Density[1]
if (dim(this.cvt)[1] > 1)
{
this.data <- rbind(this.blank,this.cvt)
this.data[this.data$Sample.Type=="Blank","Test.Nominal.Conc"] <- 0
this.data[this.data$Sample.Type=="Blank","Time"] <- 0
this.data[this.data$Sample.Type=="Cvst","Response"] <-
this.data[this.data$Sample.Type=="Cvst","Response"]*df.cvt
min.response <- sort(unique(this.data$Response))
min.response <- min.response[min.response!=0]
min.response <- min.response[1]
this.data[this.data$Response==0,"Response"] <- min.response/2
num.chem <- num.chem + 1
num.cal <- num.cal + length(unique(this.data[,"Calibration"]))
this.data$Response <- this.data$Response /
mean(subset(this.data,Time==0)$Response)
this.fit <- try(mle(lldecay,
start=list(cal=1, k_elim=0.1, sigma=0.1),
lower=list(cal=0,k_elim=0,sigma = 0.0001)))
this.null <- try(mle(lldecay,
start=list(cal=1, sigma=0.1),
lower=list(cal=0, sigma = 0.0001),
fixed=list(k_elim=0)))
if (!inherits(this.fit, "try-error") & !inherits(this.null, "try-error"))
{
# k_elim has units 1/h, convert to uL/min/10^6 hepatocytes
# hep density is 10^6 hepatocytes/mL
this.row$Clint <- 1000*coef(this.fit)["k_elim"]/hep.density/60
this.row$Clint.pValue <- min(exp(-(AIC(this.null)-AIC(this.fit))),1)
this.row$Fit <- paste(paste(unique(this.data$Test.Nominal.Conc),collapse=", "),"uM")
this.row$AIC <- AIC(this.fit)
this.row$AIC.Null <- AIC(this.null)
this.row$Clint.1 <- NA
this.row$Clint.10 <- NA
this.row$AIC.Sat <- NA
this.row$Sat.pValue <- NA
if (all(c(1,10)%in%unique(this.data$Test.Nominal.Conc)))
{
this.sat.fit <- try(mle(llsatdecay,
start=list(cal=1, k_elim=0.1, sigma=0.1, sat=0.5),
lower=list(cal=0, k_elim=0, sigma = 0.0001, sat=0),
upper=list(sat=1)))
if (!inherits(this.sat.fit, "try-error"))
{
this.row$Clint.1 <- 1000*coef(this.sat.fit)["k_elim"]/hep.density/60
this.row$Clint.10 <- 1000*coef(this.sat.fit)["k_elim"]*
coef(this.sat.fit)["sat"]/hep.density/60
this.row$AIC.Sat <- AIC(this.sat.fit)
if (this.row$Clint.pValue==1) test.AIC <- this.row$AIC.Null
else test.AIC <- this.row$AIC
this.row$Sat.pValue <- min(exp(-(test.AIC-AIC(this.sat.fit))),1)
} else{
# warning message to users to indicate failed fitting
warning("calc_clint_point - Saturation decay fit resulted in an error when fitting. Returning `NA` for the following ourputs:\n\t",
paste0(c("Clint.1","Clint.10","AIC.Sat","AIC","Sat.pValue"),collapse = ", "))
# assign NA's to outputs since saturation decay fit failed
this.row$Clint.1 <- NA
this.row$Clint.10 <- NA
this.row$AIC.Sat <- NA
this.row$Sat.pValue <- NA
# browser()
}
}
if(verbose){
if (!is.null(sig.figs)){
# Print results to desired sig.figs or default of 3
print(paste(
this.row$Compound.Name,
"Cl_int =",
signif(this.row$Clint,sig.figs),
"uL/min/million hepatocytes, p-Value =",
signif(this.row$Clint.pValue,sig.figs),
"."
))
} else {
# If sig.figs = NULL
print(paste(
this.row$Compound.Name,
"Cl_int =",
this.row$Clint,
"uL/min/million hepatocytes, p-Value =",
this.row$Clint.pValue,
"."
))
}
}
} else {
for (col in c("Fit","AIC","AIC.Null","Clint.1","Clint.10","AIC.Sat","Sat.pValue"))
this.row[,col] <- NA
this.row$Clint <- "Linear Regression Failed"
if(verbose){cat("Linear regression failed for:",this.chem,".\n")}
plot(this.data$Time, this.data$Response,main = this.chem)
# browser()
}
out.table <- rbind(out.table, this.row)
}
}
out.table <- as.data.frame(out.table)
rownames(out.table) <- make.names(out.table$Compound.Name, unique=TRUE)
#out.table <- apply(out.table,2,unlist)
out.table[!(out.table[,"Clint"]%in%"Linear Regression Failed"),"Clint"] <-
as.numeric(out.table[
!(out.table[,"Clint"]%in%"Linear Regression Failed"),"Clint"])
out.table[,"Clint.1"] <- as.numeric(out.table[,"Clint.1"])
out.table[,"Clint.10"] <- as.numeric(out.table[,"Clint.10"])
out.table[,"Clint.pValue"] <- as.numeric(out.table[,"Clint.pValue"])
out.table[,"AIC"] <- as.numeric(out.table[,"AIC"])
out.table[,"AIC.Null"] <- as.numeric(out.table[,"AIC.Null"])
out.table[,"AIC.Sat"] <- as.numeric(out.table[,"AIC.Sat"])
out.table[,"Sat.pValue"] <- as.numeric(out.table[,"Sat.pValue"])
if (output.res) {
# Determine the path for output
if (!is.null(OUTPUT.DIR)) {
file.path <- OUTPUT.DIR
} else if (!is.null(INPUT.DIR)) {
file.path <- INPUT.DIR
} else {
file.path <- tempdir()
}
rounded.out.table <- out.table
# Round results to desired number of sig figs
if (!is.null(sig.figs)){
rounded.out.table[!(rounded.out.table[,"Clint"]%in%"Linear Regression Failed"),"Clint"] <-
signif(as.numeric(rounded.out.table[
!(rounded.out.table[,"Clint"]%in%"Linear Regression Failed"),"Clint"]),sig.figs)
rounded.out.table[,"Clint.1"] <- signif(rounded.out.table[,"Clint.1"],sig.figs)
rounded.out.table[,"Clint.10"] <- signif(rounded.out.table[,"Clint.10"],sig.figs)
rounded.out.table[,"Clint.pValue"] <- signif(rounded.out.table[,"Clint.pValue"],sig.figs)
rounded.out.table[,"AIC"] <- signif(rounded.out.table[,"AIC"],sig.figs)
rounded.out.table[,"AIC.Null"] <- signif(rounded.out.table[,"AIC.Null"],sig.figs)
rounded.out.table[,"AIC.Sat"] <- signif(rounded.out.table[,"AIC.Sat"],sig.figs)
rounded.out.table[,"Sat.pValue"] <- signif(rounded.out.table[,"Sat.pValue"],sig.figs)
if(verbose){cat(paste0("\nData to export has been rounded to ", sig.figs, " significant figures.\n"))}
}
# Write out a "level-3" file:
write.table(rounded.out.table,
file=paste0(file.path, "/", FILENAME,"-Clint-Level3.tsv"),
sep="\t",
row.names=FALSE,
quote=FALSE)
if(verbose){
# Print notification message stating where the file was output to
cat(paste0("A level-3 file named ",FILENAME,"-Clint-Level3.tsv",
" has been exported to the following directory: ", file.path), "\n")
}
}
if(verbose){
print(paste("Intrinsic clearance (Clint) calculated for",num.chem,"chemicals."))
print(paste("Intrinsic clearance (Clint) calculated for",num.cal,"measurements."))
}
return(out.table)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.