# qc.R
#' Overlapping confidence intervals for LL4 parameters (normal vs. robust)
#'
#' @param fit A `drc` object (4-parameter fit) (or list of `drc` objects)
#' @param robust_fit A `drc` object that has been transformed by robustifyDrc (4-parameter fit) (or list of `drc` objects)
#' @return TRUE if at least one pair of parameter estimates (robust vs. non robust) are non-overlapping.
#' If return_frame is TRUE, then a dataframe with columns indicating which parameters are non-overlappi
#' @examples
#' library(drc)
#' library(tibble)
#' library(magrittr)
#' library(ggplot2)
#' data("curves")
#' model <- curves$standard_model[[1]]
#' robust_model <- curves$standard_model[[1]] %>%
#' assayr2::robustify_drc(form = as.formula(log10_data ~ standard_conc))
#' ll4_robust_parameter_overlap(model, robust_model)
#' ll4_robust_parameter_overlap(list(model, model),
#' list(robust_model, robust_model))
#' @export
ll4_robust_parameter_overlap <- function(fit, robust_fit) {
stopifnot(class(fit) == "drc" | class(fit) == "list")
if (class(fit) == "list" & !(missing(robust_fit))) {
stopifnot(length(fit) == length(robust_fit))
}
# handle missing robust fit
if (missing(robust_fit)) {
if (class(fit) == "drc") {
robust_fit <- robustify_drc(fit, formula(fit))
} else {
robust_fit <- purrr::map(fit, ~robustify_drc(., formula(.)))
}
}
.ll4_parameter_overlap <- function(fit, robust_fit) {
ci_fit <- confint(fit)
ci_rfit <- confint(robust_fit)
res <- data.frame(
parameter = tolower(LETTERS[2:5]),
gt_robust = ci_fit[, 1] > ci_rfit[, 2],
lt_robust = ci_fit[, 2] < ci_rfit[, 1]
) %>%
dplyr::mutate(overlapping = !(gt_robust | lt_robust))
res %<>% dplyr::filter(!(is.nan(overlapping)) & !(is.na(overlapping)))
!(all(res$overlapping))
}
# one pair of models
if (class(fit) == "drc" & class(robust_fit) == "drc") {
.ll4_parameter_overlap(fit, robust_fit)
} else { # > 1 pair of models
purrr::map2_lgl(fit, robust_fit, ~.ll4_parameter_overlap(.x, .y))
}
}
#' Check for outliers using weights from Tukey's Biweight mean
#'
#' @param robust_fit A model object of class "drc" that has been processed for
#' robust estimation.
#' @param threshold A numeric vector of length 1 indicating the maximum weight
#' that a sample is labeled as an outlier
#' @return boolean TRUE if outliers are present
#' @export
detect_outliers <- function(robust_fit, threshold = 0.1) {
any(robust_fit$weights < threshold)
}
#' Check if an asymptote estimate for a 4-parameter log-logistic function
#' is considered to be noisy
#'
#' @param object drc object 4-parameter log-logistic
#' @param bound `lower` or `upper` for the lower or upper bound of the CI
#' @return boolean TRUE if the ic50 estimate is poor with respect to the bound specified
#' @examples
#' data("isobutyryl_curves")
#' poor_ic50_estimate(isobutyryl_curves[[5]], bound = "upper")
#' @export
poor_ic50_estimate <- function(object, bound) {
stopifnot(bound %in% c("lower", "upper"))
ci <- confint(object)
cutoffs <- list(
upper = coef(object)[4] * 3,
lower = coef(object)[4] / 3
)
flag <- switch(bound,
upper = ci[4, 2] > cutoffs[[bound]], # ic50 upper,
lower = ci[4, 1] < cutoffs[[bound]] # ic50 lower
)
if (is.nan(flag) | is.na(flag)) { # must be poor, because CI is unbounded
flag <- TRUE
}
flag
}
#' Check if a parameter estimate is unbounded
#'
#' @param object A model object of class "drc", fit with 4-param log-logistic function.
#' @param parameter A character string specifying the paramter to check.
#' Available option are upper asymtote ("upper"), lower asymtote ("lower),
#' IC50 ("ic50"), or slope coefficient ("slope").
#' @return boolean TRUE if the estimate for the parameter is unbounded
#' @export
unbounded_parameter_ci <- function(object, parameter) {
stopifnot(parameter %in% c("upper", "lower", "ic50", "slope"))
coefs <- coef(object)
if (coefs[3] > coefs[2]) {
row_number <- switch(parameter,
upper = 3, lower = 2, slope = 1, ic50 = 4
)
} else {
row_number <- switch(parameter,
upper = 2, lower = 3, slope = 1, ic50 = 4
)
}
ci <- suppressWarnings(confint(object))
any(is.nan(ci[row_number, ]))
}
#' Curve span
#'
#' Calculate the range of the curve relative to the asymptotes
#'
#' @param object drc object, 4-parameter log-logistic function
#' @importFrom stats coef fitted
#' @export
curve_span <- function(object) {
curve_range <- range(
fitted(object)[which.min(object$dataList$dose)],
fitted(object)[which.max(object$dataList$dose)]
) %>%
diff() %>%
abs()
model_width <- abs(coef(object)[3] - coef(object)[2])
unname(curve_range / model_width)
}
#' Check if curve is saturated
#'
#' @param object drc object, 4-parameter log-logistic function
#' @param threshold Ask Justin.
#' @return TRUE if there is evidence to believe the curve
#' is not saturated by the data
#' @export
unsaturated_curve <- function(object, threshold = 0.9) {
curve_span(object) < threshold
}
#' Calculate Normalized-Root-Mean-Square-Deviation (NRMSD) for drc model.
#'
#' @param object A model object of class "drc"
#' @param cutoff_hep A cutoff value for hepatocytes in PAH experiments.
#' @param cutoff_huh A cutoff value for HUH7 in PAU experiments.
#' @examples
#' data("nrmsd_qc_positive_cases")
#' nrmsd_qc(nrmsd_qc_positive_cases[[1]])
#' @export
nrmsd_qc <- function(object, cutoff_hep = .1433786, cutoff_huh = .1647703) {
# why is this an anoymous function?
.nrmsd <- function(object) {
sum_sq_resids <- object$predres[, 2] %>% sum(.^2)
rmsd <- sqrt(sum_sq_resids / nrow(object$predres))
y_range <- range(object$predres[, 1])[2] - range(object$predres[, 1])[1]
return(rmsd / y_range)
}
# determine which cutoff value to use
if (grepl("PAH", unique(object$origData$run))) {
cutoff <- cutoff_hep
}
else {
cutoff <- cutoff_huh
}
# return logical
return(.nrmsd(object) > cutoff)
}
#' Encode results of QC check in an integer warning code and human-readable message
#'
#' @param object drc object fit as 4 -parameter log-logistic function
#' @return character vector of error codes separated by a comma
#' @examples
#' data("isobutyryl_curves")
#' ll4_qc_warnings(isobutyryl_curves[[5]])
#' @export
ll4_qc_warnings <- function(object) {
robust_fit <- robustify_drc(object, formula(object))
in_binary <- c(
unbounded_parameter_ci(object, parameter = "slope"),
unbounded_parameter_ci(object, parameter = "lower"),
unbounded_parameter_ci(object, parameter = "upper"),
unbounded_parameter_ci(object, parameter = "ic50"),
ll4_robust_parameter_overlap(object, robust_fit),
detect_outliers(robust_fit, threshold = 0.1),
unsaturated_curve(object),
poor_ic50_estimate(object, bound = "upper"),
poor_ic50_estimate(object, bound = "lower"),
nrmsd_qc(object)
)
which(in_binary == TRUE) %>% paste(collapse = ",")
}
#' Calculate the weighted Shannon entropy score (WES)
#'
#' Corresponds to the WES calculation outlined in Shockley K.R., 2014.
#'
#' @param resp A numeric vector of values representing the dose response measurements.
#' @param pos_ctl_fit An object of class drc fit with \code{drm()} and the argument \code{fct = LL.4()}.
#' @param curve_type String. Either "negative", "positive", or "auto" indicating the (expected) sign of the slope of the curve.
#' @param LLoD Numeric. The lower limit of detection.
#' @importFrom stats fitted
#' @export
wes <- function(resp, pos_ctl_fit, curve_type = "auto", LLoD = NULL) {
curve_types <- c("positive", "negative", "auto")
ind <- pmatch(curve_type, curve_types)
if (is.na(ind)) {
stop(
"invalid curve type:", paste0(' "', curve_type, '"'),
". Must be one of ", paste(paste0('"', curve_types, '"'), collapse = " ")
)
}
curve_type <- curve_types[ind]
baseline <- fitted(pos_ctl_fit)[which.min(pos_ctl_fit$dataList$dose)]
termline <- fitted(pos_ctl_fit)[which.max(pos_ctl_fit$dataList$dose)]
if (curve_type == "auto") {
if (termline > baseline) {
curve_type <- "positive"
} else {
curve_type <- "negative"
}
}
if (!is.null(LLoD)) {
if (curve_type == "positive") {
LLoD <- baseline * 1.25
} else {
LLoD <- termline * 1.25
}
}
r <- abs(resp - baseline) / abs(termline - baseline)
p <- r / sum(r)
w <- rep(1, length(r))
w2 <- rep(r / LLoD)
ind <- r < LLoD
w[ind] <- w2[ind]
wes <- w * p * log2(p)
wes[is.nan(wes)] <- 0
wes <- -1 * sum(wes)
return(wes)
}
.calc_mb <- function(object, s, i) {
baseline <- fitted(object)[which.min(object$dataList$dose)]
hs <- fitted(object)[which.max(object$dataList$dose)] - baseline
b <- object$coefficients["b:(Intercept)"]
c <- object$coefficients["c:(Intercept)"]
d <- object$coefficients["d:(Intercept)"]
e <- object$coefficients["e:(Intercept)"]
e <- log(e)
if (hs > 0) {
p <- ((d - c) * (1 - s)) / (baseline + i - c + c * s) - 1
} else {
p <- ((d - c) * (1 + s)) / (baseline - i - c - c * s) - 1
}
if (p <= 0) {
return(NaN)
}
mb <- (1 / b) * log(p) + e
return(unname(mb))
}
.calc_auc <- function(object, lb, ub) {
b <- object$coefficients["b:(Intercept)"]
c <- object$coefficients["c:(Intercept)"]
d <- object$coefficients["d:(Intercept)"]
e <- object$coefficients["e:(Intercept)"]
e <- log(e)
upper <- (c * ub) - ((d - c) / b) * log(1 + exp(b * (e - ub)))
lower <- (c * lb) - ((d - c) / b) * log(1 + exp(b * (e - lb)))
area <- unname(upper - lower)
return(area)
}
.auc_low_error <- function(object, lb, ub, s, i) {
b <- object$coefficients["b:(Intercept)"]
c <- object$coefficients["c:(Intercept)"]
d <- object$coefficients["d:(Intercept)"]
e <- object$coefficients["e:(Intercept)"]
e <- log(e)
upper <- (1 - s) * ((c * ub) - ((d - c) / b) * log(1 + exp(b * (e - ub)))) - (i * ub)
lower <- (1 - s) * ((c * lb) - ((d - c) / b) * log(1 + exp(b * (e - lb)))) - (i * lb)
area <- unname(upper - lower)
return(area)
}
.auc_up_error <- function(object, lb, ub, s, i) {
b <- object$coefficients["b:(Intercept)"]
c <- object$coefficients["c:(Intercept)"]
d <- object$coefficients["d:(Intercept)"]
e <- object$coefficients["e:(Intercept)"]
e <- log(e)
upper <- (1 + s) * ((c * ub) - ((d - c) / b) * log(1 + exp(b * (e - ub)))) + (i * ub)
lower <- (1 + s) * ((c * lb) - ((d - c) / b) * log(1 + exp(b * (e - lb)))) + (i * lb)
area <- unname(upper - lower)
return(area)
}
#' Calculate the error-ajusted activity area of a dose response curve
#'
#' Adjusted activity area corresponds to the area between the curve and the
#' line representing the baseline concentration, minus the overlapping error region.
#'
#' @param object An object of class drc fit with \code{drm()} and the argument \code{fct = LL.4()}.
#' @examples
#' fit <- drc::drm(disp~wt, data=mtcars, fct=drc::LL.4())
#' adj_activity_area(fit)
#' @importFrom stats anova fitted lm coef
#' @importFrom dplyr mutate
#' @export
adj_activity_area <- function(object) {
resp <- data.frame(object$predres)
resp <- dplyr::mutate(resp, abs_dev = abs(Residuals))
m0 <- lm(abs_dev ~ 1, data = resp)
m1 <- lm(abs_dev ~ Predicted.values, data = resp)
p <- anova(m0, m1)$`Pr(>F)`[2]
min_pred <- min(resp$Predicted.values) * coef(m1)[2] + coef(m1)[1]
if (p <= 0.05 & coef(m1)[2] > 0 & min_pred > 0) {
slope <- coef(m1)[2]
intercept <- coef(m1)[1]
} else {
slope <- 0
intercept <- coef(m0)[1]
if (intercept == 0) {
return(activity_area(object))
}
}
dose_range <- range(object$dataList$dose)
ub <- log(dose_range[2])
lb <- log(dose_range[1])
mb <- .calc_mb(object, slope, intercept)
if (is.nan(mb)) {
return(0)
}
if ((mb > ub) | (mb < lb)) {
return(0)
}
baseline <- fitted(object)[which.min(object$dataList$dose)]
hs <- fitted(object)[which.max(object$dataList$dose)] - baseline
if (hs > 0) {
res <- .auc_low_error(object, mb, ub, slope, intercept) - (baseline * (ub - mb))
} else {
res <- (baseline * (ub - mb)) - .auc_up_error(object, mb, ub, slope, intercept)
}
return(res)
}
#' Calculate the activity area of a dose response curve
#'
#' Activity area corresponds to the area between the curve and the
#' line representing the baseline concentration.
#'
#' @param object An object of class drc fit with \code{drm()} and the argument \code{fct = LL.4()}.
#' @examples
#' \dontrun{
#' fit <- drc::drm(disp~wt, data=mtcars, fct=drc::LL.4())
#' activity_area(fit)
#' }
#' @importFrom stats fitted
#' @export
activity_area <- function(object) {
dose_range <- range(object$dataList$dose)
ub <- log(dose_range[2])
lb <- log(dose_range[1])
area <- .calc_auc(object, lb, ub)
baseline <- fitted(object)[which.min(object$dataList$dose)]
resp_area <- abs((baseline * (ub - lb)) - area)
return(resp_area)
}
#' Calculate the maximum response of a DRC
#'
#' Returns the absolute value of the range of the curve
#'
#' @param object An object of class drc fit with \code{drm()} and the argument \code{fct = LL.4()}.
#' fit <- drc::drm(disp~wt, data=mtcars, fct=drc::LL.4())
#' RMAX(fit)
#' @importFrom stats fitted
#' @export
RMAX <- function(object) {
res <- fitted(object) %>%
range() %>%
diff() %>%
abs()
return(res)
}
# Utilities for running a full QC check for a set of dose-response curves ----
#' Get a table of dose-response curve data that should
#' be checked for quality
#' @param db_path (character) File path to a SQLite database with QC messages that correspond to integer identifiers
#' @param rds_path (character) File path to RDS file with all dose-response that exists for HST-explorer
#' @param analytes (character) Identifiers for analytes that should be checked see assayr2::see_whitelist()$targ
#' for valid identifiers
#' @param debug (bool) set to TRUE if qc_columns should be added for debugging
#' @return list where first element is a table of curves that need QC, second
#' element is a table of curves that do not need QC
#' @export
get_curves_needing_qc <- function(db_path = "/data/shiny/HST_explorer/qc-warnings.sqlite",
rds_path = "/data/shiny/HST_explorer/nate_db.RDS",
analytes = c("13C-Isobutyryl"),
debug = FALSE) {
curves <- readRDS(rds_path)
all_curve_ids <- curves %>% dplyr::count(
tx_run,
donor,
tx_challenge_abstract,
tx_cmpd,
isotope, targ,
media
)
# assume nate_db.RDS has QC column
if (debug) {
curves$qc <- FALSE
curves$qc_warnings <- ""
}
if (!missing(analytes) | is.null(analytes)) {
# Force qc flag to TRUE to avoid a QC check for analytes which we
# have not investigated
curves$qc <- ifelse(!(curves$targ %in% analytes),
TRUE, curves$qc
)
curves_needing_qc <- dplyr::filter(
curves,
targ %in% analytes,
!qc
)
curves %<>% dplyr::filter(
!(targ %in% analytes),
qc
)
} else {
curves_needing_qc <- dplyr::filter(curves, !qc)
curves %<>% dplyr::filter(qc)
}
list(curves_needing_qc, curves)
}
#' Run QC check for a set of dose-response curves
#'
#' @param curves_needing_qc (data.frame) Table of data
#' for curves which need QC (first element of get_curves_needing_qc output)
#' @param curves (data.frame) Table of data for curves
#' which do not need QC (second element of get_curves_needing_qc output)
#' @param analytes (character) Identifiers for analytes that should be checked see assayr2::see_whitelist()$targ
#' for valid identifiers
#' @param db_path A valid path to SQLite DB.
#' @return Data.frame with original data combined with data that have
#' been checked for dose-response curve quality
#' @export
drc_qc <- function(curves_needing_qc, curves, analytes, db_path = "/data/shiny/HST_explorer/qc-warnings.sqlite") {
# update QC table if new files have been added
if (nrow(curves_needing_qc) > 0) {
# Get table of unique curves with relevant identifiers
labeled <- tibble::as.tibble(curves_needing_qc) %>%
tidyr::nest(-tx_run, -targ, -donor, -media)
# Convert concentration to numeric ======================
labeled$data %<>% purrr::map(~dplyr::mutate(., tx_conc_numeric = as.numeric(tx_conc %>% as.character())))
labeled$data %<>% purrr::map(~dplyr::mutate(.,
tx_conc_numeric = ifelse(tx_conc_numeric == 0,
assayr2::new_zeros(tx_conc_numeric), tx_conc_numeric
)
))
# keep only 4-point curves
# labeled <- labeled[which(purrr::map_lgl(labeled$data, ~ nrow(.) >= 30)),]
# Fit dose-response curves for each data set =============
prop_drc <- purrr::map(labeled$data, ~
tryCatch({
drc::drm(conc_corrected ~ tx_conc,
data = .,
fct = drc::LL.4(),
control = drc::drmc(errorm = F, useD = FALSE)
)
},
error = function(e) {
NA
}
))
print("Finished fitting models...")
# Run suite of QC-tests on each model ====================
warning_codes <- purrr::map(prop_drc, ~tryCatch({
assayr2::ll4_qc_warnings(.)
}, error = function(e) {
NA
}))
labeled$qc_code <- unlist(warning_codes)
rm(warning_codes)
# Map warning codes to human-readable messages ===========
db_conn <- RSQLite::dbConnect(RSQLite::SQLite(), db_path)
warning_messages <- RSQLite::dbGetQuery(db_conn, "SELECT * FROM warning_messages")
RSQLite::dbDisconnect(db_conn)
warning_code2message <- function(x) {
codes <- strsplit(x, split = ",")[[1]] %>% unlist()
messages <- dplyr::filter(warning_messages, warning_code %in% codes)
paste(messages$warning_message, sep = "\n", collapse = "\n")
}
labeled$qc_warning <- purrr::map_chr(
labeled$qc_code,
~warning_code2message(.)
)
# Restore identifier variables
cols <- Filter(
function(x) !(x %in% c("data", "tx_conc_numeric")),
colnames(labeled)
)
for (i in 1:nrow(labeled)) {
for (col in cols) {
labeled$data[[i]][col] <- labeled[[col]][i]
}
}
# Flatten data that has just been checked
qcd_curves <- do.call("rbind", labeled$data)
qcd_curves$tx_conc_numeric <- NULL
# Order of columns may have changed, so
# make them match original data
qcd_curves <- qcd_curves[colnames(curves_needing_qc)]
# Check if we still have the same number of unique curves as
# we started with
validate_n_curves <- function(qcd_curves, curves_needing_qc) {
a <- n_new_qcd_curves <- qcd_curves %>% dplyr::count(
tx_run,
donor,
tx_challenge_abstract,
tx_cmpd,
isotope, targ,
media
) %>% nrow()
b <- curves_needing_qc %>% dplyr::count(
tx_run,
donor,
tx_challenge_abstract,
tx_cmpd,
isotope, targ,
media
) %>% nrow()
assertthat::are_equal(a, b, msg = "Could not QC one or more dose-response curves.")
}
valid_curves <- validate_n_curves(qcd_curves, curves_needing_qc)
# Combine with original data (13C-Isobutyryl curves that have already been checked
# and curves for all analytes except for 13C-Isobutyryl)
do.call("rbind", list(curves, qcd_curves))
} else {
stop("Empty data.frame passed as argument for curves_needing_qc")
}
}
#' Wrapper to run entire QC check for a given machine
#' @param db_path (character) File path to a SQLite database with QC messages that correspond to integer identifiers
#' @param rds_path (character) File path to RDS file with all dose-response that exists for HST-explorer
#' @param analytes (character) Identifiers for analytes that should be checked see assayr2::see_whitelist()$targ
#' for valid identifiers
#' @param backup_rds_dir Ask Justin.
#' @param debug (bool) set to TRUE if qc_columns should be added for debugging
#' @export run_qc_check
run_qc_check <- function(db_path, rds_path, analytes, backup_rds_dir = ".", debug = F) {
# Backup original =================================
d <- readRDS(rds_path)
human_time <- Sys.time() %>% gsub(" ", "_", .) %>% gsub("\\:", "-", .)
outfile <- file.path(backup_rds_dir, paste0("nate_db", "_", human_time, ".RDS"))
saveRDS(d, outfile)
# Run QC check ====================================
res <- get_curves_needing_qc(db_path, rds_path, analytes, debug = debug)
res <- drc_qc(res[[1]], res[[2]], analytes)
# print(head(res))
# Save data with new QC ==========================
# saveRDS(res, file = rds_path)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.