Nothing
#' Run parametric fits using types of models on concentration-response datasets
#'
#' Confidence intervals of activity metrics can be obtained through bootstrap approach.
#' The bootstrap samples are generated by adding the residuals (the difference between the original responses and the Hill fit) to
#' the fitted response (strictly to Hill equation).
#'
#' @param d Datasets with concentration-response data.
#' An example is [zfishbeh].
#' mask column is optional.
#' @inheritParams fit_modls
#' @param keep_sets Output datasets. Multiple values are allowed.
#' Default values are fit_set and resp_set.
#' fit_set is a must.
#' \itemize{
#' \item fit_set: a tibble with output from model fits
#' \item resp_set: a tibble with fitted response data from the winning model
#' }
#' @param n_samples NULL (default) for no bootstrap samples are generated or number of samples to be generated from bootstrapping.
#' When n_samples is not NULL, fit_modls = "hill" will be set automatically.
#' @param ... The named input configurations for replacing the default configurations.
#' The input configuration needs to add model type as the prefix.
#' For example, hill_pdir = -1 will set the Hill fit only to the decreasing direction.
#'
#' @return A list of named components: result and result_nested.
#' The result component is also a list of output sets depending on the parameter, *keep_sets*.
#' The result_nested component is a tibble with input data nested in a column, input,
#' and output data nested in a column, output.
#'
#' The prefix of the column names in the *fit_set* are the used models.
#' The *win_modl* is the winning model.
#'
#' @export
#'
#' @seealso [fit_modls()] for model fit information and the following analyses using [summarize_fit_output()].
#' for dichotomous response (see [zfishdev]), use `create_dataset()` first.
#'
#' @examples
#'
#' # default
#' fitd <- run_fit(zfishbeh)
#'
#' # use only hill model and fit only to the decreasing direction, keep only the fit_set output
#' fitd <- run_fit(zfishbeh, modls = "hill", keep_sets = "fit_set", hill_pdir = -1)
#'
#' # fit to the bootstrap samples
#' fitd <- run_fit(zfishbeh, n_samples = 2)
#'
#'
run_fit <- function(d, modls = c("hill", "cnst"), keep_sets = c('fit_set', 'resp_set'), n_samples = NULL, ...) {
# argument checks
args <- list(...)
d <- .check_dat_base(d, check_one_concresp = FALSE)
d <- create_resp_mask(d, mask = 0) #only user put the mask column, disable the
keep_sets <- .check_keep_sets(keep_sets, c('fit_set', 'resp_set'), must_set = "fit_set")
modls <- match.arg(modls, c("hill", "cnst"), several.ok = TRUE)
n_samples <- .check_n_samples(n_samples)
# currently only hill can be used to generate simulated samples
if (!is.null(n_samples)) modls <- 'hill'
# nest the data and calculate fits
nestd <- nest_fit_dataset(d, nest_cols = c("endpoint", "chemical"))
fitd <- cal_fit_dataset(nestd = nestd, modls = modls, args = args, fit_type = "original")
# run simulated datasets
if(!is.null(n_samples)) {
simud <- create_hillsimu_dataset(fitd = fitd, n_samples = n_samples, pdir = args$hill_pdir)
nestd <- nest_fit_dataset(simud, nest_cols = c("endpoint", "chemical", "sample_id"))
fitd <- cal_fit_dataset(nestd = nestd, modls = modls, args = args, fit_type = "hill_simu")
}
# clean the output and unnest the results (sets)
fitd_clean <- clean_fit_output(fitd = fitd, keep_sets = keep_sets)
sets <- merge_fit_output(fitd_clean, keep_sets = keep_sets)
# return the list
result <- list(result = sets, result_nested = fitd)
class(result) <- c("rcurvep", class(result))
return(result)
}
#' Nest the dataset for preparing the ensuing calculations.
#'
#' @inheritParams run_fit
#' @param nest_cols Column names of the dataset.
#'
#' @return A nested tibble with a new column, input.
#' @keywords internal
#' @noRd
#'
nest_fit_dataset <- function(d, nest_cols) {
nest_colsq <- rlang::syms(c(nest_cols, 'conc'))
result <- d %>%
dplyr::arrange(!!!nest_colsq) %>%
tidyr::nest(input = -tidyselect::one_of(nest_cols))
return(result)
}
#' Perform parametric fitting using types of models on the nested dataset.
#'
#' The calculation can be performed either on the original dataset
#' or on the simulated dataset (Hill only).
#'
#' @param nestd The nested data from \code{nest_fit_dataset}.
#' @inheritParams fit_modls
#' @param args The additional arguments for running the \code{run_fit}.
#' @param fit_type The fitting type. Either original or hill_simu. Only value is allowed.
#'
#' @return A tibble.
#' @keywords internal
#' @noRd
#'
#'
cal_fit_dataset <- function(nestd, modls, args, fit_type = c("original", "hill_simu")) {
if (fit_type == "original") {
result <- nestd %>%
dplyr::mutate(
output = purrr::map(
.data$input,
~ do.call(
fit_modls,
c(list(Conc = .x$conc, Resp = .x$resp, Mask = .x$mask, modls = modls), args)
)
)
)
} else if (fit_type == "hill_simu") {
args[['hill_pdir']] <- NULL
result <- nestd %>%
dplyr::mutate(
output = purrr::map(
.data$input,
~ do.call(
fit_modls,
c(list(Conc = .x$conc, Resp = .x$resp, hill_pdir = unique(.x$direction), Mask = NULL, modls = modls), args))
)
)
}
return(result)
}
#' Clean the dataset results after fitting based on multiple values of keep_sets.
#'
#' @param fitd The output from \code{cal_fit_dataset()}.
#' @inheritParams run_fit
#' @param thr_resp The response cutoff to calculate the potency. Default = NULL.
#' @param perc_resp The percentage cutoff to calculate the potency. Default = NULL.
#'
#' @return A tibble with results nested in new columns.
#' fit_set to out_paras.
#' resp_set to out_resps.
#' act_set to in_summary and activity.
#'
#' @keywords internal
#' @noRd
#'
clean_fit_output <- function(fitd, keep_sets, thr_resp = NULL, perc_resp = NULL) {
result <- suppressMessages(
purrr::reduce(
purrr::map(
keep_sets, clean_fit_output_in,
fitd = fitd, thr_resp = thr_resp, perc_resp = perc_resp),
dplyr::left_join)
)
return(result)
}
#' Clean the dataset results after fitting based on one keep_set.
#'
#' @inheritParams summarize_fit_output
#' @param keep_set Only value is allowed, fit_set, resp_set, act_set.
#'
#' @return A tibble with nested results in new columns.
#' fit_set to out_paras.
#' resp_set to out_resps.
#' act_set to in_summary and activity.
#'
#' @keywords internal
#' @noRd
#'
clean_fit_output_in <- function(fitd, keep_set, thr_resp = NULL, perc_resp = NULL) {
if (keep_set == "fit_set") {
result <- fitd %>%
dplyr::mutate(
out_paras = purrr::map(.data$output, extract_fit_para)
)
}
if (keep_set == 'resp_set') {
result <- fitd %>%
dplyr::mutate(
out_resps = purrr::map2(.data$input, .data$output, extract_fit_resp)
)
}
if (keep_set == "act_set") {
result <- fitd %>%
dplyr::mutate(
in_summary = purrr::map(.data$input, extract_input_summary),
activity = purrr::map2(.data$input, .data$output, extract_fit_activity, thr_resp = thr_resp, perc_resp = perc_resp)
)
}
result <- result %>% dplyr::select(-.data$input, -.data$output)
return(result)
}
#' Merge the nested results from fitting into tibble.
#'
#' @param fitd The output from \code{clean_fit_output()}.
#' @inheritParams run_fit
#'
#' @return A list of tibbles (unnested) named by the keep_sets.
#' @keywords internal
#' @noRd
merge_fit_output <- function(fitd, keep_sets) {
tbl_names <- list(
fit_set = c("out_paras"),
resp_set = c("out_resps"),
act_set = c("in_summary", "activity")
)
result <- purrr::map(
keep_sets, function(x, tbl_names, obj)
suppressWarnings(tidyr::unnest(
obj %>% dplyr::select(tidyselect::one_of(c("endpoint", "chemical", "sample_id", tbl_names[[x]])))
)),
tbl_names = tbl_names,
obj = fitd
) %>% rlang::set_names(keep_sets)
return(result)
}
#' Create a simulated dataset based on Hill fit results.
#'
#' @param fitd The output from the \code{cal_fit_dataset()}.
#' @inheritParams run_fit
#' @param pdir The preferred direction of the fit.
#'
#' @return A tibble with new columns, including sample_id and direction.
#' @keywords internal
#' @noRd
#'
create_hillsimu_dataset <- function(fitd, n_samples, pdir) {
# a new column simud is added, which has a nested tibble
# The nested tibble has a new sample_id column
result <- fitd %>%
dplyr::mutate(
simud = purrr::map2(
.data$input, .data$output,
~ dplyr::bind_rows(
replicate(n_samples, create_hillsimu_resp(.x, .y), simplify = FALSE),
.id = "sample_id"))
)
# create direction column and remove the original fit input and output
result <- result %>%
dplyr::mutate(
direction = purrr::map_dbl(
.data$output, get_hillfit_direction, pdir = pdir)
) %>%
dplyr::select(-.data$input, -.data$output) %>%
tidyr::unnest(cols = "simud") %>%
dplyr::select(-.data$mask) # do not need the mask anymore
return(result)
}
#' Create simulated responses by the hill fit results.
#'
#' The new responses is the fitted responses + sampled residuals.
#'
#' @param inp A list (tibble) of with conc and resp columns.
#' @param out A list of named models.
#'
#' @return The vector of responses.
#' @keywords internal
#' @noRd
create_hillsimu_resp <- function(inp, out) {
result <- inp
yhat <- tcplHillVal(inp$conc, out[['hill']]$tp, out[['hill']]$ga, out[['hill']]$gw)
resid <- inp$resp - yhat
sampleresids <- sample(resid,length(resid),replace = TRUE)
this_y <- yhat + sampleresids
result$resp <- this_y
return(result)
}
#' Get the direction to the next round of Hill fitting.
#'
#' If pdir is null, then use the fit results to get the direction.
#' Otherwise, use the user input.
#'
#' @param out The list of models. The component of hill is necessary.
#' @param pdir The preferred direction.
#'
#' @return An integer 1 or -1.
#' @keywords internal
#' @noRd
#'
get_hillfit_direction <- function(out, pdir) {
tp <- out[['hill']]$tp
# if not null and
if (!is.null(pdir)) {
if (length(pdir) == 1) {
return(pdir)
}
}
result <- 1
if(!is.na(tp)) {
if (tp < 0) result <- -1
}
return(result)
}
#' Summarize the results from the parametric fitting using types of models
#'
#' The function first extracts the activity data based on the fit the supplied input parameters.
#' In addition, summary of activity data (e.g., confidence interval, hit confidence) can be produced.
#'
#' A tibble, act_set is generated. When (extract_only = FALSE), a tibble, act_summary is generated with confidence intervals of the activity metrics.
#' The quantile approach is used to calculate the confidence interval.
#' For potency activity metrics, if value is NA, highest tested concentration is used in the summary.
#' For other activity metrics, if value is NA, 0 is used in the summary.
#'
#' @param d The output from the [run_fit()].
#' @param thr_resp The response cutoff to calculate the potency. Default = NULL.
#' @param perc_resp The percentage cutoff to calculate the potency. Default = NULL.
#' @param ci_level The confidence level for the activity metrics. Default is = 0.95.
#' @param extract_only Whether act_summary data should be produced. Default = FALSE.
#'
#' @return A list of named components: result and result_nested (and act_summary).
#' The result and result_nested are the copy from the output of [run_fit()].
#' An act_set is added under the result component.
#' If (extract_only = FALSE), an act_summary is added.
#' @export
#' @seealso [run_fit()]
#' @examples
#'
#' # generate some fit outputs
#'
#' ## fit only
#' fitd1 <- run_fit(zfishbeh)
#'
#' ## fit + bootstrap samples
#' fitd2 <- run_fit(zfishbeh, n_samples = 3)
#'
#'\donttest{
#' # only to extract the activity data
#' sumd1 <- summarize_fit_output(fitd1, extract_only = TRUE)
#'
#' # calculate EC20 instead of default EC10
#' sumd1 <- summarize_fit_output(fitd1, extract_only = TRUE, perc_resp = 20)
#'
#' # calculate POD using a higher noise level (e.g., 40)
#' ## this number depends on the response unit
#' sumd1 <- summarize_fit_output(fitd1, extract_only = TRUE, thr_resp = 40)
#'
#' # calculate confidence intervals based on the bootstrap samples
#' sumd2 <- summarize_fit_output(fitd2)
#'
#'}
#'
summarize_fit_output <- function(d, thr_resp = 20, perc_resp = 10, ci_level = 0.95, extract_only = FALSE) {
fitd <- d$result_nested
fitd_clean <- clean_fit_output(fitd, keep_sets = "act_set", thr_resp = thr_resp, perc_resp = perc_resp)
act_set <- merge_fit_output(fitd_clean, keep_sets = c("act_set"))
d[['result']] <- c(d$result, act_set)
if (!extract_only) {
lsets <- d$result
base_cols <- get_base_cols(lsets)
lsets_n <- get_nested_joined_sets(lsets, base_cols)
lsets_n <- make_act_na_highconc(lsets_n)
lsets_n <- make_act_na_0(lsets_n)
lsets_n <- summarize_actsets(lsets_n, ci_level = ci_level)
d[['act_summary']] <- unnest_joined_sets(lsets_n, base_cols, "act_summary")
}
class(d) <- c("rcurvep", class(d))
return(d)
}
#' Make activity NA as 0 in a dataset.
#'
#' @param nestd The output from \code{get_nested_joined_sets()}
#'
#' @return The original tibble with a added column, act_set.
#' @keywords internal
#' @noRd
make_act_na_0 <- function(nestd) {
result <- nestd %>%
dplyr::mutate(
act_set = purrr::map(.data$act_set, make_act_na_0_in)
)
return(result)
}
#' Make activity NA as 0 for some selected columns.
#'
#' @param act_set A tibble.
#'
#' @return A tibble with values in the selected columns modified.
#' @keywords internal
#' @noRd
#'
#'
make_act_na_0_in <- function(act_set) {
potency_cols <- c("slope", "Emax", "max_resp", "min_resp")
suppressWarnings(result <- act_set %>%
dplyr::mutate_at(
dplyr::vars(tidyselect::one_of(potency_cols)),
~ replace(.x, is.na(.x), 0)
))
return(result)
}
#' Convert the output data of a model to the structure of tibble.
#'
#' The model type information is added as the prefix of the column names.
#'
#' @param modl A list from one model.
#'
#' @return A tibble with parameters.
#' @keywords internal
#' @noRd
#'
tibble_fit_para <- function(modl) {
pars <- tibble::as_tibble(modl)
modl_type <- pars$modl
result <- pars %>%
dplyr::select(-.data$modl) %>%
dplyr::rename_all(.funs = list(~ stringr::str_c(modl_type, "_", .)))
return(result)
}
#' Convert the output data from a list of models to the structure of tibble.
#'
#' A column, win_modl, indicating the winning model, is also added.
#'
#' @param lmodl A list of models.
#'
#' @return A tibble with output parameters from types of model fitting + a column, win_modl.
#' @keywords internal
#' @noRd
extract_fit_para <- function(lmodl) {
result <- purrr::map_dfc(lmodl, tibble_fit_para)
result[['win_modl']] <- lmodl[[1]]$modl
return(result)
}
#' Extract fit responses based on the winning model
#'
#' If the winning model is cnst, the median of responses is reported per concentration.
#'
#' @inheritParams create_hillsimu_resp
#'
#' @return The tibble of the input with an added column, fitted_resp.
#' @keywords internal
#' @noRd
extract_fit_resp <- function(inp, out) {
result <- inp
win_modl <- out[[1]]
fit_resps <- rep(as.numeric(NA), length(inp$conc))
if (win_modl$modl == "cnst") {
fit_resps <- rep(median(inp$resp), length(inp$conc))
} else if (win_modl$modl == "hill") {
fit_resps <- tcplHillVal(inp$conc, win_modl$tp, win_modl$ga, win_modl$gw)
}
# add the results to a new column
result[['fitted_resp']] <- fit_resps
return(result)
}
#' Extract activity from the model fits.
#'
#'
#' If the cnst is the winning model and the median of responses larger than the thr_resp,
#' it is considered as an hit.
#' The median of responses is reported as Emax and
#' the lowest concentration is the other potenty metrics.
#' The hit is considered having POD < max tested concentration.
#'
#' @inheritParams create_hillsimu_resp
#' @inheritParams summarize_fit_output
#'
#' @return A list of activity metrics + hit.
#' @keywords internal
#' @noRd
#'
extract_fit_activity <- function(inp, out, thr_resp, perc_resp) {
win_modl <- out[[1]]
med_resp <- median(inp$resp)
EC50 <- NA_real_
Emax <- NA_real_
POD <- NA_real_
ECxx <- NA_real_
slope <- NA_real_
hit <- 0
if (win_modl$modl == "cnst") {
if (abs(med_resp) > thr_resp) {
hit <- 1
min_conc <- min(inp$conc)
Emax <- med_resp
EC50 <- min_conc
POD <- min_conc
ECxx <- min_conc
}
} else if (win_modl$modl == "hill") {
Emax <- win_modl$tp
EC50 <- win_modl$ga
slope <- win_modl$gw
if (!is.na(Emax)) {
# perc_resp
ECxx <- tcplHillACXX(perc_resp, win_modl$tp, win_modl$ga, win_modl$gw)
# thr_resp
if (Emax < 0) thr_resp <- thr_resp*-1
if (abs(Emax) > abs(thr_resp)) POD <- tcplHillConc(thr_resp, win_modl$tp, win_modl$ga, win_modl$gw)
# hit?
if (!is.na(POD) && POD < max(inp$conc)) hit <- 1
}
}
result <- tibble::tibble(
EC50 = EC50,
slope = slope,
Emax = Emax,
ECxx = ECxx,
POD = POD,
hit = hit
)
return(result)
}
extract_hill_activity <- function(out, bmr) {
if (out$tp < 0) bmr <- bmr*-1
result <- list(
EC50 = out[['ga']],
Emax = out[['tp']],
POD = suppressWarnings(tcplHillConc(bmr, out$tp, out$ga, out$gw))
) %>% tibble::as_tibble()
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.