Nothing
#' VPC function for categorical
#'
#' Creates a VPC plot from observed and simulation data for categorical variables.
#'
#' @param sim a data.frame with observed data, containing the independent and dependent variable, a column indicating the individual, and possibly covariates. E.g. load in from NONMEM using \link{read_table_nm}
#' @param obs a data.frame with observed data, containing the independent and dependent variable, a column indicating the individual, and possibly covariates. E.g. load in from NONMEM using \link{read_table_nm}
#' @param psn_folder instead of specifying "sim" and "obs", specify a PsN-generated VPC-folder
#' @param bins either "density", "time", or "data", "none", or one of the approaches available in classInterval() such as "jenks" (default) or "pretty", or a numeric vector specifying the bin separators.
#' @param n_bins when using the "auto" binning method, what number of bins to aim for
#' @param bin_mid either "mean" for the mean of all timepoints (default) or "middle" to use the average of the bin boundaries.
#' @param obs_cols observation dataset column names (list elements: "dv", "idv", "id", "pred")
#' @param sim_cols simulation dataset column names (list elements: "dv", "idv", "id", "pred")
#' @param show what to show in VPC (obs_ci, pi, pi_as_area, pi_ci, obs_median, sim_median, sim_median_ci)
#' @param software name of software platform using (e.g. nonmem, phoenix)
#' @param ci confidence interval to plot. Default is (0.05, 0.95)
#' @param uloq Number or NULL indicating upper limit of quantification. Default is NULL.
#' @param lloq Number or NULL indicating lower limit of quantification. Default is NULL.
#' @param plot Boolean indicting whether to plot the ggplot2 object after creation. Default is FALSE.
#' @param xlab label for x-axis
#' @param ylab label for y-axis
#' @param title title
#' @param smooth "smooth" the VPC (connect bin midpoints) or show bins as rectangular boxes. Default is TRUE.
#' @param vpc_theme theme to be used in VPC. Expects list of class vpc_theme created with function vpc_theme()
#' @param facet either "wrap", "columns", or "rows"
#' @param labeller ggplot2 labeller function to be passed to underlying ggplot object
#' @param vpcdb boolean whether to return the underlying vpcdb rather than the plot
#' @param verbose show debugging information (TRUE or FALSE)
#' @return a list containing calculated VPC information (when vpcdb=TRUE), or a ggplot2 object (default)
#' @export
#' @seealso \link{sim_data}, \link{vpc}, \link{vpc_tte}, \link{vpc_cens}
#' @examples
#'
#' ## See vpc.ronkeizer.com for more documentation and examples
#' library(vpc)
#'
#' # simple function to simulate categorical data for single individual
#' sim_id <- function(id = 1) {
#' n <- 10
#' logit <- function(x) exp(x) / (1+exp(x))
#' data.frame(id = id, time = seq(1, n, length.out = n),
#' dv = round(logit((1:n) - n/2 + rnorm(n, 0, 1.5))) )
#' }
#' ## simple function to simulate categorical data for a trial
#' sim_trial <- function(i = 1, n = 20) { # function to simulate categorical data for a trial
#' data.frame(sim = i, do.call("rbind", lapply(1:n, sim_id)))
#' }
#'
#' ## simulate single trial for 20 individuals
#' obs <- sim_trial(n = 20)
#'
#' ## simulate 200 trials of 20 individuals
#' sim <- do.call("rbind", lapply(1:200, sim_trial, n = 20))
#'
#' ## Plot categorical VPC
#' vpc_cat(sim = sim, obs = obs)
vpc_cat <- function(sim = NULL,
obs = NULL,
psn_folder = NULL,
bins = "jenks",
n_bins = "auto",
bin_mid = "mean",
obs_cols = NULL,
sim_cols = NULL,
software = "auto",
show = NULL,
ci = c(0.05, 0.95),
uloq = NULL,
lloq = NULL,
xlab = NULL,
ylab = NULL,
title = NULL,
smooth = TRUE,
vpc_theme = NULL,
facet = "wrap",
labeller = NULL,
plot = TRUE,
vpcdb = FALSE,
verbose = FALSE) {
if(is.null(obs) & is.null(sim)) {
stop("At least a simulation or an observation dataset are required to create a plot!")
}
if(!is.null(psn_folder)) {
if(!is.null(obs)) {
obs <- read_table_nm(paste0(psn_folder, "/m1/", dir(paste0(psn_folder, "/m1"), pattern="original.npctab")[1]))
}
if(!is.null(sim)) {
sim <- read_table_nm(paste0(psn_folder, "/m1/", dir(paste0(psn_folder, "/m1"), pattern="simulation.1.npctab")[1]))
}
software = "nonmem"
}
if (!is.null(obs)) {
software_type <- guess_software(software, obs)
} else {
software_type <- guess_software(software, sim)
}
## define what to show in plot
show <- replace_list_elements(show_default, show)
## define column names
cols <- define_data_columns(sim, obs, sim_cols, obs_cols, software_type)
if(!is.null(obs)) {
old_class <- class(obs)
class(obs) <- c(software_type, old_class)
}
if(!is.null(sim)) {
old_class <- class(sim)
class(sim) <- c(software_type, old_class)
}
## parse data into specific format
if(!is.null(obs)) {
obs <- filter_dv(obs, verbose)
obs <- format_vpc_input_data(obs, cols$obs, lloq, uloq, strat = NULL, bins, FALSE, 0, "observed", verbose)
}
if(!is.null(sim)) {
sim <- filter_dv(sim, verbose)
sim <- format_vpc_input_data(sim, cols$sim, lloq, uloq, strat = NULL, bins, FALSE, 0, "simulated", verbose)
sim$sim <- add_sim_index_number(sim, id = "id")
}
if (class(bins) != "numeric") {
if(!is.null(obs)) {
bins <- auto_bin(obs, bins, n_bins)
} else { # get from sim
bins <- auto_bin(sim, bins, n_bins)
}
if (is.null(bins)) {
msg("Automatic binning unsuccessful, try increasing the number of bins, or specify vector of bin separators manually.", verbose)
}
}
bins <- unique(bins)
if(!is.null(obs)) {
obs <- bin_data(obs, bins, "idv")
}
if(!is.null(sim)) {
sim <- bin_data(sim, bins, "idv")
}
## parsing
fact_perc <- function(x, fact) { sum(x == fact) / length(x) } # below lloq, default
obs$dv <- as.factor(obs$dv)
lev <- levels(obs$dv)
if (!is.null(sim)) {
tmp1 <- sim %>%
dplyr::group_by(sim, bin)
for (i in seq(lev)) {
if (i == 1) {
aggr_sim <- tmp1 %>%
dplyr::summarise(fact_perc(dv, lev[i]))
} else {
suppressMessages({
aggr_sim <- dplyr::bind_cols(aggr_sim, tmp1 %>%
dplyr::summarise(fact_perc(dv, lev[i])) %>%
dplyr::ungroup() %>%
dplyr::select(-sim, -bin))
})
}
}
aggr_sim <- dplyr::bind_cols(aggr_sim, tmp1 %>%
dplyr::summarise(mean(idv)) %>%
dplyr::ungroup() %>%
dplyr::select(-sim, -bin))
colnames(aggr_sim) <- c("sim", "bin", paste0("fact_", lev), "mn_idv")
tmp3 <- tidyr::pivot_longer(aggr_sim, names_to = "strat", cols = paste0("fact_", lev)) %>%
dplyr::arrange(sim, strat, bin) %>%
dplyr::mutate(strat = stringr::str_replace(strat, "fact_", ""))
vpc_dat <- tmp3 %>%
dplyr::group_by(strat, bin) %>%
dplyr::summarise(q50.low = quantile(value, ci[1]),
q50.med = quantile(value, 0.5),
q50.up = quantile(value, ci[2]),
bin_mid = mean(mn_idv)) %>%
dplyr::ungroup()
vpc_dat$bin_min <- rep(bins[1:(length(bins)-1)], length(unique(vpc_dat$strat)))[vpc_dat$bin]
vpc_dat$bin_max <- rep(bins[2:length(bins)], length(unique(vpc_dat$strat)))[vpc_dat$bin]
if(bin_mid == "middle") {
vpc_dat$bin_mid <- apply(cbind(vpc_dat$bin_min, vpc_dat$bin_max), 1, mean)
}
} else {
vpc_dat <- NULL
}
if(!is.null(obs)) {
tmp <- obs %>% dplyr::group_by(bin)
for (i in seq(lev)) {
if (i == 1) {
aggr_obs <- tmp %>%
dplyr::summarise(fact_perc(dv, lev[i]))
} else {
aggr_obs <- cbind(aggr_obs, tmp %>%
dplyr::summarise(fact_perc(dv, lev[i])) %>%
dplyr::ungroup() %>%
dplyr::select(-bin) )
}
}
tmp1 <- cbind(aggr_obs, tmp %>%
dplyr::summarise(mean(idv)) %>%
dplyr::ungroup() %>%
dplyr::select(-bin))
colnames(tmp1) <- c("bin", paste0("fact_", lev), "bin_mid")
tmp2 <- tidyr::pivot_longer(tmp1, names_to = "strat", cols = paste0("fact_", lev)) %>%
dplyr::arrange(strat, bin) %>%
dplyr::mutate(strat = stringr::str_replace(strat, "fact_", ""))
tmp2$bin_min <- rep(bins[1:(length(bins)-1)], length(unique(tmp2$strat)) )[tmp2$bin]
tmp2$bin_max <- rep(bins[2:length(bins)], length(unique(tmp2$strat)) )[tmp2$bin]
if(bin_mid == "middle") {
tmp2$bin_mid <- apply(dplyr::bind_cols(tmp2$bin_min, tmp2$bin_max), 1, mean)
}
aggr_obs <- tmp2
colnames(aggr_obs)[4] <- "obs50"
} else {
aggr_obs <- NULL
}
## plotting starts here
show$median_ci = FALSE
show$obs_dv = FALSE
show$obs_ci = FALSE
show$sim_median = TRUE
show$sim_median_ci = TRUE
show$pi_as_area = FALSE
show$pi_ci = FALSE
show$pi = FALSE
vpc_db <- list(sim = sim,
vpc_dat = vpc_dat,
stratify = "strat", # the stratification is the various categories!
stratify_original = "strat",
aggr_obs = aggr_obs,
obs = obs,
bins = bins,
facet = facet,
labeller = labeller,
type = "categorical",
xlab = xlab,
ylab = ylab)
if(vpcdb) {
return(vpc_db)
} else {
pl <- plot_vpc(db = vpc_db,
show = show,
vpc_theme = vpc_theme,
smooth = smooth,
log_y = FALSE,
title = title)
pl <- pl + ggplot2::ylim(c(0,1))
return(pl)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.