#' Match observed concentration-time data to the correct simulated
#' concentration-time data
#'
#' \code{match_obs_to_sim} will match observed concentration-time data to the
#' correct simulated concentration-time data and figure out what the dose number
#' should be at each time as well as figure out what the correct compound names
#' should be since that's not included in observed-data Excel files for the
#' Simulator. It will then add the observed data to your existing
#' concentration-time data.frame.
#'
#' @param ct_dataframe a data.frame of concentration-time data generated by
#' running either \code{\link{extractConcTime}} or
#' \code{\link{extractConcTime_mult}}. Not quoted.
#' @param obs_dataframe a data.frame of observed concentration-time data
#' generated by running either \code{\link{extractConcObsTime}} or
#' \code{\link{extractObsConcTime_mult}}. Not quoted.
#' @param obs_to_sim_assignment optionally specify which observed files should
#' be compared to which simulator files. If left as NA, this will assume that
#' all obsereved data should match all simulated data. To specify, use a
#' data.frame like this:
#' \code{obs_to_sim_assignment = data.frame(ObsFile = c("obs data 1.xlsx",
#' "obs data 2.xlsx"), File = c("mdz-5mg-qd.xlsx", "mdz-5mg-qd-cancer.xlsx"))}
#' or use a named character vector: \code{obs_to_sim_assignment =
#' c("obs data 1.xlsx" = "mdz-5mg-qd.xlsx", "obs data 2.xlsx" =
#' "mdz-5mg-qd-cancer.xlsx")}
#' @param existing_exp_details the output from running
#' \code{\link{extractExpDetails}} or \code{\link{extractExpDetails_mult}} on
#' the same simulations that were used for making \code{ct_dataframe}. If you
#' don't supply this, we'll run \code{\link{extractExpDetails_mult}} behind
#' the scenes, which will increase the time this takes to run. If you would
#' like to have this calculate dose numbers on data where you wouldn't have
#' simulation experimental details (example: you've only got observed data),
#' then supply a single-row data.frame with the following columns: \code{File}
#' (character, just set this to "all" or some other placeholder text),
#' \code{DoseInt_sub} (numeric), \code{StartHr_sub} (numeric; probably 0),
#' \code{NumDoses_sub} (numeric), and \code{Regimen_sub} (character;
#' presumably "Multiple Dose" here). If you want the dose number for other
#' compound IDs, then replace "_sub" with, e.g., "_inhib". Please run
#' \code{view(ExpDetailDefinitions)} to see examples of acceptable suffixes.
#'
#' @return returns a data.frame where the information from obs_dataframe has
#' dose numbers, compound names, inhibitor names, etc. to match ct_dataframe,
#' and obs_dataframe has been added to ct_dataframe.
#' @export
#'
#' @examples
#' # None yet
#'
match_obs_to_sim <- function(ct_dataframe,
obs_dataframe,
obs_to_sim_assignment = NA,
existing_exp_details = NA){
# NB: Does NOT currently check that dosing data match between sim and obs
# files. Could add that in the future as a safety check.
# error catching -------------------------------------------------------
# Check whether tidyverse is loaded
if("package:tidyverse" %in% search() == FALSE){
stop("The SimcypConsultancy R package requires the package tidyverse to be loaded, and it doesn't appear to be loaded yet. Please run\nlibrary(tidyverse)\n ...and then try again.",
call. = FALSE)
}
if(nrow(ct_dataframe) == 0){
warning("Please check your input. The data.frame you supplied for ct_dataframe doesn't have any rows.",
call. = FALSE)
return(ct_dataframe)
}
if(nrow(obs_dataframe) == 0){
warning("Please check your input. The data.frame you supplied for obs_dataframe doesn't have any rows.",
call. = FALSE)
return(ct_dataframe)
}
# Adjusting for slight differences in column names.
names(obs_dataframe)[tolower(names(obs_dataframe)) == "file"] <- "File"
names(obs_dataframe)[tolower(names(obs_dataframe)) == "obsfile"] <- "ObsFile"
if("simfile" %in% tolower(names(obs_dataframe)) &
"file" %in% tolower(names(obs_dataframe)) == FALSE){
names(obs_dataframe)[tolower(names(obs_dataframe)) == "simfile"] <- "File"
}
if(all(c("File", "CompoundID", "Compound", "Conc", "Conc_units",
"Time", "Time_units") %in% names(ct_dataframe)) == FALSE){
stop("It doesn't appear that you have supplied output from extractConcTime or extractConcTime_mult as input for ct_dataframe. This function only works when ct_dataframe is output from those functions.",
call. = FALSE)
}
if(all(c("ObsFile", "CompoundID", "Conc", "Conc_units",
"Time", "Time_units") %in% names(obs_dataframe)) == FALSE){
stop("It doesn't appear that you have supplied output from extractObsConcTime or extractObsConcTime_mult as input for ct_dataframe. This function only works when obs_dataframe is output from those functions.",
call. = FALSE)
}
if("logical" %in% class(obs_to_sim_assignment)){
if(all(is.na(obs_to_sim_assignment)) == FALSE){
stop("You have supplied a value for obs_to_sim_assigment that isn't a character vector, a data.frame, or just the value NA, which are the only acceptable options for obs_to_sim_assigment. Please check your input and try again.",
call. = FALSE)
}
} else if(any(c("data.frame", "character") %in%
class(obs_to_sim_assignment)) == FALSE){
stop("You have supplied a value for obs_to_sim_assigment that isn't a character vector, a data.frame, or just the value NA, which are the only acceptable options for obs_to_sim_assigment. Please check your input and try again.",
call. = FALSE)
}
# Main body of function -------------------------------------------------
if(is.logical(existing_exp_details)){
existing_exp_details <-
extractExpDetails_mult(sim_data_files = unique(ct_dataframe$File))
}
existing_exp_details <- harmonize_details(existing_exp_details)
# Summary of approach: Use expand_grid on obs_dataframe to include all
# possible appropriate File values and then bind obs_dataframe to
# ct_dataframe.
# Making obs_to_sim_assignment a data.frame regardless of what it was
# originally. We'll then split it into a list of data.frames and loop
# through to assign.
if("character" %in% class(obs_to_sim_assignment)){
if(all(obs_to_sim_assignment == "use existing_exp_details")){
ObsAssign <- existing_exp_details$MainDetails %>%
select(File, ObsOverlayFile) %>%
rename(ObsFile = ObsOverlayFile)
} else {
if(is.null(names(obs_to_sim_assignment))){
stop(wrapn("You appear to have supplied a character vector for the argument 'obs_to_sim_assignment', but you haven't supplied names for that vector, so we don't know how to match your simulated and observed files. Please check the help file for the argument 'obs_to_sim_assignment' and try again."),
call. = FALSE)
}
ObsAssign <- data.frame(ObsFile = names(obs_to_sim_assignment),
File = obs_to_sim_assignment)
}
} else if("logical" %in% class(obs_to_sim_assignment)){
# This is when they have left obs_to_sim_assignment as NA and want all the
# obs data to match all the sim.
ObsAssign <- expand_grid(File = unique(ct_dataframe$File),
ObsFile = unique(obs_dataframe$ObsFile))
} else {
# This is when they have supplied a data.frame.
ObsAssign <- obs_to_sim_assignment
}
# Adding ".xml" to any files that don't have it already since people often
# omit the extension or use the wrong one. Checking that the file exists.
ObsAssign <- ObsAssign %>%
mutate(Extension = str_extract(basename(ObsFile), pattern = "\\.(?<=\\.)[^\\.]+$"),
ObsFile_xml = case_when(is.na(Extension) ~ paste0(ObsFile, ".xml"),
Extension == ".xml" ~ ObsFile,
Extension == ".xlsx" ~ sub("xlsx$", "xml", ObsFile)),
ObsFile_xlsx = case_when(is.na(Extension) ~ paste0(ObsFile, ".xlsx"),
Extension == ".xlsx" ~ ObsFile,
Extension == ".xml" ~ sub("xml$", "xlsx", ObsFile)),
ObsFile_xml_exists = file.exists(ObsFile_xml),
ObsFile_xlsx_exists = file.exists(ObsFile_xlsx),
# Preferentially using the XML file since people don't always save
# the xlsx version of the file
ObsFileToUse = case_when(ObsFile_xml_exists == TRUE ~ ObsFile_xml,
ObsFile_xml_exists == FALSE &
ObsFile_xlsx_exists == TRUE ~ ObsFile_xlsx,
.default = ObsFile_xml))
# Matching the file extension in obs_dataframe or this will have trouble
obs_dataframe <- obs_dataframe %>%
left_join(ObsAssign %>% select(ObsFile, ObsFileToUse),
by = "ObsFile") %>%
select(-ObsFile) %>%
rename(ObsFile = ObsFileToUse)
ObsAssign <- ObsAssign %>%
select(-ObsFile) %>%
rename(ObsFile = ObsFileToUse)
# Making sure we have all the info we need.
if(all(ObsAssign$File[complete.cases(ObsAssign$File)] %in%
existing_exp_details$MainDetails$File) == FALSE){
suppressMessages(
existing_exp_details <-
extractExpDetails_mult(sim_data_files = ObsAssign$File,
existing_exp_details = existing_exp_details))
}
# This will cause problems if any of the sets of data have 0 rows. Checking
# for that.
MissingFile <- setdiff(unique(ObsAssign$File), unique(ct_dataframe$File))
if(length(MissingFile) > 0){
warning(paste0("The file(s) ",
str_comma(paste0("`", MissingFile, "`")),
" is/are included in `obs_to_sim_assignment` but not present in the simulated concentration-time data. It/They cannot be matched to any observed data.\n"),
call. = FALSE)
ObsAssign <- ObsAssign %>% filter(File %in% unique(ct_dataframe$File))
}
MissingObsFile <- setdiff(unique(ObsAssign$ObsFile),
unique(obs_dataframe$ObsFile))
if(length(MissingObsFile) > 0){
warning(paste0("The observed data file(s) ",
str_comma(paste0("`", MissingObsFile, "`")),
" is/are included in `obs_to_sim_assignment` but not present in the observed concentration-time data. It/They cannot be matched to any simulated data.\n"),
call. = FALSE)
ObsAssign <- ObsAssign %>% filter(ObsFile %in% unique(obs_dataframe$ObsFile))
}
# NB: Splitting by ObsFile b/c you could have more than one obs file per
# sim file, e.g., when there are observed data for more than 3 compound
# IDs, the maximum allowed in the PE data entry template. You could also
# have more than one File per ObsFile, though, so need to split by that as
# well.
obs_dataframe <- split(obs_dataframe, f = obs_dataframe$ObsFile) # NOTE: SPLIT BY OBSFILE
ct_dataframe <- split(ct_dataframe, f = ct_dataframe$File) # NOTE: SPLIT BY FILE
ObsAssign <- split(ObsAssign, f = ObsAssign$ObsFile)
for(j in names(ObsAssign)){
# j = ObsFile
ObsAssign[[j]] <- split(ObsAssign[[j]],
f = ObsAssign[[j]]$File)
ObsData_j <- list()
for(k in names(ObsAssign[[j]])){
# k = File
# Checking for whether there was a custom-dosing regimen b/c that
# messes up EVERYTHING. If there was, Dose_x needs to be NA.
CustomDosingCheck <- existing_exp_details$MainDetails %>%
filter(File == k) %>%
select(any_of(c("Dose_sub", "Dose_inhib", "Dose_inhib2"))) %>%
pivot_longer(cols = everything(),
names_to = "CompoundID", values_to = "Value") %>%
filter(Value == "custom dosing")
# Setting observed data dose to NA if it's custom dosing.
for(cmpd in CustomDosingCheck$CompoundID){
obs_dataframe[[j]][, cmpd] <- NA
}
# NB: This does not include matching by dose, which may change over the
# course of the study or simulation. This is assuming that the observed
# file in question definitely should match the simulated data in
# question, so that should NOT be a problem.
ObsData_j[[k]] <- obs_dataframe[[j]] %>%
select(-Compound) %>%
mutate(File = k) %>%
left_join(ct_dataframe[[k]] %>%
select(any_of(c("File", "CompoundID", "Compound"))) %>%
unique(),
relationship = "many-to-many",
by = c("CompoundID", "File"))
# Observed files often only include the 1st dose, even if it was
# a multiple-dose simulation, so we can't trust the obs file to
# include all the dose numbers at all the right times and instead
# should get the dose number and dosing interval from the sim
# file existing_exp_details.
Deets <- existing_exp_details
Deets$MainDetails <- Deets$MainDetails %>% filter(File == k)
ObsData_j[[k]] <- calc_dosenumber(ct_dataframe = ObsData_j[[k]],
existing_exp_details = Deets)
# Matching units. Need to do this one compound at a time.
ObsData_j[[k]] <- split(ObsData_j[[k]], f = ObsData_j[[k]]$CompoundID)
ct_dataframe[[k]] <- split(ct_dataframe[[k]], f = ct_dataframe[[k]]$CompoundID)
for(cmpd in intersect(names(ObsData_j[[k]]),
names(ct_dataframe[[k]]))){
ObsData_j[[k]][[cmpd]] <-
convert_units(DF_to_convert = ObsData_j[[k]][[cmpd]],
DF_with_good_units = ct_dataframe[[k]][[cmpd]],
MW = as.numeric(
Deets$MainDetails[
paste0("MW", AllCompounds$Suffix[AllCompounds$CompoundID == cmpd])]))
}
ObsData_j[[k]] <- bind_rows(ObsData_j[[k]])
ct_dataframe[[k]] <- bind_rows(ct_dataframe[[k]])
# Adding inhibitor name as needed
MyPerpetrator <- determine_myperpetrator(Deets,
prettify_compound_names = FALSE)
ObsData_j[[k]]$Inhibitor[ObsData_j[[k]]$Inhibitor != "none"] <-
MyPerpetrator
rm(Deets)
}
obs_dataframe[[j]] <- bind_rows(ObsData_j)
}
ct_dataframe <- bind_rows(ct_dataframe) %>%
bind_rows(bind_rows(obs_dataframe) %>% unique()) %>%
filter(complete.cases(CompoundID)) %>% unique()
return(ct_dataframe)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.