#' Extract fm and fe valuesthat change with time from a Simulator output Excel
#' file
#'
#' \code{extractFmFe} extracts fm and fe data from a simulator output Excel
#' file. A tab named something like "Time variant \%fm and fe" must be present
#' to get dynamic fm and fe values, but you can get the static values if the tab
#' "% fm and fe SS" is present. Dynamic fm and fe data are required if you want
#' to use these data with the function \code{\link{fm_plot}} to make a graph of
#' how the fm values change over time.
#'
#' @param sim_data_file name of the Excel file containing the simulated dynamic
#' fm and fe data, in quotes
#' @param returnOnlyMaxMin TRUE (default) or FALSE for whether to return only
#' maximum and minimum fm values -- basically, return the table in the upper
#' left corner of the "Time variant \%fm and fe" tab.
#' @param returnAggregateOrIndiv Return aggregate and/or individual simulated fm
#' and fe data? This currently only applies to the dynamic fms and has not yet
#' been developed for the static fms, which will only return aggregate data at
#' present. Options are "individual", "aggregate", or "both" (default).
#' Aggregated data are not calculated here but are pulled from the simulator
#' output rows labeled as "mean".
#' @param existing_exp_details If you have already run
#' \code{\link{extractExpDetails_mult}} or \code{\link{extractExpDetails}} to
#' get all the details from the "Input Sheet" (e.g., when you ran
#' extractExpDetails you said \code{exp_details = "Input Sheet"} or
#' \code{exp_details = "all"}), you can save some processing time by supplying
#' that object here, unquoted. If left as NA, this function will run
#' \code{extractExpDetails} behind the scenes to figure out some information
#' about your experimental set up.
#'
#' @return a data.frame
#'
#' @export
#' @examples
#' # None yet
#'
extractFmFe <- function(sim_data_file,
returnOnlyMaxMin = TRUE,
returnAggregateOrIndiv = "both",
existing_exp_details = NA){
# Error catching --------------------------------------------------------------------
# Check whether tidyverse is loaded
if("package:tidyverse" %in% search() == FALSE){
stop("The SimcypConsultancy R package also requires the package tidyverse to be loaded, and it doesn't appear to be loaded yet. Please run `library(tidyverse)` and then try again.")
}
# If they didn't include ".xlsx" at the end, add that.
sim_data_file <- paste0(sub("\\.wksz$|\\.dscw$|\\.xlsx$", "", sim_data_file), ".xlsx")
# Checking for file name issues
CheckFileNames <- check_file_name(sim_data_file)
BadFileNames <- CheckFileNames[!CheckFileNames == "File name meets naming standards."]
if(length(BadFileNames)> 0){
BadFileNames <- paste0(names(BadFileNames), ": ", BadFileNames)
warning(paste0("The following file names do not meet file-naming standards for the Simcyp Consultancy Team:\n",
str_c(paste0(" ", BadFileNames), collapse = "\n"), "\n"),
call. = FALSE)
}
if(any(c(length(returnAggregateOrIndiv) < 1,
length(returnAggregateOrIndiv) > 2,
any(unique(returnAggregateOrIndiv) %in% c("aggregate", "individual", "both") == FALSE)))) {
stop("returnAggregateOrIndiv must be 'aggregate', 'individual', or 'both'.",
call. = FALSE)
}
# Main body of function ----------------------------------------------------------------
# Getting summary data for the simulation(s)
if("logical" %in% class(existing_exp_details)){
existing_exp_details <- extractExpDetails(sim_data_file)
Deets <- existing_exp_details[["MainDetails"]]
} else {
existing_exp_details <- harmonize_details(existing_exp_details)
existing_exp_details <-
filter_sims(existing_exp_details, sim_data_file, "include")
Deets <- existing_exp_details[["MainDetails"]]
if(nrow(Deets) == 0){
existing_exp_details <-
extractExpDetails(sim_data_file)
Deets <- existing_exp_details[["MainDetails"]]
}
}
if(is.null(Deets)){
# Skipping the warning b/c they will have already gotten it from
# extractExpDetails.
return(data.frame())
}
if(Deets$PopRepSim == "Yes"){
warning(wrapn(paste0("The simulator file supplied, `",
sim_data_file,
"`, is for a population-representative simulation and thus doesn't have any aggregate data. Please be warned that some plotting functions will not work well without aggregate data.")),
call. = FALSE)
}
# Figuring out which sheet to extract and dealing with case since that
# apparently changes between Simulator versions.
AllSheets <- str_split(Deets$SheetNames, pattern = "` `")[[1]]
AllSheets <- gsub("`", "", AllSheets)
SheetNames_dynamic <- AllSheets[str_detect(tolower(AllSheets), "time variant .fm and fe")]
SheetNames_static <- AllSheets[which(AllSheets == "% fm and fe SS")]
if(length(SheetNames_dynamic) == 0 &
length(SheetNames_static) == 0){
warning(wrapn(paste0("The simulator output file provided, '",
sim_data_file,
"', does not appear to have a sheet titled 'Time variant %fm and fe', which is what we need for extracting dynamic fm and fe values, nor a sheet titled '% fm and fe SS', which is what we need for static fm and fe values. We cannot return any fm data.")),
call. = FALSE)
return(data.frame())
}
# Dynamic fm data extraction -----------------------------------------------
# Preferentially getting dynamic fm data
if(length(SheetNames_dynamic) == 1){
# Reading in simulated abundance-time profile data
sim_data_xl <- suppressMessages(
readxl::read_excel(path = sim_data_file,
sheet = SheetNames_dynamic,
range = switch(as.character(returnOnlyMaxMin),
"TRUE" = "A1:E50", # Guessing that there wouldn't be more than 50 lines here... Reasonable?
"FALSE" = NULL),
col_names = FALSE))
## Extracting only max and min --------------------------------------------
if(returnOnlyMaxMin){
EndRow <- which(is.na(sim_data_xl$...1[3:nrow(sim_data_xl)]))[1] + 1
suppressMessages(
Out <- sim_data_xl[3:EndRow, 1:5] %>%
rename(Max = ...2,
tmax = ...3,
Min = ...4,
tmin = ...5) %>%
mutate(Tissue = str_extract(tolower(...1), "liver|kidney|renal"),
Enzyme = str_extract(...1, "(CYP|UGT)[1-9][ABCDEFJ][1-9]{1,2}|Additional"),
PerpPresent = str_detect(tolower(...1), "with inh"),
Parameter = str_extract(...1, "fm|fe"),
across(.cols = c(Max, tmax, Min, tmin),
.fns = as.numeric),
File = sim_data_file) %>%
select(Parameter, Tissue, Enzyme, PerpPresent, Max, tmax, Min, tmin, File)
)
return(Out)
}
## Extracting aggregate data ---------------------------------------------
if(any(c("aggregate", "both") %in% returnAggregateOrIndiv)){
StartRow_agg <- which(sim_data_xl$...1 == "Population Statistics")
TimeRow <- which(str_detect(sim_data_xl$...1, "^Time "))
TimeRow <- TimeRow[TimeRow > StartRow_agg][1]
# Figuring out which rows contain which data
FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
which(1:nrow(sim_data_xl) > TimeRow))[1]
FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl), FirstBlank)
NamesToCheck <- sim_data_xl$...1[TimeRow[1]:(FirstBlank-1)]
RowsToUse <- which(str_detect(tolower(NamesToCheck),
"\\((liver|kidney|renal)\\)"))
IDs_agg <- data.frame(ColOrig = paste0("V", 1:(length(RowsToUse) + 1)),
ID = c("Time", NamesToCheck[RowsToUse])) %>%
# NOTE: Just guessing for now what any inhibitors might be named b/c
# I don't have an example file for that atm.
mutate(CompoundID = str_extract(tolower(ID), "sub( met)?|sub( pri met[12])?|inhib"),
CompoundID = case_when(CompoundID == "sub" ~ "substrate",
CompoundID == "sub met" ~ "primary metabolite 1",
CompoundID == "sub pri met1" ~ "primary metabolite 1",
CompoundID == "sub pri met2" ~ "primary metabolite 2",
CompoundID == "inhib" ~ "inhibitor 1"),
Tissue = gsub("\\(|\\)", "",
str_extract(tolower(ID),
"\\((liver|kidney|renal)\\)")),
Enzyme = str_extract(ID,
"(CYP|UGT)[1-9][ABCDEFJ][1-9]{1,2}"),
Trial = str_trim(str_extract(tolower(ID),
"(geometric)? mean| 5(th)? percentile| 95(th)? percentile|median")),
Trial = case_when(Trial == "5th percentile" ~ "per5",
Trial == "95th percentile" ~ "per95",
Trial == "geometric mean" ~ "geomean",
TRUE ~ Trial),
PerpPresent = str_detect(ID, "with inh"),
Parameter = str_trim(str_extract(ID, " fe | fm ")))
sim_data_mean <- sim_data_xl[c(TimeRow, RowsToUse + TimeRow - 1), ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric) %>%
rename(Time = V1) %>%
pivot_longer(names_to = "ColOrig", values_to = "Fraction",
cols = -Time) %>%
left_join(IDs_agg, by = "ColOrig")
}
## Extracting individual data --------------------------------------------
if(any(c("individual", "both") %in% returnAggregateOrIndiv)){
StartRow_indiv <- which(sim_data_xl$...1 == "Individual Statistics")
TimeRow <- which(str_detect(sim_data_xl$...1, "^Time "))
TimeRow <- TimeRow[TimeRow > StartRow_indiv][1]
# Figuring out which rows contain which data
FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
which(1:nrow(sim_data_xl) > TimeRow))[1]
FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl) + 1, FirstBlank)
RowsToUse <- which(str_detect(tolower(sim_data_xl$...1),
"\\((liver|kidney|renal)\\)"))
RowsToUse <- RowsToUse[RowsToUse > StartRow_indiv & RowsToUse < FirstBlank]
IDs_indiv <- data.frame(ColOrig = paste0("V", 1:(length(RowsToUse) + 1)),
ID = c("Time", t(sim_data_xl[RowsToUse, 1])),
Individual = sim_data_xl[c(TimeRow, RowsToUse), 2],
Trial = sim_data_xl[c(TimeRow, RowsToUse), 3]) %>%
rename(Individual = ...2,
Trial = ...3) %>%
# NOTE: Just guessing for now what any inhibitors might be named b/c
# I don't have an example file for that atm.
mutate(CompoundID = str_extract(tolower(ID), "sub( met)?|sub( pri met[12])?|inhib"),
CompoundID = case_when(CompoundID == "sub" ~ "substrate",
CompoundID == "sub met" ~ "primary metabolite 1",
CompoundID == "sub pri met1" ~ "primary metabolite 1",
CompoundID == "sub pri met2" ~ "primary metabolite 2",
CompoundID == "inhib" ~ "inhibitor 1"),
Tissue = gsub("\\(|\\)", "",
str_extract(tolower(ID),
"\\((liver|kidney|renal)\\)")),
Enzyme = str_extract(ID,
"(CYP|UGT)[1-3][ABCDEJ][1-9]{1,2}"),
PerpPresent = str_detect(ID, "with inh"),
Parameter = str_trim(str_extract(ID, " fe | fm ")))
sim_data_indiv <- sim_data_xl[c(TimeRow, RowsToUse), ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric) %>%
rename(Time = V1) %>%
pivot_longer(names_to = "ColOrig", values_to = "Fraction",
cols = -Time) %>%
left_join(IDs_indiv, by = "ColOrig")
}
## Putting everything together ------------------------------------------
TimeUnits <- sim_data_xl$...1[which(str_detect(sim_data_xl$...1, "^Time"))][1]
TimeUnits <- ifelse(TimeUnits == "Time (h)", "hours", "minutes")
Data <- list()
if(any(c("aggregate", "both") %in% returnAggregateOrIndiv)){
Data[["agg"]] <- sim_data_mean %>%
arrange(Trial, Time)
}
if(any(c("individual", "both") %in% returnAggregateOrIndiv)){
Data[["indiv"]] <- sim_data_indiv %>%
mutate(Individual = as.character(Individual),
Trial = as.character(Trial)) %>%
arrange(Individual, Time)
}
Data <- bind_rows(Data)
if("individual" %in% returnAggregateOrIndiv){
Data <- Data %>%
mutate(Individual = ifelse(is.na(Individual), Trial, Individual))
}
# Noting which compound names belong to which compound IDs
AllCompounds <- c("substrate" = Deets$Substrate,
"primary metabolite 1" = Deets$PrimaryMetabolite1,
"primary metabolite 2" = Deets$PrimaryMetabolite2,
"secondary metabolite" = Deets$SecondaryMetabolite,
"inhibitor 1" = Deets$Inhibitor1,
"inhibitor 1 metabolite" = Deets$Inhibitor1Metabolite,
"inhibitor 2" = Deets$Inhibitor2)
# Adding DoseNumber so that we can skip extractExpDetails in ct_plot when
# the user requests a specific dose.
MyIntervals <-
c("substrate" = Deets$DoseInt_sub,
"primary metabolite 1" = Deets$DoseInt_sub,
"primary metabolite 2" = Deets$DoseInt_sub,
"secondary metabolite" = Deets$DoseInt_sub,
"inhibitor 1" = ifelse(is.null(Deets$DoseInt_inhib),
NA, Deets$DoseInt_inhib),
"inhibitor 1 metabolite" = ifelse(is.null(Deets$DoseInt_inhib),
NA, Deets$DoseInt_inhib),
"inhibitor 2" = ifelse(is.null(Deets$DoseInt_inhib2),
NA, Deets$DoseInt_inhib2))
MyStartTimes <-
c("substrate" = Deets$StartHr_sub,
"primary metabolite 1" = Deets$StartHr_sub,
"primarymetabolite 2" = Deets$StartHr_sub,
"secondary metabolite" = Deets$StartHr_sub,
"inhibitor 1" = ifelse(is.null(Deets$StartHr_inhib), NA,
Deets$StartHr_inhib),
"inhibitor 2" = ifelse(is.null(Deets$StartHr_inhib2), NA,
Deets$StartHr_inhib2),
"inhibitor 1 metabolite" = ifelse(is.null(Deets$StartHr_inhib), NA,
Deets$StartHr_inhib))
MyMaxDoseNum <-
c("substrate" = ifelse(Deets$Regimen_sub == "Single Dose",
1, Deets$NumDoses_sub),
"primary metabolite 1" = ifelse(Deets$Regimen_sub == "Single Dose",
1, Deets$NumDoses_sub),
"primarymetabolite 2" = ifelse(Deets$Regimen_sub == "Single Dose",
1, Deets$NumDoses_sub),
"secondary metabolite" = ifelse(Deets$Regimen_sub == "Single Dose",
1, Deets$NumDoses_sub),
"inhibitor 1" = ifelse(is.null(Deets$NumDoses_inhib), NA,
ifelse(Deets$Regimen_inhib == "Single Dose",
1, Deets$NumDoses_inhib)),
"inhibitor 2" = ifelse(is.null(Deets$NumDoses_inhib2), NA,
ifelse(Deets$Regimen_inhib2 == "Single Dose",
1, Deets$NumDoses_inhib2)),
"inhibitor 1 metabolite" = ifelse(is.null(Deets$NumDoses_inhib), NA,
ifelse(Deets$Regimen_inhib == "Single Dose",
1, Deets$NumDoses_inhib)))
# Converting data to numeric while also retaining names
suppressWarnings(
MyIntervals <- sapply(MyIntervals, FUN = as.numeric))
suppressWarnings(
MyStartTimes <- sapply(MyStartTimes, FUN = as.numeric))
suppressWarnings(
MyMaxDoseNum <- sapply(MyMaxDoseNum, FUN = as.numeric))
Data <- Data %>%
mutate(StartHr_sub = MyStartTimes["substrate"],
TimeSinceDose1_sub = Time - StartHr_sub,
DoseInt_sub = MyIntervals["substrate"],
MaxDoseNum_sub = MyMaxDoseNum["substrate"],
DoseNum_sub = Time %/% DoseInt_sub + 1,
# Taking care of possible artifacts
DoseNum_sub = ifelse(DoseNum_sub < 0, 0, DoseNum_sub),
DoseNum_sub = ifelse(DoseNum_sub > MaxDoseNum_sub,
MaxDoseNum_sub, DoseNum_sub),
# If it was a single dose, make everything after StartHr dose
# 1 and everything before StartHr dose 0. if it was a single
# dose, then DoseInt is NA.
DoseNum_sub = ifelse(is.na(DoseInt_sub),
ifelse(TimeSinceDose1_sub < 0, 0, 1), DoseNum_sub),
StartHr_inhib1 = MyStartTimes["inhibitor 1"],
TimeSinceDose1_inhib1 = Time - StartHr_inhib1,
DoseInt_inhib1 = MyIntervals["inhibitor 1"],
MaxDoseNum_inhib1 = MyMaxDoseNum["inhibitor 1"],
DoseNum_inhib1 = Time %/% DoseInt_inhib1 + 1,
# Taking care of possible artifacts
DoseNum_inhib1 = ifelse(DoseNum_inhib1 < 0, 0, DoseNum_inhib1),
DoseNum_inhib1 = ifelse(DoseNum_inhib1 > MaxDoseNum_inhib1,
MaxDoseNum_inhib1, DoseNum_inhib1),
# If it was a single dose, make everything after StartHr dose
# 1 and everything before StartHr dose 0. if it was a single
# dose, then DoseInt is NA.
DoseNum_inhib1 = ifelse(is.na(DoseInt_inhib1),
ifelse(TimeSinceDose1_inhib1 < 0, 0, 1), DoseNum_inhib1),
StartHr_inhib2 = MyStartTimes["inhibitor 2"],
TimeSinceDose1_inhib2 = Time - StartHr_inhib2,
DoseInt_inhib2 = MyIntervals["inhibitor 2"],
MaxDoseNum_inhib2 = MyMaxDoseNum["inhibitor 2"],
DoseNum_inhib2 = Time %/% DoseInt_inhib2 + 1,
# Taking care of possible artifacts
DoseNum_inhib2 = ifelse(DoseNum_inhib2 < 0, 0, DoseNum_inhib2),
DoseNum_inhib2 = ifelse(DoseNum_inhib2 > MaxDoseNum_inhib2,
MaxDoseNum_inhib2, DoseNum_inhib2),
# If it was a single dose, make everything after StartHr dose
# 1 and everything before StartHr dose 0. if it was a single
# dose, then DoseInt is NA.
DoseNum_inhib2 = ifelse(is.na(DoseInt_inhib2),
ifelse(TimeSinceDose1_inhib2 < 0, 0, 1), DoseNum_inhib2))
# Checking for when the simulation ends right at the last dose b/c
# then, setting that number to 1 dose lower
if(length(Data %>% filter(DoseNum_sub == max(Data$DoseNum_sub)) %>%
pull(Time) %>% unique()) == 1){
MyMaxDoseNum_sub <- max(Data$DoseNum_sub)
Data <- Data %>%
mutate(DoseNum_sub = ifelse(DoseNum_sub == MyMaxDoseNum_sub,
MyMaxDoseNum_sub - 1, DoseNum_sub))
}
if(length(Data %>% filter(DoseNum_inhib1 == max(Data$DoseNum_inhib1)) %>%
pull(Time) %>% unique()) == 1){
MyMaxDoseNum_inhib1 <- max(Data$DoseNum_inhib1)
Data <- Data %>%
mutate(DoseNum_inhib1 = ifelse(DoseNum_inhib1 == MyMaxDoseNum_inhib1,
MyMaxDoseNum_inhib1 - 1, DoseNum_inhib1))
}
if(length(Data %>% filter(DoseNum_inhib2 == max(Data$DoseNum_inhib2)) %>%
pull(Time) %>% unique()) == 1){
MyMaxDoseNum_inhib2 <- max(Data$DoseNum_inhib2)
Data <- Data %>%
mutate(DoseNum_inhib2 = ifelse(DoseNum_inhib2 == MyMaxDoseNum_inhib2,
MyMaxDoseNum_inhib2 - 1, DoseNum_inhib2))
}
# Noting exactly what the perpetrators were
AllPerpetrators <- c(Deets$Inhibitor1, Deets$Inhibitor2,
Deets$Inhibitor1Metabolite)
AllPerpetrators <- AllPerpetrators[complete.cases(AllPerpetrators)]
# Finalizing, tidying, selecting only useful columns
Data <- Data %>%
mutate(Time_units = tolower({{TimeUnits}}),
File = sim_data_file,
Inhibitor = ifelse(PerpPresent,
str_comma(AllPerpetrators), "none"),
Substrate = Deets$Substrate,
Compound = AllCompounds[CompoundID]) %>%
arrange(across(any_of(c("Parameter", "Tissue", "Enzyme",
"Substrate", "Inhibitor",
"Individual", "Trial", "Time")))) %>%
select(any_of(c("Compound", "CompoundID", "Inhibitor",
"Parameter", "Enzyme", "Tissue",
"Individual", "Trial", "Time", "Fraction",
"Time_units",
"DoseNum_sub", "DoseNum_inhib1", "DoseNum_inhib2",
"File"))) %>%
purrr::discard(~all(is.na(.)))
return(Data)
} else {
# Static fm data extraction -----------------------------------------------
# Reading in simulated abundance-time profile data
sim_data_xl <- suppressMessages(
readxl::read_excel(path = sim_data_file,
sheet = SheetNames_static,
col_names = FALSE))
# Only doing aggregate data for now. Determining column positions.
Row3 <- as.character(t(sim_data_xl[3, ]))
StartCols_main <- which(str_detect(Row3, "Statistics"))
StartCol_BL <- StartCols_main[1]
EndCol_BL <- as.character(t(sim_data_xl[4, (StartCol_BL + 1):ncol(sim_data_xl)]))
EndCol_BL <- which(is.na(EndCol_BL))[1] - 1 + StartCol_BL
EndCol_BL <- ifelse(is.na(EndCol_BL), ncol(sim_data_xl), EndCol_BL)
# These do not seem to change
StartRow <- 5
EndRow <- 18
# BL fms
Out <- sim_data_xl[StartRow:EndRow, StartCol_BL:EndCol_BL]
names(Out) <- c("Statistic",
t(as.character(sim_data_xl[StartRow - 1, (StartCol_BL+1):EndCol_BL])))
suppressWarnings(
Out <- Out %>%
mutate(across(.cols = -Statistic,
.fns = as.numeric)) %>%
mutate(Statistic = renameStats(Statistic),
DDI = FALSE,
File = sim_data_file)
)
if(length(StartCols_main) == 2){
StartCol_DDI <- StartCols_main[2]
EndCol_DDI <- as.character(t(sim_data_xl[4, (StartCol_DDI + 1):ncol(sim_data_xl)]))
EndCol_DDI <- which(is.na(EndCol_DDI))[1] + StartCol_DDI - 1
EndCol_DDI <- ifelse(is.na(EndCol_DDI), ncol(sim_data_xl), EndCol_DDI)
# DDI fms
DDI <- sim_data_xl[StartRow:EndRow, StartCol_DDI:EndCol_DDI]
names(DDI) <- c("Statistic",
t(as.character(sim_data_xl[StartRow - 1, (StartCol_DDI+1):EndCol_DDI])))
suppressWarnings(
DDI <- DDI %>%
mutate(across(.cols = -Statistic,
.fns = as.numeric)) %>%
mutate(Statistic = renameStats(Statistic),
DDI = TRUE,
File = sim_data_file)
)
Out <- bind_rows(Out, DDI) %>%
select(File, Statistic, everything())
}
return(Out)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.