#' Extract enzyme abundance data from a simulator output Excel file
#'
#' Extracts enzyme abundance data from a simulator output Excel file. The
#' appropriate tab must be present in the output file. For detailed instructions
#' and examples, please see the SharePoint file "Simcyp PBPKConsult R Files -
#' Simcyp PBPKConsult R Files/SimcypConsultancy function examples and
#' instructions/Enzyme abundance plots/Enzyme-abundance-plot-examples.docx".
#' (Sorry, we are unable to include a link to it here.)
#'
#' \strong{Note:} Unlike the similar function for extracting drug
#' concentrations, \code{\link{extractConcTime}}, This has not been set up yet
#' to get the dose number for a custom-dosing regimen.
#'
#' @param sim_data_file name of the Excel file containing the simulated
#' enzyme-abundance-time data, in quotes
#' @param enzyme the enzyme of interest, e.g., "CYP3A4" (default), "UGT1A1",
#' etc. Spaces or hyphens in enzyme names will be ignored. Not case sensitive.
#' @param tissue From which tissue should the desired enzyme abundance be
#' extracted? Options are "liver" (default), "gut", or "kidney". Note: If
#' "gut" is selected, the output will return both colon and small intestine
#' concentrations.
#' @param returnAggregateOrIndiv Return aggregate and/or individual simulated
#' enzyme abundance data? Options are "individual", "aggregate", or "both"
#' (default). Aggregated data are not calculated here but are pulled from the
#' simulator output rows labeled as "mean".
#' @param existing_exp_details If you have already run
#' \code{\link{extractExpDetails_mult}} or \code{\link{extractExpDetails}} to
#' get all the details from the "Input Sheet" (e.g., when you ran
#' extractExpDetails you said \code{exp_details = "Input Sheet"} or
#' \code{exp_details = "all"}), you can save some processing time by supplying
#' that object here, unquoted. If left as NA, this function will run
#' \code{extractExpDetails} behind the scenes to figure out some information
#' about your experimental set up.
#'
#' @return A data.frame of enzyme abundance with time with the following
#' columns: \describe{
#'
#' \item{Enzyme}{the enzyme whose abundance is listed}
#'
#' \item{Tissue}{the tissue}
#'
#' \item{Substrate}{what the substrate was in the simulation}
#'
#' \item{Inhibitor}{what the perpetrator was in the simulation if there was one}
#'
#' \item{Individual}{the individual for the given profile, which will be a
#' number for a simulated individual or will be "obs" or "obs+inhibitor" for
#' observed data, "mean" for the mean data, "geomean" for the geometric mean
#' data, or "per5" or "per95" for the 5th and 95th percentile data.}
#'
#' \item{Trial}{the trial number for that set of simulations or "obs", "mean",
#' etc. for the observed or aggregate data}
#'
#' \item{Time}{the time since the first dose}
#'
#' \item{Abundance}{abundance of the enzyme listed}
#'
#' \item{Time_units}{units used for time}
#'
#' \item{Dose_num_x}{the dose number for the substrate (suffix is "_sub"),
#' inhibitor 1 ("_inhib1"), or inhibitor 2 ("_inhib2")}
#'
#' \item{Dose_int_x}{the dosing interval for that compound. This will be NA
#' for custom-dosing regimens.}
#'
#' \item{TimeSinceDose1_x}{time since dose 1 for that compound}
#'
#' }
#'
#'
#' @export
#' @examples
#' extractEnzAbund(sim_data_file = "../Example simulator output MD.xlsx",
#' enzyme = "CYP3A4", tissue = "liver")
#'
#'
extractEnzAbund <- function(sim_data_file,
enzyme = "CYP3A4",
tissue = "liver",
returnAggregateOrIndiv = "both",
existing_exp_details = NA){
# Error catching --------------------------------------------------------------------
# Check whether tidyverse is loaded
if("package:tidyverse" %in% search() == FALSE){
stop("The SimcypConsultancy R package also requires the package tidyverse to be loaded, and it doesn't appear to be loaded yet. Please run `library(tidyverse)` and then try again.")
}
# If they didn't include ".xlsx" at the end, add that.
sim_data_file <- ifelse(str_detect(sim_data_file, "xlsx$"),
sim_data_file, paste0(sim_data_file, ".xlsx"))
# Checking for file name issues
CheckFileNames <- check_file_name(sim_data_file)
BadFileNames <- CheckFileNames[!CheckFileNames == "File name meets naming standards."]
if(length(BadFileNames)> 0){
BadFileNames <- paste0(names(BadFileNames), ": ", BadFileNames)
warning(paste0("The following file names do not meet file-naming standards for the Simcyp Consultancy Team:\n",
str_c(paste0(" ", BadFileNames), collapse = "\n"), "\n"),
call. = FALSE)
}
if(any(c(length(returnAggregateOrIndiv) < 1,
length(returnAggregateOrIndiv) > 2,
any(unique(returnAggregateOrIndiv) %in% c("aggregate", "individual", "both") == FALSE)))) {
stop("returnAggregateOrIndiv must be 'aggregate', 'individual', or 'both'.",
call. = FALSE)
}
if(length(tissue) != 1){
warning("You must enter one and only one tissue option. (Default is liver.)\n",
call. = FALSE)
return(data.frame())
}
if(tissue %in% c("gut", "liver", "kidney") == FALSE){
warning("The tissue you entered is not one of the options. Please select one of 'gut', 'liver', or 'kidney' for the tissue.\n",
call. = FALSE)
return(data.frame())
}
tissue <- tolower(tissue)
enzyme <- gsub(" |_|-", "", toupper(enzyme))
# Checking that what they're asking for is possible
LiverEnz <- c(paste0("CYP", c("1A1", "1A2", "2A6", "2B6", "2C8", "2C9", "2C18",
"2C19", "2D6", "2E1", "2J2", "3A4", "3A5", "3A7")),
paste0("UGT", c(paste0("1A", c(1,3:10)),
paste0("2B", c(4, 7, 10, 11, 15, 17, 28)),
" User Defined")))
GutEnz <- c(paste0("CYP", c("2C9", "2C19", "2D6", "2J2", "3A4", "3A5")),
paste0("UGT", c(paste0("1A", c(1,3:10)),
paste0("2B", c(4, 7, 10, 11, 15, 17, 28)),
" User Defined")))
KidneyEnz <- paste0("UGT", c(paste0("1A", c(1,3:10)),
paste0("2B", c(4, 7, 10, 11, 15, 17, 28)),
" User Defined"))
if(tissue == "liver" & enzyme %in% LiverEnz == FALSE){
warning(wrapn(paste0("You requested ", enzyme, " levels in the liver, which is not among the possible outputs from the Simcyp Simulator, so we cannot return any data.")),
call. = FALSE)
return(data.frame())
}
if(tissue == "gut" & enzyme %in% GutEnz == FALSE){
warning(wrapn(paste0("You requested ", enzyme, " levels in the gut, which is not among the possible outputs from the Simcyp Simulator, so we cannot return any data.")),
call. = FALSE)
return(data.frame())
}
if(tissue == "kidney" & enzyme %in% KidneyEnz == FALSE){
warning(wrapn(paste0("You requested ", enzyme, " levels in the kidney, which is not among the possible outputs from the Simcyp Simulator, so we cannot return any data.")),
call. = FALSE)
return(data.frame())
}
# Main body of function ----------------------------------------------------------------
# Getting summary data for the simulation(s)
if("logical" %in% class(existing_exp_details)){ # logical when user has supplied NA
Deets <- extractExpDetails(sim_data_file, exp_details = "Summary and Input")[["MainDetails"]]
} else {
Deets <- filter_sims(existing_exp_details, sim_data_file, "include")
Deets <- harmonize_details(Deets)[["MainDetails"]] %>%
filter(File == sim_data_file)
if(nrow(Deets) == 0){
Deets <- extractExpDetails(sim_data_file, exp_details = "Summary and Input")[["MainDetails"]]
}
}
if(Deets$PopRepSim == "Yes"){
warning(paste0("The simulator file supplied, `",
sim_data_file,
"`, is for a population-representative simulation and thus doesn't have any aggregate data. Please be warned that some plotting functions will not work well without aggregate data.\n"),
call. = FALSE)
}
# Figuring out which sheet to extract and dealing with case since that
# apparently changes between Simulator versions.
AllSheets <- gsub("`", "", str_split_1(Deets$SheetNames, pattern = "` `"))
SheetToExtract <- data.frame(Sheet = AllSheets,
SheetLower = tolower(AllSheets)) %>%
filter(SheetLower == paste(tolower(enzyme),
switch(tissue,
"liver" = "(liver)",
"gut" = "(gut)",
"kidney" = "(kidney)"))) %>%
pull(Sheet)
if(length(SheetToExtract) == 0){
warning(paste0("The simulator output file `",
sim_data_file, "` does not appear to have the sheet we need for the enzyme abundances requested. We were looking for a sheet titled `",
paste(toupper(enzyme), switch(tissue,
"liver" = "(liver)",
"gut" = "(gut)",
"kidney" = "(kidney)")),
"` and could not find it, so these data cannot be returned.\n"),
call. = FALSE)
return(data.frame())
}
# Reading in simulated abundance-time profile data
sim_data_xl <- suppressMessages(
readxl::read_excel(path = sim_data_file,
sheet = SheetToExtract,
col_names = FALSE))
# Extracting aggregate data ---------------------------------------------
if(any(c("aggregate", "both") %in% returnAggregateOrIndiv)){
# If the tissue was gut, there are separate data sets for small
# intestine and colon or for gut and colon depending on simulator
# version or maybe on whether it was an ADAM model (still figuring out
# what causes "SI" to be replaced with "Gut" in output). Checking for
# that.
GutParts <- c("colon", "small intestine")[
c(any(str_detect(tolower(sim_data_xl$...1), "\\(colon\\)")),
any(str_detect(tolower(sim_data_xl$...1), "\\(si\\)|\\(gut\\)")))]
if(all(complete.cases(GutParts)) & tissue == "gut"){
sim_data_mean <- list()
# mean data
StartRow_agg <- which(sim_data_xl$...1 == "Population Statistics")
TimeRows <- which(str_detect(sim_data_xl$...1, "^Time "))
TimeRows <- TimeRows[TimeRows > StartRow_agg][1:2]
# Figuring out which rows contain which data
FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
which(1:nrow(sim_data_xl) > TimeRows[2]))[1]
FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl), FirstBlank)
NamesToCheck <- tolower(sim_data_xl$...1[TimeRows[1]:(FirstBlank-1)])
# Need to note which rows are for which gut part.
SIrows <- which(str_detect(NamesToCheck, "\\(si\\)|\\(gut\\)")) +
TimeRows[1]-1
SITimeRow <- TimeRows[TimeRows + 1 == SIrows]
# Looking for the next blank row after SITimeRow
SIEndRow <- which(is.na(sim_data_xl$...1))
SIEndRow <- SIEndRow[SIEndRow > SITimeRow][1] - 1
Colonrows <- which(str_detect(NamesToCheck, "\\(colon\\)")) +
TimeRows[1]-1
ColonTimeRow <- TimeRows[TimeRows + 1 == Colonrows]
# Looking for the next blank row after ColonTimeRow
ColonEndRow <- which(is.na(sim_data_xl$...1))
ColonEndRow <- ColonEndRow[ColonEndRow > ColonTimeRow][1] - 1
GutRows <- list("colon" = ColonTimeRow:ColonEndRow,
"small intestine" = SITimeRow:SIEndRow)
# Checking for inhibitor
PerpPresent <- any(str_detect(NamesToCheck, "with inh"), na.rm = TRUE)
rm(NamesToCheck)
for(i in GutParts){
# Checking which cells contain mean, 5th, and 95th
# percentile data.
NamesToCheck <- tolower(sim_data_xl$...1[GutRows[[i]]])
RowsToUse <-
c("mean" = which(str_detect(NamesToCheck,
"enzyme value.*mean") &
!str_detect(NamesToCheck, "with inh")) +
GutRows[[i]][1]-1,
"per5" = which(str_detect(NamesToCheck,
"enzyme.* 5th percentile") &
!str_detect(NamesToCheck, "with inh")) +
GutRows[[i]][1]-1,
"per95" = which(str_detect(NamesToCheck,
"enzyme.*95th percentile") &
!str_detect(NamesToCheck, "with inh")) +
GutRows[[i]][1]-1)
sim_data_mean[[i]] <- sim_data_xl[c(GutRows[[i]][1], RowsToUse), ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric)
names(sim_data_mean[[i]]) <- c("Time", names(RowsToUse))
sim_data_mean[[i]] <- sim_data_mean[[i]] %>%
pivot_longer(names_to = "Trial", values_to = "Abundance", cols = -Time) %>%
mutate(Enzyme = enzyme,
Tissue = i)
rm(RowsToUse, NamesToCheck)
if(PerpPresent){
# Checking which cells contain mean, 5th, and 95th
# percentile data.
NamesToCheck <- tolower(sim_data_xl$...1[GutRows[[i]]])
RowsToUse <-
c("mean" = which(str_detect(NamesToCheck,
"enzyme value with inh mean")) +
GutRows[[i]][1]-1,
"per5" = which(str_detect(NamesToCheck,
"enzyme value with inh 5th percentile")) +
GutRows[[i]][1]-1,
"per95" = which(str_detect(NamesToCheck,
"enzyme value with inh 95th percentile")) +
GutRows[[i]][1]-1)
sim_data_mean_inhib <- sim_data_xl[c(GutRows[[i]][1], RowsToUse), ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric)
names(sim_data_mean_inhib) <- c("Time", names(RowsToUse))
sim_data_mean_inhib <- sim_data_mean_inhib %>%
pivot_longer(names_to = "Trial",
values_to = "Abundance",
cols = -Time) %>%
mutate(Enzyme = enzyme,
Tissue = i,
PerpPresent = TRUE)
sim_data_mean[[i]] <- bind_rows(sim_data_mean[[i]],
sim_data_mean_inhib) %>%
mutate(PerpPresent = ifelse(is.na(PerpPresent),
FALSE, PerpPresent))
rm(NamesToCheck, RowsToUse, sim_data_mean_inhib)
}
}
sim_data_mean <- bind_rows(sim_data_mean)
} else {
# non-gut tissue
TimeRow <- which(str_detect(sim_data_xl$...1, "^Time "))
TimeRow <- TimeRow[TimeRow > which(sim_data_xl$...1 == "Population Statistics")][1]
# Figuring out which rows contain which data
FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
which(1:nrow(sim_data_xl) > TimeRow))[1]
FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl), FirstBlank)
NamesToCheck <- tolower(sim_data_xl$...1[TimeRow:(FirstBlank-1)])
RowsToUse <- c(
"mean" =
which(str_detect(NamesToCheck, "mean") &
!str_detect(NamesToCheck,
"geometric|with inh")) +
TimeRow-1,
"per5" =
which(str_detect(NamesToCheck," 5(th)? percentile") &
!str_detect(NamesToCheck, "with inh|95")) +
TimeRow-1,
"per95" =
which(str_detect(NamesToCheck, " 95(th)? percentile") &
!str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"per10" =
which(str_detect(NamesToCheck," 10(th)? percentile") &
!str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"per90" =
which(str_detect(NamesToCheck, " 90(th)? percentile") &
!str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"geomean" =
which(str_detect(NamesToCheck, "geometric mean") &
!str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"median" =
which(str_detect(NamesToCheck, "median") &
!str_detect(NamesToCheck, "with inh")) +
TimeRow-1)
sim_data_mean <- sim_data_xl[c(TimeRow, RowsToUse), ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric)
names(sim_data_mean) <- c("Time", names(RowsToUse))
sim_data_mean <- sim_data_mean %>%
pivot_longer(names_to = "Trial",
values_to = "Abundance",
cols = -Time) %>%
mutate(Enzyme = enzyme,
Tissue = tissue)
rm(RowsToUse)
# Checking for inhibitor
PerpPresent <- any(str_detect(NamesToCheck, "with inh"), na.rm = TRUE)
if(PerpPresent){
RowsToUse <- c(
"mean" =
which(str_detect(NamesToCheck, "mean") &
!str_detect(NamesToCheck, "geometric") &
str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"per5" =
which(str_detect(NamesToCheck," 5(th)? percentile") &
!str_detect(NamesToCheck, "95") &
str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"per95" =
which(str_detect(NamesToCheck, " 95(th)? percentile") &
str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"per10" =
which(str_detect(NamesToCheck," 10(th)? percentile") &
str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"per90" =
which(str_detect(NamesToCheck, " 90(th)? percentile") &
str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"geomean" =
which(str_detect(NamesToCheck, "geometric mean") &
str_detect(NamesToCheck, "with inh")) +
TimeRow-1,
"median" =
which(str_detect(NamesToCheck, "median") &
str_detect(NamesToCheck, "with inh")) +
TimeRow-1)
sim_data_mean_inhib <-
sim_data_xl[c(TimeRow, RowsToUse), ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3))
names(sim_data_mean_inhib) <- c("Time", names(RowsToUse))
sim_data_mean_inhib <- sim_data_mean_inhib %>%
mutate_all(as.numeric) %>%
pivot_longer(names_to = "Trial",
values_to = "Abundance",
cols = -Time) %>%
mutate(Enzyme = enzyme,
Tissue = tissue,
PerpPresent = TRUE)
sim_data_mean <- bind_rows(sim_data_mean,
sim_data_mean_inhib) %>%
mutate(PerpPresent = ifelse(is.na(PerpPresent),
FALSE, PerpPresent))
rm(RowsToUse)
}
rm(TimeRow)
}
}
# Extracting individual data --------------------------------------------
if(any(c("individual", "both") %in% returnAggregateOrIndiv)){
# If the tissue was gut, there are separate data sets for small
# intestine and colon. Checking for that.
GutParts <- c("colon", "small intestine")[
c(any(str_detect(tolower(sim_data_xl$...1), "\\(colon\\)")),
any(str_detect(tolower(sim_data_xl$...1), "\\(si\\)|\\(gut\\)")))]
if(length(which(complete.cases(GutParts))) > 0 & tissue == "gut"){
sim_data_ind <- list()
StartRow_ind <- which(sim_data_xl$...1 == "Individual Statistics")
TimeRows <- which(str_detect(sim_data_xl$...1, "^Time "))
TimeRows <- TimeRows[TimeRows > StartRow_ind][1:2]
# Figuring out which rows contain which data
FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
which(1:nrow(sim_data_xl) > TimeRows[2]))[1]
FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl), FirstBlank)
NamesToCheck <- tolower(sim_data_xl$...1[TimeRows[1]:nrow(sim_data_xl)])
# Need to note which rows are for which gut part.
SIrows <- which(str_detect(NamesToCheck, "\\(si\\)|\\(gut\\)")) +
TimeRows[1]-1
SITimeRow <- intersect(TimeRows+1, SIrows) - 1
# Looking for the next blank row after SITimeRow
SIEndRow <- which(is.na(sim_data_xl$...1))
SIEndRow <- SIEndRow[SIEndRow > SITimeRow][1] - 1
# The above doesn't work if the last row is the last row of in the file, so
# catching that exception.
SIEndRow <- ifelse(is.na(SIEndRow), nrow(sim_data_xl), SIEndRow)
Colonrows <- which(str_detect(NamesToCheck, "\\(colon\\)")) +
TimeRows[1]-1
ColonTimeRow <- intersect(TimeRows+1, Colonrows) - 1
# Looking for the next blank row after ColonTimeRow
ColonEndRow <- which(is.na(sim_data_xl$...1))
ColonEndRow <- ColonEndRow[which(ColonEndRow > ColonTimeRow)][1] - 1
# The above doesn't work if the last row is the last row of in the file, so
# catching that exception.
ColonEndRow <- ifelse(is.na(ColonEndRow), nrow(sim_data_xl), ColonEndRow)
GutRows <- list("colon" = ColonTimeRow:ColonEndRow,
"small intestine" = SITimeRow:SIEndRow)
# Checking for inhibitor
PerpPresent <- any(str_detect(NamesToCheck, "with inh"), na.rm = TRUE)
rm(NamesToCheck)
for(i in GutParts){
RowsToUse <- intersect(
GutRows[[i]],
GutRows[[i]][which(!str_detect(tolower(sim_data_xl$...1[GutRows[[i]]]),
"with inh"))])
sim_data_ind[[i]] <- sim_data_xl[RowsToUse, ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric)%>%
rename(Time = "V1")
SubjTrial <- sim_data_xl[RowsToUse[-1], 2:3] %>%
rename(Individual = ...2, Trial = ...3) %>%
mutate(SubjTrial = paste0("ID", Individual, "_", Trial))
names(sim_data_ind[[i]])[2:ncol(sim_data_ind[[i]])] <- SubjTrial$SubjTrial
sim_data_ind[[i]] <- sim_data_ind[[i]] %>%
pivot_longer(names_to = "SubjTrial", values_to = "Abundance",
cols = -Time) %>%
mutate(Enzyme = enzyme,
Tissue = i,
SubjTrial = sub("ID", "", SubjTrial)) %>%
separate(SubjTrial, into = c("Individual", "Trial"),
sep = "_") %>%
mutate(across(.cols = c("Individual", "Trial"),
.fns = as.numeric))
rm(RowsToUse)
if(PerpPresent){
RowsToUse <- GutRows[[i]][which(
str_detect(tolower(sim_data_xl$...1[GutRows[[i]]]),
"with inh"))]
sim_data_ind_inhib <- sim_data_xl[c(GutRows[[i]][1], RowsToUse), ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric)%>%
rename(Time = "V1")
SubjTrial <- sim_data_xl[RowsToUse, 2:3] %>%
rename(Individual = ...2, Trial = ...3) %>%
mutate(SubjTrial = paste0("ID", Individual, "_", Trial))
names(sim_data_ind_inhib)[2:ncol(sim_data_ind_inhib)] <- SubjTrial$SubjTrial
sim_data_ind_inhib <- sim_data_ind_inhib %>%
pivot_longer(names_to = "SubjTrial", values_to = "Abundance",
cols = -Time) %>%
mutate(Enzyme = enzyme,
Tissue = i,
PerpPresent = TRUE,
SubjTrial = sub("ID", "", SubjTrial)) %>%
separate(SubjTrial, into = c("Individual", "Trial"),
sep = "_") %>%
mutate(across(.cols = c("Individual", "Trial"),
.fns = as.numeric))
sim_data_ind[[i]] <- bind_rows(sim_data_ind[[i]],
sim_data_ind_inhib) %>%
mutate(PerpPresent = ifelse(is.na(PerpPresent),
FALSE, PerpPresent))
rm(RowsToUse, sim_data_ind_inhib)
}
}
sim_data_ind <- bind_rows(sim_data_ind)
} else {
# individual data
StartRow_ind <- which(sim_data_xl$...1 == "Individual Statistics")
TimeRow <- which(str_detect(sim_data_xl$...1, "^Time "))
TimeRow <- TimeRow[TimeRow > StartRow_ind][1]
# Figuring out which rows contain which data
FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
which(1:nrow(sim_data_xl) > TimeRow))[1]
FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl), FirstBlank)
NamesToCheck <- tolower(sim_data_xl$...1[TimeRow:nrow(sim_data_xl)])
RowsToUse <- which(!str_detect(NamesToCheck, "with inh")) + TimeRow-1
sim_data_ind <- sim_data_xl[RowsToUse, ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric) %>%
rename(Time = "V1")
SubjTrial <- sim_data_xl[RowsToUse[-1], 2:3] %>%
rename(Individual = ...2, Trial = ...3) %>%
mutate(SubjTrial = paste0("ID", Individual, "_", Trial))
names(sim_data_ind)[2:ncol(sim_data_ind)] <- SubjTrial$SubjTrial
sim_data_ind <- sim_data_ind %>%
pivot_longer(names_to = "SubjTrial",
values_to = "Abundance",
cols = -Time) %>%
mutate(Enzyme = enzyme,
Tissue = tissue,
SubjTrial = sub("ID", "", SubjTrial)) %>%
separate(SubjTrial, into = c("Individual", "Trial"),
sep = "_") %>%
mutate(across(.cols = c("Individual", "Trial"),
.fns = as.numeric))
rm(RowsToUse)
# Checking for inhibitor
PerpPresent <- any(str_detect(NamesToCheck, "with inh"), na.rm = TRUE)
if(PerpPresent){
RowsToUse <- which(str_detect(NamesToCheck, "with inh")) + TimeRow-1
sim_data_ind_inhib <-
sim_data_xl[c(TimeRow, RowsToUse), ] %>%
t() %>%
as.data.frame() %>% slice(-(1:3)) %>%
mutate_all(as.numeric) %>%
rename(Time = "V1")
names(sim_data_ind_inhib)[
2:ncol(sim_data_ind_inhib)] <- SubjTrial$SubjTrial
sim_data_ind_inhib <- sim_data_ind_inhib %>%
pivot_longer(names_to = "SubjTrial",
values_to = "Abundance",
cols = -Time) %>%
mutate(Enzyme = enzyme,
Tissue = tissue,
PerpPresent = TRUE,
SubjTrial = sub("ID", "", SubjTrial)) %>%
separate(SubjTrial, into = c("Individual", "Trial"),
sep = "_") %>%
mutate(across(.cols = c("Individual", "Trial"),
.fns = as.numeric))
sim_data_ind <- bind_rows(sim_data_ind,
sim_data_ind_inhib) %>%
mutate(PerpPresent = ifelse(is.na(PerpPresent),
FALSE, PerpPresent))
rm(RowsToUse)
}
rm(TimeRow, NamesToCheck)
}
}
# Putting everything together ------------------------------------------
TimeUnits <- sim_data_xl$...1[which(str_detect(sim_data_xl$...1, "^Time"))][1]
TimeUnits <- ifelse(TimeUnits == "Time (h)", "Hours", "Minutes")
Data <- list()
if(any(c("aggregate", "both") %in% returnAggregateOrIndiv)){
Data[["agg"]] <- sim_data_mean %>%
arrange(Trial, Time) %>%
mutate(IndivOrAgg = "aggregate",
Simulated = TRUE)
}
if(any(c("individual", "both") %in% returnAggregateOrIndiv)){
Data[["indiv"]] <- sim_data_ind %>%
mutate(Individual = as.character(Individual),
Trial = as.character(Trial),
Simulated = TRUE,
IndivOrAgg = "individual") %>%
arrange(Individual, Time)
}
Data <- bind_rows(Data)
if("individual" %in% returnAggregateOrIndiv){
Data <- Data %>%
mutate(Individual = ifelse(is.na(Individual), Trial, Individual))
}
# Adding DoseNumber so that we can skip extractExpDetails in ct_plot when
# the user requests a specific dose.
MyIntervals <-
c("substrate" = Deets$DoseInt_sub,
"primary metabolite 1" = Deets$DoseInt_sub,
"primary metabolite 2" = Deets$DoseInt_sub,
"secondary metabolite" = Deets$DoseInt_sub,
"inhibitor 1" = ifelse(is.null(Deets$DoseInt_inhib),
NA, Deets$DoseInt_inhib),
"inhibitor 1 metabolite" = ifelse(is.null(Deets$DoseInt_inhib),
NA, Deets$DoseInt_inhib),
"inhibitor 2" = ifelse(is.null(Deets$DoseInt_inhib2),
NA, Deets$DoseInt_inhib2))
MyStartTimes <-
c("substrate" = Deets$StartHr_sub,
"primary metabolite 1" = Deets$StartHr_sub,
"primarymetabolite 2" = Deets$StartHr_sub,
"secondary metabolite" = Deets$StartHr_sub,
"inhibitor 1" = ifelse(is.null(Deets$StartHr_inhib), NA,
Deets$StartHr_inhib),
"inhibitor 2" = ifelse(is.null(Deets$StartHr_inhib2), NA,
Deets$StartHr_inhib2),
"inhibitor 1 metabolite" = ifelse(is.null(Deets$StartHr_inhib), NA,
Deets$StartHr_inhib))
MyMaxDoseNum <-
c("substrate" = ifelse(Deets$Regimen_sub == "Single Dose",
1, Deets$NumDoses_sub),
"primary metabolite 1" = ifelse(Deets$Regimen_sub == "Single Dose",
1, Deets$NumDoses_sub),
"primarymetabolite 2" = ifelse(Deets$Regimen_sub == "Single Dose",
1, Deets$NumDoses_sub),
"secondary metabolite" = ifelse(Deets$Regimen_sub == "Single Dose",
1, Deets$NumDoses_sub),
"inhibitor 1" = ifelse(is.null(Deets$NumDoses_inhib), NA,
ifelse(Deets$Regimen_inhib == "Single Dose",
1, Deets$NumDoses_inhib)),
"inhibitor 2" = ifelse(is.null(Deets$NumDoses_inhib2), NA,
ifelse(Deets$Regimen_inhib2 == "Single Dose",
1, Deets$NumDoses_inhib2)),
"inhibitor 1 metabolite" = ifelse(is.null(Deets$NumDoses_inhib), NA,
ifelse(Deets$Regimen_inhib == "Single Dose",
1, Deets$NumDoses_inhib)))
# Converting data to numeric while also retaining names
suppressWarnings(
MyIntervals <- sapply(MyIntervals, FUN = as.numeric))
suppressWarnings(
MyStartTimes <- sapply(MyStartTimes, FUN = as.numeric))
suppressWarnings(
MyMaxDoseNum <- sapply(MyMaxDoseNum, FUN = as.numeric))
Data <- Data %>%
mutate(StartHr_sub = MyStartTimes["substrate"],
TimeSinceDose1_sub = Time - StartHr_sub,
DoseInt_sub = MyIntervals["substrate"],
MaxDoseNum_sub = MyMaxDoseNum["substrate"],
DoseNum_sub = Time %/% DoseInt_sub + 1,
# Taking care of possible artifacts
DoseNum_sub = ifelse(DoseNum_sub < 0, 0, DoseNum_sub),
DoseNum_sub = ifelse(DoseNum_sub > MaxDoseNum_sub,
MaxDoseNum_sub, DoseNum_sub),
# If it was a single dose, make everything after StartHr dose
# 1 and everything before StartHr dose 0. if it was a single
# dose, then DoseInt is NA.
DoseNum_sub = ifelse(is.na(DoseInt_sub),
ifelse(TimeSinceDose1_sub < 0, 0, 1), DoseNum_sub),
StartHr_inhib1 = MyStartTimes["inhibitor 1"],
TimeSinceDose1_inhib1 = Time - StartHr_inhib1,
DoseInt_inhib1 = MyIntervals["inhibitor 1"],
MaxDoseNum_inhib1 = MyMaxDoseNum["inhibitor 1"],
DoseNum_inhib1 = Time %/% DoseInt_inhib1 + 1,
# Taking care of possible artifacts
DoseNum_inhib1 = ifelse(DoseNum_inhib1 < 0, 0, DoseNum_inhib1),
DoseNum_inhib1 = ifelse(DoseNum_inhib1 > MaxDoseNum_inhib1,
MaxDoseNum_inhib1, DoseNum_inhib1),
# If it was a single dose, make everything after StartHr dose
# 1 and everything before StartHr dose 0. if it was a single
# dose, then DoseInt is NA.
DoseNum_inhib1 = ifelse(is.na(DoseInt_inhib1),
ifelse(TimeSinceDose1_inhib1 < 0, 0, 1), DoseNum_inhib1),
StartHr_inhib2 = MyStartTimes["inhibitor 2"],
TimeSinceDose1_inhib2 = Time - StartHr_inhib2,
DoseInt_inhib2 = MyIntervals["inhibitor 2"],
MaxDoseNum_inhib2 = MyMaxDoseNum["inhibitor 2"],
DoseNum_inhib2 = Time %/% DoseInt_inhib2 + 1,
# Taking care of possible artifacts
DoseNum_inhib2 = ifelse(DoseNum_inhib2 < 0, 0, DoseNum_inhib2),
DoseNum_inhib2 = ifelse(DoseNum_inhib2 > MaxDoseNum_inhib2,
MaxDoseNum_inhib2, DoseNum_inhib2),
# If it was a single dose, make everything after StartHr dose
# 1 and everything before StartHr dose 0. if it was a single
# dose, then DoseInt is NA.
DoseNum_inhib2 = ifelse(is.na(DoseInt_inhib2),
ifelse(TimeSinceDose1_inhib2 < 0, 0, 1), DoseNum_inhib2))
# # Checking for any custom dosing
# if(any(str_detect(names(Deets), "CustomDosing"))){
# CDCompounds <-
# data.frame(CompoundSuffix =
# str_extract(names(Deets)[str_detect(names(Deets),
# "CustomDosing")],
# "_sub|_inhib(2)?")) %>%
# mutate(CompoundID = recode(CompoundSuffix, "_sub" = "substrate",
# "_inhib" = "inhibitor 1",
# "_inhib2" = "inhibitor 2"))
#
# Dosing <- list()
# # This is kind of a disaster... Looking for a better way to code this.
#
# for(j in CDCompounds$CompoundID){
# Dosing[[j]] <-
# Deets[[paste0("CustomDosing",
# CDCompounds$CompoundSuffix[CDCompounds$CompoundID == j])]] %>%
# mutate(CompoundID = CDCompounds$CompoundID[CDCompounds$CompoundID == j])
#
# if(max(Data$Time) > max(Dosing[[j]]$Time)){
# Dosing[[j]] <- Dosing[[j]] %>%
# bind_rows(data.frame(Time = max(Data$Time) + 1,
# DoseNum = max(Dosing[[j]]$DoseNum)))
# }
#
# Dosing[[j]]$Breaks <-
# as.character(cut(Dosing[[j]]$Time, breaks = Dosing[[j]]$Time,
# right = FALSE))
# }
#
# # LEFT OFF HERE - Not sure how best to deal with this since enzyme
# # abundances are different from concentrations of specific compounds. Also
# # not sure it's going to be that important.
#
#
# MyData <- list()
# MyData[["not CD"]] <- Data %>% filter(CD == "not CD")
#
# for(j in unique(Data$CD)[!unique(Data$CD) == "not CD"]){
# MyData[[j]] <- Data %>% filter(CD == j) %>% select(-DoseNum)
# # This should make the right breaks for each possible compound
# # with custom dosing. They should match the breaks in the
# # appropriate list item in Dosing.
# MyData[[j]]$Breaks <-
# as.character(cut(MyData[[j]]$Time, breaks = Dosing[[j]]$Time,
# right = FALSE))
#
# MyData[[j]] <- MyData[[j]] %>%
# left_join(Dosing[[j]] %>% select(CompoundID, Breaks, DoseNum))
#
# }
#
# Data <- bind_rows(MyData)
# if(length(setdiff(unique(OrigCompounds),
# unique(Data$CompoundID))) > 0){
# warning("PROBLEM WITH CUSTOM DOSING! Please tell Laura Shireman if you see this message.")
# }
# }
# Checking for when the simulation ends right at the last dose b/c
# then, setting that number to 1 dose lower
if(length(Data %>% filter(DoseNum_sub == max(Data$DoseNum_sub)) %>%
pull(Time) %>% unique()) == 1){
MyMaxDoseNum_sub <- max(Data$DoseNum_sub)
Data <- Data %>%
mutate(DoseNum_sub = ifelse(DoseNum_sub == MyMaxDoseNum_sub,
MyMaxDoseNum_sub - 1, DoseNum_sub))
}
if(length(Data %>% filter(DoseNum_inhib1 == max(Data$DoseNum_inhib1)) %>%
pull(Time) %>% unique()) == 1){
MyMaxDoseNum_inhib1 <- max(Data$DoseNum_inhib1)
Data <- Data %>%
mutate(DoseNum_inhib1 = ifelse(DoseNum_inhib1 == MyMaxDoseNum_inhib1,
MyMaxDoseNum_inhib1 - 1, DoseNum_inhib1))
}
if(length(Data %>% filter(DoseNum_inhib2 == max(Data$DoseNum_inhib2)) %>%
pull(Time) %>% unique()) == 1){
MyMaxDoseNum_inhib2 <- max(Data$DoseNum_inhib2)
Data <- Data %>%
mutate(DoseNum_inhib2 = ifelse(DoseNum_inhib2 == MyMaxDoseNum_inhib2,
MyMaxDoseNum_inhib2 - 1, DoseNum_inhib2))
}
# Noting exactly what the perpetrators were
AllPerpetrators <- c(Deets$Inhibitor1, Deets$Inhibitor2,
Deets$Inhibitor1Metabolite)
AllPerpetrators <- AllPerpetrators[complete.cases(AllPerpetrators)]
# Finalizing, tidying, selecting only useful columns
Data <- Data %>%
mutate(Time_units = tolower({{TimeUnits}}),
File = sim_data_file,
Inhibitor = ifelse(PerpPresent,
str_comma(AllPerpetrators), "none"),
Substrate = Deets$Substrate,
# Adding a little bit more info
Species = Deets$Species) %>%
arrange(across(any_of(c("File", "Enzyme", "Tissue",
"Substrate", "Inhibitor",
"Individual", "Trial", "Time")))) %>%
select(any_of(c("Enzyme", "Tissue", "Substrate", "Inhibitor",
"Simulated", "IndivOrAgg", "Species",
"Individual", "Trial", "Time", "Abundance",
"Time_units",
"DoseNum_sub", "Dose_int_sub", "TimeSinceDose1_sub",
"DoseNum_inhib1", "Dose_int_inhib1", "TimeSinceDose1_inhib1",
"DoseNum_inhib2", "Dose_int_inhib2", "TimeSinceDose1_inhib2",
"File")))
return(Data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.