R/extractExpDetails.R

Defines functions extractExpDetails

Documented in extractExpDetails

#' Extract details about the experimental design
#'
#' \code{extractExpDetails} looks up experimental design details from a Simcyp
#' Simulator output file. For detailed instructions and examples, please see the
#' SharePoint file "Simcyp PBPKConsult R Files - Simcyp PBPKConsult R
#' Files/SimcypConsultancy function examples and instructions/Checking
#' simulation experimental
#' details/Checking-simulation-experimental-details.docx". (Sorry, we are unable
#' to include a link to it here.)
#'
#' @param sim_data_file name of the Excel file containing the simulator output,
#'   in quotes. \strong{A note:} There are just a few items that we will attempt
#'   to extract from the matching workspace file; for that information, we will
#'   look for a workspace file that is named \emph{identically} to the Excel
#'   file except for the file extension. It will ignore the date/time stamp that
#'   the autorunner adds as long as that stamp is in a format like this: "myfile
#'   - 2023-10-31 07-23-15.xlsx".
#' @param exp_details Experiment details you want to extract from the simulator
#'   output file. Options are \describe{
#'
#'   \item{"Summary and Input"}{Extract details available from the "Summary tab"
#'   and the "Input Sheet" (default)}
#'
#'   \item{"population tab"}{Extract details about the population used (data
#'   come from the tab with the same name as the population simulated)}
#'
#'   \item{"Simcyp inputs"}{Extract all the details that you normally fill out
#'   on the "Simcyp inputs (and QC)" tab of a compound data sheet plus trial
#'   design information}
#'
#'   \item{"workspace"}{Extract a limited set of details directly
#'   from the Simcyp Simulator workspace files. The set of possible details may
#'   be viewed by entering \code{view(AllWorkspaceDetails)} in the console. This
#'   \emph{only} works if the workspace file name perfectly matches the Excel
#'   results file name and is located in the same folder. Otherwise, this step
#'   in the data extraction will be skipped.}
#'
#'   \item{"all"}{Extract all possible parameters}}
#'
#'   \strong{NOTES:} \enumerate{\item{The default pulls parameters from the
#'   "Summary" tab and the "Input Sheet" tab. Note that the
#'   "Summary" tab does not include information on any compounds beyond the
#'   substrate and inhibitor 1.} \item{There are a few places where
#'   requesting one item as input will get you multiple items as output:
#'   intrinsic clearance, interaction parameters, and transport parameters. For
#'   example, if you request intrinsic clearance values (ex: "CLint_sub"),
#'   you'll get all the intrinsic clearance values for that compound, and
#'   they'll be named according to which parameter it is, which enzyme it's for,
#'   etc. Same thing with requesting interaction parameters (ex:
#'   "Interaction_inhib" to get all the interaction parameters for inhibitor 1)
#'   and transporter parameters (ex: "Transport_sub").}}
#' @return Returns a named list of the experimental details
#' @export
#'
#' @examples
#'
#' extractExpDetails(sim_data_file = "../Example simulator output.xlsx")
#' extractExpDetails(sim_data_file = "../Example simulator output MD + inhibitor.xlsx")
#' extractExpDetails(sim_data_file = "../Example simulator output.xlsx",
#'                   exp_details = "all")
#'
#'
#' 
extractExpDetails <- function(sim_data_file,
                              exp_details = "Summary and Input"){
   
   # Error catching ---------------------------------------------------------
   # Check whether tidyverse is loaded
   if("package:tidyverse" %in% search() == FALSE){
      stop("The SimcypConsultancy R package also requires the package tidyverse to be loaded, and it doesn't appear to be loaded yet. Please run `library(tidyverse)` and then try again.")
   }
   
   # If they didn't include ".xlsx" at the end, add that.
   sim_data_file <- paste0(sub("\\.wksz$|\\.dscw$|\\.xlsx$", "", sim_data_file), ".xlsx")
   
   # Checking that the file is, indeed, a simulator output file.
   SheetNames <- tryCatch(readxl::excel_sheets(sim_data_file),
                          error = openxlsx::getSheetNames(sim_data_file))
   if(all(c("Input Sheet", "Summary") %in% SheetNames) == FALSE){
      # Using "warning" instead of "stop" here b/c I want this to be able to
      # pass through to extractExpDetails_mult and just skip any files that
      # aren't simulator output.
      warning(wrapn(paste0("The file '", sim_data_file,
                           "' does not appear to be a Simcyp Simulator output Excel file. We cannot return any information for this file.")), 
              call. = FALSE)
      return(list())
   }
   
   # Checking for file name issues
   CheckFileNames <- check_file_name(sim_data_file)
   BadFileNames <- CheckFileNames[!CheckFileNames == "File name meets naming standards."]
   if(length(BadFileNames)> 0){
      BadFileNames <- paste0(names(BadFileNames), ": ", BadFileNames)
      warning(paste0("The following file names do not meet file-naming standards for the Simcyp Consultancy Team:\n", 
                     str_c(paste0("     ", BadFileNames), collapse = "\n"),
                     "\n"), 
              call. = FALSE)
   }
   
   # Cleaning up possible problems w/how exp_details by tab might be inputted
   if(length(exp_details) != 1){
      warning("You can only enter one value for what set of details you want for the argument `exp_details`. We'll set this to `all` for now.\n", 
              call. = FALSE)
      exp_details <- "all"
   }
   
   exp_details <- tolower(exp_details[1])
   
   if(str_detect(exp_details, "summary|input")){
      exp_details <- "summary and input"
   } else if(str_detect(exp_details, "population")){
      exp_details <- "population tab"
   } else if(str_detect(exp_details, "worksp")){
      exp_details <- "workspace"
   } 
   
   if(exp_details %in% c("summary and input", "population tab", "workspace", 
                         "all", "simcyp inputs") == FALSE){
      warning("The only options for the argument `exp_details` are `Summary and Input`, `population tab`, `workspace`, `Simcyp inputs`, or `all` (not case sensitive), and you've supplied something else. We'll set this to `all` for now.\n", 
              call. = FALSE)
      exp_details <- "all"
   }
   
   # Noting exp_details requested for later
   exp_details_input <- exp_details
   
   
   # Main body of function ----------------------------------------------------
   
   # Noting which details are possible, which columns to search for their
   # names, which columns contain their values for substrates or inhibitors,
   # and what kind of data to format the output as at the end. Using data
   # object AllExpDetails.
   
   # Summary tab only includes info on Substrate & Inhibitor1
   SumDeets <- AllExpDetails %>% 
      filter(DataSource == "Summary" &
                (is.na(CompoundID) | 
                    CompoundID %in% c("substrate", "inhibitor 1")) &
                !Detail %in% c("PrimaryMetabolite1", 
                               "PrimaryMetabolite2", 
                               "SecondaryMetabolite", 
                               "Inhibitor2", 
                               "Inhibitor1Metabolite")) %>% 
      rename(Deet = Detail) %>% arrange(Deet)
   
   # If it's on input sheet but isn't for a specific compound, then it's about
   # the trial design b/c we haven't set this up to pull any information from
   # the "Simulation Toolbox" or "Software Version Detail" sections.
   InputDeets <- AllExpDetails %>% filter(DataSource == "Input Sheet") %>% 
      rename(Deet = Detail) %>% arrange(Deet) %>% 
      mutate(CompoundID = ifelse(is.na(CompoundID), "Trial Design", CompoundID))
   
   PopDeets <- AllExpDetails %>% filter(DataSource == "population") %>% 
      rename(Deet = Detail) %>% arrange(Deet)
   
   # Determining info to pull
   exp_details <- 
      switch(exp_details_input, 
             "all" = unique(AllExpDetails$Detail), 
             "summary tab" = SumDeets$Deet, 
             "input sheet" = InputDeets$Deet, 
             "summary and input" = c(SumDeets$Deet, InputDeets$Deet),
             "population tab" = PopDeets$Deet, 
             "simcyp inputs" = AllExpDetails %>% 
                filter(complete.cases(CDSInputMatch)) %>% 
                pull(Detail))
   
   # There are some details that we will just ALWAYS need to include b/c so
   # many downstream functions rely on them. Making sure that these are
   # included. 
   exp_details <- 
      unique(c(exp_details, 
               AllCompounds$DetailNames, 
               "Units_AUC", "Units_Cmax", "Units_CL", "Units_tmax",
               "PopRepSim", "SimulatorUsed",
               paste0(rep(c("StartHr", "StartDayTime", "Regimen", "MW",
                            "Dose", "NumDoses", "DoseInt", "DoseRoute", 
                            "ReleaseProfileAvailable"),
                          each = 3), 
                      c("_sub", "_inhib", "_inhib2"))))
   
   # This needs to exist for all scenarios, even if we're not checking for it.
   ReleaseProfs <- NULL
   
   # Need to note original exp_details requested b/c I'm adding to it if people
   # request info from other tabs than what they've originally got. Note that
   # this is different from "exp_details_input" and serves a different purpose.
   exp_details_orig <- exp_details
   
   # When user requests HSA or AGP, they actually want all the individual betas
   # for that. Adjusting to account for that here.
   if("HSA" %in% exp_details_orig){
      exp_details <- unique(c(exp_details, "HSA_C0_female",
                              "HSA_C0_male", "HSA_C1_female",
                              "HSA_C1_male", "HSA_C2_female",
                              "HSA_C2_male", "HSA_male", "HSA_female"))
      exp_details <- exp_details[!exp_details == "HSA"]
   }
   
   if("HSA_male" %in% exp_details_orig){
      exp_details <- unique(c(exp_details, "HSA_male", "HSA_C0_male",
                              "HSA_C1_male", "HSA_C2_male"))
      exp_details <- exp_details[!exp_details == "HSA_male"]
   }
   
   if("HSA_female" %in% exp_details_orig){
      exp_details <- unique(c(exp_details, "HSA_female", "HSA_C0_female",
                              "HSA_C1_female", "HSA_C2_female"))
      exp_details <- exp_details[!exp_details == "HSA_female"]
   }
   
   if("AGP" %in% exp_details_orig){
      exp_details <- unique(c(exp_details, "AGP_male", "AGP_female"))
      exp_details <- exp_details[!exp_details == "AGP"]
   }
   
   if(any(exp_details %in% AllExpDetails$Detail == FALSE)){
      Problem <- str_comma(unique(setdiff(exp_details,
                                          AllExpDetails$Detail)))
      warning(paste0("These study details are not among the possible options: ",
                     Problem,
                     ", so they will be omitted. Please enter 'view(ExpDetailDefinitions)' into the console for all options.\n"),
              call. = FALSE)
      exp_details <- intersect(exp_details, AllExpDetails$Detail)
   }
   
   if(length(exp_details) == 0){
      stop("You must enter at least one study detail to extract.",
           call. = FALSE)
   }
   
   Out <- list()
   
   if(any(exp_details %in% PopDeets$Deet)){
      exp_details <- c(exp_details, "Population")
      exp_details <- unique(exp_details)
   }
   
   # Need to note when to look for custom dosing tabs
   CustomDosing <- NA
   
   # Pulling details from the summary tab ----------------------------------
   MySumDeets <- sort(intersect(exp_details, SumDeets$Deet))
   
   if(exp_details_input[1] %in% c("population tab")){
      MySumDeets <- c("Population", "SimulatorVersion")
   }
   
   if(length(MySumDeets) > 0){
      
      # Long file names cause problems for readxl but not openxlsx, for
      # some reason. That's why there's the error function calling on
      # openxlsx.
      SummaryTab <- suppressMessages(tryCatch(
         readxl::read_excel(path = sim_data_file, sheet = "Summary",
                            col_names = FALSE),
         error = openxlsx::read.xlsx(sim_data_file, sheet = "Summary",
                                     colNames = FALSE)))
      # If openxlsx read the file, the names are different. Fixing.
      if(names(SummaryTab)[1] == "X1"){
         names(SummaryTab) <- paste0("...", 1:ncol(SummaryTab))
      }
      
      # We'll select details based on whether this was a Discovery simulation. 
      Out[["SimulatorUsed"]] <- ifelse(str_detect(SummaryTab[1, 1], "Discovery"), 
                                       "Simcyp Discovery", "Simcyp Simulator")
      DiscoveryCol <- switch(Out[["SimulatorUsed"]], 
                             "Simcyp Discovery" = c("Simulator and Discovery", 
                                                    "Discovery only"), 
                             "Simcyp Simulator" = c("Simulator only", 
                                                    "Simulator and Discovery"))
      
      # Need to filter to keep only details that we can possibly find based on
      # what type of simulator was used
      SumDeets <- SumDeets %>% filter(SimulatorAvailability %in% DiscoveryCol)
      MySumDeets <- MySumDeets[MySumDeets %in% SumDeets$Deet]
      
      # sub function for finding correct cell
      pullValue <- function(deet){
         
         # Setting up regex to search
         ToDetect <- SumDeets %>% 
            filter(Deet == deet) %>% pull(Regex_row)
         NameCol <- SumDeets$NameCol[which(SumDeets$Deet == deet)]
         Row <- which(str_detect(SummaryTab[, NameCol] %>% pull(), ToDetect))
         Val <- SummaryTab[Row, SumDeets$ValueCol[SumDeets$Deet == deet]] %>%
            pull()
         
         # Accounting for when fu,p is scripted
         if(length(Val) > 0 && 
            (any(complete.cases(Val)) && 
             str_detect(deet, "^fu_") & any(str_detect(Val, "script")))){
            SumDeets$Class[SumDeets$Deet == deet] <- "character"
            assign("SumDeets", SumDeets, envir = parent.frame())
         }
         
         suppressWarnings(
            Val <- switch(SumDeets$Class[SumDeets$Deet == deet], 
                          "character" = as.character(Val),
                          "numeric" = as.numeric(Val))
         )
         
         if(length(Val) > 1){
            Val <- str_comma(Val)
         }
         
         # Tidying up some specific idiosyncracies of simulator output
         Val <- ifelse(complete.cases(Val) & Val == "n/a", NA, Val)
         Val <- ifelse(str_detect(deet, "^Unit"),
                       str_trim(gsub("\\(unbound\\)|\\(blood\\)|\\(unbound blood\\)|Dose \\(|\\)|CMax \\(|TMax \\(|AUC \\(|CL \\(Dose/AUC\\)\\(|\\(blood\\)",
                                     "", Val)), Val)
         Val <- ifelse(deet %in% c("SimDuration"),
                       as.numeric(Val), Val)
         Val <- ifelse(deet == "SimulatorVersion",
                       str_extract(Val, "Version [12][0-9]"),
                       Val)
         if(deet == "PKTissue_Discovery"){
            ConcUnit <- str_extract(Val, "(ng|mg|µg|µM|nM)(/)?(mL|L)?")
            Val <- case_match(Val, 
                              paste0("CMax (", ConcUnit, ")") ~ "plasma", 
                              paste0("CMax (", ConcUnit, ")(blood)") ~ "blood",
                              paste0("CMax (", ConcUnit, ")(unbound)") ~ "unbound plasma", 
                              paste0("CMax (", ConcUnit, ")(unbound blood)") ~ "unbound blood")
         }
         
         return(Val)
      }
      
      # Checking whether this was an ADC sim b/c have to do this differently. 
      MySumDeets <- setdiff(MySumDeets, "ADCSimulation_sub")
      Out[["ADCSimulation_sub"]] <- 
         any(str_detect(as.character(SummaryTab[, 1]), 
                        SumDeets %>% filter(Deet == "ADCSimulation_sub") %>% 
                           pull(Regex_row)), na.rm = T)
      
      for(i in MySumDeets){
         Out[[i]] <- pullValue(i)
         
         if(str_detect(i, "^StartDayTime") & is.na(Out[[i]])){
            CustomDosing <- c(CustomDosing, TRUE)
         }
         
         if(i == "Population" & is.na(Out[[i]])){
            # This can happen when the simulator output is actually from Simcyp
            # Discovery or the Simcyp Animal Simulator. Look for
            # "species" in that case.
            Out[[i]] <- as.character(SummaryTab[which(SummaryTab$...1 == "Species"), 2])
         }
         
      }
      
      # Simcyp Discovery only allows simulations with a substrate or metabolite
      # 1 and no other compounds, so everything else will be NA or NULL.
      if(str_detect(SummaryTab[1, 1], "Discovery")){
         Out[c("Inhibitor1", "Inhibitor2", "Inhibitor1Metabolite", 
               "PrimaryMetabolite2", "SecondaryMetabolite")] <- NA
      }
      
      # Removing details that don't apply, e.g., _inhib parameters when there
      # was no inhibitor.
      if(length(Out$Inhibitor1) > 0 &&
         is.na(Out$Inhibitor1) & any(str_detect(names(Out), "_inhib$"))){
         Out <- Out[-which(str_detect(names(Out), "_inhib$"))]
      }
      
      if(length(Out$Inhibitor2) > 0 &&
         is.na(Out$Inhibitor2) & any(str_detect(names(Out), "_inhib2$"))){
         Out <- Out[-which(str_detect(names(Out), "_inhib2$"))]
      }
      
      if(length(Out$Inhibitor1Metabolite) > 0 &&
         is.na(Out$Inhibitor1Metabolite) & any(str_detect(names(Out), "_inhib1met$"))){
         Out <- Out[-which(str_detect(names(Out), "_inhib1met$"))]
      }
      
      if(length(Out$PrimaryMetabolite1) > 0 &&
         is.na(Out$PrimaryMetabolite1) & any(str_detect(names(Out), "_met$"))){
         Out <- Out[-which(str_detect(names(Out), "_met$"))]
      }
      
      if(length(Out$PrimaryMetabolite2) > 0 &&
         is.na(Out$PrimaryMetabolite2) & any(str_detect(names(Out), "_met2$"))){
         Out <- Out[-which(str_detect(names(Out), "_met2$"))]
      }
      
      if(length(Out$SecondaryMetabolite) > 0 &&
         is.na(Out$SecondaryMetabolite) & any(str_detect(names(Out), "secmet$"))){
         Out <- Out[-which(str_detect(names(Out), "_secmet$"))]
      }
   }
   
   # Pulling details from the Input Sheet tab ------------------------------
   MyInputDeets <- intersect(exp_details, InputDeets$Deet)
   # Not pulling the same info twice
   MyInputDeets <- setdiff(MyInputDeets, names(Out))
   
   if(length(MyInputDeets) > 0){
      
      InputTab <- suppressMessages(tryCatch(
         readxl::read_excel(path = sim_data_file, sheet = "Input Sheet",
                            col_names = FALSE),
         error = openxlsx::read.xlsx(sim_data_file, sheet = "Input Sheet",
                                     colNames = FALSE)))
      # If openxlsx read the file, the names are different. Fixing.
      if(names(InputTab)[1] == "X1"){
         names(InputTab) <- paste0("...", 1:ncol(InputTab))
      }
      
      # When Inhibitor 1 is not present, don't look for those values.
      if(any(str_detect(t(InputTab[5, ]), "Inhibitor 1"), na.rm = T) == FALSE){
         MyInputDeets <- MyInputDeets[!str_detect(MyInputDeets, "_inhib$|Inhibitor1")]
      }
      
      # When Inhibitor 2 is not present, don't look for those values.
      if(any(str_detect(t(InputTab[5, ]), "Inhibitor 2"), na.rm = T) == FALSE){
         MyInputDeets <- MyInputDeets[!str_detect(MyInputDeets, "_inhib2$|Inhibitor2")]
      }
      
      # When primary metabolite 1 is not present, don't look for those values.
      if(any(str_detect(t(InputTab[5, ]), "Sub Pri Metabolite1"), na.rm = T) == FALSE){
         MyInputDeets <- MyInputDeets[!str_detect(MyInputDeets, "_met1|PrimaryMetabolite1")]
      }
      
      # When primary metabolite 2 is not present, don't look for those values.
      if(any(str_detect(t(InputTab[5, ]), "Sub Pri Metabolite2"), na.rm = T) == FALSE){
         MyInputDeets <- MyInputDeets[!str_detect(MyInputDeets, "_met2|PrimaryMetabolite2")]
      }
      
      # When secondary metabolite is not present, don't look for those values.
      if(any(str_detect(t(InputTab[5, ]), "Sub Sec Metabolite"), na.rm = T) == FALSE){
         MyInputDeets <- MyInputDeets[!str_detect(MyInputDeets, "_secmet|SecondaryMetabolite")]
      }
      
      # When Inhibitor 1 metabolite is not present, don't look for those values.
      if(any(str_detect(t(InputTab[5, ]), "Inh 1 Metabolite"), na.rm = T) == FALSE){
         MyInputDeets <- MyInputDeets[!str_detect(MyInputDeets, "_inhib1met|Inhibitor1Metabolite")]
      }
      
      # We'll select details based on whether this was a Discovery simulation. 
      Out[["SimulatorUsed"]] <- ifelse(str_detect(InputTab[1, 1], "Discovery"), 
                                       "Simcyp Discovery", "Simcyp Simulator")
      
      DiscoveryCol <- switch(Out[["SimulatorUsed"]], 
                             "Simcyp Discovery" = c("Simulator and Discovery", 
                                                    "Discovery only"), 
                             "Simcyp Simulator" = c("Simulator only", 
                                                    "Simulator and Discovery"))
      
      # Need to filter to keep only details that we can possibly find based on
      # what type of simulator was used
      InputDeets <- InputDeets %>% 
         filter(Deet %in% MyInputDeets & SimulatorAvailability %in% DiscoveryCol)
      MyInputDeets <- MyInputDeets[MyInputDeets %in% InputDeets$Deet]
      
      # Looking for locations of columns.
      ColLocations <- c("substrate" = 1,
                        "Trial Design" = which(t(InputTab[5, ]) == "Trial Design"),
                        "inhibitor 1" = which(t(InputTab[5, ]) == "Inhibitor 1"),
                        "inhibitor 2" = which(t(InputTab[5, ]) == "Inhibitor 2"),
                        "primary metabolite 1" = which(t(InputTab[5, ]) == "Sub Pri Metabolite1"),
                        "primary metabolite 2" = which(t(InputTab[5, ]) == "Sub Pri Metabolite2"),
                        "secondary metabolite" = which(t(InputTab[5, ]) == "Sub Sec Metabolite"),
                        "inhibitor 1 metabolite" = which(t(InputTab[5, ]) == "Inh 1 Metabolite"))
      
      InputDeets$NameCol <- ColLocations[InputDeets$CompoundID]
      InputDeets$ValueCol <- InputDeets$NameCol + 1
      
      ## Main set of parameters -----------------------------------------
      
      # Dealing w/potential replicate values. CompoundType and pKa may be
      # replicated but will have the same value, so when we take the unique
      # value, that will drop away.
      
      # Checking for any ADAMI parameters. (May need to adapt this later for
      # other variations on ADAM models or anything else where there will be
      # multiple cells with identical labels.)
      ADAMIrow <- which(InputTab$...1 == "ADAMI Parameters")
      
      if(length(ADAMIrow) == 0){
         InputDeets <- InputDeets %>% 
            filter(!str_detect(Deet, "ADAMI"))
         ADAMIreps <- NA
         NonADAMIreps <- NA
         MyInputDeets <- intersect(MyInputDeets, InputDeets$Deet)
      } else {
         ADAMIreps <- InputDeets %>% filter(str_detect(Deet, "ADAMI")) %>% 
            pull(Deet)
         NonADAMIreps <- sub("_ADAMI", "", ADAMIreps)
      }
      
      # sub function for finding correct cell
      pullValue <- function(deet){
         
         # Setting up regex to search
         ToDetect <- InputDeets %>% 
            filter(Deet == deet) %>% pull(Regex_row)
         NameCol <- InputDeets$NameCol[which(InputDeets$Deet == deet)]
         Row <- which(str_detect(InputTab[, NameCol] %>% pull(), ToDetect)) +
            (InputDeets %>% filter(Deet == deet) %>% pull(OffsetRows))
         
         if(length(Row) == 0){
            Val <- NA
         } else {
            if(deet %in% ADAMIreps){Row <- Row[Row > ADAMIrow]}
            if(deet %in% NonADAMIreps){Row <- Row[Row < ADAMIrow]}
            
            Val <- InputTab[Row,
                            InputDeets$ValueCol[
                               which(InputDeets$Deet == deet)]] %>% pull()
            
            # If it's a kp scalar value other than the main one, then we need to
            # 1st check whether the value listed is "User" and then get the value
            # in the cell right below that if it is.
            kpcheck <- str_detect(deet, "kp_scalar_") & 
               deet %in% paste0("kp_scalar", AllCompounds$Suffix) == FALSE
            if(kpcheck){
               if(complete.cases(Val) && Val == "Predicted"){
                  Val <- NA
               } else {
                  NameColBelow <- InputTab[Row + 1,
                                           InputDeets$NameCol[
                                              which(InputDeets$Deet == deet)]] %>% pull()
                  if(str_detect(tolower(NameColBelow),
                                tolower(gsub(paste0("kp_scalar_|",
                                                    str_c(AllCompounds$Suffix, collapse = "|")),
                                             "", 
                                             sub("additional_organ", "Additional Organ", deet))))){
                     Val <- InputTab[Row + 1, InputDeets$ValueCol[
                        which(InputDeets$Deet == deet)]] %>% pull()
                     
                  } else {
                     Val <- NA
                  }
               }
            }
         }
         
         # If SimStartDayTime is not found, which will happen with animal
         # sims, it may be possible to piece together from other data. 
         if(length(Val) == 0 & deet == "SimStartDayTime"){
            StartDay <- as.character(InputTab[which(InputTab$...1 == "Start Day"), 2])
            StartTime <- as.character(InputTab[which(InputTab$...1 == "Start Time"), 2])
            StartTime <- str_split(sub("m", "", StartTime), "h")[[1]]
            Val <- 
               paste0("Day ", StartDay, ", ",
                      formatC(as.numeric(StartTime[1]), width = 2, flag = "0"), ":",
                      formatC(as.numeric(StartTime[2]), width = 2, flag = "0"))
            rm(StartDay, StartTime)
         }
         
         # Ontogeny profile along w/CompoundType and pKa are often listed twice
         # in output for some reason. Only keeping the unique set of values for
         # all deets. This will still throw a warning if there is more than one
         # value, but we'd want to know that anyway, so that's why I'm not just
         # keeping the 1st value listed.
         Val <- sort(unique(Val))
         
         # Accounting for when fu,p is scripted
         if(length(Val) > 0 && 
            (any(complete.cases(Val)) && 
             str_detect(deet, "^fu_") & any(str_detect(Val, "script")))){
            InputDeets$Class[InputDeets$Deet == deet] <- "character"
            assign("InputDeets", InputDeets, envir = parent.frame())
         }
         
         suppressWarnings(
            Val <- switch(InputDeets$Class[InputDeets$Deet == deet], 
                          "character" = as.character(Val),
                          "numeric" = as.numeric(Val))
         )
         
         if(length(Val) > 1){
            Val <- str_comma(Val)
         }
         
         # Tidying up some specific idiosyncracies of simulator output
         Val <- ifelse(length(Val) == 0 || 
                          (complete.cases(Val) & Val == "n/a"), NA, Val)
         Val <- ifelse(str_detect(deet, "^Unit"),
                       str_trim(gsub("\\(unbound\\)|\\(blood\\)|\\(unbound blood\\)|Dose \\(|\\)|CMax \\(|TMax \\(|AUC \\(|CL \\(Dose/AUC\\)\\(|\\(blood\\)",
                                     "", Val)), Val)
         
         return(Val)
      }
      
      # pullValue doesn't work for CL, so those are separate. Also need
      # to do StartDayTime_x, SimulatorVersion, and ADCSimulation separately.
      MyInputDeets1 <-
         MyInputDeets[!str_detect(MyInputDeets, 
                                  "CLint_|Interaction_|^StartDayTime|Transport_|ADCSimulation|SimulatorVersion|OrganTissue")]
      
      if(length(MyInputDeets1) > 0){
         for(i in MyInputDeets1){
            Out[[i]] <- pullValue(i)
         }
      }
      
      
      ## Some overall simulation details -----------------------------------
      
      # Noting all sheet names. This saves time for later data extraction and
      # also helps with debugging and coding in general. 
      Out[["SheetNames"]] <- str_c(paste0("`", SheetNames, "`"), collapse = " ")
      
      # Checking whether this was an ADC sim. 
      if("ADCSimulation_sub" %in% MyInputDeets){
         Out[["ADCSimulation_sub"]] <- 
            any(str_detect(as.character(InputTab[, 1]), 
                           InputDeets %>% filter(Deet == "ADCSimulation_sub") %>%
                              pull(Regex_row)), na.rm = T)
      }
      
      # Checking simulator version
      Out[["SimulatorVersion"]] <- ifelse(str_detect(InputTab[1, 1], "Discovery"), 
                                          as.character(InputTab[1, 1]), 
                                          str_extract(as.character(InputTab[3, 1]),
                                                      "Version [12][0-9]"))
      
      ### Checking on release profiles -----------------------------------------
      if(Out[["SimulatorUsed"]] != "Simcyp Discovery" &&
         exists("InputTab", inherits = FALSE)){
         
         ReleaseProfs <- list()
         
         for(i in names(ColLocations)[!names(ColLocations) == "Trial Design"]){
            
            Suffix <- AllCompounds$Suffix[AllCompounds$CompoundID == i]
            
            if(any(str_detect(t(InputTab[, as.numeric(ColLocations[i])]), "Release Mean"),
                   na.rm = TRUE)){
               
               StartRow <- which(str_detect(t(InputTab[, ColLocations[i]]), "CR/MR Input"))[1] + 1
               EndRow <- which(str_detect(t(InputTab[, ColLocations[i]]), "Release Mean"))
               EndRow <- EndRow[which.max(EndRow)] + 1 # Looking for last "Release Mean" row and then the next row will be the CV for that. 
               
               Release_temp <- InputTab[StartRow:EndRow, ColLocations[i]:(ColLocations[i]+1)]
               names(Release_temp) <- c("NameCol", "ValCol")
               
               # Older versions of simulator do not have CV. Checking. 
               ReleaseCV <- Release_temp$ValCol[which(str_detect(Release_temp$NameCol, "CV"))]
               if(all(is.null(ReleaseCV))){ReleaseCV <- NA}
               
               suppressWarnings(
                  ReleaseProfs[[i]] <- data.frame(
                     CR_MR_input = Out[[paste0("CR_MR_Input", Suffix)]], 
                     Time = Release_temp$ValCol[which(str_detect(Release_temp$NameCol, "Time"))], 
                     Release_mean = Release_temp$ValCol[which(str_detect(Release_temp$NameCol, "Release Mean"))], 
                     Release_CV = ReleaseCV) %>% 
                     mutate(across(.cols = everything(), .fns = as.numeric), 
                            Release_CV = Release_CV / 100, # Making this a fraction instead of a number up to 100
                            File = sim_data_file, 
                            CompoundID = i, 
                            Compound = as.character(Out[AllCompounds$DetailNames[
                               AllCompounds$CompoundID == i]])) %>% 
                     select(File, CompoundID, Compound, Time, Release_mean, Release_CV)
               )
               
               rm(StartRow, EndRow, Release_temp)
               
            } else if(
               complete.cases(Out[[paste0("CR_MR_Input", 
                                          AllCompounds$Suffix[AllCompounds$CompoundID == i])]]) &&
               Out[[paste0("CR_MR_Input", 
                           AllCompounds$Suffix[AllCompounds$CompoundID == i])]] == 
               "Weibull"){
               
               Suffix <- AllCompounds$Suffix[AllCompounds$CompoundID == i]
               
               ReleaseProfs <- data.frame(
                  CR_MR_input = Out[[paste0("CR_MR_Input", Suffix)]], 
                  Parameter = c("Fmax", "alpha", "beta", "lag"), 
                  Value = c(Out[[paste0("ReleaseProfile_Fmax", Suffix)]], 
                            Out[[paste0("ReleaseProfile_alpha", Suffix)]], 
                            Out[[paste0("ReleaseProfile_beta", Suffix)]], 
                            Out[[paste0("ReleaseProfile_lag", Suffix)]]), 
                  CV = c(Out[[paste0("ReleaseProfile_Fmax_CV", Suffix)]], 
                         Out[[paste0("ReleaseProfile_alpha_CV", Suffix)]], 
                         Out[[paste0("ReleaseProfile_beta_CV", Suffix)]], 
                         Out[[paste0("ReleaseProfile_lag_CV", Suffix)]])) %>% 
                  mutate(CV = CV / 100, # Making this a fraction instead of a number up to 100
                         File = sim_data_file, 
                         CompoundID = i, 
                         Compound = as.character(Out[AllCompounds$DetailNames[
                            AllCompounds$CompoundID == i]])) %>% 
                  select(File, CompoundID, Compound, CR_MR_input, Parameter, Value, CV)
               
            } else {
               ReleaseProfs <- NULL
            }
         }
         
         ReleaseProfs <- bind_rows(ReleaseProfs)
      }
      
      ### Checking on dissolution profiles ------------------------------------
      if(Out[["SimulatorUsed"]] != "Simcyp Discovery" &&
         exists("InputTab", inherits = FALSE) &&
         any(str_detect(unlist(c(InputTab[, ColLocations])), "Dissolution( Mean)? \\(\\%"),
             na.rm = TRUE)){
         
         DissoProfs <- list()
         
         for(i in names(ColLocations)[!names(ColLocations) == "Trial Design"]){
            # There may be more than one tissue. Checking for this. 
            DissoTissueRows <- which(str_detect(t(InputTab[, ColLocations[i]]),
                                                "^Dissolution Profile"))
            
            # If the results do not specify any tissues, then start rows will be
            # different. Last row will be the same, though.
            LastRow <- which(str_detect(t(InputTab[, ColLocations[i]]), "Dissolution( Mean)? \\(\\%"))
            LastRow <- LastRow[which.max(LastRow)] + 1 # Looking for last "Dissolution (%)" row and then the next row will be the CV for that. 
            
            if(length(DissoTissueRows) > 0){
               
               StartRows <- which(str_detect(t(InputTab[, ColLocations[i]]), "^Dissolution Profile")) + 1
               
               # It could be that one compound has dissolution profiles and another
               # compound does not. Checking that here since I did not check it in
               # the original "if" statement at the top of this section.
               if(all(is.na(StartRows))){
                  next
               }
               
               if(length(StartRows) > 1){
                  EndRows <- c(StartRows[2:length(StartRows)], NA) - 2
                  EndRows[length(StartRows)] <- LastRow
               } else {
                  EndRows <- LastRow
               }
               
               DissoTissues <- gsub("\\(|\\)", "", 
                                    str_extract(
                                       t(InputTab[, ColLocations[i]])[DissoTissueRows], 
                                       "\\(.*\\)"))
            } else {
               # If the tissue is not specified, then there will be only 1 set of values. 
               StartRows <- which(str_detect(t(InputTab[, ColLocations[i]]), "Dissolution( Mean)? \\(\\%"))[1] - 1
               DissoTissues <- "not specified"
               EndRows <- LastRow
               
               # It could be that one compound has dissolution profiles and another
               # compound does not. Checking that here since I did not check it in
               # the original "if" statement at the top of this section.
               if(all(is.na(StartRows))){
                  next
               }
            }
            
            DissoProfs[[i]] <- list()
            
            for(tiss in 1:length(StartRows)){
               
               Disso_temp <- InputTab[StartRows[tiss]:EndRows[tiss],
                                      ColLocations[i]:(ColLocations[i]+1)]
               names(Disso_temp) <- c("NameCol", "ValCol")
               
               # Older versions of simulator do not have CV. Checking. 
               DissoCV <- Disso_temp$ValCol[which(str_detect(Disso_temp$NameCol, "CV"))]
               if(all(is.null(DissoCV))){DissoCV <- NA}
               
               suppressWarnings(
                  DissoProfs[[i]][[tiss]] <- 
                     data.frame(
                        Time = Disso_temp$ValCol[which(str_detect(Disso_temp$NameCol, "Time"))], 
                        Dissolution_mean = Disso_temp$ValCol[which(str_detect(Disso_temp$NameCol, "Dissolution( Mean)? \\(\\%"))], 
                        Dissolution_CV = DissoCV) %>% 
                     mutate(across(.cols = everything(), .fns = as.numeric), 
                            Dissolution_CV = Dissolution_CV / 100, # Making this a fraction instead of a number up to 100
                            File = sim_data_file, 
                            Tissue = DissoTissues[[tiss]],
                            CompoundID = i, 
                            Compound = as.character(Out[AllCompounds$DetailNames[
                               AllCompounds$CompoundID == i]]))
               )
               
               rm(Disso_temp, DissoCV)
               
            }
            
            DissoProfs[[i]] <- bind_rows(DissoProfs[[i]])
            
         }
         
         DissoProfs <- bind_rows(DissoProfs) %>% 
            select(File, Tissue, CompoundID, Compound, Time,
                   Dissolution_mean, Dissolution_CV)
         
      } else {
         DissoProfs <- NULL
      }
      
      ### Concentration-dependent fu -----------------------------------------
      if(Out[["SimulatorUsed"]] != "Simcyp Discovery" &&
         exists("InputTab", inherits = FALSE) &&
         any(str_detect(unlist(c(InputTab[, ColLocations + 1])), 
                        "Concentration-dependent fu profile"),
             na.rm = TRUE)){
         
         CDfupProfs <- list()
         
         for(i in names(ColLocations)[!names(ColLocations) == "Trial Design"]){
            StartRow <- which(str_detect(t(InputTab[, ColLocations[i] + 1]), 
                                         "Concentration-dependent fu profile"))[1] + 2
            EndRow <- which(str_detect(t(InputTab[, ColLocations[i]]), 
                                       "fu [0-9]"))
            EndRow <- EndRow[which.max(EndRow)]
            
            # It could be that one compound has conc-dependent fu,p profiles and
            # another compound does not. Checking that here since I did not check it
            # in the original if statement at the top of this section.
            if(is.na(StartRow)){
               next
            }
            
            CDfup_temp <- InputTab[StartRow:EndRow, ColLocations[i]:(ColLocations[i]+1)]
            names(CDfup_temp) <- c("NameCol", "ValCol")
            
            CDfupProfs[[i]] <- data.frame(
               Conc = CDfup_temp$ValCol[which(str_detect(CDfup_temp$NameCol, "Conc"))], 
               fup = CDfup_temp$ValCol[which(str_detect(CDfup_temp$NameCol, "fu [0-9]"))]) %>%  
               mutate(across(.cols = everything(), .fns = as.numeric), 
                      File = sim_data_file, 
                      CompoundID = i, 
                      Compound = as.character(Out[AllCompounds$DetailNames[
                         AllCompounds$CompoundID == i]])) %>% 
               select(File, CompoundID, Compound, Conc, fup)
            
            rm(StartRow, EndRow, CDfup_temp)
            
         }
         
         CDfupProfs <- bind_rows(CDfupProfs)
      } else {
         CDfupProfs <- NULL
      }
      
      
      ### Concentration-dependent B/P -----------------------------------------
      if(Out[["SimulatorUsed"]] != "Simcyp Discovery" &&
         exists("InputTab", inherits = FALSE) &&
         any(str_detect(unlist(c(InputTab[, ColLocations + 1])), 
                        "Concentration-dependent B/P profile"),
             na.rm = TRUE)){
         
         CDBPProfs <- list()
         
         for(i in names(ColLocations)[!names(ColLocations) == "Trial Design"]){
            StartRow <- which(str_detect(t(InputTab[, ColLocations[i] + 1]), 
                                         "Concentration-dependent B/P profile"))[1] + 2
            EndRow <- which(str_detect(t(InputTab[, ColLocations[i]]), 
                                       "B/P [0-9]"))
            EndRow <- EndRow[which.max(EndRow)]
            
            # It could be that one compound has conc-dependent B/P profiles and
            # another compound does not. Checking that here since I did not check it
            # in the original if statement at the top of this section.
            if(is.na(StartRow)){
               next
            }
            
            CDBP_temp <- InputTab[StartRow:EndRow, ColLocations[i]:(ColLocations[i]+1)]
            names(CDBP_temp) <- c("NameCol", "ValCol")
            
            CDBPProfs[[i]] <- data.frame(
               Conc = CDBP_temp$ValCol[which(str_detect(CDBP_temp$NameCol, "Conc"))], 
               BP = CDBP_temp$ValCol[which(str_detect(CDBP_temp$NameCol, "B/P [0-9]"))]) %>%  
               mutate(across(.cols = everything(), .fns = as.numeric), 
                      File = sim_data_file, 
                      CompoundID = i, 
                      Compound = as.character(Out[AllCompounds$DetailNames[
                         AllCompounds$CompoundID == i]])) %>% 
               select(File, CompoundID, Compound, Conc, BP)
            
            rm(StartRow, EndRow, CDBP_temp)
            
         }
         
         CDBPProfs <- bind_rows(CDBPProfs)
      } else {
         CDBPProfs <- NULL
      }
      
      
      ### pH-dependent solubility -----------------------------------------
      if(Out[["SimulatorUsed"]] != "Simcyp Discovery" &&
         exists("InputTab", inherits = FALSE) &&
         any(str_detect(unlist(c(InputTab[, ColLocations + 1])), 
                        "Solubility-pH profile|User defined pH-Solubility"),
             na.rm = TRUE)){
         
         pHSol <- list()
         
         for(i in names(ColLocations)[!names(ColLocations) == "Trial Design"]){
            StartRow <- which(str_detect(t(InputTab[, ColLocations[i] + 1]), 
                                         "Solubility-pH profile|User defined pH-Solubility"))[1] + 1
            EndRow <- which(str_detect(t(InputTab[, ColLocations[i]]), 
                                       "Entry [0-9]{1,} Solubility"))
            EndRow <- EndRow[which.max(EndRow)]
            
            # It could be that one compound has pH-dependent fu,p profiles and
            # another compound does not. Checking that here since I did not check it
            # in the original if statement at the top of this section.
            if(is.na(StartRow)){
               next
            }
            
            pHSol_temp <- InputTab[StartRow:EndRow, ColLocations[i]:(ColLocations[i]+1)]
            names(pHSol_temp) <- c("NameCol", "ValCol")
            
            pHSol[[i]] <- data.frame(
               pH = pHSol_temp$ValCol[which(str_detect(pHSol_temp$NameCol, "pH"))], 
               Solubility = pHSol_temp$ValCol[which(str_detect(pHSol_temp$NameCol, "Entry [0-9]{1,} Solubility"))]) %>%  
               mutate(across(.cols = everything(), .fns = as.numeric), 
                      File = sim_data_file, 
                      CompoundID = i, 
                      Compound = as.character(Out[AllCompounds$DetailNames[
                         AllCompounds$CompoundID == i]])) %>% 
               select(File, CompoundID, Compound, pH, Solubility)
            
            rm(StartRow, EndRow, pHSol_temp)
            
         }
         
         pHSol <- bind_rows(pHSol)
      } else {
         pHSol <- NULL
      }
      
      
      ## Pulling CL info ----------------------------------------------------
      MyInputDeets2 <- MyInputDeets[str_detect(MyInputDeets, "CLint_")]
      
      if(length(MyInputDeets2) > 0){
         
         for(j in MyInputDeets2){
            
            Suffix <- str_extract(j, "_sub$|_inhib$|_inhib2$|_met1$|_met2$|_secmet$|_inhib1met$")
            NameCol <- InputDeets$NameCol[InputDeets$Deet == j]
            ValueCol <- InputDeets$ValueCol[InputDeets$Deet == j]
            CLRows <- which(
               InputTab[ , NameCol] == "Enzyme" |
                  str_detect(InputTab[ , NameCol] %>%
                                pull(),
                             "^Biliary (CLint|Clearance)") |
                  str_detect(InputTab[ , NameCol] %>%
                                pull(),
                             "^Additional HLM CLint|^Additional Systemic Clearance|^Additional HKM CLint") |
                  str_detect(InputTab[ , ValueCol] %>%
                                pull(),
                             "In Vivo Clear") |
                  str_detect(InputTab[ , NameCol] %>%
                                pull(),
                             "(Liver|Intestine|Biliary) Clearance"))
            CLRows <- CLRows[complete.cases(InputTab[CLRows + 1, NameCol])]
            
            
            if(Out[["SimulatorUsed"]] == "Simcyp Discovery"){
               # Discovery sims have a slightly different setup on the Input
               # Sheet and only the 1st CLRows value should be used b/c that
               # section will contain all the info we need.
               CLRows <- CLRows[1]
               
               MyNames <- as.character(t(
                  InputTab[CLRows:nrow(InputTab), NameCol]))
               
               DiscoveryDeets <- 
                  data.frame(
                     Detail = c("CLiv_InVivoCL", 
                                "CLpo_InVivoCL", 
                                "CLint_biliary",
                                "CLint_biliary_fuinc", 
                                "CLrenal", 
                                "CL_AddSystemic", 
                                "CL_PercentAvailReabsorption"),
                     RegexRow = c("CL.*iv.*[(]mL", 
                                  "CL.*po.*[(]mL", 
                                  "Biliary Clearance ..L/min",
                                  "Biliary fu inc", 
                                  "CL R .mL", 
                                  "^Additional Systemic Clearance", 
                                  "^Percent.*re-absorption"))
               
               for(i in 1:nrow(DiscoveryDeets)){
                  MyRow <- which(str_detect(MyNames, DiscoveryDeets$RegexRow[i]))
                  if(length(MyRow) == 0){
                     Out[[paste0(DiscoveryDeets$Detail[i], Suffix)]] <- NA
                  } else {
                     suppressWarnings(
                        Out[[paste0(DiscoveryDeets$Detail[i], Suffix)]] <-
                           as.numeric(InputTab[MyRow + CLRows - 1, ValueCol])
                     )
                  }
                  rm(MyRow)
               }
               
               # Liver and intestinal CL
               LivOrInt <- c("Liver", "Intestine")[
                  c(any(str_detect(MyNames, "Liver")), any(str_detect(MyNames, "Intestine")))]
               
               LivIndCL <- 
                  data.frame(
                     Detail = c("CL_XXX_Type", 
                                "CL_XXX__UseSaturableKinetics", 
                                "CLint_XXX", 
                                "CL_Km_XXX", 
                                "CL_Vmax_XXX", 
                                "CL_XXX_fuinc", 
                                "CL_XXX_UseMetabolite", 
                                "CL_XXX_MetabPerc", 
                                "CL_XXX_ScrapingsCorrectionFactor", 
                                "CL_XXX_ElutionCorrectionFactor"), 
                     RegexRow = c(paste(i, "Clearance Type"), 
                                  "Use Saturable Kinetics", 
                                  "CLint", 
                                  "Km \\(", 
                                  "Vmax \\(", 
                                  "fu inc", 
                                  "Use Metabolite", 
                                  "Metabolite .%", 
                                  "\\(scrapings\\) Correction Factor", 
                                  "\\(elution\\) Correction Factor")
                  )
               
               if(length(LivOrInt[complete.cases(LivOrInt)]) > 0){
                  for(i in LivOrInt[complete.cases(LivOrInt)]){
                     OrganRows <- range(
                        which(str_detect(MyNames, i) |
                                 str_detect(MyNames,
                                            paste0(str_sub(i, 1, 1), "M")))
                     )
                     
                     for(k in 1:nrow(LivIndCL)){
                        MyRow <- which(str_detect(MyNames[OrganRows[1]:OrganRows[2]],
                                                  LivIndCL$RegexRow[k]))
                        if(length(MyRow) == 0){
                           Out[[paste0(sub("XXX", i, LivIndCL$Detail[k]), Suffix)]] <- NA
                        } else {
                           suppressWarnings(
                              Out[[paste0(sub("XXX", i, LivIndCL$Detail[k]), Suffix)]] <-
                                 as.character(InputTab[MyRow + CLRows - 1, ValueCol])
                           )
                        }
                        rm(MyRow)
                     }
                  }
               }
               
            } else {
               # Regular Simulator data extraction starts here. 
               
               # Checking for interaction data
               IntRowStart <- which(str_detect(InputTab[, NameCol] %>%
                                                  pull(), "Ind max|^Ki |^MBI|Interaction"))[1] - 1
               
               if(complete.cases(IntRowStart)){
                  CLRows <- CLRows[CLRows < min(IntRowStart)]
               }
               
               for(i in CLRows){
                  
                  # CL for a specific enzyme
                  if(str_detect(as.character(InputTab[i, NameCol]), "Enzyme")){
                     
                     LastRow_i <- which(is.na(InputTab[, NameCol]))
                     LastRow_i <- LastRow_i[LastRow_i > i][1] - 1
                     
                     Enzyme <- gsub(" ", "", InputTab[i, NameCol + 1])
                     Pathway <- gsub(" |-", "", InputTab[i - 1, NameCol + 1])
                     if(as.character(InputTab[i+1, NameCol]) == "Genotype"){
                        Genotype <- InputTab[i+1, NameCol + 1]
                        Genotype <- gsub("\\*", "star", Genotype)
                        Genotype <- gsub("/", "", Genotype)
                        Enzyme <- paste0(Enzyme, "_", Genotype)
                        CLrow <- i + 2
                     } else if((str_detect(Enzyme, "User") &
                                !str_detect(InputTab[i+1, NameCol], "CLint|Vmax")) |
                               str_detect(tolower(InputTab[i + 1, NameCol]), 
                                          "ontogeny")){
                        CLrow <- i + 2
                     } else {
                        CLrow <- i + 1
                     }
                     
                     CLType <- str_extract(InputTab[CLrow, NameCol],
                                           "CLint|Vmax|t1/2|Ind max")
                     
                     if(CLType == "CLint"){
                        
                        # NOTE TO CODERS: I'd been including units for CLint in
                        # the past, but I realized that I hadn't included units
                        # for other enzymes, so I decided later to omit them.
                        # Keeping this bit of code, albeit commented out, so
                        # that we can add them back easily if we want.
                        
                        # Units <- str_extract(InputTab[CLrow, NameCol], 
                        #                      "\\(.*\\)")
                        # Units <- gsub("\\(|\\)", "", Units)
                        # Units <- gsub("/| ", "_", Units)
                        # # Dealing with mu since it's causing some problems
                        # # downstream when a symbol
                        # Units <- gsub(rlang::chr_unserialise_unicode("<U+00B5>"), 
                        #               "u", Units)
                        
                        suppressWarnings(
                           Out[[paste0(
                              paste("CLint", Enzyme, Pathway, # Units,
                                    sep = "_"),
                              Suffix)]] <-
                              as.numeric(InputTab[CLrow, NameCol + 1])
                        )
                        
                        suppressWarnings(
                           Out[[paste0(
                              paste("fu_mic", Enzyme,
                                    Pathway, sep = "_"),
                              Suffix)]] <-
                              as.numeric(InputTab[CLrow+1, NameCol + 1])
                        )
                        
                        # Check for any UGT-specific CL parameters
                        if(str_detect(Enzyme, "UGT") & 
                           any(str_detect(t(InputTab[i:LastRow_i, NameCol]),
                                          "rUGT"))){
                           
                           rUGTSysInfo <- InputTab[i:LastRow_i, c(NameCol, ValueCol)] %>% 
                              rename(Name = 1, Value = 2) %>% 
                              filter(str_detect(Name, "rUGT"))
                           
                           Out[[paste0("CLint_", Enzyme, "_", Pathway, "_rUGTSystem",
                                       Suffix)]] <-
                              rUGTSysInfo[which(str_detect(rUGTSysInfo$Name, 
                                                           "rUGTSystem")), ] %>% 
                              pull(Value)
                           
                           suppressWarnings(
                              Out[[paste0("CLint_", Enzyme, "_", Pathway, "_rUGTScalar_liver",
                                          Suffix)]] <-
                                 rUGTSysInfo[which(
                                    str_detect(tolower(rUGTSysInfo$Name), 
                                               "rugtscalar.*liver")), ] %>% 
                                 pull(Value) %>% as.numeric())
                           
                           suppressWarnings(
                              Out[[paste0("CLint_", Enzyme, "_", Pathway, "_rUGTScalar_intestine",
                                          Suffix)]] <-
                                 rUGTSysInfo[which(
                                    str_detect(tolower(rUGTSysInfo$Name), 
                                               "rugtscalar.*intestine")), ] %>% 
                                 pull(Value) %>% as.numeric())
                           
                           suppressWarnings(
                              Out[[paste0("CLint_", Enzyme, "_", Pathway, "_rUGTScalar_kidney",
                                          Suffix)]] <-
                                 rUGTSysInfo[which(
                                    str_detect(tolower(rUGTSysInfo$Name), 
                                               "rugtscalar.*kidney")), ] %>% 
                                 pull(Value) %>% as.numeric())
                           
                           rm(rUGTSysInfo)
                        }
                        
                        rm(Enzyme, Pathway, CLType)
                        next
                        
                     }
                     
                     if(CLType == "Vmax"){
                        suppressWarnings(
                           Out[[paste0(
                              paste("Vmax", Enzyme,
                                    Pathway, sep = "_"),
                              Suffix)]] <-
                              as.numeric(InputTab[CLrow, NameCol + 1])
                        )
                        
                        suppressWarnings(
                           Out[[paste0(
                              paste("Km", Enzyme,
                                    Pathway, sep = "_"),
                              Suffix)]] <-
                              as.numeric(InputTab[CLrow+1, NameCol + 1])
                        )
                        
                        suppressWarnings(
                           Out[[paste0(
                              paste("fu_mic", Enzyme,
                                    Pathway, sep = "_"),
                              Suffix)]] <-
                              as.numeric(InputTab[CLrow+2, NameCol + 1])
                        )
                        
                        rm(Enzyme, Pathway, CLType)
                        next
                     }
                     
                     if(CLType == "t1/2"){
                        suppressWarnings(
                           Out[[paste0(
                              paste("HalfLife", Enzyme,
                                    Pathway, sep = "_"),
                              Suffix)]] <-
                              as.numeric(InputTab[CLrow, NameCol + 1])
                        )
                        
                        rm(Enzyme, Pathway, CLType)
                        next
                     }
                  } 
                  
                  # Biliary CL
                  if(str_detect(as.character(InputTab[i, NameCol]), "^Biliary (CLint|Clearance)")){
                     suppressWarnings(
                        Out[[paste0("CLint_biliary", Suffix)]] <-
                           as.numeric(InputTab[i, NameCol + 1])
                     )
                  }
                  
                  # Other HLM CL
                  if(str_detect(as.character(InputTab[i, NameCol]), "^Additional HLM CLint")){
                     suppressWarnings(
                        Out[[paste0("CLint_AddHLM", Suffix)]] <-
                           as.numeric(InputTab[i, NameCol + 1])
                     )
                  }
                  
                  # Other HKM CL
                  if(str_detect(as.character(InputTab[i, NameCol]), "^Additional HKM CLint")){
                     suppressWarnings(
                        Out[[paste0("CLint_AddHKM", Suffix)]] <-
                           as.numeric(InputTab[i, NameCol + 1])
                     )
                  }
                  
                  # in vivo CL
                  if(str_detect(as.character(InputTab[i, ValueCol]),
                                "In Vivo Clearance")){
                     
                     MyNames <- as.character(t(
                        InputTab[i:min(
                           c(IntRowStart, CLRows[which(CLRows == i) + 1] - 1, 
                             nrow(InputTab)), na.rm = T), NameCol]))
                     
                     suppressWarnings(
                        Out[[paste0("CLiv_InVivoCL", Suffix)]] <- 
                           as.numeric(InputTab[
                              which(str_detect(MyNames,
                                               "CL.*iv.*[(](m)?L")) + i - 1,
                              ValueCol])
                     )
                     
                     suppressWarnings(
                        Out[[paste0("CLbiliary_InVivoCL", Suffix)]] <- 
                           as.numeric(InputTab[
                              which(str_detect(MyNames,
                                               "Biliary Clearance")) + i - 1,
                              ValueCol])
                     )
                     
                     suppressWarnings(
                        Out[[paste0("CLadditional_InVivoCL", Suffix)]] <- 
                           as.numeric(InputTab[
                              which(str_detect(MyNames,
                                               "Additional Systemic Clearance")) + i - 1,
                              ValueCol])
                     )
                     
                     suppressWarnings(
                        Out[[paste0("CLpo_InVivoCL", Suffix)]] <- 
                           as.numeric(InputTab[
                              which(str_detect(MyNames,
                                               "^CL .po.")) + i - 1,
                              ValueCol])
                     )
                     
                  }
                  
               }
               rm(CLRows, IntRowStart, NameCol, Suffix)
            }
         }
      } 
      
      ## Pulling interaction info -------------------------------------------
      MyInputDeets3 <- MyInputDeets[str_detect(MyInputDeets, "Interaction_")]
      
      if(length(MyInputDeets3) > 0){
         
         for(j in MyInputDeets3){
            
            Suffix <- str_extract(j, "_sub$|_inhib$|_inhib2$|_met1$|_secmet$|_inhib1met$")
            NameCol <- InputDeets$NameCol[InputDeets$Deet == j]
            ValueCol <- InputDeets$ValueCol[InputDeets$Deet == j]
            IntRows <- which(str_detect(InputTab[ , NameCol] %>% pull(),
                                        "^Enzyme$|^Transporter$"))
            IntRows <- IntRows[complete.cases(InputTab[IntRows + 1, NameCol])]
            
            # Only IntRows after the first instance of an
            # interaction type of term is found in NameCol. NB: I
            # thought it would work to just look for cells after
            # "interaction", but "interaction" hasn't always been
            # listed in the output files I've found.
            IntRowStart <- which(str_detect(InputTab[, NameCol] %>%
                                               pull(), "Ind [mM]ax|Ind [sS]lope|^Ki |^MBI"))[1] - 1
            TransporterTissues <- IntRows[which(
               str_detect(t(InputTab[IntRows, NameCol]), "Transporter"))]
            TransporterTissues <- TransporterTissues[which(
               str_detect(t(InputTab[TransporterTissues - 1, NameCol]), 
                          "Organ/Tissue"))] - 1
            TransporterTissues <- data.frame(
               Row = TransporterTissues, 
               Tissue = as.character(t(InputTab[TransporterTissues, ValueCol])))
            
            if(complete.cases(IntRowStart)){
               
               IntRows <- IntRows[IntRows >= IntRowStart]
               
               for(i in IntRows){
                  Enzyme <- gsub(" |\\(|\\)|-|/", "_", InputTab[i, NameCol + 1])
                  Enzyme <- gsub("_{2,}", "_", Enzyme)
                  Enzyme <- sub("_$", "", Enzyme)
                  NextEmptyCell <- which(is.na(InputTab[, NameCol + 1]))
                  NextEmptyCell <- NextEmptyCell[NextEmptyCell > i][1]
                  # If there's another interaction listed
                  # before the next empty cell, need to
                  # account for that.
                  NextInt <- IntRows[which(IntRows == i) + 1] - 1
                  NextInt <- ifelse(i == IntRows[length(IntRows)],
                                    nrow(InputTab), NextInt)
                  ThisIntRows <- i:(c(NextEmptyCell, NextInt)[which.min(c(NextEmptyCell, NextInt))])
                  ThisIntRows <- setdiff(ThisIntRows, NextEmptyCell)
                  
                  # Induction
                  IndParam1stRow <- which(str_detect(InputTab[ThisIntRows, NameCol] %>% pull(),
                                                     "Ind max|Ind Slope"))
                  if(length(IndParam1stRow) > 0){
                     IndModelCheck <- list(str_detect(t(InputTab[ThisIntRows, NameCol]), "Ind max"), 
                                           str_detect(t(InputTab[ThisIntRows, NameCol]), "Ind Slope"), 
                                           str_detect(t(InputTab[ThisIntRows, NameCol]), "Ind( )?C50"), 
                                           str_detect(t(InputTab[ThisIntRows, NameCol]), "fu inc"), 
                                           str_detect(t(InputTab[ThisIntRows, NameCol]), "\u03B3"))
                     IndModelCheck <- sapply(IndModelCheck, FUN = function(x) which(x == TRUE))
                     # Note: I can't seem to get regex to work for
                     # detecting a Greek character; I figured that the
                     # Hill coefficient gamma is the only time that an
                     # induction parameter name is only going to be one
                     # character long.
                     
                     suppressWarnings(
                        Out[[paste0(
                           paste("IndMax", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[IndModelCheck[[1]][1]], NameCol + 1])
                     )
                     
                     suppressWarnings(
                        Out[[paste0(
                           paste("IndSlope", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[IndModelCheck[[2]][1]], NameCol + 1])
                     )
                     
                     suppressWarnings(
                        Out[[paste0(
                           paste("IndC50", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[IndModelCheck[[3]][1]], NameCol + 1])
                     )
                     
                     suppressWarnings(
                        Out[[paste0(
                           paste("Ind_fu_inc", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[IndModelCheck[[4]][1]], NameCol + 1])
                     )
                     
                     suppressWarnings(
                        Out[[paste0(
                           paste("Ind_gamma", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[IndModelCheck[[5]][1]], NameCol + 1])
                     )
                     
                     rm(IndModelCheck)   
                  }
                  
                  # competitive inhibition
                  Ki <- which(str_detect(InputTab[ThisIntRows, NameCol] %>% pull(),
                                         "Ki "))
                  if(length(Ki) > 0){
                     
                     EnzTrans <- as.character(InputTab[i, NameCol])
                     
                     if(EnzTrans == "Transporter"){
                        Enzyme <-
                           paste0(Enzyme, "_",
                                  # setting the tissue 
                                  TransporterTissues %>% 
                                     filter(Row <= i) %>% 
                                     filter(Row == max(Row)) %>% 
                                     pull(Tissue))
                     }
                     
                     suppressWarnings(
                        Out[[paste0(
                           paste("Ki", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[Ki], NameCol + 1])
                     )
                     
                     # fu mic or fu inc
                     IncType <- str_extract(InputTab[ThisIntRows[Ki+1], NameCol] %>%
                                               pull(),
                                            "inc|mic")
                     suppressWarnings(
                        Out[[paste0(
                           paste(switch(IncType,
                                        "inc" = "Ki_fu_inc",
                                        "mic" = "Ki_fu_mic"),
                                 Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[Ki+1], NameCol + 1])
                     )
                     
                     rm(IncType, EnzTrans)
                  }
                  
                  # MBI
                  MBI <-  which(str_detect(InputTab[ThisIntRows, NameCol] %>% pull(),
                                           "MBI Kapp"))
                  if(length(MBI) > 0){
                     suppressWarnings(
                        Out[[paste0(
                           paste("MBI_Kapp", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[MBI], NameCol + 1])
                     )
                     
                     suppressWarnings(
                        Out[[paste0(
                           paste("MBI_kinact", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[MBI+1], NameCol + 1])
                     )
                     
                     suppressWarnings(
                        Out[[paste0(
                           paste("MBI_fu_mic", Enzyme,
                                 sep = "_"), Suffix)]] <-
                           as.numeric(InputTab[ThisIntRows[MBI+2], NameCol + 1])
                     )
                  }
                  
                  rm(Enzyme, NextEmptyCell, NextInt,
                     ThisIntRows, IndParam1stRow, Ki, MBI)
               }
            }
            
            rm(Suffix, IntRows, IntRowStart, NameCol)
         }
      }
      
      ## Dealing with StartDayTime_x --------------------------------------
      MyInputDeets4 <- MyInputDeets[str_detect(MyInputDeets, "^StartDayTime")]
      
      if(length(MyInputDeets4) > 0){
         for(j in MyInputDeets4){
            
            NameCol <- InputDeets$NameCol[which(InputDeets$Deet == j)]
            ValueCol <- InputDeets$ValueCol[InputDeets$Deet == j]
            Row_day <- which(str_detect(InputTab[, NameCol] %>% pull(), "Start Day"))
            # If this is not present, which sometimes happens with a custom
            # dosing schedule, then will need to pull info from custom
            # dosing sheet lower in script.
            if(length(Row_day) == 0){
               CustomDosing <- c(CustomDosing, TRUE)
            } else {
               Val_day <- InputTab[Row_day, InputDeets$ValueCol[
                  which(InputDeets$Deet == j)]] %>% pull()
               Row_time <- which(str_detect(InputTab[, NameCol] %>% pull(), "Start Time"))
               Val_time <- InputTab[Row_time, InputDeets$ValueCol[
                  which(InputDeets$Deet == j)]] %>% pull()
               
               # Dealing with inconsistencies in time format
               Val_time <- sub("m", "", Val_time)
               Val_time <- str_split(Val_time, pattern = "h")[[1]]
               Val_time <- str_c(str_pad(Val_time, width = 2, pad = "0"),
                                 collapse = ":")
               
               Out[[j]] <- paste(paste0("Day ", Val_day),
                                 Val_time, sep = ", ")
            }
         }
      }
      
      
      ## Transport parameters ----------------------------------------------
      MyInputDeets5 <- MyInputDeets[str_detect(MyInputDeets, "Transport_")]
      MyInputDeets5 <- InputDeets %>% 
         filter(Deet %in% MyInputDeets5 & complete.cases(NameCol)) %>%
         pull(Deet)
      
      if(length(MyInputDeets5) > 0){
         
         for(j in MyInputDeets5){
            
            Suffix <- str_extract(j, "_sub$|_inhib$|_inhib2$|_met1$|_secmet$|_inhib1met$")
            NameCol <- InputDeets$NameCol[InputDeets$Deet == j]
            ValueCol <- InputDeets$ValueCol[InputDeets$Deet == j]
            
            # There can be transporter interactions higher up on the tab,
            # but parameters that are actually just transport parameters all
            # come after the title "Transport".
            StartTrans <- which(InputTab[, NameCol] %>% pull() == "Transport") 
            
            if(length(StartTrans) > 0){
               TransRows <- which(str_detect(InputTab[ , NameCol] %>% pull(),
                                             "^Transporter"))
               TransRows <- TransRows[TransRows > StartTrans]
               
               if(length(TransRows) > 0){
                  
                  # Sometimes, the organ is only listed once and then not listed
                  # again for subsequent transporters until it changes. 
                  OrganRows <- which(str_detect(InputTab[, NameCol] %>% pull(), 
                                                "Organ/Tissue"))
                  OrganRows <- OrganRows[OrganRows >= min(TransRows) - 1 & 
                                            OrganRows < max(TransRows)]
                  
                  # BBB transporter parameters are set up differently, so
                  # removing any organ rows for those scenarios.
                  BrainRows <- OrganRows + 1
                  BrainRows <- BrainRows[which(str_detect(
                     InputTab[BrainRows, NameCol] %>% pull(), "BBB|BCSFB|ISF.ICF"))]
                  
                  OrganRows <- setdiff(OrganRows, BrainRows - 1)
                  
                  for(i in TransRows){
                     
                     # Last row always seems to contain RAF/REF or ISEF,T
                     TransRowLast <- which(str_detect(InputTab[ , NameCol] %>% pull(), 
                                                      "RAF/REF|ISEF"))
                     TransRowLast <- TransRowLast[TransRowLast > i]
                     TransRowLast <- ifelse(length(TransRowLast) == 0, 
                                            nrow(InputTab), TransRowLast[1])
                     TransRowNames <- InputTab[i:TransRowLast, NameCol] %>% pull(1)
                     Transporter <- gsub(" |\\(|\\)|-|/", "_", InputTab[i, NameCol + 1])
                     Transporter <- gsub("_{2,}", "_", Transporter)
                     Transporter <- sub("_$", "", Transporter)
                     Transporter <- case_match(Transporter, 
                                               "Apical_Efflux_Kidney" ~ "General_Apical_Efflux", 
                                               .default = Transporter)
                     
                     Location <- gsub(" |\\(|\\)|-|/", "", 
                                      InputTab[c(i:TransRowLast)[which(TransRowNames == "Location")],
                                               ValueCol] %>% pull(1))
                     Organ <- InputTab[max(OrganRows[OrganRows < i]), ValueCol] %>% 
                        pull() %>% str_comma() # This should have length 1, but adding str_comma just in case. 
                     
                     # Organ <- which(str_detect(as.character(t(
                     #    InputTab[(i-1):TransRowLast, NameCol])), "Organ/Tissue"))
                     # Organ <- ifelse(length(Organ) > 0, 
                     #                 as.character(InputTab[((i-1):TransRowLast)[Organ], ValueCol]), 
                     #                 "")
                     
                     ParamPrefix <- paste("Transporter", Organ, Transporter, Location, sep = "_")
                     
                     # Either CLint,T or Jmax and Km values will be listed
                     if(any(str_detect(TransRowNames, "CLint,T"))){
                        
                        suppressWarnings(
                           Out[[paste0(ParamPrefix, "_CLintT", Suffix)]] <- 
                              as.numeric(
                                 InputTab[c(i:TransRowLast)[which(str_detect(TransRowNames, "CLint,T"))],
                                          ValueCol] %>% pull(1))
                        )
                        
                     } else if(any(str_detect(TransRowNames, "Jmax"))){
                        
                        suppressWarnings(
                           Out[[paste0(ParamPrefix, "_Jmax", Suffix)]] <- 
                              as.numeric(
                                 InputTab[c(i:TransRowLast)[which(str_detect(TransRowNames, "Jmax"))],
                                          ValueCol] %>% pull(1))
                        )
                        
                        suppressWarnings(
                           Out[[paste0(ParamPrefix, "_Km", Suffix)]] <- 
                              as.numeric(
                                 InputTab[c(i:TransRowLast)[which(str_detect(TransRowNames, "Km"))],
                                          ValueCol] %>% pull(1))
                        )
                        
                     }
                     
                     # Checking for fuinc values b/c they're not always there
                     fuinc <- as.numeric(
                        InputTab[c(i:TransRowLast)[which(str_detect(TransRowNames, "fuinc"))],
                                 ValueCol] %>% pull(1))
                     
                     if(length(fuinc) > 0){
                        suppressWarnings(
                           Out[[paste0(ParamPrefix, "_fuinc", Suffix)]] <- fuinc
                        )
                     }
                     rm(fuinc)
                     
                     # Checking for RAF/REF values b/c they're not always there
                     RAFREF <- as.numeric(
                        InputTab[c(i:TransRowLast)[which(str_detect(TransRowNames, "ISEF|RAF|REF"))],
                                 ValueCol] %>% pull(1))
                     
                     if(length(RAFREF) > 0){
                        suppressWarnings(
                           Out[[paste0(ParamPrefix, "_RAFREF", Suffix)]] <- RAFREF
                        )
                     }
                     rm(RAFREF)
                     
                     rm(TransRowLast, Transporter, TransRowNames, Location, 
                        ParamPrefix)
                  }
               }
               
            }
         }
      }
   }
   
   # Dealing with custom dosing schedules ---------------------------------
   if(any(str_detect(exp_details, "StartDayTime")) & 
      any(str_detect(names(Out), "StartDayTime")) == FALSE |
      any(CustomDosing, na.rm = TRUE)){
      
      # When there's custom dosing for any of the substrate or inhibitors,
      # then the dosing start time should be pulled from a "Custom CustomDosing"
      # tab. Pulling any custom dosing sheets here.
      
      CustomDoseSheets <- SheetNames[str_detect(SheetNames, "Custom Dosing")]
      
      for(j in CustomDoseSheets){
         
         Suffix <- switch(str_extract(j, "Inh [12]|Sub"), 
                          "Inh 1" = "_inhib", 
                          "Inh 2" = "_inhib2", 
                          "Sub" = "_sub")
         
         CustomDose_xl <- suppressMessages(tryCatch(
            readxl::read_excel(path = sim_data_file, sheet = j,
                               col_names = FALSE),
            error = openxlsx::read.xlsx(sim_data_file, sheet = j,
                                        colNames = FALSE)))
         
         # If people have added anything to the sheet, that can mess up data
         # extraction. Here, we're at least checking for any columns that would
         # have NA values for the names b/c those are probably places where
         # people have added something extra off to the side of the data we
         # actually want.
         GoodCols <- t(CustomDose_xl[3, ]) %>% as.character()
         GoodCols <- which(complete.cases(GoodCols))
         
         CustomDosing <- CustomDose_xl[4:nrow(CustomDose_xl), GoodCols]
         names(CustomDosing) <- make.names(CustomDose_xl[3, GoodCols])
         CustomDosing <- CustomDosing %>% 
            rename(DoseNum = Dose.Number, 
                   Time1 = Time,
                   Dose_units = Dose.Units, 
                   DoseRoute = Route.of.Administration) %>% 
            mutate(Day = as.numeric(Day))
         
         TimeUnits <- names(CustomDosing)[str_detect(names(CustomDosing), "Offset")]
         names(CustomDosing)[str_detect(names(CustomDosing), "Offset")] <- "Time"
         
         MyCompoundID <- AllCompounds$CompoundID[AllCompounds$Suffix == Suffix]
         MyCompound <- as.character(Out[AllCompounds$DetailNames[AllCompounds$Suffix == Suffix]])
         
         CustomDosing <- CustomDosing %>% 
            # Removing any rows where Time is NA b/c those are likely places
            # where people have added some comments, etc. and not the main data
            # we want. The NA values mess up things downstream.
            filter(complete.cases(Time)) %>% 
            mutate(Time_units = ifelse(str_detect(TimeUnits, "\\.h\\.$"), 
                                       "h", "min"), 
                   File = sim_data_file, 
                   TimeOfDay = as.character(round_date(timeConv(
                      as.numeric(Time1)), unit = "minute")), 
                   TimeOfDay = sub("1899-12-30 ", "", TimeOfDay), 
                   CompoundID = MyCompoundID, 
                   Compound = MyCompound) %>% 
            mutate(across(.cols = matches("DoseNum|Time$|Dose$|^Day$"), 
                          .fns = as.numeric)) %>% 
            select(File, CompoundID, Compound, Day, TimeOfDay, 
                   Time, Time_units, DoseNum, 
                   Dose, Dose_units, DoseRoute)
         
         Out[[paste0("CustomDosing", Suffix)]] <- CustomDosing
         Out[[paste0("Dose", Suffix)]] <- "custom dosing"
         Out[[paste0("StartDayTime", Suffix)]] <- "custom dosing"
         Out[[paste0("StartHr", Suffix)]] <- CustomDosing$Time[CustomDosing$DoseNum == 1]
         Out[[paste0("DoseRoute", Suffix)]] <- "custom dosing"
         Out[[paste0("DoseInt", Suffix)]] <- "custom dosing"
         Out[[paste0("Regimen", Suffix)]] <- "Multiple Dose"
         
         rm(CustomDosing, Suffix, CustomDose_xl, MyCompoundID, MyCompound, TimeUnits)
         
      }
   }
   
   # Pulling details from the population tab -------------------------------
   MyPopDeets <- intersect(exp_details, PopDeets$Deet)
   
   # If user asks for population details, then function is set up to read both
   # what that population is and what the simulator version is by reading the
   # summary tab, so this next line should work and will not give them any
   # population details for Simcyp Discovery simulations until we set that up. 
   MyPopDeets <- intersect(MyPopDeets, 
                           (AllExpDetails %>% 
                               filter(SimulatorAvailability %in% DiscoveryCol) %>% 
                               pull(Detail)))
   
   if(length(MyPopDeets) > 0){
      # Getting name of that tab.
      if(exists("SheetNames", inherit = FALSE) == FALSE){
         SheetNames <- tryCatch(readxl::excel_sheets(sim_data_file),
                                error = openxlsx::getSheetNames(sim_data_file))
         
      }
      
      # If user has requested that the population tab be annotated, which is an
      # option!, then there will be 2 matches to the population sheet name. We
      # want the 1st one.
      PopSheet <- SheetNames[str_detect(tolower(SheetNames),
                                        str_sub(tolower(Out$Population), 1, 20))][1]
      
      PopTab <- suppressMessages(tryCatch(
         readxl::read_excel(path = sim_data_file, sheet = PopSheet,
                            col_names = FALSE),
         error = openxlsx::read.xlsx(sim_data_file, sheet = PopSheet,
                                     colNames = FALSE)))
      # If openxlsx read the file, the names are different. Fixing.
      if(names(PopTab)[1] == "X1"){
         names(PopTab) <- paste0("...", 1:ncol(PopTab))
      }
      
      MyPopDeets <- intersect(exp_details, PopDeets$Deet)
      
      # User can change the name of user-defined cytosolic phenotypes for GI
      # tract, kidney, and liver. Changing this back to "Cyt1" to work for
      # regex, though. For now, only extracting data for Cyt1 and not any more
      # user-defined cytosolic phenotype parameters, so ignoring the others.
      # If name is changed in one, it's changed in all. Columns are 3, 5, and
      # 9.
      if(any(str_detect(PopTab$...3, "Cyt1"), na.rm = T) == FALSE){
         StartCytRow <- which(str_detect(PopTab$...3, "^User Cyt$"))[1]
         
         NewName <- gsub("Abundance : | Population Scalar", "", PopTab[StartCytRow + 1, 3])
         
         PopTab$...3 <- sub(NewName, "User Cyt1", PopTab$...3)
         PopTab$...5 <- sub(NewName, "User Cyt1", PopTab$...5)
         PopTab$...9 <- sub(NewName, "User Cyt1", PopTab$...9)
         
      }
      
      # sub function for finding correct cell
      pullValue <- function(deet){
         
         # Setting up regex to search
         ToDetect <- AllExpDetails %>% 
            filter(Detail == deet & DataSource == "population") %>% pull(Regex_row)
         NameCol <- PopDeets$NameCol[which(PopDeets$Deet == deet)]
         
         if(ncol(PopTab) < NameCol){
            # This happens when it's an animal simulation.
            return(NA)
         }
         Row <- which(str_detect(PopTab[, NameCol] %>% pull(), ToDetect))
         if(length(Row) == 0){
            Val <- NA
         } else {
            Val <- PopTab[Row, PopDeets$ValueCol[PopDeets$Deet == deet]] %>%
               pull()
            Val <- sort(unique(Val))
         }
         
         suppressWarnings(
            Val <- switch(PopDeets$Class[PopDeets$Deet == deet], 
                          "character" = as.character(Val),
                          "numeric" = as.numeric(Val))
         )
         
         # Tidying up some specific idiosyncracies of simulator output
         Val <- ifelse(complete.cases(Val) & Val == "n/a",
                       NA, Val)
         
         return(Val)
      }
      
      for(i in MyPopDeets){
         Out[[i]] <- pullValue(i)
      }
   }
   
   # Pulling from workspace file -------------------------------------------
   if(any(c("workspace", "all") %in% exp_details_input)){
      
      # Checking that the workspace file is available. This will ignore the
      # date/time stamp on the Excel results if it's still there. 
      
      WkspFile <- c("Simulator" = sub("( - [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}-[0-9]{2}-[0-9]{2})?\\.xlsx$",
                                      ".wksz", sim_data_file), 
                    "Discovery" = sub("( - [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}-[0-9]{2}-[0-9]{2})?\\.xlsx$",
                                      ".dscw", sim_data_file))
      WkspFile <- WkspFile[which(file.exists(WkspFile))]
      
      if(length(WkspFile) > 0){
         
         TEMP <- extractExpDetails_XML(
            sim_workspace_files = WkspFile, 
            compoundsToExtract = "all",
            exp_details = "all")
         
         TEMP$MainDetails <- TEMP$MainDetails %>% 
            # This currently removes anything that we already have from the
            # Excel file. May change that later to verify that Excel and
            # workspace match.
            select(!any_of(c("Substrate", "Inhibitor1", "Inhibitor2", 
                             "PrimaryMetabolite1", "PrimaryMetabolite2", 
                             "SecondaryMetabolite", "Inhibitor1Metabolite", 
                             paste0("DistributionModel",
                                    c("inhib1met", 
                                      "_met1", "_met2", "_secmet")))))
         
         # Note: Currently, we are not extracting anything from the workspace
         # that would be its own separate list. When we DO do that, we'll need
         # to adjust this code to bind the MainDetails and whatever that list
         # is.
         Out <- c(Out,
                  TEMP$MainDetails[
                     setdiff(names(TEMP$MainDetails)[
                        names(TEMP$MainDetails) != "Workspace"], 
                        names(Out))])
         
         rm(TEMP)
      }
   }
   
   
   # Calculated details & data cleanup ----------------------------------------
   
   if("StartHr_sub" %in% exp_details && 
      "StartDayTime_sub" %in% names(Out) &&
      complete.cases(Out$StartDayTime_sub) && 
      Out$StartDayTime_sub != "custom dosing"){
      Out[["StartHr_sub"]] <- difftime_sim(time1 = Out$SimStartDayTime,
                                           time2 = Out$StartDayTime_sub)
   }
   
   if(all(c("Inhibitor1", "StartDayTime_inhib") %in% names(Out)) &&
      complete.cases(Out$StartDayTime_inhib) &&
      Out$StartDayTime_inhib != "custom dosing"){
      Out[["StartHr_inhib"]] <- difftime_sim(time1 = Out$SimStartDayTime,
                                             time2 = Out$StartDayTime_inhib)
   }
   
   if(all(c("Inhibitor2", "StartDayTime_inhib2") %in% names(Out)) && 
      complete.cases(Out$StartDayTime_inhib2) &&
      Out$StartDayTime_inhib != "custom dosing"){
      Out[["StartHr_inhib2"]] <- difftime_sim(time1 = Out$SimStartDayTime,
                                              time2 = Out$StartDayTime_inhib2)
   }
   
   # Other functions call on "Inhibitor1", etc., so we need those objects to
   # exist, even if they were not used in this simulation. Setting them to NA if
   # they don't exist.
   MissingCmpd <- setdiff(AllCompounds$DetailNames, 
                          names(Out))
   MissingCmpd_list <- as.list(rep(NA, length(MissingCmpd)))
   names(MissingCmpd_list) <- MissingCmpd
   Out <- c(Out, MissingCmpd_list)
   
   # Always including the file name. 
   Out$File <- sim_data_file
   
   # Noting when workspace, if there is a matching one, was last changed.
   WorkspaceFile <- sub("xlsx", ifelse(Out$SimulatorUsed == "Discovery", 
                                       "dscw", "wksz"), sim_data_file)
   # Removing the file timestamp if there was one b/c that won't be part of the
   # workspace file name.
   WorkspaceFile <- sub(" - [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}-[0-9]{2}-[0-9]{2}", 
                        "", WorkspaceFile)
   
   Out$Workspace_TimeLastModified <- 
      ifelse(file.exists(WorkspaceFile), 
             as.character(file.info(WorkspaceFile)$mtime), NA)
   
   # Noting when this was run. 
   Out$expDetails_TimeStamp <- Sys.time()
   
   
   # Species should be lower case and not have "Sim" in front of it to work
   # more smoothly with other functions and also just look better. Setting
   # "beagle" to "dog" and setting it to "human" if it's missing, which it will
   # be for regular simulator output.
   if("Species" %in% names(Out)){
      Out$Species <- tolower(sub("Sim-", "", Out$Species))
      Out$Species <- ifelse(Out$Species == "beagle", "dog", Out$Species)
      
      if(is.na(Out$Species)){
         Out$Species <- "human"
      }
   }
   
   Out <- Out[sort(names(Out))]
   
   # Adding missing, necessary list items
   Missing1 <- setdiff(
      paste0(rep(c("DoseInt", "DoseRoute", "Regimen", "NumDoses"), each = 3), 
             c("_sub", "_inhib", "_inhib2")), 
      names(Out))
   
   if(length(Missing1) > 0){
      Missing <- as.list(matrix(data = NA, ncol = length(Missing1),
                                dimnames = list(NULL, Missing1)))
      names(Missing) <- Missing1
      
      Out <- c(Out, Missing)
   }
   
   # Fixing an issue that trips up other code down the line: Sometimes, the
   # user might specify a "multiple dose" regimen but then only administer
   # a single dose. That messes up, e.g., extractPK b/c it looks on the
   # wrong tab for the info it needs. When that happens, set the regimen to
   # "Single Dose".
   if(is.null(Out$Regimen_sub) == FALSE && 
      (complete.cases(Out$Regimen_sub) && Out$Regimen_sub == "Multiple Dose") &
      (complete.cases(Out$NumDoses_sub) && Out$NumDoses_sub == 1)){
      Out$Regimen_sub <- "Single Dose"
   }
   
   if(is.null(Out$Regimen_inhib) == FALSE && 
      (complete.cases(Out$Regimen_inhib) && Out$Regimen_inhib == "Multiple Dose") &
      (complete.cases(Out$NumDoses_inhib) && Out$NumDoses_inhib == 1)){
      Out$Regimen_inhib1 <- "Single Dose" 
   }
   
   if(is.null(Out$Regimen_inhib2) == FALSE && 
      (complete.cases(Out$Regimen_inhib2) && Out$Regimen_inhib2 == "Multiple Dose") &
      (complete.cases(Out$NumDoses_inhib2) && Out$NumDoses_inhib2 == 1)){
      Out$Regimen_inhib2 <- "Single Dose" 
   }
   
   # Making DoseInt_x and Dose_x numeric all the time. We'll get custom dosing
   # info from Regimen_x and DoseRoute_x.
   suppressWarnings(Out$DoseInt_sub <- as.numeric(Out$DoseInt_sub))
   suppressWarnings(Out$DoseInt_inhib <- as.numeric(Out$DoseInt_inhib))
   suppressWarnings(Out$DoseInt_inhib2 <- as.numeric(Out$DoseInt_inhib2))
   
   suppressWarnings(Out$Dose_sub <- as.numeric(Out$Dose_sub))
   suppressWarnings(Out$Dose_inhib <- as.numeric(Out$Dose_inhib))
   suppressWarnings(Out$Dose_inhib2 <- as.numeric(Out$Dose_inhib2))
   
   # At this point, DoseInt_x and Dose_x will be NA if it's a custom dosing
   # regimen. Setting the regimen to "multiple". We'll use that downstream for
   # checking for appropriate PK parameters, etc.
   
   Out$Regimen_sub <- ifelse(is.na(Out$DoseInt_sub) & 
                                (complete.cases(Out$DoseRoute_sub) && 
                                    Out$DoseRoute_sub == "custom dosing"), 
                             "Multiple Dose", Out$Regimen_sub)
   Out$Regimen_inhib <- ifelse(is.na(Out$DoseInt_inhib) & 
                                  (complete.cases(Out$DoseInt_inhib) && 
                                      Out$DoseRoute_inhib == "custom dosing"), 
                               "Multiple Dose", Out$Regimen_inhib)
   Out$Regimen_inhib2 <- ifelse(is.na(Out$DoseInt_inhib2) & 
                                   (complete.cases(Out$DoseInt_inhib2) && 
                                       Out$DoseRoute_inhib2 == "custom dosing"), 
                                "Multiple Dose", Out$Regimen_inhib2)
   
   # Splitting this up into main details -- a data.frame -- and then,
   # separately, whatever items need to be lists, e.g., custom dosing regimens
   # and dissolution profiles. 
   Main <- as.data.frame(Out[which(sapply(Out, length) == 1)])
   
   # Making absoultely sure that File included in Main. When we run
   # harmonize_details, it will add it to the other items whenever there is at
   # least 1 row, but we need it in Main to do that.
   Main$File <- sim_data_file 
   
   ## Dosing -----------------------------------------------------------------
   # Setting up Dosing data.frame to include ALL dosing info, so custom dosing
   # when appropriate and, for compounds and/or simulations w/out custom dosing,
   # then a data.frame of all dosing events filled in based on interval, amount,
   # etc.
   
   Out <- list(MainDetails = Main, 
               CustomDosing = bind_rows(Out$CustomDosing_sub, 
                                        Out$CustomDosing_inhib, 
                                        Out$CustomDosing_inhib2), 
               DissolutionProfiles = DissoProfs,
               ReleaseProfiles = ReleaseProfs, 
               ConcDependent_fup = CDfupProfs, 
               ConcDependent_BP = CDBPProfs, 
               pH_dependent_solubility = pHSol)
   
   Out <- harmonize_details(Out)
   
   
   # Returning --------------------------------------------------------------
   
   for(j in names(Out)[unlist(lapply(Out, is.null)) == FALSE]){
      Out[[j]] <- Out[[j]] %>% 
         mutate(File = sim_data_file) %>% 
         select(File, everything())
   }
   
   return(Out)
   
}
shirewoman2/Consultancy documentation built on Feb. 18, 2025, 10 p.m.