R/extract_fu.R

Defines functions extract_fu

Documented in extract_fu

#' Extract fu,plasma values that change with time from a Simulator output Excel
#' file. UNDER CONSTRUCTION!
#'
#' \code{extract_fu} extracts the time-dependent fraction unbound in plasma from
#' a simulator output Excel file. A tab named something like "fu Profile (Sub)"
#' must be present. This currently only extracts data for the substrate, but if
#' you have an example with a different compound, please talk to Laura Shireman.
#' UNDER CONSTRUCTION!
#'
#' @param sim_data_files the Simcyp Simulator Excel results files containing the
#'   simulated time-dependent fu data, in quotes
#' @param returnAggregateOrIndiv Return aggregate and/or individual simulated fu
#'   data? Options are "individual", "aggregate", or "both" (default).
#'   Aggregated data are not calculated here but are pulled from the simulator
#'   output rows labeled as "mean".
#' @param existing_exp_details If you have already run
#'   \code{\link{extractExpDetails_mult}} or \code{\link{extractExpDetails}} to
#'   get all the details from the "Input Sheet" (e.g., when you ran
#'   extractExpDetails you said \code{exp_details = "Input Sheet"} or
#'   \code{exp_details = "all"}), you can save some processing time by supplying
#'   that object here, unquoted. If left as NA, this function will run
#'   \code{extractExpDetails} behind the scenes to figure out some information
#'   about your experimental set up.
#'
#' @return a data.frame
#'
#' @export
#' @examples
#' # None yet
#' 


extract_fu <- function(sim_data_files,
                       returnAggregateOrIndiv = "both", 
                       existing_exp_details = NA){
   
   # Error catching --------------------------------------------------------------------
   # Check whether tidyverse is loaded
   if("package:tidyverse" %in% search() == FALSE){
      stop("The SimcypConsultancy R package also requires the package tidyverse to be loaded, and it doesn't appear to be loaded yet. Please run `library(tidyverse)` and then try again.")
   }
   
   # If they didn't include ".xlsx" at the end, add that.
   sim_data_files <- paste0(sub("\\.wksz$|\\.dscw$|\\.xlsx$", "", sim_data_files), ".xlsx")
   
   # Checking for file name issues
   CheckFileNames <- check_file_name(sim_data_files)
   BadFileNames <- CheckFileNames[!CheckFileNames == "File name meets naming standards."]
   if(length(BadFileNames)> 0){
      BadFileNames <- paste0(names(BadFileNames), ": ", BadFileNames)
      warning(paste0("The following file names do not meet file-naming standards for the Simcyp Consultancy Team:\n", 
                     str_c(paste0("     ", BadFileNames), collapse = "\n"), "\n"),
              call. = FALSE)
   }
   
   if(any(c(length(returnAggregateOrIndiv) < 1,
            length(returnAggregateOrIndiv) > 2,
            any(unique(returnAggregateOrIndiv) %in% c("aggregate", "individual", "both") == FALSE)))) {
      stop("returnAggregateOrIndiv must be 'aggregate', 'individual', or 'both'.",
           call. = FALSE)
   }
   
   
   # Main body of function ----------------------------------------------------------------
   
   extract_fu_subfun <- function(sim_data_file){
      
      # This is currently only set up for the substrate, but we may change that
      # later. For now, setting the object compoundID as a placeholder since it
      # will only be "substrate" at the moment.
      compoundID <- "substrate"
      
      # Getting summary data for the simulation(s)
      
      if("logical" %in% class(existing_exp_details)){
         Deets <- extractExpDetails(sim_data_file,
                                    exp_details = "Summary and Input")[["MainDetails"]]
         
      } else {
         
         Deets <- filter_sims(existing_exp_details, sim_data_file, "include")
         Deets <- harmonize_details(Deets)[["MainDetails"]] %>% 
            filter(File == sim_data_file)
         
         if(nrow(Deets) == 0){
            Deets <- extractExpDetails(sim_data_file,
                                       exp_details = "Summary and Input")[["MainDetails"]]
         }
      }
      
      if(Deets$PopRepSim == "Yes"){
         warning(paste0("The simulator file supplied, `", 
                        sim_data_file, 
                        "`, is for a population-representative simulation and thus doesn't have any aggregate data. Please be warned that some plotting functions will not work well without aggregate data.\n"),
                 call. = FALSE)
      }
      
      # Figuring out which sheet to extract and dealing with case since that
      # apparently changes between Simulator versions.
      AllSheets <- readxl::excel_sheets(sim_data_file)
      SheetToExtract <- AllSheets[str_detect(tolower(AllSheets), "fu profile \\(sub\\)")]
      
      if(length(SheetToExtract) == 0){
         warning(paste0("The simulator output file provided, `", 
                        sim_data_file, 
                        "``, does not appear to have a sheet titled `Time variance %fu and fe`, which is what we need for extracting dynamic fu and fe values.\n"),
                 call. = FALSE)
         return(data.frame())
      }
      
      # Reading in simulated abundance-time profile data
      sim_data_xl <- suppressMessages(
         readxl::read_excel(path = sim_data_file,
                            sheet = SheetToExtract,
                            col_names = FALSE))
      
      # Extracting aggregate data ---------------------------------------------
      if(any(c("aggregate", "both") %in% returnAggregateOrIndiv)){
         
         StartRow_agg <- which(sim_data_xl$...1 == "Population Statistics")
         TimeRow <- which(str_detect(sim_data_xl$...1, "^Time "))
         TimeRow <- TimeRow[TimeRow > StartRow_agg][1]
         
         # Figuring out which rows contain which data
         FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
                                 which(1:nrow(sim_data_xl) > TimeRow))[1]
         FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl), FirstBlank)
         NamesToCheck <- sim_data_xl$...1[TimeRow[1]:(FirstBlank-1)]
         
         RowsToUse <- which(str_detect(NamesToCheck, "^fu "))
         
         IDs_agg <- data.frame(ColOrig = paste0("V", 1:(length(RowsToUse) + 1)), 
                               ID = c("Time", NamesToCheck[RowsToUse])) %>% 
            mutate(Trial = str_trim(str_extract(tolower(ID),
                                                "(geometric)? mean| 5(th)? percentile| 95(th)? percentile|median")), 
                   Trial = case_when(Trial == "5th percentile" ~ "per5", 
                                     Trial == "95th percentile" ~ "per95", 
                                     Trial == "geometric mean" ~ "geomean",
                                     TRUE ~ Trial))
         
         sim_data_mean <- sim_data_xl[c(TimeRow, RowsToUse + TimeRow - 1), ] %>% 
            t() %>%
            as.data.frame() %>% slice(-(1:3)) %>%
            mutate_all(as.numeric) %>% 
            rename(Time = V1) %>% 
            pivot_longer(names_to = "ColOrig", values_to = "fu", 
                         cols = -Time) %>%
            left_join(IDs_agg, by = "ColOrig")
      }
      
      
      # Extracting individual data --------------------------------------------
      if(any(c("individual", "both") %in% returnAggregateOrIndiv)){
         
         StartRow_indiv <- which(sim_data_xl$...1 == "Individual Statistics")
         TimeRow <- which(str_detect(sim_data_xl$...1, "^Time "))
         TimeRow <- TimeRow[TimeRow > StartRow_indiv][1]
         
         # Figuring out which rows contain which data
         FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
                                 which(1:nrow(sim_data_xl) > TimeRow))[1]
         FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl) + 1, FirstBlank)
         
         RowsToUse <- which(str_detect(sim_data_xl$...1, "^fu$"))
         RowsToUse <- RowsToUse[RowsToUse > StartRow_indiv & RowsToUse < FirstBlank]
         
         IDs_indiv <- data.frame(ColOrig = paste0("V", 1:(length(RowsToUse) + 1)), 
                                 ID = c("Time", t(sim_data_xl[RowsToUse, 1])), 
                                 Individual = sim_data_xl[c(TimeRow, RowsToUse), 2], 
                                 Trial = sim_data_xl[c(TimeRow, RowsToUse), 3]) %>% 
            rename(Individual = ...2, 
                   Trial = ...3)
         
         sim_data_indiv <- sim_data_xl[c(TimeRow, RowsToUse), ] %>% 
            t() %>%
            as.data.frame() %>% slice(-(1:3)) %>%
            mutate_all(as.numeric) %>% 
            rename(Time = V1) %>% 
            pivot_longer(names_to = "ColOrig", values_to = "fu", 
                         cols = -Time) %>%
            left_join(IDs_indiv, by = "ColOrig")
         
      }
      
      # Putting everything together ------------------------------------------
      
      TimeUnits <- sim_data_xl$...1[which(str_detect(sim_data_xl$...1, "^Time"))][1]
      TimeUnits <- ifelse(TimeUnits == "Time (h)", "hours", "minutes")
      
      Data <- list()
      
      if(any(c("aggregate", "both") %in% returnAggregateOrIndiv)){
         Data[["agg"]] <- sim_data_mean %>%
            arrange(Trial, Time)
      }
      
      if(any(c("individual", "both") %in% returnAggregateOrIndiv)){
         Data[["indiv"]] <- sim_data_indiv %>%
            mutate(Individual = as.character(Individual),
                   Trial = as.character(Trial)) %>%
            arrange(Individual, Time)
      }
      
      Data <- bind_rows(Data)
      
      if("individual" %in% returnAggregateOrIndiv){
         Data <- Data %>%
            mutate(Individual = ifelse(is.na(Individual), Trial, Individual))
      }
      
      # Adding DoseNumber so that we can skip extractExpDetails in ct_plot when
      # the user requests a specific dose.
      MyIntervals <- 
         c("substrate" = Deets$DoseInt_sub,
           "primary metabolite 1" = Deets$DoseInt_sub,
           "primary metabolite 2" = Deets$DoseInt_sub,
           "secondary metabolite" = Deets$DoseInt_sub,
           "inhibitor 1" = ifelse(is.null(Deets$DoseInt_inhib),
                                  NA, Deets$DoseInt_inhib),
           "inhibitor 1 metabolite" = ifelse(is.null(Deets$DoseInt_inhib),
                                             NA, Deets$DoseInt_inhib),
           "inhibitor 2" = ifelse(is.null(Deets$DoseInt_inhib2),
                                  NA, Deets$DoseInt_inhib2))
      
      MyStartTimes <- 
         c("substrate" = Deets$StartHr_sub,
           "primary metabolite 1" = Deets$StartHr_sub,
           "primarymetabolite 2" = Deets$StartHr_sub,
           "secondary metabolite" = Deets$StartHr_sub,
           "inhibitor 1" = ifelse(is.null(Deets$StartHr_inhib), NA,
                                  Deets$StartHr_inhib),
           "inhibitor 2" = ifelse(is.null(Deets$StartHr_inhib2), NA,
                                  Deets$StartHr_inhib2),
           "inhibitor 1 metabolite" = ifelse(is.null(Deets$StartHr_inhib), NA,
                                             Deets$StartHr_inhib))
      
      MyMaxDoseNum <- 
         c("substrate" = ifelse(Deets$Regimen_sub == "Single Dose", 
                                1, Deets$NumDoses_sub),
           "primary metabolite 1" = ifelse(Deets$Regimen_sub == "Single Dose", 
                                           1, Deets$NumDoses_sub),
           "primarymetabolite 2" = ifelse(Deets$Regimen_sub == "Single Dose", 
                                          1, Deets$NumDoses_sub),
           "secondary metabolite" = ifelse(Deets$Regimen_sub == "Single Dose", 
                                           1, Deets$NumDoses_sub),
           "inhibitor 1" = ifelse(is.null(Deets$NumDoses_inhib), NA,
                                  ifelse(Deets$Regimen_inhib == "Single Dose", 
                                         1, Deets$NumDoses_inhib)),
           "inhibitor 2" = ifelse(is.null(Deets$NumDoses_inhib2), NA,
                                  ifelse(Deets$Regimen_inhib2 == "Single Dose", 
                                         1, Deets$NumDoses_inhib2)),
           "inhibitor 1 metabolite" = ifelse(is.null(Deets$NumDoses_inhib), NA,
                                             ifelse(Deets$Regimen_inhib == "Single Dose", 
                                                    1, Deets$NumDoses_inhib)))
      
      # Also adding compound name
      MyCompound <- 
         c("substrate" = Deets$Substrate,
           "primary metabolite 1" = Deets$PrimaryMetabolite1,
           "primary metabolite 2" = Deets$PrimaryMetabolite2,
           "secondary metabolite" = Deets$SecondaryMetabolite,
           "inhibitor 1" = Deets$Inhibitor1, 
           "inhibitor 1 metabolite" = Deets$Inhibitor1Metabolite, 
           "inhibitor 2" = Deets$Inhibitor2)
      
      # Converting data to numeric while also retaining names
      suppressWarnings(
         MyIntervals <- sapply(MyIntervals, FUN = as.numeric))
      suppressWarnings(
         MyStartTimes <- sapply(MyStartTimes, FUN = as.numeric))
      suppressWarnings(
         MyMaxDoseNum <- sapply(MyMaxDoseNum, FUN = as.numeric))
      
      Data <- Data %>%
         mutate(StartHr_sub = MyStartTimes["substrate"],
                TimeSinceDose1_sub = Time - StartHr_sub,
                DoseInt_sub = MyIntervals["substrate"],
                MaxDoseNum_sub = MyMaxDoseNum["substrate"],
                DoseNum_sub = Time %/% DoseInt_sub + 1,
                # Taking care of possible artifacts
                DoseNum_sub = ifelse(DoseNum_sub < 0, 0, DoseNum_sub),
                DoseNum_sub = ifelse(DoseNum_sub > MaxDoseNum_sub, 
                                     MaxDoseNum_sub, DoseNum_sub),
                # If it was a single dose, make everything after StartHr dose
                # 1 and everything before StartHr dose 0. if it was a single
                # dose, then DoseInt is NA.
                DoseNum_sub = ifelse(is.na(DoseInt_sub),
                                     ifelse(TimeSinceDose1_sub < 0, 0, 1), DoseNum_sub),
                StartHr_inhib1 = MyStartTimes["inhibitor 1"],
                TimeSinceDose1_inhib1 = Time - StartHr_inhib1,
                DoseInt_inhib1 = MyIntervals["inhibitor 1"],
                MaxDoseNum_inhib1 = MyMaxDoseNum["inhibitor 1"],
                DoseNum_inhib = Time %/% DoseInt_inhib1 + 1,
                # Taking care of possible artifacts
                DoseNum_inhib = ifelse(DoseNum_inhib < 0, 0, DoseNum_inhib),
                DoseNum_inhib = ifelse(DoseNum_inhib > MaxDoseNum_inhib1, 
                                       MaxDoseNum_inhib1, DoseNum_inhib),
                # If it was a single dose, make everything after StartHr dose
                # 1 and everything before StartHr dose 0. if it was a single
                # dose, then DoseInt is NA.
                DoseNum_inhib = ifelse(is.na(DoseInt_inhib1),
                                       ifelse(TimeSinceDose1_inhib1 < 0, 0, 1), DoseNum_inhib),
                StartHr_inhib2 = MyStartTimes["inhibitor 2"],
                TimeSinceDose1_inhib2 = Time - StartHr_inhib2,
                DoseInt_inhib2 = MyIntervals["inhibitor 2"],
                MaxDoseNum_inhib2 = MyMaxDoseNum["inhibitor 2"],
                DoseNum_inhib2 = Time %/% DoseInt_inhib2 + 1,
                # Taking care of possible artifacts
                DoseNum_inhib2 = ifelse(DoseNum_inhib2 < 0, 0, DoseNum_inhib2),
                DoseNum_inhib2 = ifelse(DoseNum_inhib2 > MaxDoseNum_inhib2, 
                                        MaxDoseNum_inhib2, DoseNum_inhib2),
                # If it was a single dose, make everything after StartHr dose
                # 1 and everything before StartHr dose 0. if it was a single
                # dose, then DoseInt is NA.
                DoseNum_inhib2 = ifelse(is.na(DoseInt_inhib2),
                                        ifelse(TimeSinceDose1_inhib2 < 0, 0, 1), DoseNum_inhib2))
      
      # Checking for when the simulation ends right at the last dose b/c
      # then, setting that number to 1 dose lower
      if(length(Data %>% filter(DoseNum_sub == max(Data$DoseNum_sub)) %>%
                pull(Time) %>% unique()) == 1){
         MyMaxDoseNum_sub <- max(Data$DoseNum_sub)
         Data <- Data %>%
            mutate(DoseNum_sub = ifelse(DoseNum_sub == MyMaxDoseNum_sub,
                                        MyMaxDoseNum_sub - 1, DoseNum_sub))
      }
      
      if(length(Data %>% filter(DoseNum_inhib == max(Data$DoseNum_inhib)) %>%
                pull(Time) %>% unique()) == 1){
         MyMaxDoseNum_inhib1 <- max(Data$DoseNum_inhib)
         Data <- Data %>%
            mutate(DoseNum_inhib = ifelse(DoseNum_inhib == MyMaxDoseNum_inhib1,
                                          MyMaxDoseNum_inhib1 - 1, DoseNum_inhib))
      }
      
      if(length(Data %>% filter(DoseNum_inhib2 == max(Data$DoseNum_inhib2)) %>%
                pull(Time) %>% unique()) == 1){
         MyMaxDoseNum_inhib2 <- max(Data$DoseNum_inhib2)
         Data <- Data %>%
            mutate(DoseNum_inhib2 = ifelse(DoseNum_inhib2 == MyMaxDoseNum_inhib2,
                                           MyMaxDoseNum_inhib2 - 1, DoseNum_inhib2))
      }
      
      # Finalizing, tidying, selecting only useful columns
      Data <- Data %>%
         mutate(File = sim_data_file,
                Compound = MyCompound[compoundID], 
                CompoundID = compoundID, 
                DoseNum = case_when(
                   {{compoundID}} %in% 
                      c("substrate", "primary metabolite 1", 
                        "primary metabolite 2", 
                        "secondary metabolite") ~ DoseNum_sub, 
                   {{compoundID}} %in% c("inhibitor 1", 
                                         "inhibitor  1 metabolite") ~ DoseNum_inhib, 
                   {{compoundID}} %in% c("inhibitor 2") ~ DoseNum_inhib2)) %>%
         arrange(across(any_of(c("CompoundID", "Compound",
                                 "Individual", "Trial", "Time")))) %>%
         select(any_of(c("Compound", "CompoundID", 
                         "Individual", "Trial", "Time", "fu", "Time_units", 
                         "DoseNum", "File"))) %>% 
         purrr::discard(~all(is.na(.)))
      
      return(Data)
   }
   
   Out <- list()
   for(ff in sim_data_files){
      Out[[ff]] <- extract_fu_subfun(sim_data_file = ff)
   }
   
   Out <- bind_rows(Out)
   
   return(Out)
   
}
shirewoman2/Consultancy documentation built on Feb. 18, 2025, 10 p.m.