R/eCT_pulldata.R

Defines functions eCT_pulldata

Documented in eCT_pulldata

#' Pull the appropriate data from the harmonized Excel tab
#'
#' INTERNAL USE ONLY.
#'
#' @param cmpd cmpd
#' @param AllPerpsPresent AllPerpsPresent
#' @param pull_interaction_data T or F
#' @param fromMultFunction T or F
#' @param Deets 1 row
#' @param ADAM T or F
#' @param ss tissue subtype
#' @param AdvBrainModel T or F
#' @param tissue tissue
#' @param SimConcUnits SimConcUnits
#' @param returnAggregateOrIndiv returnAggregateOrIndiv
#' @param sim_data_xl sim_data_xl
#'
#' @return data.frame of conc time data
#'
#' @examples
#' # none
#' 
eCT_pulldata <- function(sim_data_xl, 
                         cmpd, 
                         AllPerpsPresent, 
                         pull_interaction_data, 
                         fromMultFunction, 
                         Deets,
                         ADAM, 
                         ss, 
                         AdvBrainModel, 
                         tissue, 
                         SimTimeUnits,
                         SimConcUnits, 
                         returnAggregateOrIndiv){
   
   if(ADAM & cmpd != "substrate"){
      if(tissue %in% c("faeces", "gut tissue")){
         if(cmpd != "inhibitor 1"){
            warning(paste0(str_to_title(cmpd),
                           " concentrations are not available for ",
                           tissue, " and thus will not be extracted.\n"),
                    call. = FALSE)
            
            return()
         }
      } else {
         warning(paste0(str_to_title(cmpd),
                        " concentrations are not available for ",
                        tissue, " and thus will not be extracted.\n"),
                 call. = FALSE)
         
         return()
         
      }
   }
   
   MyCompound <-
      switch(cmpd,
             "substrate" = Deets$Substrate,
             "inhibitor 1" = Deets$Inhibitor1,
             "inhibitor 2" = Deets$Inhibitor2,
             "inhibitor 1 metabolite" = Deets$Inhibitor1Metabolite,
             "primary metabolite 1" = Deets$PrimaryMetabolite1,
             "primary metabolite 2" = Deets$PrimaryMetabolite2,
             "secondary metabolite" = Deets$SecondaryMetabolite) %>%
      as.character()
   
   MainCompoundIDs <- c("substrate", "primary metabolite 1", "primary metabolite 2",
                        "secondary metabolite",
                        "inhibitor 1", "inhibitor 2", "inhibitor 1 metabolite",
                        "inhibitor 2 metabolite")
   
   # if(all(CompoundType == "ADC")){
   #    MyCompound <- switch(cmpd,
   #                         "total protein" = paste("total", Deets$Substrate),
   #                         "conjugated protein" = paste("conjugated", Deets$Substrate),
   #                         "released payload" = Deets$PrimaryMetabolite1)
   # }
   
   # Determining rows to use ----------------------------------------------
   
   TimeRow <- which(str_detect(sim_data_xl$...1, "^Time "))[1]
   
   # Figuring out which rows contain which data
   FirstBlank <- intersect(which(is.na(sim_data_xl$...1)),
                           which(1:nrow(sim_data_xl) > TimeRow))[1]
   FirstBlank <- ifelse(is.na(FirstBlank), nrow(sim_data_xl) + 1, FirstBlank)
   # NamesToCheck <- sim_data_xl$...1[TimeRow:(FirstBlank-1)]
   NamesToCheck <- sim_data_xl$...1
   
   InteractionIndices <- which(str_detect(NamesToCheck, "WITHINTERACTION"))
   
   
   if(cmpd %in% c(MainCompoundIDs, "released payload") & 
      ADAM == FALSE & AdvBrainModel == FALSE){
      # cmpd is one of main compounds is not ADAM or AdvBrainModel 
      
      # released payload looks like primary metabolite 1, so extracting
      # those data here rather than with the rest of the ADC data
      
      CompoundIndices <- which(str_detect(NamesToCheck,
                                          switch(cmpd, 
                                                 "substrate" = "SUBSTRATE",
                                                 "primary metabolite 1" = "PRIMET1", 
                                                 "primary metabolite 2" = "PRIMET2", 
                                                 "secondary metabolite" = "SECMET", 
                                                 "inhibitor 1" = "PERPETRATOR1INHIB", 
                                                 "inhibitor 2" = "PERPETRATOR2", 
                                                 "inhibitor 1 metabolite" = "PERPETRATOR1MET")))
      
   } else if(ADAM|AdvBrainModel){
      # cmpd is ADAM model compound
      
      # Step 1: Find all rows w/ADAM model concentrations
      CompoundIndices <- which(str_detect(NamesToCheck, 
                                          "^Ms|^Dissolution Rate Solid State|^C Lumen Free|^C Lumen Total|^Heff|^Absorption Rate|^Mur|^Md|^Inh Md|^Luminal CLint|CTissue|ITissue|dissolved|absorbed|^C Enterocyte|Release fraction|CIntracranial|CBrainI[CS]F|CCSF(Spinal|Cranial)|Kpuu_I[CS]F|Kpuu I[CS]F|Kpuu_BrainMass|Kpuu brain mass|CTotal( )?Brain"))
      # IMPORTANT: If you change the above regex b/c you find some new weird way
      # that the Simulator output refers to things, ALSO CHANGE IT IN
      # extractConcTime.
      
      # Step 2: Find only those rows with this particular tissue subtype
      temp_regex <- paste0(ifelse(
         str_detect(SimConcUnits$TypeCode[SimConcUnits$Type == ss],
                    "dissolved|absorbed"),
         "", "^"),
         SimConcUnits$TypeCode[SimConcUnits$Type == ss])
      
      # gut tissue tab is set up slightly differently, so need to
      # adjust what regex to use.
      if(tissue == "gut tissue"){ # FIXME - CHECK THIS
         temp_regex <- ifelse(cmpd == "inhibitor 1",
                              "^ITissue", "^CTissue")
      }
      
      CompoundIndices <- intersect(
         which(str_detect(NamesToCheck, temp_regex)), 
         CompoundIndices)
      
   } else if(all(CompoundType == "ADC") & cmpd != "released payload" &
             length(AllPerpsPresent) == 0){
      
      CompoundIndices <- which(
         str_detect(tolower(sim_data_xl$...1),
                    switch(ss,
                           "total protein" = "protein total .dar0",
                           "conjugated protein" = "conjugated protein .dar1",
                           "total antibody" = "cantibody total",
                           "protein-conjugated substrate" = "protein conjugated drug" # CHECK THIS ONE with an example; just guessing for now
                    )))
      
   }
   
   if(pull_interaction_data){
      Include <- intersect(CompoundIndices, 
                           InteractionIndices)
   } else {
      Include <- setdiff(CompoundIndices, 
                         InteractionIndices)
   }
   
   # Need to remove trial means from here if they're included. 
   Include <- setdiff(Include, 
                      which(str_detect(tolower(sim_data_xl$...1), "trial")))
   
   # FIXME - Return to this and add trial means here. 
   IncludeTrialMeans <- intersect(Include, 
                                  which(str_detect(tolower(sim_data_xl$...1), "trial")))
   
   # If Include has length 0, then this particular set of data is not available,
   # e.g., metabolite for a solid tissue or interaction data for Heff data.
   # Return empty data.
   if(length(Include) == 0){
      return()
   }
   
   # NB: Making NamesToCheck lower case now that we've checked for the
   # all-caps compound IDs.
   NamesToCheck <- tolower(NamesToCheck)
   
   Out <- list()
   
   if("aggregate" %in% returnAggregateOrIndiv){
      
      RowsToUse <- c(
         "mean"    = intersect(
            which(str_detect(NamesToCheck, "mean") &
                     !str_detect(NamesToCheck, "geome(t)?ric")), # There's a spelling error in some simulator output, and geometric is listed as "geomeric".
            Include),
         "per5"    = intersect(
            which(str_detect(NamesToCheck," 5(th)? percentile|5th ptile|^5th percentile$") &
                     !str_detect(NamesToCheck, "95")),
            Include),
         "per95"   = intersect(
            which(str_detect(NamesToCheck, " 95(th)? percentile|95th ptile|^95th percentile$")),
            Include),
         "per10"   = intersect(
            which(str_detect(NamesToCheck," 10(th)? percentile|10th ptile")),
            Include),
         "per90"   = intersect(
            which(str_detect(NamesToCheck, " 90(th)? percentile|90th ptile")),
            Include),
         "geomean" = intersect(
            which(str_detect(NamesToCheck, "geome(t)?ric mean")),
            Include),
         "median"  = intersect(
            which(str_detect(NamesToCheck, "median")),
            Include))
      
      # dealing with instances where there are no aggregate data s/a animal sims
      if(length(RowsToUse) == 0){
         Out[["agg"]] <- list()
      } else {
         suppressWarnings(
            Out[["agg"]] <- sim_data_xl[c(TimeRow, RowsToUse), ] %>%
               t() %>%
               as.data.frame() %>%
               slice(-(1:3)) %>%
               mutate_all(as.numeric)
         )
         
         names(Out[["agg"]]) <- c("Time", names(RowsToUse))
         Out[["agg"]] <- Out[["agg"]] %>%
            pivot_longer(names_to = "Trial", values_to = "Conc",
                         cols = -c(Time)) %>%
            mutate(Compound = MyCompound,
                   CompoundID = cmpd,
                   Inhibitor = ifelse(pull_interaction_data |
                                         cmpd %in% c("inhibitor 1", 
                                                     "inhibitor 2", 
                                                     "inhibitor 1 metabolite"), 
                                      str_comma(AllPerpsPresent), 
                                      "none"),
                   Time_units = SimTimeUnits,
                   Conc_units = ifelse(ADAM|AdvBrainModel,
                                       SimConcUnits$ConcUnit[
                                          SimConcUnits$Type == ss],
                                       SimConcUnits),
                   Tissue = tissue,
                   Tissue_subtype = ifelse(ADAM|AdvBrainModel, ss, NA), 
                   IndivOrAgg = "aggregate")
      }
   } 
   
   if("individual" %in% returnAggregateOrIndiv){
      
      # Certain animal sims will only have "Statistics" above to the single row
      # of conc time data since they only have 1 individual.
      RowsToUse <- Include[which(Include > which(str_detect(
         sim_data_xl$...1, "^(Individual )?Statistics")))]
      
      if(Deets$Species != "human" & length(RowsToUse) == 0){
         # Sometimes, animal sims will list "Population Statistics" even though
         # there's only 1 individual.
         RowsToUse <- Include[which(Include > which(str_detect(
            sim_data_xl$...1, "Population Statistics")))]
         
      }
      
      suppressWarnings(
         Out[["indiv"]] <- sim_data_xl[c(TimeRow, RowsToUse), ] %>%
            t() %>%
            as.data.frame() %>% slice(-(1:3)) %>%
            mutate_all(as.numeric) %>%
            rename(Time = "V1")
      )
      
      if(length(RowsToUse) > 1){
         SubjTrial <- sim_data_xl[RowsToUse, 2:3] %>%
            rename(Individual = ...2, Trial = ...3) %>%
            mutate(SubjTrial = paste0("ID", Individual, "_", Trial))
         
         names(Out[["indiv"]])[2:ncol(Out[["indiv"]])] <- SubjTrial$SubjTrial
      }
      
      suppressWarnings(
         Out[["indiv"]] <- Out[["indiv"]] %>%
            pivot_longer(names_to = "SubjTrial", values_to = "Conc",
                         cols = -Time) %>%
            mutate(Compound = MyCompound,
                   CompoundID = cmpd,
                   Inhibitor = ifelse(pull_interaction_data |
                                         cmpd %in% c("inhibitor 1", 
                                                     "inhibitor 2", 
                                                     "inhibitor 1 metabolite"), 
                                      str_comma(AllPerpsPresent), 
                                      "none"), 
                   SubjTrial = sub("ID", "", SubjTrial),
                   Time_units = SimTimeUnits,
                   Conc_units = ifelse(ADAM|AdvBrainModel,
                                       SimConcUnits$ConcUnit[
                                          SimConcUnits$Type == ss],
                                       SimConcUnits),
                   Tissue = tissue,
                   Tissue_subtype = ifelse(ADAM|AdvBrainModel, ss, NA)) %>%
            separate(SubjTrial, into = c("Individual", "Trial"),
                     sep = "_") %>% 
            mutate(Individual = as.character(Individual),
                   Trial = as.character(Trial), 
                   IndivOrAgg = "individual")
      )
      
      # Adjusting for when there was only 1 individual
      if(length(RowsToUse) == 1){
         Out[["indiv"]]$Individual <- "1"
         Out[["indiv"]]$Trial <- "mean"
      }
      
      rm(RowsToUse)
      
   }
   
   Out <- bind_rows(Out)
   
   return(Out)
   
}
shirewoman2/Consultancy documentation built on Feb. 18, 2025, 10 p.m.