inst/doc/Vh_Linakis2020.R

## ----configure_knitr, eval = TRUE---------------------------------------------
knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)

## ----clear_memory, eval = TRUE------------------------------------------------
rm(list=ls()) 

## ----runchunks, eval = TRUE---------------------------------------------------
# Set whether or not the following chunks will be executed (run):
execute.vignette <- FALSE

## ----setup, eval = execute.vignette-------------------------------------------
#  knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)
#  library(httk)
#  library(ggplot2)
#  library(gridExtra)
#  library(cowplot)
#  library(ggrepel)
#  library(dplyr)
#  library(stringr)
#  library(forcats)
#  library(smatr)

## ----data, eval = execute.vignette--------------------------------------------
#  met_data <- metabolism_data_Linakis2020
#  conc_data <- concentration_data_Linakis2020
#  #conc_data <- subset(concentration_data_Linakis2020, !(SAMPLING_MATRIX %in%
#  #                      c("EEB","MEB","EB")))
#  #conc_data <- concentration_data_Linakis2020
#  #  subset(concentration_data_Linakis2020, !(SOURCE_CVT %in% c(
#  #  "11453305")))
#  conc_data[,"DOSE_U"] <- ifelse(conc_data[,"DOSE_U"] == "ppm",
#                                 yes = "ppmv",
#                                 conc_data[,"DOSE_U"])
#  conc_data[,"ORIG_CONC_U"] <- ifelse(conc_data[,"ORIG_CONC_U"] == "ppm",
#                                      yes = "ppmv",
#                                      conc_data[,"ORIG_CONC_U"])
#  # Not sure what to do with percent:
#  conc_data <- subset(conc_data,toupper(ORIG_CONC_U) != "PERCENT")
#  # Rename this column:
#  colnames(conc_data)[colnames(conc_data)=="ORIG_CONC_U"] <- "CONC_U"
#  conc_data$ORIGINAL_CONC_U <- conc_data$CONC_U
#  conc_data$ORIGINAL_CONC <- conc_data$CONCENTRATION
#  # Maybe Linakis et al translated all concentrations to uM?
#  conc_data$CONC_U <- "uM"

## ----convert_to_ppmweight, eval = FALSE---------------------------------------
#  gas.media <- c("EB","MEB","EEB","EB (+W)")
#  gas.units <- unique(subset(conc_data,
#    SAMPLING_MATRIX %in% gas.media)$CONC_U)
#  target.unit <- "ppmv"
#  for (this.unit in gas.units)
#    if (this.unit != target.unit)
#    {
#      these.chems <- unique(subset(conc_data,
#        SAMPLING_MATRIX %in% gas.media &
#        CONC_U==this.unit)$DTXSID)
#      for (this.chem in these.chems)
#      {
#        this.factor <- convert_units(
#          input.units=this.unit,
#          output.units=target.unit,
#          dtxsid=this.chem, state="gas")
#        print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit))
#  
#        # Scale the observation
#        conc_data[conc_data$DTXSID==this.chem &
#                    conc_data$SAMPLING_MATRIX %in% gas.media &
#                    conc_data$CONC_U==this.unit,"CONCENTRATION"] <-
#          this.factor * conc_data[
#            conc_data$DTXSID==this.chem &
#              conc_data$SAMPLING_MATRIX %in% gas.media &
#              conc_data$CONC_U==this.unit,"CONCENTRATION"]
#        # Change the reported unit
#        conc_data[conc_data$DTXSID==this.chem &
#                    conc_data$SAMPLING_MATRIX %in% gas.media &
#                    conc_data$CONC_U==this.unit,"CONC_U"] <-
#          target.unit
#  
#      }
#    }

## ----normalize_other_units, eval = FALSE--------------------------------------
#  tissue.media <- c("VBL","BL","ABL","PL","BL (+W)")
#  tissue.units <- unique(subset(conc_data,
#    SAMPLING_MATRIX %in% tissue.media)$CONC_U)
#  target.unit <- "uM"
#  for (this.unit in tissue.units)
#    if (this.unit != target.unit)
#    {
#      these.chems <- unique(subset(conc_data,
#        SAMPLING_MATRIX %in% tissue.media &
#        CONC_U==this.unit)$DTXSID)
#      for (this.chem in these.chems)
#      {
#        this.factor <- try(convert_units(
#          input.units=this.unit,
#          output.units=target.unit,
#          dtxsid=this.chem))
#        print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit))
#  
#        # Scale the observation
#        conc_data[conc_data$DTXSID==this.chem &
#                    conc_data$SAMPLING_MATRIX %in% tissue.media &
#                    conc_data$CONC_U==this.unit,"CONCENTRATION"] <-
#          this.factor * conc_data[
#            conc_data$DTXSID==this.chem &
#              conc_data$SAMPLING_MATRIX %in% tissue.media &
#              conc_data$CONC_U==this.unit,"CONCENTRATION"]
#        # Change the reported unit
#        conc_data[conc_data$DTXSID==this.chem &
#                    conc_data$SAMPLING_MATRIX %in% tissue.media &
#                    conc_data$CONC_U==this.unit,"CONC_U"] <-
#          target.unit
#  
#      }
#    }

## ----summary, eval = execute.vignette-----------------------------------------
#  # Small molecule chemicals
#  summary(met_data$AVERAGE_MASS)
#  # Generally more lipophilic chemicals
#  summary(met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED)
#  # Unsurprisingly then, the chemicals are generally less water-soluble
#  summary(met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED)
#  # ~60% of samples in humans
#  table(conc_data$CONC_SPECIES)/nrow(conc_data)*100
#  # ~72% of samples are from blood
#  table(conc_data$SAMPLING_MATRIX)/nrow(conc_data)*100

## ----scenarios, eval = execute.vignette---------------------------------------
#  # Create a dataframe with 1 row for each unique external exposure scenario
#  unique_scenarios <- conc_data[with(conc_data,
#    order(PREFERRED_NAME,
#          CONC_SPECIES,
#          SAMPLING_MATRIX,
#          as.numeric(as.character(DOSE)),EXP_LENGTH,-TIME)),] %>%
#    distinct(DTXSID,DOSE,DOSE_U,EXP_LENGTH,CONC_SPECIES,SAMPLING_MATRIX, .keep_all = TRUE)

## ----runmodel, eval = execute.vignette----------------------------------------
#  # Store the output of each simulation:
#  simlist <- list()
#  # Store the Cvt data relevant to each simulation
#  obslist <- list()
#  # Conduct one simulation for each unique combination of chemical, species, dose:
#  for (i in 1:nrow(unique_scenarios))
#    if (unique_scenarios$CASRN[i] %in% get_cheminfo(model="gas_pbtk",
#                                                    suppress.messages = TRUE))
#  {
#    # Identify relevant Cvt data:
#      relconc <- subset(conc_data,conc_data$DTXSID == unique_scenarios$DTXSID[i] &
#        conc_data$DOSE == unique_scenarios$DOSE[i] &
#        conc_data$EXP_LENGTH == unique_scenarios$EXP_LENGTH[i] &
#        conc_data$CONC_SPECIES == unique_scenarios$CONC_SPECIES[i] &
#        conc_data$SAMPLING_MATRIX == unique_scenarios$SAMPLING_MATRIX[i])
#      obslist[[i]] <- relconc
#  #
#  #
#  #
#  #
#  #
#  # THE FOLLOWING CODE RUNS solve_gas_pbtk FOR EACH SCENARIO
#  # (UNIQUE COMBINATION OF CHEMICAL, SPECIES, DOSE, ETC.)
#  #
#  #
#  #
#  #
#  #
#      solver.out <- try(suppressWarnings(as.data.frame(solve_gas_pbtk(
#          chem.cas = unique_scenarios$CASRN[i],
#          days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]),
#  # Make sure we get predicted conc's at the observed times:
#          times=unique(c(0,signif(obslist[[i]]$TIME,4))), # days
#          tsteps = 500,
#          exp.conc = as.numeric(unique_scenarios$DOSE[i]), # SED (06-22-2021) think this is ppmv for all scenarios
#           input.units = unique_scenarios$DOSE_U[i], # specify the units for exp.conc (ppmv)
#          exp.duration = unique_scenarios$EXP_LENGTH[i], # days
#          period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]), # days
#          species = as.character(unique_scenarios$CONC_SPECIES[i]),
#          monitor.vars = c(
#            "Cven",
#            "Clung",
#            "Cart",
#            "Cplasma",
#            "Calvppmv",
#            "Cendexhppmv",
#            "Cmixexhppmv",
#            "Calv",
#            "Cendexh",
#            "Cmixexh",
#            "Cmuc",
#            "AUC"),
#          vmax.km = FALSE,
#          suppress.messages = TRUE))))
#  #
#  #
#  #
#  #
#  #
#  #
#  #
#  #
#  #
#  #
#      if (class(solver.out) %in% "try-error")
#        solver.out <- data.frame(time=NA,Conc=NA)
#  
#      print(solver.out)
#  # Get the blood:plasma ratio:
#      this.Rb2p <- suppressWarnings(available_rblood2plasma(
#        chem.cas=unique_scenarios$CASRN[i],
#        species=as.character(unique_scenarios$CONC_SPECIES[i])))
#      solver.out$Rb2p <- this.Rb2p
#  
#      # The column simconc holds the appropriate prediction for the sampling matrix
#      # BL : blood
#      # EEB : end-exhaled breath
#      # MEB : mixed exhaled breath
#      # VBL : venous blood
#      # ABL : arterial blood
#      # EB : unspecified exhaled breath sample (assumed to be EEB)
#      # PL: plasma
#      # +W with work/exercise
#      #
#      # The model outputs are provided in the following units:
#  	  # uM: Cven, Clung, Cart, Cplasma, Calv, Cendexh, Cmixexh, Cmuc
#  	  # ppmv: Calvppmv, Cendexhppmv, Cmixexhppmv
#  	  # uM*days: AUC
#      if (unique_scenarios$SAMPLING_MATRIX[i] == "VBL" |
#        unique_scenarios$SAMPLING_MATRIX[i] == "BL" |
#        unique_scenarios$SAMPLING_MATRIX[i] == "BL (+W)")
#      {
#        solver.out$simconc <- solver.out$Cven*this.Rb2p
#        solver.out$unit <- "uM"
#      } else if (unique_scenarios$SAMPLING_MATRIX[i] == "ABL") {
#        solver.out$simconc <- solver.out$Cart*this.Rb2p
#        solver.out$unit <- "uM"
#      } else if (unique_scenarios$SAMPLING_MATRIX[i] == "EB" |
#        unique_scenarios$SAMPLING_MATRIX[i] == "EEB" |
#        unique_scenarios$SAMPLING_MATRIX[i] == "EB (+W)")
#      {
#        solver.out$simconc <- solver.out$Cendexh # uM
#        solver.out$unit <- "uM"
#      } else if (unique_scenarios$SAMPLING_MATRIX[i] == "MEB") {
#          solver.out$simconc <- solver.out$Cmixexh # uM
#          solver.out$unit <- "uM"
#      } else if (unique_scenarios$SAMPLING_MATRIX[i] == "PL") {
#        solver.out$simconc <- solver.out$Cplasma
#        solver.out$unit <- "uM"
#      } else {
#        solver.out$simconc <- NA
#        solver.out$unit <- NA
#      }
#      simlist[[i]] <- solver.out
#  }

## ----Makestudyplots, eval = execute.vignette----------------------------------
#  cvtlist <- list()
#  for(i in 1:nrow(unique_scenarios))
#  {
#      plot.data <- simlist[[i]]
#      relconc <- obslist[[i]]
#  
#      if (!is.null(plot.data))
#      {
#  #Right now this is only calculating real concentrations according to mg/L in blood
#      cvtlist[[i]] <- ggplot(data=plot.data, aes(time*24, simconc)) +
#        geom_line() +
#        xlab("Time (h)") +
#        ylab(paste0("Simulated ",
#          unique_scenarios$SAMPLING_MATRIX[i],
#          "\nConcentration (" ,
#          solver.out$unit, ")")) +
#        ggtitle(paste0(
#          unique_scenarios$PREFERRED_NAME[i],
#          " (",
#          unique_scenarios$CONC_SPECIES[i],
#          ", ",
#          round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
#          unique_scenarios$DOSE_U[i],
#          " for ",
#          round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
#          "h in ",
#          unique_scenarios$SAMPLING_MATRIX[i], ")")) +
#        geom_point(data = relconc, aes(TIME*24,CONCENTRATION)) +
#        theme(plot.title = element_text(face = 'bold', size = 20),
#          axis.title.x = element_text(face = 'bold', size = 20),
#          axis.text.x = element_text(size=16),
#          axis.title.y = element_text(face = 'bold', size = 20),
#          axis.text.y = element_text(size = 16),
#          legend.title = element_text(face = 'bold', size = 16),
#          legend.text = element_text(face = 'bold',size = 14))+
#        theme_bw()
#      }
#  }

## ----init_dataset, eval = execute.vignette------------------------------------
#  # Creation of simulated vs. observed concentration dataset
#  unique_scenarios$RSQD <- 0
#  unique_scenarios$RMSE <- 0
#  unique_scenarios$AIC <- 0
#  simobslist <- list()
#  obvpredlist <- list()

## ----merge_datasets, eval = execute.vignette----------------------------------
#  for(i in 1:length(simlist))
#  {
#    obsdata <- as.data.frame(obslist[[i]])
#    simdata <- as.data.frame(simlist[[i]])
#  # skips over anything for which there was no observed data or
#  # insufficient information to run simulation:
#    if (!is.null(simdata) &
#        !is.null(obsdata) &
#        dim(simdata)[1]>1)
#    {
#  # Make sure we are looking at consistent time points:
#      simobscomb <- simdata[simdata$time %in% signif(obsdata$TIME,4),]
#      obsdata <- subset(obsdata,signif(TIME,4) %in% simobscomb$time)
#  # Merge with obsdata
#      colnames(obsdata)[colnames(obsdata) ==
#        "TIME"] <-
#        "obstime"
#  # Round to match sim time:
#      obsdata$time <- signif(obsdata$obstime,4)
#      colnames(obsdata)[colnames(obsdata) ==
#        "CONCENTRATION"] <-
#        "obsconc"
#      colnames(obsdata)[colnames(obsdata) ==
#        "PREFERRED_NAME"] <-
#        "chem"
#      colnames(obsdata)[colnames(obsdata) ==
#        "DOSE"] <-
#        "dose"
#      colnames(obsdata)[colnames(obsdata) ==
#        "EXP_LENGTH"] <-
#        "explen"
#      colnames(obsdata)[colnames(obsdata) ==
#        "CONC_SPECIES"] <-
#        "species"
#      colnames(obsdata)[colnames(obsdata) ==
#        "SAMPLING_MATRIX"] <-
#        "matrix"
#      colnames(obsdata)[colnames(obsdata) ==
#        "AVERAGE_MASS"] <-
#        "mw"
#      colnames(obsdata)[colnames(obsdata) ==
#        "CONC_U"] <-
#        "CONC_U"
#      simobscomb <- suppressWarnings(merge(obsdata[,c(
#        "time",
#        "obstime",
#        "obsconc",
#        "chem",
#        "dose",
#        "explen",
#        "species",
#        "matrix",
#        "mw",
#        "CONC_U",
#        "ORIGINAL_CONC_U"
#        )], simobscomb, by="time", all.x=TRUE))
#  
#  # Merge with met_data
#      this.met_data <- subset(met_data,
#        PREFERRED_NAME == simobscomb[1,"chem"] &
#        SPECIES == simobscomb[1,"species"])
#      colnames(this.met_data)[colnames(this.met_data)=="CHEM_CLASS"] <-
#        "chemclass"
#      colnames(this.met_data)[colnames(this.met_data) ==
#        "OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED"] <-
#        "logp"
#      colnames(this.met_data)[colnames(this.met_data) ==
#        "WATER_SOLUBILITY_MOL.L_OPERA_PRED"] <-
#        "sol"
#      colnames(this.met_data)[colnames(this.met_data) ==
#        "HENRYS_LAW_ATM.M3.MOLE_OPERA_PRED"] <-
#        "henry"
#      colnames(this.met_data)[colnames(this.met_data) ==
#        "VMAX"] <-
#        "vmax"
#      colnames(this.met_data)[colnames(this.met_data) ==
#        "KM"] <-
#        "km"
#      simobscomb <- suppressWarnings(cbind(simobscomb,this.met_data[c(
#        "chemclass",
#        "logp",
#        "sol",
#        "henry",
#        "vmax",
#        "km")]))
#      simobslist[[i]] <- simobscomb
#    }
#  }

## ----time_quartile, eval = execute.vignette-----------------------------------
#  for(i in 1:length(simobslist))
#    if (!is.null(simobslist[[i]]))
#    {
#      simobscomb <- simobslist[[i]]
#      for (j in 1:nrow(simobscomb))
#      {
#        max.time <- max(simobscomb$time,na.rm=TRUE)
#        if (is.na(max.time)) simobscomb$tquart <- NA
#        else if (max.time == 0) simobscomb$tquart <- "1"
#        else if (!is.na(simobscomb$time[j]))
#        {
#          simobscomb$tquart[j] <- as.character(1 +
#            floor(simobscomb$time[j]/max.time/0.25))
#          simobscomb$tquart[simobscomb$tquart=="5"] <-
#            "4"
#        } else simobscomb$tquart[j] >- NA
#      }
#      simobslist[[i]] <- simobscomb
#    }

## ----calc_AUC, eval = execute.vignette----------------------------------------
#  for(i in 1:length(simobslist))
#    if (!is.null(simobslist[[i]]))
#    {
#      simobscomb <- simobslist[[i]]
#  # Calculat the AUC with the trapezoidal rule:
#      if (nrow(simobscomb)>1)
#      {
#        for (k in 2:max(nrow(simobscomb)-1,2,na.rm=TRUE))
#        {
#          simobscomb$obsAUCtrap[1] <- 0
#          simobscomb$simAUCtrap[1] <- 0
#          if (min(simobscomb$time) <= (simobscomb$explen[1]*1.03) &
#              nrow(simobscomb) >=2)
#          {
#            simobscomb$obsAUCtrap[k] <- simobscomb$obsAUCtrap[k-1] +
#              0.5*(simobscomb$time[k] - simobscomb$time[k-1]) *
#              (simobscomb$obsconc[k] + simobscomb$obsconc[k-1])
#            simobscomb$simAUCtrap[k] <- simobscomb$simAUCtrap[k-1] +
#              0.5*(simobscomb$time[k]-simobscomb$time[k-1]) *
#              (simobscomb$simconc[k] + simobscomb$simconc[k-1])
#          } else {
#            simobscomb$obsAUCtrap <- 0
#            simobscomb$simAUCtrap <- 0
#          }
#        }
#      } else {
#        simobscomb$obsAUCtrap <- 0
#        simobscomb$simAUCtrap <- 0
#      }
#      simobscomb$AUCobs <- max(simobscomb$obsAUCtrap)
#      simobscomb$AUCsim <- max(simobscomb$simAUCtrap)
#      simobscomb$calcAUC <- max(simobscomb$AUC)
#      if (min(simobscomb$time) <= simobscomb$explen[1]*1.03)
#      {
#        simobscomb$Cmaxobs <- max(simobscomb$obsconc)
#        simobscomb$Cmaxsim <- max(simobscomb$simconc)
#      } else {
#        simobscomb$Cmaxobs <- 0
#        simobscomb$Cmaxsim <- 0
#      }
#      simobslist[[i]] <- simobscomb
#    }

## ----calc_error, eval = execute.vignette--------------------------------------
#  for(i in 1:length(simobslist))
#    if (!is.null(simobslist[[i]]))
#    {
#      simobscomb <- simobslist[[i]]
#      unique_scenarios$RSQD[i] <- 1 - (
#        sum((simobscomb$obsconc - simobscomb$simconc)^2) /
#        sum((simobscomb$obsconc-mean(simobscomb$obsconc))^2)
#        )
#      unique_scenarios$RMSE[i] <-
#        sqrt(mean((simobscomb$simconc - simobscomb$obsconc)^2))
#      unique_scenarios$AIC[i] <-
#        nrow(simobscomb)*(
#          log(2*pi) + 1 +
#          log((sum((simobscomb$obsconc-simobscomb$simconc)^2) /
#            nrow(simobscomb)))
#        ) + ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
#      simobslist[[i]] <- simobscomb
#    }

## ----combine_studies, eval = execute.vignette---------------------------------
#  obsvpredlist <- list()
#  for(i in 1:length(simobslist))
#    if (!is.null(simobslist[[i]]))
#    {
#      simobscomb <- simobslist[[i]]
#      obsvpredlist[[i]] <- ggplot(simobscomb, aes(x = simconc, y = obsconc)) +
#        geom_point() +
#        geom_abline() +
#        xlab("Simulated Concentrations (uM)") +
#        ylab("Observed Concentrations (uM)") +
#        ggtitle(paste0(
#          unique_scenarios$PREFERRED_NAME[i],
#          " (",
#          unique_scenarios$CONC_SPECIES[i],
#          ", ",
#          round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
#          unique_scenarios$DOSE_U[i],
#          " for ",
#          round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
#          "h in ",
#          unique_scenarios$SAMPLING_MATRIX[i], ")")) +
#        theme_bw() +
#        theme(plot.title = element_text(face = 'bold', size = 20),
#          axis.title.x = element_text(face = 'bold', size = 20),
#          axis.text.x = element_text(size=16),
#          axis.title.y = element_text(face = 'bold', size = 20),
#          axis.text.y = element_text(size = 16),
#          legend.title = element_text(face = 'bold', size = 16),
#          legend.text = element_text(face = 'bold',size = 14))
#    }

## ----obsvspredplots, eval = execute.vignette----------------------------------
#  # Create pdfs of observed vs. predicted concentration plots
#  dir.create("Linakis2020")
#  pdf("Linakis2020/obvpredplots.pdf", width = 10, height = 10)
#  for (i in 1:length(obsvpredlist)) {
#    print(obsvpredlist[[i]])
#  }
#  dev.off()

## ----annotate_study_stats, eval = execute.vignette----------------------------
#  for (i in 1:length(cvtlist))
#    if (!is.null(cvtlist[[i]]))
#  {
#    cvtlist[[i]] <- cvtlist[[i]] +
#      geom_text(
#        x = Inf,
#        y = Inf,
#        hjust = 1.3,
#        vjust = 1.3,
#  #      size = 6,
#        label = paste0(
#          "RMSE: ",
#          round(unique_scenarios$RMSE[i],digits = 2),
#          "\nAIC: ",
#          round(unique_scenarios$AIC[i],digits = 2)))# +
#  #    theme(
#  #      plot.title = element_text(face = 'bold', size = 15),
#  #      axis.title.x = element_text(face = 'bold', size = 20),
#  #      axis.text.x = element_text(size=16),
#  #      axis.title.y = element_text(face = 'bold', size = 20),
#  #      axis.text.y = element_text(size = 16),
#  #      legend.title = element_text(face = 'bold', size = 16),
#  #      legend.text = element_text(face = 'bold',size = 14))
#  }

## ----cvtplots, eval = execute.vignette----------------------------------------
#  # Create pdfs of simulated concentration-time plots against observed c-t values
#  pdf("Linakis2020/simdataplots.pdf")
#  for (i in 1:length(cvtlist)) {
#    print(cvtlist[[i]])
#  }
#  dev.off()

## ----combine_obs_vs_pred, eval = execute.vignette-----------------------------
#  simobsfull <- do.call("rbind",simobslist)
#  simobsfullrat <- subset(simobsfull, simobsfull$species == "Rat")
#  simobsfullhum <- subset(simobsfull, simobsfull$species == "Human")
#  unique_scenarios <- subset(unique_scenarios,!is.na(unique_scenarios$RSQD))

## ----Standardize Units, eval = FALSE------------------------------------------
#  these.chems <- unique(subset(simobsfull,unit=="ppmv")$chem)
#  for (this.chem in these.chems)
#  {
#    # Use HTTK unit conversion:
#    this.factor <- convert_units(
#      input.units="ppmv", output.units="um", chem.name=this.chem, state="gas")
#  
#    # Scale the observation
#    simobsfull[simobsfull$chem==this.chem &
#                simobsfull$unit=="ppmv","obsconc"] <-
#      this.factor * simobsfull[
#        simobsfull$chem==this.chem &
#          simobsfull$unit=="ppmv","obsconc"]
#    # Scale the prediction
#    simobsfull[simobsfull$chem==this.chem &
#                simobsfull$unit=="ppmv","simconc"] <-
#      this.factor * simobsfull[
#        simobsfull$chem==this.chem &
#          simobsfull$unit=="ppmv","simconc"]
#    # Change the reported unit
#    simobsfull[simobsfull$chem==this.chem &
#                simobsfull$unit=="ppmv","unit"] <-
#      "uM"
#  }

## ----regression, eval = execute.vignette--------------------------------------
#  table(unique_scenarios$CONC_SPECIES)
#  nrow(simobsfull) - nrow(simobsfull[
#    !is.na(simobsfull$simconc) &
#    simobsfull$simconc > 0 &
#    simobsfull$obsconc > 0,])
#  pmiss <- (nrow(simobsfull) -
#    nrow(simobsfull[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0,])) /
#    nrow(simobsfull) * 100
#  missdata <- (simobsfull[
#    is.na(simobsfull$simconc) |
#    simobsfull$simconc <= 0 |
#    simobsfull$obsconc <= 0,])
#  t0df <- simobsfull[simobsfull$obstime == 0,]
#  lmall <- lm(
#  #log transforms:
#    log10(simobsfull$obsconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0]) ~
#  #log transforms:
#    log10(simobsfull$simconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0]))
#  #Linear binned 1
#  lmsub1 <- lm(
#    simobsfull$obsconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc < 0.1] ~
#    simobsfull$simconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc < 0.1])
#  #Linear binned 2
#  lmsub2 <- lm(
#    simobsfull$obsconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc >= 0.1 &
#      simobsfull$obsconc < 10] ~
#    simobsfull$simconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc >= 0.1 &
#      simobsfull$obsconc < 10])
#  #Linear binned 3
#  lmsub3 <- lm(
#    simobsfull$obsconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc >= 10] ~
#    simobsfull$simconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc >= 10])
#  lmrat <- lm(
#    log10(simobsfullrat$obsconc[
#      !is.na(simobsfullrat$simconc) &
#      simobsfullrat$simconc > 0 &
#      simobsfullrat$obsconc > 0]) ~
#    log10(simobsfullrat$simconc[
#      !is.na(simobsfullrat$simconc) &
#      simobsfullrat$simconc > 0 &
#      simobsfullrat$obsconc > 0]))
#  unique(simobsfullrat$chem)
#  lmhum <- lm(
#    log10(simobsfullhum$obsconc[
#      !is.na(simobsfullhum$simconc) &
#      simobsfullhum$simconc > 0 &
#      simobsfullhum$obsconc > 0]) ~
#    log10(simobsfullhum$simconc[
#      !is.na(simobsfullhum$simconc) &
#      simobsfullhum$simconc > 0 &
#      simobsfullhum$obsconc > 0]))
#  unique(simobsfullhum$chem)
#  concregslope <- summary(lmall)$coef[2,1]
#  concregr2 <- summary(lmall)$r.squared
#  concregrmse <- sqrt(mean(lmall$residuals^2))
#  totalrmse <- sqrt(mean((
#    log10(simobsfull$simconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0]) -
#    log10(simobsfull$obsconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0]))^2,
#     na.rm = TRUE))
#  totalmae <- mean(abs(
#    log10(simobsfull$simconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0]) -
#    log10(simobsfull$obsconc[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0])),
#    na.rm = TRUE)
#  totalaic <- nrow(
#    simobsfull[
#      !is.na(simobsfull$simconc) &
#      simobsfull$simconc >0 &
#      simobsfull$obsconc >
#      0,]) *
#    (log(2*pi) +
#       1 +
#       log((sum(
#         (simobsfull$obsconc[
#           !is.na(simobsfull$simconc) &
#           simobsfull$simconc > 0 &
#           simobsfull$obsconc > 0] -
#         simobsfull$simconc[
#           !is.na(simobsfull$simconc) &
#           simobsfull$simconc > 0 &
#           simobsfull$obsconc > 0])^2,
#         na.rm=TRUE) /
#       nrow(simobsfull[
#         !is.na(simobsfull$simconc) &
#         simobsfull$simconc > 0 &
#         simobsfull$obsconc > 0,])))) +
#    ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
#  mispred <- table(abs(
#    log10(simobsfull$simconc) -
#    log10(simobsfull$obsconc))>2 &
#    simobsfull$simconc>0)
#  mispred[2]
#  mispred[2] / nrow(simobsfull[
#    !is.na(simobsfull$simconc) &
#      simobsfull$simconc >0 &
#      simobsfull$obsconc > 0,])*100
#  overpred <- table(
#    log10(simobsfull$simconc) -
#    log10(simobsfull$obsconc)>2 &
#    simobsfull$simconc>0)
#  overpred[2]
#  overpred[2] / nrow(simobsfull[
#    !is.na(simobsfull$simconc) &
#    simobsfull$simconc >0 &
#    simobsfull$obsconc > 0,])*100
#  underpred <- table(
#    log10(simobsfull$obsconc) -
#    log10(simobsfull$simconc)>2 &
#    simobsfull$simconc>0)
#  underpred[2]
#  underpred[2] / nrow(simobsfull[
#    !is.na(simobsfull$simconc) &
#    simobsfull$simconc >0 &
#    simobsfull$obsconc > 0,])*100
#  mispredhalf <- table(abs(
#    log10(simobsfull$simconc) -
#    log10(simobsfull$obsconc))>0.5 &
#    simobsfull$simconc>0)
#  mispredhalf[2]
#  mispredhalf[2] / nrow(simobsfull[
#    !is.na(simobsfull$simconc) &
#    simobsfull$simconc >0 &
#    simobsfull$obsconc > 0,])*100
#  overpredhalf <- table(
#    log10(simobsfull$simconc) -
#    log10(simobsfull$obsconc)>0.5 &
#    simobsfull$simconc>0)
#  overpredhalf[2]
#  overpredhalf[2] / nrow(simobsfull[
#    !is.na(simobsfull$simconc) &
#    simobsfull$simconc >0 &
#    simobsfull$obsconc > 0,])*100
#  underpredhalf <- table(
#    log10(simobsfull$obsconc) -
#    log10(simobsfull$simconc)>0.5 &
#    simobsfull$simconc>0)
#  underpredhalf[2]
#  underpredhalf[2] / nrow(simobsfull[
#    !is.na(simobsfull$simconc) &
#    simobsfull$simconc > 0 &
#    simobsfull$obsconc > 0,])*100
#  chemunderpred <- subset(simobsfull,
#    log10(simobsfull$simconc) -
#    log10(simobsfull$obsconc) < 0 &
#    simobsfull$simconc > 0)
#  table(chemunderpred$chemclass) / table(simobsfull$chemclass)*100

## ----obspredFig2, eval = execute.vignette-------------------------------------
#  fig2 <- ggplot(
#    data = simobsfull[
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0,],
#    aes(x = log10(simconc), y = log10(obsconc))) +
#    geom_point(
#      color = ifelse(
#        abs(
#          log10(simobsfull[
#            simobsfull$simconc > 0 &
#            simobsfull$obsconc > 0,]$simconc) -
#          log10(simobsfull[
#            simobsfull$simconc > 0 &
#            simobsfull$obsconc > 0,]$obsconc)) >2,
#        'red',
#        'black')) +
#    geom_abline() +
#    xlab("Log(Simulated Concentrations)") +
#    ylab("Log(Observed Concentrations)") +
#    theme_bw() +
#    geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall')) +
#    geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
#    geom_text(
#      x = Inf,
#      y = -Inf,
#      hjust = 1.05,
#      vjust = -0.15,
#      size = 8,
#      label = paste0(
#    #    "Regression slope: ",
#  #      round(summary(lmall)$coef[2,1],digits = 2),
#        "\nRegression R^2: ",
#        round(summary(lmall)$r.squared,digits = 2),
#        "\nRegression RMSE: ",
#        round(sqrt(mean(lmall$residuals^2)),digits = 2),
#        "\nRMSE (Identity): ",
#        round(totalrmse,digits = 2)
#   #     "\n% Missing:",
#  #      round(pmiss, digits = 2), "%")
#      )) +
#    #geom_text(
#    #  data = simobsfull[
#    #    abs(log10(simobsfull$simconc) - log10(simobsfull$obsconc))>7 &
#    #    simobsfull$simconc>0 & simobsfull$obsconc > 0,],
#    #  aes(label = paste(chem,species,matrix)),
#     ## fontface = 'bold',
#    #  check_overlap = TRUE,
#  #    size = 3.5,
#    #  hjust = 0.5,
#    #  vjust = -0.8) +
#    scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
#    theme(
#      plot.title = element_text(face = 'bold', size = 15),
#      axis.title.x = element_text(face = 'bold', size = 30),
#      axis.text.x = element_text(size=16),
#      axis.title.y = element_text(face = 'bold', size = 30),
#      axis.text.y = element_text(size = 16),
#      legend.title = element_text(face = 'bold', size = 24),
#      legend.text = element_text(face = 'bold',size = 24))
#  fig2 #Display plot in R

## ----make_how_to_add_models_version, eval=execute.vignette--------------------
#  library(scales)
#  # Function for formatting tick labels:
#  scientific_10 <- function(x) {
#    out <- gsub("1e", "10^", scientific_format()(x))
#    out <- gsub("\\+","",out)
#    out <- gsub("10\\^01","10",out)
#    out <- parse(text=gsub("10\\^00","1",out))
#  }
#  
#  font.size.large <- 10
#  font.size.small <- 8
#  
#  figaddmodels <- ggplot(
#    data = simobsfull[
#      simobsfull$simconc > 0 &
#      simobsfull$obsconc > 0,],
#    aes(x = simconc, y = obsconc)) +
#    geom_point(
#      color = ifelse(
#        abs(
#          log10(simobsfull[
#            simobsfull$simconc > 0 &
#            simobsfull$obsconc > 0,]$simconc) -
#          log10(simobsfull[
#            simobsfull$simconc > 0 &
#            simobsfull$obsconc > 0,]$obsconc)) >2,
#        'red',
#        'black'),alpha=0.15) +
#    geom_abline() +
#    scale_y_log10(label=scientific_10,limits=c(1e-4,1e4))+
#    scale_x_log10(label=scientific_10,limits=c(1e-4,1e4))+
#    xlab("Simulated Concentrations") +
#    ylab("Observed Concentrations") +
#    theme_bw() +
#    geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall', linetype="Overall")) +
#    geom_smooth(method = 'lm', se = FALSE, aes(color = species, linetype = species)) +
#    geom_text(
#      x = 2,
#      y = -1,
#      size = 4,
#      label = paste0("RMSLE: ",
#              round(totalrmse,digits = 2)
#              )) +
#    scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
#    scale_linetype_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
#    theme(
#      plot.title = element_text(face = 'bold', size = font.size.small),
#      axis.title.x = element_text(face = 'bold', size = font.size.large),
#      axis.text.x = element_text(size=font.size.small),
#      axis.title.y = element_text(face = 'bold', size = font.size.large),
#      axis.text.y = element_text(size = font.size.small),
#      legend.title = element_text(face = 'bold', size = font.size.large),
#      legend.text = element_text(face = 'bold',size = font.size.large))
#  print(figaddmodels)
#  ggsave("c:/users/jwambaug/AddModelsFig1.tiff", width=6, height=4, dpi=300)
#  
#  counts <- simobsfull[,c("chem","dose","explen","species")]
#  counts <- subset(counts,!duplicated(counts))
#  paste(length(unique(counts$chem)),
#        "chemicals across",dim(counts)[1],
#        "experimental conditions in",
#        length(unique(counts$species)),"species.")

## ----write_figure2, eval = FALSE----------------------------------------------
#  pdf("Linakis2020/Figure2.pdf", width = 10, height = 10)
#  print(fig2)
#  dev.off()

## ----calculations, eval = execute.vignette------------------------------------
#  # Create data and run calculations for populating plots
#  cmaxfull <- subset(simobsfull, !duplicated(simobsfull$AUCobs) & simobsfull$Cmaxobs != 0)
#  cmaxobs <- cmaxfull$Cmaxobs
#  cmaxsim <- cmaxfull$Cmaxsim
#  cmaxobs <- cmaxobs[!is.nan(cmaxsim)]
#  cmaxsim <- cmaxsim[!is.nan(cmaxsim)]
#  cmaxsim[!is.finite(log10(cmaxsim))] <- NA
#  cmaxlm <- lm(log10(cmaxobs)~log10(cmaxsim), na.action = na.exclude)
#  cmaxvcbkg <- subset(cmaxfull,
#    paste(
#      cmaxfull$chem,
#      cmaxfull$dose,
#      cmaxfull$explen,
#      cmaxfull$species,
#      cmaxfull$matrix) %in%
#    paste(
#      t0df$chem,
#      t0df$dose,
#      t0df$explen,
#      t0df$species,
#      t0df$matrix))
#  cmaxvcbkg$cmaxcbkgratio <- cmaxvcbkg$Cmaxobs / cmaxvcbkg$obsconc
#  cmaxvcbkg$adjustedCmaxsim <- cmaxvcbkg$Cmaxsim - cmaxvcbkg$obsconc
#  aucfull <-subset(simobsfull,
#    !duplicated(simobsfull$AUCobs) &
#    simobsfull$AUCobs != 0)
#  aucobs <- aucfull$AUCobs
#  aucsim <- aucfull$AUCsim
#  aucobs <- aucobs[!is.nan(aucsim)]
#  aucsim <- aucsim[!is.nan(aucsim)]
#  aucsim[!is.finite(log10(aucsim))] <- NA
#  auclm <- lm(log10(aucobs)~log10(aucsim), na.action = na.exclude)
#  cmaxslope <- summary(cmaxlm)$coef[2,1]
#  cmaxrsq <- summary(cmaxlm)$r.squared
#  totalrmsecmax <- sqrt(mean((log10(cmaxfull$Cmaxsim) -
#    log10(cmaxfull$Cmaxobs))^2, na.rm = TRUE))
#  cmaxmiss <- nrow(cmaxfull[
#    abs(log10(cmaxfull$Cmaxsim) -
#    log10(cmaxfull$Cmaxobs)) > 1,])
#  cmaxmissp <- nrow(cmaxfull[
#    abs(log10(cmaxfull$Cmaxsim) -
#    log10(cmaxfull$Cmaxobs)) > 1,]) /
#    nrow(cmaxfull) * 100
#  cmaxmisschem <- table(cmaxfull[
#    abs(log10(cmaxfull$Cmaxsim) -
#    log10(cmaxfull$Cmaxobs)) > 1,]$chem)
#  aucslope <- summary(auclm)$coef[2,1]
#  aucrsq <- summary(auclm)$r.squared
#  totalrmseauc <- sqrt(mean((
#    log10(aucfull$AUCsim) -
#    log10(aucfull$AUCobs))^2, na.rm = TRUE))
#  aucmiss <- nrow(aucfull[
#    abs(log10(aucfull$AUCsim) -
#    log10(aucfull$AUCobs)) > 1,])
#  aucmissp <- nrow(aucfull[
#    abs(log10(aucfull$AUCsim) -
#    log10(aucfull$AUCobs)) > 1,]) /
#    nrow(aucfull) * 100
#  aucmisschem <- table(aucfull[
#    abs(log10(aucfull$AUCsim) -
#    log10(aucfull$AUCobs)) > 1,]$chem)

## ----Fig4plot, eval = execute.vignette----------------------------------------
#  cmaxp <- ggplot(data = cmaxfull, aes(x = log10(Cmaxsim), y = log10(Cmaxobs))) +
#    geom_point(color =
#      ifelse(abs(log10(cmaxfull$Cmaxsim)  -log10(cmaxfull$Cmaxobs))>=2, "red","black"))  +
#    geom_abline()  +
#    xlab("Log(Simulated Max Concentration)") +
#    ylab("Log(Observed Max Concentration)") +
#    theme_bw() +
#    geom_smooth(method = 'lm', se = FALSE, aes(color = 'Overall')) +
#    geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
#    geom_text(x = Inf,
#      y = -Inf,
#      hjust = 1.05,
#      vjust = -0.15,
#  #    size = 6,
#      label = paste0("Regression slope: ",
#        round(summary(cmaxlm)$coef[2,1],digits = 2),
#        "\nRegression R^2: ",
#        round(summary(cmaxlm)$r.squared,digits = 2))) +
#    geom_text_repel(
#      data = cmaxfull[
#        (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 &
#        log10(cmaxfull$Cmaxsim) > 2,],
#      aes(label = paste(chem,species,matrix)),
#      force = 2,
#   #   size = 5.3,
#      fontface = 'bold',
#      color = 'black',
#      hjust = -0.05,
#      vjust = 0.5) +
#    scale_y_continuous(lim = c (-1,5)) +
#    scale_x_continuous(lim = c(-1,5)) +
#    geom_text_repel(
#      data = cmaxfull[
#        (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 &
#        log10(cmaxfull$Cmaxsim) <= 2,],
#      aes(label = paste(chem,species,matrix)),
#      nudge_x = 0.0,
#      nudge_y = -0.2,
#      force = 2,
#  #    size = 5.3,
#      fontface = 'bold',
#      color = 'black',
#      hjust = -0.05,
#      vjust = 0.5) +
#    geom_text(
#      data = cmaxfull[
#        (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))<=-2,],
#      aes(label = paste(chem,species,matrix)),
#  #    size = 5.3,
#      fontface = 'bold',
#      color = 'black',
#      hjust = 0.5,
#      vjust = -0.7) +
#    scale_color_discrete(
#      name = 'Species',
#      breaks = c("Overall","Human","Rat")) #+
#  #  theme(plot.title = element_text(face = 'bold', size = 10),
#  #    axis.title.x = element_text(face = 'bold', size = 10),
#  #    axis.text.x = element_text(size=8),
#  #    axis.title.y = element_text(face = 'bold', size = 10),
#  #    axis.text.y = element_text(size = 8),
#  #    legend.title = element_text(face = 'bold', size = 8),
#  #    legend.text = element_text(face = 'bold',size = 8))
#  cmaxp
#  aucp <- ggplot(
#    data = aucfull,
#    aes(x = log10(AUCsim), y = log10(AUCobs))) +
#    geom_point(color =
#      ifelse(abs(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2, "red","black"))  +
#    geom_abline()  +
#    xlab("Log(Simulated AUC)") +
#    ylab("Log(Observed AUC)") +
#    theme_bw() +
#    geom_smooth(method = 'lm', se = FALSE, aes(color = "Overall")) +
#    geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
#    geom_text(
#      x = Inf,
#      y = -Inf,
#      hjust = 1.05,
#      vjust = -0.15,
#  #    size = 6,
#      label = paste0(
#        "Regression slope: ",
#        round(summary(auclm)$coef[2,1],digits = 2),
#        "\nRegression R^2: ",
#        round(summary(auclm)$r.squared,digits = 2))) +
#    geom_text_repel(
#      data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2,],
#      aes(label = paste(chem,species,matrix)),
#  #    size = 5.3,
#      fontface = 'bold',
#      color = 'black',
#      hjust = -0.05,
#      vjust = 0.5) +
#    scale_y_continuous(lim = c (-2,4)) +
#    scale_x_continuous(lim = c(-2,4)) +
#    geom_text(
#      data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))<=-2,],
#      aes(label = paste(chem,species,matrix)),
#  #    size = 5.3,
#      fontface = 'bold',
#      color = 'black',
#      hjust = 0.5,
#      vjust = -0.8) +
#    scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) #+
#  #  theme(
#  #    plot.title = element_text(face = 'bold', size = 15),
#  #    axis.title.x = element_text(face = 'bold', size = 20),
#  #    axis.text.x = element_text(size=16),
#  #    axis.title.y = element_text(face = 'bold', size = 20),
#  #    axis.text.y = element_text(size = 16),
#  #    legend.title = element_text(face = 'bold', size = 16),
#  #    legend.text = element_text(face = 'bold',size = 14))
#  aucp

## ----write_figure4, eval = execute.vignette-----------------------------------
#  pdf("Linakis2020/Figure4.pdf", width = 20, height = 10)
#  plot_grid(cmaxp,aucp,nrow = 1, labels = c('A','B'), label_size = 30)
#  dev.off()

## ----Fig3plot, eval = execute.vignette----------------------------------------
#  simobsfull$aggscen <- as.factor(paste(simobsfull$chem,
#    simobsfull$species,
#    simobsfull$matrix))
#  chem.lm <- lm(
#    log10(simconc) - log10(obsconc) ~
#    aggscen,
#    data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,])
#  chem.res <- resid(chem.lm)
#  # Number of observations per chemical class
#  numpt <- simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,] %>%
#    group_by(chemclass) %>% summarize(n = paste("n =", length(simconc)))
#  fig3 <- ggplot(
#    data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#    aes(x = aggscen, y = log10(simconc)-log10(obsconc), fill = chemclass)) +
#    geom_boxplot() +
#    geom_hline(yintercept = 0) +
#    geom_hline(yintercept = 2, linetype = 2)+
#    geom_hline(yintercept = -2, linetype = 2)+
#    xlab("Exposure Scenario") +
#    ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#    facet_wrap(vars(chemclass), scales = 'free_x', nrow = 1) + #35 by 12 for poster
#    theme_bw() +
#    geom_text(
#      data = numpt,
#      aes(x = Inf, y = -Inf, hjust = 1.05, vjust = -0.5, label = n),
#      size = 10,
#      colour = 'black',
#      inherit.aes = FALSE,
#      parse = FALSE) +
#    theme(
#      axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5,size = 20, face = 'bold'),
#      strip.text.x = element_text(face = 'bold', size = 24),
#      legend.position = 'none',
#      axis.title.x = element_text(face = 'bold', size = 30),
#      axis.title.y = element_text(face = 'bold', size = 30),
#      axis.text.y = element_text(face = 'bold',size = 25, color = 'black'))
#  fig3

## ----write_figure3, eval = execute.vignette-----------------------------------
#  pdf("Linakis2020/Figure3.pdf", width = 40, height = 13)
#  print(fig3)
#  dev.off()

## ----FigS1, eval = execute.vignette-------------------------------------------
#  figs1a <- ggplot(
#      data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#      aes(x = tquart, y = log10(simconc)-log10(obsconc))) +
#    geom_boxplot()+
#    geom_hline(yintercept = 0)+
#    geom_hline(yintercept = 2, linetype = 2)+
#    geom_hline(yintercept = -2, linetype = 2)+
#    xlab("\nTime Quartile\n") +
#    ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#    theme_bw()+
#    theme(
#      axis.text.x = element_text(size = 20, face = 'bold'),
#      strip.text.x = element_text(face = 'bold', size = 20),
#      legend.position = 'none',
#      axis.title.x = element_text(face = 'bold', size = 20),
#      axis.title.y = element_text(face = 'bold', size = 20),
#      axis.text.y = element_text(size = 20, face = 'bold'))
#  figs1a
#  figs1b <- ggplot(
#      data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#      aes(x = mw, y = log10(simconc)-log10(obsconc))) +
#    geom_point()+
#    geom_hline(yintercept = 0)+
#    geom_hline(yintercept = 2, linetype = 2)+
#    geom_hline(yintercept = -2, linetype = 2)+
#    xlab("\nMolecular Weight (g/mol)\n") +
#    ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#    theme_bw()+
#    theme(
#      axis.text.x = element_text(size = 20, face = 'bold'),
#      strip.text.x = element_text(face = 'bold', size = 20),
#      legend.position = 'none',
#      axis.title.x = element_text(face = 'bold', size = 20),
#      axis.title.y = element_text(face = 'bold', size = 20),
#      axis.text.y = element_text(size = 20, face = 'bold'))
#  figs1b
#  figs1c <- ggplot(
#      data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#      aes(x = logp, y = log10(simconc)-log10(obsconc))) +
#    geom_point()+
#    geom_hline(yintercept = 0)+
#    geom_hline(yintercept = 2, linetype = 2)+
#    geom_hline(yintercept = -2, linetype = 2)+
#    xlab("\nLog P") +
#    ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#    theme_bw() +
#    theme(
#      axis.text.x = element_text(size = 20, face = 'bold'),
#      strip.text.x = element_text(face = 'bold', size = 20),
#      legend.position = 'none',
#      axis.title.x = element_text(face = 'bold', size = 20),
#      axis.title.y = element_text(face = 'bold', size = 20),
#      axis.text.y = element_text(size = 20, face = 'bold'))
#  figs1c
#  figs1d <- ggplot(
#      data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
#      aes(x = sol, y = log10(simconc)-log10(obsconc))) +
#    geom_point()+
#    geom_hline(yintercept = 0)+
#    geom_hline(yintercept = 2, linetype = 2)+
#    geom_hline(yintercept = -2, linetype = 2)+
#    xlab("\nSolubility (mol/L)") +
#    ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
#    scale_x_log10()+
#    theme_bw() +
#    theme(
#      axis.text.x = element_text(size = 20, face = 'bold'),
#      strip.text.x = element_text(face = 'bold', size = 20),
#      legend.position = 'none',
#      axis.title.x = element_text(face = 'bold', size = 20),
#      axis.title.y = element_text(face = 'bold', size = 20),
#      axis.text.y = element_text(size = 20, face = 'bold'))
#  figs1d

## ----write_figures1, eval = execute.vignette----------------------------------
#  pdf("Linakis2020/FigureS1.pdf", width = 20, height = 20)
#  plot_grid(figs1a,figs1b,figs1c,figs1d,nrow = 2, labels = c('A','B','C','D'), label_size = 30)
#  dev.off()

## ----SupTable2, eval = execute.vignette---------------------------------------
#  senschem <- list()
#  sensslope <- list()
#  sensrsq <- list()
#  sensregrmse <- list()
#  senstotalrmse <- list()
#  senspmiss <- list()
#  senscmaxslope <- list()
#  senscmaxrsq <- list()
#  senstotalrmsecmax <- list()
#  sensaucslope <- list()
#  sensaucrsq <- list()
#  senstotalrmseauc <- list()
#  for (i in 1:nrow(simobsfull))
#  {
#    simobsfullsens <- subset(simobsfull,simobsfull$chem != simobsfull$chem[i])
#    senschem[i] <- as.character(simobsfull$chem[i])
#    senslm <- lm(
#      log10(simobsfullsens$obsconc[
#        !is.na(simobsfullsens$simconc) &
#        simobsfullsens$simconc > 0 &
#        simobsfullsens$obsconc > 0]) ~
#      log10(simobsfullsens$simconc[
#        !is.na(simobsfullsens$simconc) &
#        simobsfullsens$simconc >0 &
#        simobsfullsens$obsconc > 0]))
#    sensslope[i] <- round(summary(senslm)$coef[2,1],digits = 2)
#    sensrsq[i] <- round(summary(senslm)$r.squared,digits = 2)
#    sensregrmse[i] <- round(sqrt(mean(senslm$residuals^2)),digits = 2)
#    senstotalrmse[i] <- round(sqrt(mean((
#      log10(simobsfullsens$simconc[
#        !is.na(simobsfullsens$simconc) &
#        simobsfullsens$simconc >0 &
#        simobsfullsens$obsconc > 0]) -
#      log10(simobsfullsens$obsconc[
#        !is.na(simobsfullsens$simconc) &
#          simobsfullsens$simconc >0 &
#          simobsfullsens$obsconc > 0]))^2,
#      na.rm = TRUE)), digits = 2)
#    senspmiss[i] <- round((nrow(simobsfullsens) -
#      nrow(simobsfullsens[
#        !is.na(simobsfullsens$simconc) &
#        simobsfullsens$simconc >0 &
#        simobsfullsens$obsconc > 0,])) / nrow(simobsfullsens) * 100,
#      digits = 2)
#    senscmaxfull <- subset(simobsfullsens, !duplicated(simobsfullsens$Cmaxobs))
#    senscmaxlm <- lm(
#      log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]) ~
#      log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]),
#      na.action = na.exclude)
#    sensaucfull <-subset(simobsfullsens, !duplicated(simobsfullsens$AUCobs))
#    sensauclm <- lm(
#      log10(aucfull$AUCobs[aucfull$AUCobs>0]) ~
#      log10(aucfull$AUCsim[aucfull$AUCobs>0]),
#      na.action = na.exclude)
#    senscmaxslope[i] <- round(summary(senscmaxlm)$coef[2,1],digits = 2)
#    senscmaxrsq[i] <- round(summary(senscmaxlm)$r.squared,digits = 2)
#    senstotalrmsecmax[i] <- sqrt(mean((log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]) - log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]))^2, na.rm = TRUE))
#    sensaucslope[i] <- round(summary(sensauclm)$coef[2,1],digits = 2)
#    sensaucrsq[i] <- round(summary(sensauclm)$r.squared,digits = 2)
#    senstotalrmseauc[i] <- sqrt(mean((log10(sensaucfull$AUCsim[sensaucfull$AUCobs>0]) - log10(sensaucfull$AUCobs[sensaucfull$AUCobs>0]))^2, na.rm = TRUE))
#  }
#  sensitivitydf <- data.frame(Chemical <- as.character(senschem),
#                              sensSlope <- as.numeric(sensslope),
#                              sensRsq <- as.numeric(sensrsq),
#                              sensRegrmse <- as.numeric(sensregrmse),
#                              sensTotrmse <- as.numeric(senstotalrmse),
#                              sensPmiss <- as.numeric(senspmiss),
#                              sensCmaxslope <- as.numeric(senscmaxslope),
#                              sensCmaxrsq <- as.numeric(senscmaxrsq),
#                              sensCmaxrmse <- as.numeric(senstotalrmsecmax),
#                              sensAUCslope <- as.numeric(sensaucslope),
#                              sensAUCrsq <- as.numeric(sensaucrsq),
#                              sensAUCrmse <- as.numeric(senstotalrmseauc),
#                              stringsAsFactors=FALSE)
#  sensitivitydf <- subset(sensitivitydf,!duplicated(sensitivitydf$Chemical....as.character.senschem.))
#  names(sensitivitydf) <- c('Chemical Dropped','Regression Slope','Regression R^2','Regression RMSE','Orthogonal RMSE', 'Percent Data Censored','Cmax Regression Slope','Cmax Regression R^2','Cmax Orthogonal RMSE','AUC Regression Slope','AUC Regression R^2','AUC Orthogonal RMSE')
#  notdropped <- c('None',concregslope,concregr2,concregrmse,totalrmse,pmiss,cmaxslope,cmaxrsq,totalrmsecmax,aucslope,aucrsq,totalrmseauc)
#  sensitivitydf <- rbind(notdropped, sensitivitydf)
#  sensitivitydf[,-1] <- sapply(sensitivitydf[,-1],as.numeric)
#  sensitivitydf[,-1] <- round(sensitivitydf[,-1],2)
#    # Clean up and write file
#  rm(chem.lm, obvpredplot, p, pdata1, plot.data, plots, relconc, sensaucfull, sensauclm, sensaucrsq, sensaucslope, senschem, senscmaxfull, senscmaxlm, senscmaxrsq, senscmaxslope, senslm, senspmiss, sensregrmse, sensrsq, sensslope, senstotalrmse, senstotalrmseauc, senstotalrmsecmax, solve, AUCrmse, AUCrsq, AUCslope, chem.res, Chemical, Cmaxrmse, Cmaxrsq, Cmaxslope, colors, count, i, j, k, met_col, name, name1, Pmiss, Regrmse, Rsq, Slope, rem, Totrmse)
#  
#  write.csv(sensitivitydf, 'supptab2.csv',row.names = FALSE)

## ----SupTable1, eval = execute.vignette---------------------------------------
#  supptab1 <- subset(unique_scenarios, !duplicated(unique_scenarios$PREFERRED_NAME) | !duplicated(unique_scenarios$SOURCE_CVT) | !duplicated(unique_scenarios$CONC_SPECIES))
#  for(i in 1:nrow(supptab1)){
#    tryCatch({
#      supptab1$Metabolism_Source[i] <- met_data$SOURCE_MET[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#      supptab1$Log_P[i] <- met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#      supptab1$Solubility[i] <- met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#      supptab1$Blood_Air_Partition_Coefficient[i] <- met_data$CALC_PBA[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#      supptab1$Chem_Class[i] <- met_data$CHEM_CLASS[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#      supptab1$Species[i] <- met_data$SPECIES[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#      supptab1$Vmax[i] <- met_data$VMAX[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#      supptab1$Km[i] <- met_data$KM[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
#    }, error = function(e){})
#  }
#  supptab1 <- supptab1[c('PREFERRED_NAME','DTXSID','CASRN','Chem_Class','AVERAGE_MASS','Log_P','Solubility','Blood_Air_Partition_Coefficient','Species','Vmax','Km','Metabolism_Source','SOURCE_CVT')]
#  names(supptab1) <- c('Chemical','DTXSID','CASRN','CAMEO Chemical Class','Molecular Weight (g/mol)','Log P','Solubility (mol/L)','Blood Air Partition Coefficient','Available Species Data','Vmax (pmol/min/10^6 cells)','KM (uM)','Metabolism Data Source','Concentration-Time Data Source')
#  write.csv(supptab1, "supptab1.csv", row.names = FALSE)

Try the httk package in your browser

Any scripts or data that you put into this service are public.

httk documentation built on Sept. 11, 2024, 9:32 p.m.