inst/doc/Vf_Wambaugh2019.R

## ----configure_knitr, eval = TRUE---------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = '#>', fig.width=5, fig.height=3)

## ----clear_memory, eval = TRUE------------------------------------------------
rm(list=ls()) 

## ----runchunks, eval = TRUE---------------------------------------------------
# Set whether or not the following chunks will be executed (run):
execute.vignette <- FALSE

## ----setup, eval = execute.vignette-------------------------------------------
#  library(data.table)
#  library(httk)
#  library(ggplot2)
#  library(stringr)
#  library(scales)
#  library(grid)
#  library(reshape)

## ----lmr2, eval = execute.vignette--------------------------------------------
#  lm_R2 <- function(m,prefix=NULL){
#      out <- substitute("RMSE = "~mse*","~~italic(R)^2~"="~r2,
#           list(mse = signif(mean(m$residuals^2)^(1/2),3),
#                r2 = format(summary(m)$adj.r.squared, digits = 2)))
#      paste(prefix,as.character(as.expression(out)))
#  }

## ----multi.plot, eval = execute.vignette--------------------------------------
#  multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL, heights=NULL, widths=unit(rep_len(1, cols), "null")) {
#  
#    # Make a list from the ... arguments and plotlist
#    plots <- c(list(...), plotlist)
#  
#    numPlots = length(plots)
#  
#    # If layout is NULL, then use 'cols' to determine layout
#    if (is.null(layout)) {
#      # Make the panel
#      # ncol: Number of columns of plots
#      # nrow: Number of rows needed, calculated from # of cols
#      layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
#                      ncol = cols, nrow = ceiling(numPlots/cols))
#    }
#  
#   if (numPlots==1) {
#      print(plots[[1]])
#  
#    } else {
#      # Set up the page
#      grid.newpage()
#      if (!is.null(heights))
#      {
#        pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout),heights=heights,widths=widths)))
#      } else {
#        pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout),widths=widths)))
#      }
#      # Make each plot, in the correct location
#      for (i in 1:numPlots) {
#        # Get the i,j matrix positions of the regions that contain this subplot
#        matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
#  
#        print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
#                                        layout.pos.col = matchidx$col))
#      }
#    }
#  }

## ----scientific.notation, eval = execute.vignette-----------------------------
#  scientific_10 <- function(x) {
#    out <- gsub("1e", "10^", scientific_format()(x))
#    out <- gsub("\\+","",out)
#    out <- gsub("10\\^01","10",out)
#    out <- parse(text=gsub("10\\^00","1",out))
#  }

## ----scientific.notation2, eval = execute.vignette----------------------------
#  scientific_10_skip <- function(x) {
#    out <- gsub("1e", "10^", scientific_format()(x))
#    out <- gsub("\\+","",out)
#    out <- gsub("10\\^01","10",out)
#    out <- gsub("10\\^00","1",out)
#    out <- gsub("10\\^-01",NA,out)
#    return(parse(text=out))
#  }

## ----cssfun.u, eval = execute.vignette----------------------------------------
#  cssfun_u <- function(chemcas,
#                        MW,
#                        hepatic.bioavailability,
#                        Fgutabs=1,
#                        probs=0.95,
#                        minimum.Funbound.plasma=0.0001)
#  {
#    # Create a data table with uncertainty only:
#    pop_u_httk <- as.data.table(parameterize_steadystate(chem.cas=chemcas,
#      clint.pvalue.threshold=1,
#      minimum.Funbound.plasma=0))
#  
#    # Get the physico-chemical properties:
#    params.schmitt <-parameterize_schmitt(chem.cas=chemcas)
#    psd <- params.schmitt$pKa_Donor
#    psa <- params.schmitt$pKa_Accept
#    params.schmitt <- params.schmitt[regexpr("pKa",names(params.schmitt))==-1]
#    pop_u_httk[,names(params.schmitt):=params.schmitt]
#    pop_u_httk[,pKa_Donor := paste(psd, collapse=",")]
#    pop_u_httk[,pKa_Accept := paste(psa, collapse=",")]
#    pop_u_httk <- pop_u_httk[rep(1,1000)]
#  
#    # Calculate the distribution coefficient:
#    ions <- calc_ionization(pH=7.4,parameters=pop_u_httk)
#    pop_u_httk[,Dow74 := Pow * (ions$fraction_neutral +
#      0.001 * ions$fraction_charged + ions$fraction_zwitter)]
#  
#    # Parse the Funbound.plasma.dist into quantiles:
#    Funbound.plasma.dist <- pop_u_httk[1,Funbound.plasma.dist]
#    if (nchar(Funbound.plasma.dist) -
#      nchar(gsub(",","",Funbound.plasma.dist))!=2)
#    {
#      stop("Funbound.plasma distribution should be three values (median,low95th,high95th) separated by commas.")
#    }
#    temp <- strsplit(Funbound.plasma.dist,",")
#    ppb.median <- as.numeric(temp[[1]][1])
#    ppb.low95 <- as.numeric(temp[[1]][2])
#    ppb.high95 <- as.numeric(temp[[1]][3])
#  
#    if (ppb.median>minimum.Funbound.plasma)
#    {
#      # Use optim to estimate alpha and beta for fup such that the median and 95%
#      # credible interval approximate the estimate from MCMC:
#      ppb.fit <- optim(c(2,(1-ppb.median)/ppb.median*2),
#        function(x) (0.95-pbeta(ppb.high95,x[1],x[2])+
#        pbeta(ppb.low95,x[1],x[2]))^2+
#        (ppb.median-qbeta(0.5,x[1],x[2]))^2,
#        method="BFGS")
#    # We are drawing new values for the unadjusted Fup:
#      pop_u_httk[, unadjusted.Funbound.plasma:=rbeta(1000,
#        ppb.fit$par[1],
#        ppb.fit$par[2])]
#    } else if (ppb.high95>minimum.Funbound.plasma)
#    {
#      # Assume that since the median is zero but the u95 is not, that there is
#      # an exponential distribution:
#      # 97.5% of clearance values will be below Funbound.plasma.u95:
#      pop_u_httk[,unadjusted.Funbound.plasma:=runif(1000,
#        minimum.Funbound.plasma,
#        min(1,minimum.Funbound.plasma+
#        2*(ppb.high95-minimum.Funbound.plasma)))]
#      pop_u_httk[as.logical(rbinom(1000,1,.95)),
#        unadjusted.Funbound.plasma:=minimum.Funbound.plasma]
#    } else {
#      pop_u_httk[, unadjusted.Funbound.plasma:=minimum.Funbound.plasma]
#    }
#  
#  # then we need to adjust for in vitro binding:
#    pop_u_httk[,Flipid:=subset(physiology.data,
#      Parameter=='Plasma Effective Neutral Lipid Volume Fraction')[,
#      which(colnames(physiology.data) == 'Human')]]
#    pop_u_httk[,Funbound.plasma.adjustment:=1 / (Dow74 * Flipid +
#      1 / unadjusted.Funbound.plasma)/unadjusted.Funbound.plasma]
#    pop_u_httk[,
#      Funbound.plasma:=unadjusted.Funbound.plasma*Funbound.plasma.adjustment]
#  
#  # Enforce a minimum ppb:
#    pop_u_httk[Funbound.plasma<minimum.Funbound.plasma,
#      Funbound.plasma:=minimum.Funbound.plasma]
#  
#  # If Fup varies, then Rb2p and hepatic bioavailability also vary:
#    Rblood2plasma <- get_rblood2plasma(chem.cas=chemcas)
#    if (is.na(Rblood2plasma))
#    {
#      pop_u_httk[, Rblood2plasma:=calc_rblood2plasma(params=pop_u_httk)]
#    # Right now we don't similate variability on a known blood:plasma ratio
#    } else pop_u_httk[, Rblood2plasma:=Rblood2plasma]
#  
#    # Parse the clint.dist to retrieve the quantiles:
#    Clint.dist <- pop_u_httk[1,Clint.dist]
#    if (nchar(Clint.dist) -
#      nchar(gsub(",","",Clint.dist))!=3)
#    {
#      stop("Clint distribution should be four values (median,low95th,high95th,pValue) separated by commas.")
#    }
#    temp <- strsplit(Clint.dist,",")
#    Clint.median <- as.numeric(temp[[1]][1])
#    Clint.low95 <- as.numeric(temp[[1]][2])
#    Clint.high95 <- as.numeric(temp[[1]][3])
#    Clint.pvalue <- as.numeric(temp[[1]][4])
#  
#    # Use optim to estimate mean and standard deviation of Clint such that the
#    # median and 95% credible interval approximate the estimate from MCMC:
#    if (Clint.high95>0)
#    {
#      if (Clint.median>0)
#      {
#        clint.fit <- optim(c(log(Clint.median),0.3), function(x)
#          (0.95-plnorm(Clint.high95,x[1],x[2])+plnorm(Clint.low95,x[1],x[2]))^2+
#          (Clint.median-qlnorm(0.5,x[1],x[2]))^2)
#        pop_u_httk[,Clint:=rlnorm(1000,clint.fit$par[1],clint.fit$par[2])]
#      } else {
#        pop_u_httk[,Clint:=exp(runif(1000,log(1),
#        (log(Clint.high95)-log(1))/0.975))]
#      }
#    } else pop_u_httk[,Clint:=0]
#    pop_u_httk[as.logical(rbinom(1000,1,Clint.pvalue)),Clint:=0] # the Bayesian "p-value" here reflects how often there is no clearance
#  
#    pop_u_httk[, CLh:=calc_hepatic_clearance(chem.cas=chemcas,
#      parameters=pop_u_httk,hepatic.model='unscaled',suppress.messages=TRUE,clint.pvalue.threshold=1)]
#    pop_u_httk[, Qliver:=Qtotal.liverc*BW^0.75]
#    pop_u_httk[, hepatic.bioavailability:= Qliver / (Qliver +
#      Funbound.plasma * Clint / Rblood2plasma)]
#    pop_u_httk[, (setdiff(names(pop_u_httk),
#      names(httk::parameterize_steadystate(chem.cas=chemcas)))):=NULL]
#  
#  
#    #compute Css for each individual
#    # calc_analytic_css is vectorized so that this will work.
#    css <- httk::calc_analytic_css(parameters=pop_u_httk,
#                                   clint.pvalue.threshold=1,
#                                   daily.dose=1,
#                                   clint.pop.cv=NULL,
#                                   fup.pop.cv=NULL,
#                                   output.units="uM",
#                                   model="3compartmentss",
#                                   species="Human",
#                                   suppress.messages=TRUE)#,
#                              #     recalc.blood2plasma=TRUE)
#    #get Css95
#    cssquants <- quantile(css, probs=probs)
#    return(cssquants)
#  }

## ----hlfun.u, eval = execute.vignette-----------------------------------------
#  hlfun_u <- function(chemcas,
#                        MW,
#                        probs=0.95,
#                        minimum.Funbound.plasma=0.0001)
#  {
#    # Create a data table with uncertainty only:
#    print(chemcas)
#    pop_u_httk <- as.data.table(parameterize_steadystate(chem.cas=chemcas,
#      clint.pvalue.threshold=1,
#      minimum.Funbound.plasma=0))
#    params.schmitt <-parameterize_schmitt(chem.cas=chemcas)
#    psd <- params.schmitt$pKa_Donor
#    psa <- params.schmitt$pKa_Accept
#    params.schmitt <- params.schmitt[regexpr("pKa",names(params.schmitt))==-1]
#    pop_u_httk[,names(params.schmitt):=params.schmitt]
#    pop_u_httk[,pKa_Donor := paste(psd, collapse=",")]
#    pop_u_httk[,pKa_Accept := paste(psa, collapse=",")]
#    pop_u_httk <- pop_u_httk[rep(1,1000)]
#  
#    ions <- calc_ionization(pH=7.4,parameters=pop_u_httk)
#    pop_u_httk[,Dow74 := Pow * (ions$fraction_neutral + 0.001 * ions$fraction_charged + ions$fraction_zwitter)]
#  
#    # Parse the Funbound.plasma.dist into quantiles:
#    Funbound.plasma.dist <- pop_u_httk[1,Funbound.plasma.dist]
#    print(paste(chemcas,Funbound.plasma.dist))
#    if (nchar(Funbound.plasma.dist) -
#      nchar(gsub(",","",Funbound.plasma.dist))!=2)
#    {
#      stop("Funbound.plasma distribution should be three values (median,low95th,high95th) separated by commas.")
#    }
#    temp <- strsplit(Funbound.plasma.dist,",")
#    ppb.median <- as.numeric(temp[[1]][1])
#    ppb.low95 <- as.numeric(temp[[1]][2])
#    ppb.high95 <- as.numeric(temp[[1]][3])
#  
#    if (ppb.median>minimum.Funbound.plasma)
#    {
#      # Use optim to estimate alpha and beta for fup such that the median and 95%
#      # credible interval approximate the estimate from MCMC:
#      ppb.fit <- optim(c(2,(1-ppb.median)/ppb.median*2),
#        function(x) (0.95-pbeta(ppb.high95,x[1],x[2])+
#        pbeta(ppb.low95,x[1],x[2]))^2+
#        (ppb.median-qbeta(0.5,x[1],x[2]))^2,
#        method="BFGS")
#    # We are drawing new values for the unadjusted Fup:
#      pop_u_httk[, unadjusted.Funbound.plasma:=rbeta(1000,
#        ppb.fit$par[1],
#        ppb.fit$par[2])]
#    } else if (ppb.high95>minimum.Funbound.plasma)
#    {
#      # Assume that since the median is zero but the u95 is not, that there is
#      # an exponential distribution:
#      # 97.5% of clearance values will be below Funbound.plasma.u95:
#      pop_u_httk[, unadjusted.Funbound.plasma:=runif(1000,
#        minimum.Funbound.plasma,
#        min(1,(ppb.high95-minimum.Funbound.plasma)/0.975))]
#      pop_u_httk[as.logical(rbinom(1000,1,.975)),
#        unadjusted.Funbound.plasma:=minimum.Funbound.plasma]
#    } else {
#      pop_u_httk[, unadjusted.Funbound.plasma:=minimum.Funbound.plasma]
#    }
#  
#  # then we need to adjust for in vitro binding:
#    pop_u_httk[,Flipid:=subset(physiology.data,
#      Parameter=='Plasma Effective Neutral Lipid Volume Fraction')[,
#      which(colnames(physiology.data) == 'Human')]]
#    pop_u_httk[,Funbound.plasma.adjustment:=1 / (Dow74 * Flipid +
#      1 / unadjusted.Funbound.plasma)/unadjusted.Funbound.plasma]
#    pop_u_httk[,
#      Funbound.plasma:=unadjusted.Funbound.plasma*Funbound.plasma.adjustment]
#  
#  # Enforce a minimum ppb:
#    pop_u_httk[Funbound.plasma<minimum.Funbound.plasma,
#      Funbound.plasma:=minimum.Funbound.plasma]
#  
#    Rblood2plasma <- get_rblood2plasma(chem.cas=chemcas)
#    if (is.na(Rblood2plasma))
#    {
#      pop_u_httk[, Rblood2plasma:=calc_rblood2plasma(params=pop_u_httk)]
#    } else pop_u_httk[, Rblood2plasma:=Rblood2plasma] # Right now we don't similate variability on a known blood:plasma ratio
#    # Parse the clint.dist to retrieve the quantiles:
#    Clint.dist <- pop_u_httk[1,Clint.dist]
#    if (nchar(Clint.dist) -
#      nchar(gsub(",","",Clint.dist))!=3)
#    {
#      stop("Clint distribution should be four values (median,low95th,high95th,pValue) separated by commas.")
#    }
#    temp <- strsplit(Clint.dist,",")
#    Clint.median <- as.numeric(temp[[1]][1])
#    Clint.low95 <- as.numeric(temp[[1]][2])
#    Clint.high95 <- as.numeric(temp[[1]][3])
#    Clint.pvalue <- as.numeric(temp[[1]][4])
#  
#    # Use optim to estimate mean and standard deviation of Clint such that the
#    # median and 95% credible interval approximate the estimate from MCMC:
#    if (Clint.high95>0)
#    {
#      if (Clint.median>0)
#      {
#        clint.fit <- optim(c(log(Clint.median),0.3), function(x)
#          (0.95-plnorm(Clint.high95,x[1],x[2])+plnorm(Clint.low95,x[1],x[2]))^2+
#          (Clint.median-qlnorm(0.5,x[1],x[2]))^2)
#        pop_u_httk[,Clint:=rlnorm(1000,clint.fit$par[1],clint.fit$par[2])]
#      } else {
#        pop_u_httk[,Clint:=exp(runif(1000,log(1),
#        (log(Clint.high95)-log(1))/0.975))]
#      }
#    } else pop_u_httk[,Clint:=0]
#    pop_u_httk[as.logical(rbinom(1000,1,Clint.pvalue)),Clint:=0] # the Bayesian "p-value" here reflects how often there is no clearance
#  
#    pop_u_httk[, CLh:=calc_hepatic_clearance(chem.cas=chemcas,parameters=pop_u_httk,hepatic.model='unscaled',suppress.messages=TRUE,clint.pvalue.threshold=1)]
#    pop_u_httk[, Qliver:=Qtotal.liverc*BW^0.75]
#    pop_u_httk[, hepatic.bioavailability:= Qliver / (Qliver + Funbound.plasma * Clint / Rblood2plasma)]
#  
#    PC.names <- names(predict_partitioning_schmitt(chem.cas="80-05-7"))
#    pop_u_httk<-cbind(pop_u_httk,as.data.table(predict_partitioning_schmitt(parameters=pop_u_httk)))
#    pop_u_httk[,hl:=calc_elimination_rate(parameters=pop_u_httk)]
#  
#    return(log(2)/quantile(unlist(pop_u_httk[,"hl",with=FALSE]),probs=probs))
#  }

## ----Wambaugh2019.Fig1, eval = execute.vignette-------------------------------
#  Fig1.table <- httk::wambaugh2019.raw
#  Fig1.table$Error1 <- Fig1.table$Base.Fup.High - Fig1.table$Base.Fup.Low
#  Fig1.table$Error2 <- Fig1.table$Affinity.Fup.High - Fig1.table$Affinity.Fup.Low
#  Fig1.table$Sigma1 <- Fig1.table$Error1/(2*qnorm(0.975))
#  Fig1.table$Mean1 <- (Fig1.table$Base.Fup.High + Fig1.table$Base.Fup.Low)/(qnorm(0.975))
#  Fig1.table$CV1 <- Fig1.table$Sigma1/Fig1.table$Mean1
#  Fig1.table$Sigma2 <- Fig1.table$Error2/(2*qnorm(0.975))
#  Fig1.table$Mean2 <- (Fig1.table$Base.Fup.High + Fig1.table$Base.Fup.Low)/(qnorm(0.975))
#  Fig1.table$CV2 <- Fig1.table$Sigma2/Fig1.table$Mean2
#  Fig1.table$ErrorClint <- Fig1.table$CLint.1uM.High95th - Fig1.table$CLint.1uM.Low95th
#  Fig1.table$SigmaClint <- Fig1.table$ErrorClint/(2*qnorm(0.975))
#  Fig1.table$MeanClint <- (Fig1.table$CLint.1uM.High95th + Fig1.table$CLint.1uM.Low95th)/(qnorm(0.975))
#  Fig1.table$CVClint <- Fig1.table$SigmaClint/Fig1.table$MeanClint
#  
#  Fupmeasured <- subset(Fig1.table,!is.na(Affinity.Fup.Med))
#  Fupmeasured <- subset(Fupmeasured,!duplicated(CAS))
#  CLintmeasured <- subset(Fig1.table,!is.na(CLint.1uM.Median))
#  CLintmeasured <- subset(CLintmeasured,!duplicated(CAS))
#  
#  
#  
#  print(paste("New HTTK experimental measurements were made for",length(unique(Fig1.table$CAS)),"chemicals from the ToxCast HTS library."))
#  
#  print(paste("Fup was successfully measured for",length(unique(subset(Fig1.table,!is.na(Affinity.Fup.Med))$CAS)),"chemicals from the ToxCast HTS library."))
#  
#  print(paste("CLint was successfully measured for",length(unique(subset(Fig1.table,!is.na(CLint.1uM.Median))$CAS)),"chemicals from the ToxCast HTS library."))
#  
#  
#  
#  
#  Fig1a <- ggplot(Fig1.table)+
#     geom_freqpoly(binwidth = 0.5,lwd=1.5,color="Red",aes(Base.Fup.Med))+
#     geom_freqpoly(binwidth = 0.5,lwd=1.5,lty=3,color="Blue",aes(Affinity.Fup.Med))+
#    xlab(expression(paste("Measured ",F[up]))) +
#    ylab("Number of chemicals")+
#     scale_x_log10(label=scientific_10,limits=c(10^-6,1.05))+
#     labs(title=paste(length(unique(subset(Fig1.table,!is.na(Affinity.Fup.Med))$CAS)),"Chemicals Measured"))+
#      theme_bw()+
#      theme( text  = element_text(size=10)) +
#      annotate("text", x=10^-6,y=80,size=8,label="a")
#  
#  print(paste("Median 100%:",signif(median(Fig1.table$Base.Fup.Med,na.rm=TRUE),2)))
#  print(paste("Median Titration:",signif(median(Fig1.table$Affinity.Fup.Med,na.rm=TRUE),2)))
#  
#  
#  
#  Fig1b <- ggplot(Fig1.table)+
#     geom_histogram(binwidth = 0.5,alpha=0.6,fill="green",aes(CLint.1uM.Median))+
#     xlab(expression(paste("Measured ",Cl[int]," (uL/min/",10^6," hepatocytes)"))) +
#    ylab("Number of chemicals")+
#    scale_x_log10(label=scientific_10,limits=c(10^-1,5*10^3))+
#    labs(title=paste(length(unique(subset(Fig1.table,!is.na(CLint.1uM.Median))$CAS)),"Chemicals Measured"))+
#      theme_bw()+
#      theme( text  = element_text(size=10)) +
#      annotate("text", x=2*10^-1,y=80,size=8,label="b")
#  
#  print(paste(sum(Fig1.table$CLint.1uM.Median==0,na.rm=TRUE),"Zero Measurments"))
#  print(paste("Median Non-Zero:",signif(median(Fig1.table$CLint.1uM.Median,na.rm=TRUE),3)))
#  
#  
#  
#  Fig1c <- ggplot(Fig1.table)+
#     geom_histogram(binwidth = 0.1,alpha=0.2,fill="Red",aes(Error1))+
#     geom_histogram(binwidth = 0.1,alpha=0.2,fill="Blue",aes(Error2))+
#    xlab("Width of Credible Interval") +
#    ylab("Number of chemicals")+
#     scale_x_log10(label=scientific_10,limits=c(5*10^-4,1.05))+
#     labs(title=paste("CI:",signif(median(Fig1.table$Error1,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CV1,na.rm=TRUE),1),"/ CI:",signif(median(Fig1.table$Error2,na.rm=TRUE),1),"CV:",signif(median(Fig1.table$CV2,na.rm=TRUE),1)))+
#      theme_bw()+
#      theme( text  = element_text(size=10))+
#      annotate("text", x=5*10^-4,y=42,size=8,label="c")
#  
#  print(paste("Median 100% CI:",signif(median(Fig1.table$Error1,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CV1,na.rm=TRUE),2)))
#  print(paste("Median Titr. CI:",signif(median(Fig1.table$Error2,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CV2,na.rm=TRUE),2)))
#  
#  
#  Fig1d <- ggplot(Fig1.table)+
#     geom_histogram(binwidth = 0.1,alpha=0.6,fill="green",aes(ErrorClint))+
#    xlab("Width of Credible Interval") +
#    ylab("Number of chemicals")+
#    scale_x_log10(label=scientific_10,limits=c(10^0,10^3))+
#     labs(title=paste("Median CI:",signif(median(Fig1.table$ErrorClint,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CVClint,na.rm=TRUE),2)))+
#      theme_bw()+
#      theme( text  = element_text(size=10))+
#      annotate("text", x=1.5,y=25,size=8,label="d")
#  
#  print(paste("Median CI:",signif(median(Fig1.table$ErrorClint,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CVClint,na.rm=TRUE),2)))
#  
#  multiplot(Fig1a,Fig1c,Fig1b,Fig1d,cols=2,widths=c(1.75,1.75))

## ----Wambaugh2019.Fig2, eval = execute.vignette-------------------------------
#  Fig2.table <- httk::wambaugh2019.raw
#  print(paste(sum(!is.na(Fig2.table$Fup.point)),"total successful Fup estimates using point estimation."))
#  print(paste(sum(!is.na(Fig2.table$Base.Fup.Med)),"total successful Fup estimates using BAyesian analysis of top protein concentration."))
#  Fig2.table$Error1 <- Fig2.table$Base.Fup.High - Fig2.table$Base.Fup.Low
#  Fig2.table$Error2 <- Fig2.table$Affinity.Fup.High - Fig2.table$Affinity.Fup.Low
#  Fig2.table$Sigma1 <- Fig2.table$Error1/(2*qnorm(0.975))
#  Fig2.table$Mean1 <- (Fig2.table$Base.Fup.High + Fig2.table$Base.Fup.Low)/(qnorm(0.975))
#  Fig2.table$CV1 <- Fig2.table$Sigma1/Fig2.table$Mean1
#  Fig2.table$Sigma2 <- Fig2.table$Error2/(2*qnorm(0.975))
#  Fig2.table$Mean2 <- (Fig2.table$Base.Fup.High + Fig2.table$Base.Fup.Low)/(qnorm(0.975))
#  Fig2.table$CV2 <- Fig2.table$Sigma2/Fig2.table$Mean2
#  Fig2.table$Point.Estimate <- "Good"
#  Fig2.table[Fig2.table$Fup.point<0&!is.na(Fig2.table$Fup.point),"Point.Estimate"]<-"<0"
#  Fig2.table[Fig2.table$Fup.point>1&!is.na(Fig2.table$Fup.point),"Point.Estimate"]<-">1"
#  Fig2.table[is.na(Fig2.table$Fup.point),"Point.Estimate"]<-"No Value"
#  Fig2.table[Fig2.table$Point.Estimate%in%c("<0","No Value"),"Fup.point"] <-  Fig2.table[Fig2.table$Point.Estimate%in%c("<0","No Value"),"Base.Fup.Med"]
#  Fig2.table$Uncertain<-"Yes"
#  Fig2.table[is.na(Fig2.table$CV1),"CV1"] <- -999
#  Fig2.table[Fig2.table$CV1<0.49,"Uncertain"] <- "No"
#  Fig2.table[Fig2.table$CV1==-999,"CV1"] <- NA
#  
#  Fig2a <- ggplot(subset(Fig2.table,!is.na(Base.Fup.Med)),aes(x=Fup.point,y=Base.Fup.Med,shape=Point.Estimate,alpha=Uncertain,colour=Point.Estimate)) +
#           geom_point(size=3)+
#  #         geom_errorbar(aes(ymax = Base.Fup.High, ymin=Base.Fup.Low))+
#     scale_y_log10(label=scientific_10,limits=c(10^-6,2)) +
#     scale_x_log10(label=scientific_10,limits=c(10^-6,2)) +
#    ylab(expression(paste(F[up]," Bayesian Analysis (Single Conc.)"))) +
#    xlab(expression(paste(F[up]," Point Estimate (Traditional Analysis)"))) +
#    geom_abline(intercept = 0, slope = 1,linetype="dashed", colour="Blue") +
#  #  geom_vline(xintercept = 1, size=2,linetype="dashed", colour="red")+
#    geom_vline(xintercept = 0.01, size=2,linetype="dotted", colour="red")+
#    geom_hline(yintercept = 0.01, size=2,linetype="dotted", colour="red")+
#      scale_colour_discrete(name="Point Estimate")+
#      scale_shape_discrete(name="Point Estimate") +
#      scale_alpha_manual(values=c(1,0.3))+
#          theme_bw() +
#       theme(legend.position="bottom", text  = element_text(size=12))+
#       annotate("text", size=8,x = 10^-6, y = 2, label = "a")+
#      guides(alpha=guide_legend(nrow=2,byrow=TRUE),colour=guide_legend(nrow=2,byrow=TRUE),shape=guide_legend(nrow=2,byrow=TRUE))
#  
#  #print(Fig2a)
#  
#  print(paste(sum(Fig2.table$Uncertain=="Yes"),"uncertain estimates using single conc."))
#  print(paste(sum(Fig2.table$Point.Estimate=="<0"),"chemicals with negative point estimates."))
#  print(paste(sum(Fig2.table$Point.Estimate==">1"),"chemicals with Fup > 1."))
#  
#  
#  
#  summary(lm(data=subset(Fig2.table,!is.na(Fup.point)&Fup.point>0&Base.Fup.Med>0),log(Fup.point)~log(Base.Fup.Med)))
#  
#  
#  
#  
#  Fig2.table$Uncertain2<-"Yes"
#  Fig2.table[is.na(Fig2.table$CV2),"CV2"] <- -999
#  Fig2.table[Fig2.table$CV2<0.49,"Uncertain2"] <- "No"
#  Fig2.table[Fig2.table$CV2==-999,"CV2"] <- NA
#  Fig2.table$TopFail <- ""
#  Fig2.table[!is.na(Fig2.table$Affinity.Fup.Med)&Fig2.table$Uncertain2=="No","TopFail"]<-"No"
#  Fig2.table[Fig2.table$TopFail=="No"&!is.na(Fig2.table$Base.Fup.Med)&Fig2.table$Uncertain=="Yes","TopFail"]<-"Single Conc. Only"
#  Fig2.table[!is.na(Fig2.table$Affinity.Fup.Med)&Fig2.table$Uncertain2=="Yes","TopFail"]<-"Yes"
#  Fig2.table[!is.na(Fig2.table$Affinity.Fup.Med)&is.na(Fig2.table$Base.Fup.Med),"TopFail"] <- "Single Conc. Only"
#  Fig2.table[is.na(Fig2.table$Base.Fup.Med),"Base.Fup.Med"] <-  Fig2.table[is.na(Fig2.table$Base.Fup.Med),"Affinity.Fup.Med"]
#  
#  
#  Fig2b <- ggplot(subset(Fig2.table,!is.na(Affinity.Fup.Med)),aes(x=Base.Fup.Med,y=Affinity.Fup.Med,shape=TopFail,alpha=TopFail,colour=TopFail)) +
#           geom_point(size=3)+
#           geom_point(data=subset(Fig2.table,TopFail=="No"),size=3)+
#  #         geom_errorbar(aes(ymax = Fup.High.x, ymin=Fup.Low.x))+
#  #         geom_errorbarh(aes(xmin=Fup.Low.y,xmax=Fup.High.y))+
#     scale_y_log10(label=scientific_10,limits=c(10^-8,2)) +
#     scale_x_log10(label=scientific_10,limits=c(10^-8,2)) +
#    xlab(expression(paste(F[up]," Bayesian Analysis (Single Conc.)"))) +
#    ylab(expression(paste(F[up]," Bayesian Analysis (Three Conc.)"))) +
#    geom_abline(intercept = 0, slope = 1,linetype="dashed", colour="Blue")+
#    scale_colour_discrete(name="Uncertain")+
#    scale_shape_discrete(name="Uncertain") +
#  #    annotate("text",size=8, x = 10^-4, y = 1, label = "B")+
#     scale_alpha_manual(name="Uncertain",values=c(1,0.6,0.3))+
#       theme_bw() +
#    theme(legend.position="bottom", text  = element_text(size=12))+
#        annotate("text", size=8,x = 10^-8, y = 2, label = "b")+
#      guides(alpha=guide_legend(nrow=2,byrow=TRUE),colour=guide_legend(nrow=2,byrow=TRUE),shape=guide_legend(nrow=2,byrow=TRUE))
#  
#  #print(Fig2b)
#  
#  # Need to reduce to unique chemicals.
#  Fupbaseworked <- subset(Fig2.table,!is.na(Base.Fup.Med)&Uncertain=="No")
#  Fupbasenot <- subset(Fig2.table,!is.na(Base.Fup.Med)&Uncertain=="Yes")
#  Fupbaseworked <- subset(Fupbaseworked,!duplicated(CAS))
#  Fupbasenot <- subset(Fupbasenot,!duplicated(CAS))
#  # Also need to count a chemical as a success if it worked for either duplicate:
#  Fupbasenot <- subset(Fupbasenot,!(CAS%in%Fupbaseworked$CAS))
#  
#  Fupaffinityworked <- subset(Fig2.table,!is.na(Affinity.Fup.Med)&Uncertain2=="No")
#  Fupaffinitynot <- subset(Fig2.table,!is.na(Affinity.Fup.Med)&Uncertain2=="Yes")
#  Fupaffinityworked <- subset(Fupaffinityworked,!duplicated(CAS))
#  Fupaffinitynot <- subset(Fupaffinitynot,!duplicated(CAS))
#  Fupaffinitynot <- subset(Fupaffinitynot,!(CAS%in%Fupaffinityworked$CAS))
#  
#  
#  print(paste(dim(Fupbaseworked)[1],"total successful Fup estimates using single conc."))
#  print(paste(signif(dim(Fupbasenot)[1]/(dim(Fupbaseworked)[1]+dim(Fupbasenot)[1])*100,3),"percent failure rate using single conc."))
#  print(paste(dim(Fupaffinityworked)[1],"total successful Fup estimates using protein titration."))
#  print(paste(signif(dim(Fupaffinitynot)[1]/(dim(Fupaffinityworked)[1]+dim(Fupaffinitynot)[1])*100,3),"percent failure rate using protein titration."))
#  print(paste(dim(Fupbasenot)[1],"uncertain estimates using original protocol."))
#  print(paste(dim(Fupaffinitynot)[1],"uncertain estimates using protein titration."))
#  print(paste(dim(Fupaffinityworked)[1]-dim(Fupbaseworked)[1],"chemicals that could not be measured at 100% plasma protein."))
#  
#  Wetmore.data <- httk:::Wetmore.data
#  W2015 <- subset(Wetmore.data,Reference=="Wetmore 2015")
#  
#  W2012 <- subset(Wetmore.data,Reference=="Wetmore 2012")
#  print(paste("The Fup assay failed for ",signif(sum(W2012$Fub==0.005)/length(W2012$Fub)*100,3),"% of chemicals in {Wetmore, 2012} and ",signif(sum(W2015$Fub==0)/length(W2015$Fub)*100,3),"% of chemicals in {Wetmore, 2015}."))
#  
#  
#  Fuplod <- subset(Fig2.table,TopFail=="Single Conc. Only")
#  print(paste("Among the chemicals that were uncertain using the original protocol but could be measured using the new three concentration protocol, the median estimated Fup was ",signif(median(Fuplod$Affinity.Fup.Med),2),"with values as low as ",signif(min(Fuplod$Affinity.Fup.Med),2),"and as high as",signif(max(Fuplod$Affinity.Fup.Med),2),". Previously, a default of 0.005 has been assumed when Fup could not be measured {Rotroff, 2010}.}"))
#  
#  
#  summary(lm(data=subset(Fig2.table,!is.na(Base.Fup.Med)&Affinity.Fup.Med>0&Base.Fup.Med>0),log(Base.Fup.Med)~log(Affinity.Fup.Med)))
#  
#  #dev.new(width=10,height=6)
#  multiplot(Fig2a,Fig2b,cols=2,widths=c(1.75,1.75))

## ----Wambaugh2019.Fig3, eval = execute.vignette-------------------------------
#  Fig1.table$Class <- "Other"
#  Fig1.table[Fig1.table$DTXSID %in% pharma$DTXSID,"Class"]<-"Pharmaceutical"
#  Fig1.table$Class <- as.factor(Fig1.table$Class)
#  
#    Fig3 <- ggplot(Fig1.table,aes(Affinity.Kd.Med,fill=Class))+
#      geom_histogram(binwidth = 0.5)+
#      xlab(expression(paste("Binding Affinity (", mu,"M)",sep=""))) +
#        ylab("Number of chemicals")+
#      scale_x_log10(label=scientific_10)+
#        scale_fill_discrete(name="Chemical Class")+
#        theme_bw()  +
#      theme(legend.position="bottom", text  = element_text(size=18))
#  
#   # dev.new()
#    print(Fig3)

## ----Wambaugh2019.Fig4, eval = execute.vignette-------------------------------
#  clearance.table <- subset(httk::wambaugh2019.raw,!is.na(CLint.1uM.Median))
#  
#  print(paste(sum(clearance.table$CLint.1uM.Median==0),"zero valued median Bayesian posteriors"))
#  print(paste(sum(clearance.table$CLint.1uM.Point==0&clearance.table$CLint.1uM.Median>0,na.rm=TRUE),"zero valued point estimates where median posterior>0"))
#  print(paste(sum(clearance.table$CLint.1uM.Point>0&clearance.table$CLint.1uM.Median==0,na.rm=TRUE),"zero valued median Bayesian posteriors where point estimate was non-zero"))
#  print(paste(sum(is.na(clearance.table$CLint.1uM.Point)&clearance.table$CLint.1uM.Median==0,na.rm=TRUE),"zero valued median Bayesian posteriors where point estimate was NA"))
#  print(paste(sum(is.na(clearance.table$CLint.1uM.Point)&clearance.table$CLint.1uM.Median>0,na.rm=TRUE),"non-zero valued median Bayesian posteriors where point estimate was NA"))
#  
#  
#  
#  
#  clearance.table$Fit <- "Decreasing"
#  clearance.table[clearance.table$CLint.1uM.Median == 0,"Fit"] <- "Median Zero"
#  #clearance.table[is.na(clearance.table$CLint.1uM.Point == 0),"Fit"] <- "Point Est. Missing"
#  for (i in 1:dim(clearance.table)[1])
#    if (!is.na(clearance.table[i,"CLint.1uM.Point"]))
#    {
#      if (clearance.table[i,"CLint.1uM.Point"] == 0) clearance.table[i,"Fit"] <- "Point Est. Zero"
#    } else clearance.table[i,"Fit"] <- "Point Est. Failed"
#  
#  
#  set.seed(123456)
#  clearance.table[is.na(clearance.table$CLint.1uM.Point),"CLint.1uM.Point"]<-runif(sum(is.na(clearance.table$CLint.1uM.Point)),0.1,0.2)
#  clearance.table[clearance.table$CLint.1uM.Point==0,"CLint.1uM.Point"]<-runif(sum(clearance.table$CLint.1uM.Point==0),0.3,0.8)
#  clearance.table[clearance.table$CLint.1uM.Low95th==0,"CLint.1uM.Low95th"]<-0.1
#  clearance.table[clearance.table$CLint.1uM.Median==0,"CLint.1uM.Median"]<-0.1
#  clearance.table[clearance.table$CLint.1uM.High95th==0,"CLint.1uM.High95th"]<-0.1
#  clearance.table[clearance.table$CLint.1uM.High95th>1000,"CLint.1uM.High95th"]<-1000
#  clearance.table[clearance.table$CLint.1uM.Low95th<0.1,"CLint.1uM.Low95th"]<-0.1
#  
#  clearance.fit <- lm(log10(CLint.1uM.Median)~log10(CLint.1uM.Point),data=subset(clearance.table,!is.na(CLint.1uM.Point)&CLint.1uM.Point>1))
#  Fig4 <- ggplot(clearance.table, aes(x=CLint.1uM.Point,y=CLint.1uM.Median,colour=Fit)) +
#    geom_segment(aes(x=CLint.1uM.Point,xend=CLint.1uM.Point,y=CLint.1uM.Low95th,yend=CLint.1uM.High95th),size=1,alpha=0.5)+
#    geom_point(size=3) +
#    scale_y_log10(label=scientific_10,limits=c(10^-1,1000)) +
#    scale_x_log10(label=scientific_10_skip,limits=c(10^-1,1000)) +
#    xlab(expression(paste(CL[int]," (",mu,"L/min/",10^{6}," Hep.) Point Estimate",sep="")))+
#    ylab(expression(paste(CL[int]," (",mu,"L/min/",10^{6}," Hep.) Bayesian",sep="")))+
#    geom_vline(xintercept = 0.25, size=2,colour="black")+
#    geom_vline(xintercept = 0.9, size=2,colour="black")+
#    geom_abline(intercept = 0, slope = 1,linetype="dashed")+
#    scale_colour_discrete(name="Assay Result")+
#    theme_bw() +
#    theme(legend.position="bottom", text  = element_text(size=14))+
#    guides(colour=guide_legend(nrow=2,byrow=TRUE))+
#    annotate("text",x = 10^2, y = 10^0,size=6, label = lm_R2(clearance.fit,prefix=""),parse=TRUE)
#  
#  #dev.new()
#  print(Fig4)
#  
#  summary(clearance.fit)

## ----Wambaugh2019.Fig5, eval = execute.vignette-------------------------------
#  # This section takes a long time because calc_vdist is not yet data.table compatible:
#  ceetox <- as.data.table(subset(httk::wambaugh2019,!is.na(Human.Funbound.plasma)&!is.na(Human.Clint)&!is.na(logP)))
#  httk_cas <- ceetox$CAS[ceetox$CAS %in% get_cheminfo(model="3compartmentss")]
#  
#  # back up chem.physical_and_invtro.data:
#  HTTK.data <- chem.physical_and_invitro.data
#  
#  # Use the point estimates:
#  chem.physical_and_invitro.data <- add_chemtable(httk::wambaugh2019.raw,current.table=chem.physical_and_invitro.data,data.list=list(Compound="Name",CAS="CAS",Clint="CLint.1uM.Point",Funbound.plasma="Fup.point"),reference="Wambaugh 2019",species="Human",overwrite=TRUE)
#  
#  
#  
#  schmitt_cas <- httk_cas[httk_cas %in% get_cheminfo(model="schmitt")]
#  ceetox[CAS%in%schmitt_cas,
#         hlpoint:=log(2)/httk::calc_elimination_rate(chem.cas=CAS),
#         by=.(CAS)]
#  
#  chem.physical_and_invitro.data <- HTTK.data
#  
#  ceetox[CAS%in%schmitt_cas,
#         hl_975:=hlfun_u(chemcas=CAS,
#                            MW=MW,
#                            probs=0.975),
#         by=.(CAS)]
#  
#  ceetox[CAS%in%schmitt_cas,
#         hl_med:=hlfun_u(chemcas=CAS,
#                            MW=MW,
#                            probs=0.5),
#         by=.(CAS)]
#  
#  ceetox[CAS%in%schmitt_cas,
#         hl_25:=hlfun_u(chemcas=CAS,
#                           MW=MW,
#                            probs=0.025),
#         by=.(CAS)]
#  
#  ceetox[,hlpointEstimated:=TRUE]
#  ceetox[is.na(hlpoint),hlpointEstimated:=FALSE]
#  ceetox[is.na(hlpoint),hlpoint:=hl_med]
#  
#  hlfit <- lm(data=subset(ceetox,hlpointEstimated),log10(hlpoint)~log10(hl_med))
#  Fig5a <- ggplot(data=subset(ceetox,hlpointEstimated),aes(x=hlpoint,y=hl_med)) +
#    geom_point(size=3,alpha=0.5) +
#    geom_errorbar(aes(ymax = hl_975, ymin=hl_25),alpha=0.3)+
#    scale_y_log10(label=scientific_10,limits=c(1,10^6)) +
#    scale_x_log10(label=scientific_10,limits=c(1,10^6)) +
#    xlab("half-life (h) Point Estimate")+
#    ylab("Bayesian half-life (h)")+
#    geom_abline(intercept = 0, slope = 1,linetype="dashed",color="blue")+
#    annotate("text", size=8,x = 1, y = 10^6, label = "A")+
#    theme_bw() +
#    theme(text  = element_text(size=18))+
#    annotate("text",x = 3*10^3, y = 10^0,size=5, label = lm_R2(hlfit,prefix=""),parse=TRUE)
#  
#  
#  cssfit <- lm(data=ceetox,log10(cssmed)~log10(cssmed_u))
#  Fig5b <- ggplot(data=ceetox,aes(x=cssmed,y=cssmed_u)) +
#    geom_point(size=3,alpha=0.5) +
#    geom_errorbar(aes(ymin=css25_u,ymax=css975_u),alpha=0.3)+
#    scale_y_log10(label=scientific_10,limits=c(10^-3,10^5)) +
#    scale_x_log10(label=scientific_10,limits=c(10^-3,10^5)) +
#    xlab(expression(paste(C[ss]," Point Estimate (uM)")))+
#    ylab(expression(paste("Bayesian ",C[ss]," (uM)")))+
#    annotate("text", size=8,x = 10^-3, y = 10^5, label = "B")+
#    theme_bw() +
#    theme(text  = element_text(size=18)) +
#    geom_abline(intercept = 0, slope = 1,linetype="dashed",color="blue")+
#    annotate("text",x = 10^2, y = 10^-3,size=5, label = lm_R2(cssfit,prefix=""),parse=TRUE)
#  
#  
#  #dev.new(width=8,height=5)
#  multiplot(Fig5a,Fig5b,cols=2,widths=c(1.75,1.75))

## ----Wambaugh2019.Fig6, eval = execute.vignette-------------------------------
#  set.seed(42)
#  ceetox[CAS%in%httk_cas,
#         css95_uv:=calc_mc_css(chem.cas=CAS,
#                                   clint.pvalue.threshold=1, #Don't need to use p-values here (Bayesian)
#                                   output.units="uM",
#                                   model="3compartmentss",
#                                   species="Human"),
#         by=.(CAS)]
#  
#  set.seed(42)
#  ceetox[CAS%in%httk_cas,
#         css95_v:=calc_mc_css(chem.cas=CAS,
#                                   clint.pvalue.threshold=1, #Don't need to use p-values here (Bayesian)
#                                   output.units="uM",
#                                   clint.meas.cv=NULL,
#                                   fup.meas.cv=NULL,
#                                   clint.pop.cv=0.3,
#                                   fup.pop.cv=0.3,
#                                   model="3compartmentss",
#                                   species="Human"),
#         by=.(CAS)]
#  
#  
#  
#  set.seed(42)
#  ceetox[CAS%in%httk_cas,
#         css95_u:=cssfun_u(chemcas=CAS,
#                            MW=MW),
#         by=.(CAS)]
#  
#  set.seed(42)
#  ceetox[CAS%in%httk_cas,
#         cssmed_u:=cssfun_u(chemcas=CAS,
#                            MW=MW,
#                            probs=0.5),
#         by=.(CAS)]
#  
#  set.seed(42)
#  ceetox[CAS%in%httk_cas,
#         css25_u:=cssfun_u(chemcas=CAS,
#                            MW=MW,
#                            probs=0.025),
#         by=.(CAS)]
#  
#  set.seed(42)
#  ceetox[CAS%in%httk_cas,
#         css975_u:=cssfun_u(chemcas=CAS,
#                            MW=MW,
#                            probs=0.975),
#         by=.(CAS)]
#  
#  ceetox[CAS%in%httk_cas,
#         cssmed:=httk::calc_analytic_css(chem.cas=CAS,
#                                   clint.pvalue.threshold=1,
#                                   output.units="uM",
#                                   model="3compartmentss",
#                                   species="Human",
#                                   suppress.messages=TRUE),
#         by=.(CAS)]
#  
#  
#  
#  # Go back to Bayes estimates:
#  chem.physical_and_invitro.data <- HTTK.data
#  
#  dt_melt <- data.table::melt(ceetox[, .(Compound, CAS, css95_u, css95_v, css95_uv, cssmed)],
#                              id.vars=c("Compound","CAS", "cssmed"),
#                              variable.name="type",
#                              value.name="css95")
#  
#  dt_melt[, type:=gsub(x=type, pattern="css95_uv", replacement="Both", fixed=TRUE)]
#  dt_melt[, type:=gsub(x=type, pattern="css95_u", replacement="Uncertainty", fixed=TRUE)]
#  dt_melt[, type:=gsub(x=type, pattern="css95_v", replacement="Variability", fixed=TRUE)]
#  
#  
#  #now set the factor levels explicitly
#  dt_melt[, type:=factor(type,
#                         levels=c("Uncertainty", "Variability", "Both"))]
#  
#  
#  dt_melt[, Norm:=css95/cssmed]
#  
#  
#  #now set the factor levels explicitly
#  dt_melt[, Compound:=factor(Compound,
#                         levels=dt_melt[type=="Both",Compound][order(dt_melt[type=="Both",Norm])])]
#  
#  
#  Fig6 <- ggplot(data=dt_melt) +
#    geom_point(size=2,alpha=0.7,aes(x=Compound,
#                  y=Norm,
#                  color=type,
#                  shape=type)) +
#     scale_y_log10(label=scientific_10,limits=c(0.9,500)) +
#    ylab(expression(paste("Ratio of ",C[ss]," ",95^th," Percentile to Median Estimate"))) +
#    xlab("Chemicals")+
#    scale_colour_discrete(name=expression(paste(C[ss]," Varied to Reflect")))+
#    scale_shape_discrete(name=expression(paste(C[ss]," Varied to Reflect"))) +
#    theme_bw() +
#    theme(legend.position="bottom",axis.text.x = element_blank())+
#    annotate("text", size=8,x = levels(dt_melt$Compound)[length(unique(dt_melt$Compound))/2], y = 400, label = paste("Median Ratio for Uncertainty:",signif(median(subset(dt_melt,type=="Uncertainty")$Norm),3)))+
#    annotate("text", size=8,x = levels(dt_melt$Compound)[length(unique(dt_melt$Compound))/2], y = 200, label = paste("Median Ratio for Variability:",signif(median(subset(dt_melt,type=="Variability")$Norm),3)))+
#    annotate("text", size=8,x = levels(dt_melt$Compound)[length(unique(dt_melt$Compound))/2], y = 100, label = paste("Median Ratio for Both:",signif(median(subset(dt_melt,type=="Both")$Norm),3)))
#  
#  #dev.new(width=10,height=6)
#  print(Fig6)

## ----Wambaugh2019.Fig7,  eval = execute.vignette------------------------------
#  # Head to ftp://newftp.epa.gov/COMPTOX/High_Throughput_Screening_Data/Previous_Data/ToxCast_Data_Release_Oct_2015/ to get the ToxCast/Tox21 data:
#  # These are the concentrations that caused activity in exess of the background (ACC) "hits".
#  #
#  # # Load the ACC table into an object Tox21.acc:
#  # Tox21.acc <- read.csv("INVITRODB_V2_LEVEL5/EXPORT_LVL5&6_ASID7_TOX21_151020.csv",stringsAsFactors=FALSE)
#  # # Subset to the chemicals of interest:
#  # Tox21.acc <- subset(Tox21.acc,casn%in%Fig1.table$CAS)
#  # # We only need hits:
#  # Tox21.acc <- subset(Tox21.acc,hitc==1)
#  #
#  # # Pare this down to just the data we need:
#  # Tox21.acc <- Tox21.acc[,c("code","chid","chnm","casn","aenm","modl_acc","flags")]
#  #
#  # # In this vignette we use the precalculated quantiles of the ACC's
#  # # to help keep the HTTK package smaller:
#  #
#  # wambaugh2019.tox21 <- NULL
#  # for (this.chem in sort(unique(Tox21.acc$casn)))
#  # {
#  #   this.subset <- subset(Tox21.acc,casn==this.chem)
#  #   this.row <- data.frame(casn=this.chem,
#  #     med.conc =
#  #       median(10^(this.subset$modl_acc)),
#  #     q10.conc =
#  #       quantile(10^(this.subset$modl_acc),0.1),
#  #     low.conc =
#  #       min(10^(this.subset$modl_acc)),
#  #     q90.conc =
#  #       quantile(10^(this.subset$modl_acc),0.9),
#  #     high.conc =
#  #       max(10^(this.subset$modl_acc)),
#  #     stringsAsFactors = F)
#  #   wambaugh2019.tox21  <- rbind(wambaugh2019.tox21, this.row)
#  # }
#  
#  chem.preds <- httk::wambaugh2019.seem3
#  directnhanes.preds <- httk::wambaugh2019.nhanes
#  Tox21.acc.numeric <- httk::wambaugh2019.tox21
#  
#  #Subset to those chemicals with HTS hits:
#  human.hits <- subset(ceetox,CAS%in%Tox21.acc.numeric$casn)
#  
#  # Now calculate the oral equivalent dose for each chemical:
#  # Must make sure that CSS is in units of uM!!
#  for (this.chem in human.hits$CAS)
#  {
#    med.conc <-
#      Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"med.conc"]
#    q10.conc <-
#      Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"q10.conc"]
#    low.conc <-
#      Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"low.conc"]
#    q90.conc <-
#      Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"q90.conc"]
#    high.conc <-
#      Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"high.conc"]
#    css95.v <- human.hits[human.hits$CAS==this.chem][["css95_v"]]
#    css95.uv <- human.hits[human.hits$CAS==this.chem][["css95_uv"]]
#    human.hits[human.hits$CAS==this.chem,"HTS.Median.ACC"] <- med.conc
#    human.hits[human.hits$CAS==this.chem,"HTS.Q10.ACC"] <- q10.conc
#    human.hits[human.hits$CAS==this.chem,"HTS.Q90.ACC"] <- q90.conc
#    human.hits[human.hits$CAS==this.chem,"HTS.Low.ACC"] <- low.conc
#    human.hits[human.hits$CAS==this.chem,"HTS.High.ACC"] <- high.conc
#    human.hits[human.hits$CAS==this.chem,"HTS.Median.equivdose.v"] <- med.conc/css95.v
#    human.hits[human.hits$CAS==this.chem,"HTS.Q10.equivdose.v"] <- q10.conc/css95.v
#    human.hits[human.hits$CAS==this.chem,"HTS.Q90.equivdose.v"] <- q90.conc/css95.v
#    human.hits[human.hits$CAS==this.chem,"HTS.Low.equivdose.v"] <- low.conc/css95.v
#    human.hits[human.hits$CAS==this.chem,"HTS.High.equivdose.v"] <- high.conc/css95.v
#    human.hits[human.hits$CAS==this.chem,"HTS.Median.equivdose.uv"] <- med.conc/css95.uv
#    human.hits[human.hits$CAS==this.chem,"HTS.Q10.equivdose.uv"] <- q10.conc/css95.uv
#    human.hits[human.hits$CAS==this.chem,"HTS.Q90.equivdose.uv"] <- q90.conc/css95.uv
#    human.hits[human.hits$CAS==this.chem,"HTS.Low.equivdose.uv"] <- low.conc/css95.uv
#    human.hits[human.hits$CAS==this.chem,"HTS.High.equivdose.uv"] <- high.conc/css95.uv
#  }
#  
#  
#  
#  #Add the exposure predictions:
#  for (this.chem in human.hits$CAS)
#  {
#    print(this.chem)
#  # If we have direct inferrences from NHANES, use those exposures:
#    if (this.chem %in% directnhanes.preds$CASRN)
#    {
#      human.hits[human.hits$CAS==this.chem,"Exposure.median"] <- directnhanes.preds[directnhanes.preds$CASRN==this.chem,"lP"]
#      human.hits[human.hits$CAS==this.chem,"Exposure.high"] <- directnhanes.preds[directnhanes.preds$CASRN==this.chem,"lP.max"]
#  # Otherwise use the heuristics model (Wambaugh 2014):
#    } else if (this.chem %in% chem.preds$CAS) {
#      human.hits[human.hits$CAS==this.chem,"Exposure.median"] <- chem.preds[chem.preds$CAS==this.chem,"seem3"]
#      human.hits[human.hits$CAS==this.chem,"Exposure.high"] <- chem.preds[chem.preds$CAS==this.chem,"seem3.u95"]
#    }
#  }
#  
#  # Drop chemicals without exposure predictions:
#  human.hits<- subset(human.hits,!is.na(Exposure.median))
#  
#  
#  # This code puts the chemicals into order by margin between exposure and activity:
#  chem.names <- unique(human.hits$Compound)
#  potency <- rep(Inf,times=length(chem.names))
#  names(potency) <- chem.names
#  potency.u <- potency
#  for (this.chem in chem.names)
#  {
#    this.subset <- subset(human.hits,Compound==this.chem)
#    potency[this.chem] <- this.subset$HTS.Q10.equivdose.v/this.subset$Exposure.high
#    potency.u[this.chem] <- this.subset$HTS.Q10.equivdose.uv/this.subset$Exposure.high
#  }
#  potency <- sort(potency)
#  human.hits$Compound <- factor(human.hits$Compound, levels=names(potency))
#  
#  potency.u <- potency.u[names(potency.u)[potency.u<1]]
#  potency.u <- potency.u[names(potency)[potency>1]]
#  
#  new.overlaps <- names(potency.u[!is.na(potency.u)])
#  first.change <- new.overlaps[1]
#  last.change <- new.overlaps[length(new.overlaps)]
#  window.start <- names(potency)[which(names(potency)==first.change)-5]
#  window.end <- names(potency)[which(names(potency)==last.change)+5]
#  window.names <- names(potency)[(which(names(potency)==first.change)-5):(which(names(potency)==last.change)+5)]
#  # Initialize a new plot:
#  Fig7a <- ggplot(data=human.hits) +
#    geom_segment(aes(x=Compound,xend=Compound,y=HTS.Q10.equivdose.v,yend=HTS.Q90.equivdose.v),size=1,color="white",alpha=0.5) +
#    geom_rect(mapping=aes(xmin=window.start,xmax=window.end,ymin=10^-9,ymax=10^3),color="grey",alpha=0.5)+
#    geom_segment(aes(x=Compound,xend=Compound,y=HTS.Q10.equivdose.v,yend=HTS.Q90.equivdose.v),size=1,color="red",alpha=0.5) +
#    geom_point(aes(x=Compound,y=HTS.Median.equivdose.v),shape=3) +
#    geom_point(aes(x=Compound,y=HTS.Q10.equivdose.v)) +
#    theme_bw() +
#    theme(axis.text.x = element_blank()) +
#    theme(axis.title.x = element_text(size=16)) +
#    theme(axis.title.y = element_text(size=16)) +
#    scale_y_log10(label=scientific_10,limits = c(10^-9,10^3)) +
#    geom_segment(aes(x=Compound,xend=Compound,y=Exposure.high,yend=Exposure.median),size=1,color="blue",alpha=0.5) +
#    geom_point(aes(x=Compound,y=Exposure.median),shape=2) +
#    ylab("Bioactive Dose & Exposure\nmg/kg BW/day\n(Variability Only)") +
#    xlab("Chemicals Ranked By Ratio Between Bioactive Dose and Exposure")+
#    annotate("text", size=8,x = names(potency)[10], y = 100, label = "a")
#  
#  # Initialize a new plot:
#  Fig7c <- ggplot() +
#    geom_segment(data=human.hits,aes(x=Compound,xend=Compound,y=HTS.Q10.equivdose.v,yend=HTS.Q90.equivdose.v),size=3,color="grey") +
#    geom_segment(data=human.hits,aes(x=Compound,xend=Compound,y=HTS.Q10.equivdose.uv,yend=HTS.Q90.equivdose.uv),size=3,color="red",alpha=0.5) +
#    geom_point(data=human.hits,aes(x=Compound,y=HTS.Median.equivdose.uv),shape=3,size=2) +
#    geom_point(data=human.hits,aes(x=Compound,y=HTS.Q10.equivdose.uv),size=2) +
#    theme_bw() +
#    theme(axis.text.x = element_text(angle = 60, hjust = 1,size=6)) +
#    theme(axis.text.x = element_blank()) +
#    theme(axis.title.x = element_text(size=16)) +
#    theme(axis.title.y = element_text(size=16)) +
#    xlim(window.names) +
#    scale_y_log10(label=scientific_10,limits = c(10^-8,2*10^2)) +
#    geom_segment(data=human.hits,aes(x=Compound,xend=Compound,y=Exposure.high,yend=Exposure.median),size=3,color="blue",alpha=0.5) +
#    geom_segment(data=subset(human.hits,Compound%in%names(potency.u)),aes(x=Compound,xend=Compound,y=10^-8,yend=Exposure.median/1.5),size=2,arrow=arrow(length=unit(0.5,"cm")))+
#    geom_point(data=human.hits,aes(x=Compound,y=Exposure.median),shape=2,size=2) +
#    xlab("") +
#    ylab("Bioactive Dose & Exposure\nmg/kg BW/day\n(Uncertainty and Variability)") +
#    annotate("text", size=8,x = window.names[1], y = 100, label = "b")
#  
#  
#  #print(Fig10b)
#  #dev.new(width=10,height=6)
#  multiplot(Fig7a,Fig7c,cols=1,heights=c(0.5,0.5))
#  
#  write.csv(subset(human.hits,Exposure.high>HTS.Q10.equivdose.uv)[,c("Compound","DSSTox_Substance_Id","Human.Clint","Human.Funbound.plasma","HTS.Q10.ACC","HTS.Q10.equivdose.v","HTS.Q10.equivdose.uv","Exposure.high")],file=paste("SupTable5-",Sys.Date(),".txt",sep=""),row.names=FALSE)

## ----calc.stats,  eval = execute.vignette-------------------------------------
#  Wetmore.chems <- subset(chem.physical_and_invitro.data,regexpr("Wetmore",Human.Clint.Reference)!=-1)[,c("Compound","CAS","DTXSID","Formula")]
#  Wetmore.chems <- subset(Wetmore.chems,CAS%in%get_cheminfo())
#  
#  for (this.cas in Wetmore.chems$CAS)
#  {
#    Wetmore.chems[Wetmore.chems$CAS==this.cas,"Css.Lit"] <- get_lit_css(
#      chem.cas=this.cas,
#      output.units="uM")
#    set.seed(42)
#    Wetmore.chems[Wetmore.chems$CAS==this.cas,"Css.v192"] <- calc_mc_css(
#      chem.cas=this.cas,
#      fup.meas.cv=NULL,
#      clint.meas.cv=NULL,
#      fup.censored.dist=TRUE,
#      output.units="uM")
#    set.seed(42)
#    Wetmore.chems[Wetmore.chems$CAS==this.cas,"Css.v110"] <- calc_mc_css(
#      chem.cas=this.cas,
#      output.units="uM")
#  }
#  
#  median(Wetmore.chems$Css.v110/Wetmore.chems$Css.Lit,na.rm=TRUE)
#  
#  ceetox$Human.Clint2 <- sapply(ceetox$Human.Clint,function(x) as.numeric(strsplit(x,",")[[1]][1]))
#  ceetox$Human.Fup2 <- sapply(ceetox$Human.Funbound.plasma,function(x) as.numeric(strsplit(x,",")[[1]][1]))
#  write.csv(ceetox[,c(1,2,8,9,16,17,6,7,12,30,10,11,15,31,13,14,18:28)],row.names=FALSE,file=paste("Supplemental-Table4-",Sys.Date(),".txt",sep=""))
#  
#  print(paste("Complete HTTK data on",sum(get_cheminfo()%in%ceetox$CAS),"new chemicals."))

Try the httk package in your browser

Any scripts or data that you put into this service are public.

httk documentation built on March 7, 2023, 7:26 p.m.