Nothing
## ----configure_knitr, eval = TRUE---------------------------------------------
knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)
## ----clear_memory, eval = TRUE------------------------------------------------
rm(list=ls())
## ----runchunks, eval = TRUE---------------------------------------------------
# Set whether or not the following chunks will be executed (run):
execute.vignette <- FALSE
## ----setup, eval = execute.vignette-------------------------------------------
# knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)
# library(httk)
# library(ggplot2)
# library(gridExtra)
# library(cowplot)
# library(ggrepel)
# library(dplyr)
# library(stringr)
# library(forcats)
# library(smatr)
## ----data, eval = execute.vignette--------------------------------------------
# met_data <- metabolism_data_Linakis2020
# conc_data <- concentration_data_Linakis2020
# #conc_data <- subset(concentration_data_Linakis2020, !(SAMPLING_MATRIX %in%
# # c("EEB","MEB","EB")))
# #conc_data <- concentration_data_Linakis2020
# # subset(concentration_data_Linakis2020, !(SOURCE_CVT %in% c(
# # "11453305")))
# conc_data[,"DOSE_U"] <- ifelse(conc_data[,"DOSE_U"] == "ppm",
# yes = "ppmv",
# conc_data[,"DOSE_U"])
# conc_data[,"ORIG_CONC_U"] <- ifelse(conc_data[,"ORIG_CONC_U"] == "ppm",
# yes = "ppmv",
# conc_data[,"ORIG_CONC_U"])
# # Not sure what to do with percent:
# conc_data <- subset(conc_data,toupper(ORIG_CONC_U) != "PERCENT")
# # Rename this column:
# colnames(conc_data)[colnames(conc_data)=="ORIG_CONC_U"] <- "CONC_U"
# conc_data$ORIGINAL_CONC_U <- conc_data$CONC_U
# conc_data$ORIGINAL_CONC <- conc_data$CONCENTRATION
# # Maybe Linakis et al translated all concentrations to uM?
# conc_data$CONC_U <- "uM"
## ----convert_to_ppmweight, eval = FALSE---------------------------------------
# gas.media <- c("EB","MEB","EEB","EB (+W)")
# gas.units <- unique(subset(conc_data,
# SAMPLING_MATRIX %in% gas.media)$CONC_U)
# target.unit <- "ppmv"
# for (this.unit in gas.units)
# if (this.unit != target.unit)
# {
# these.chems <- unique(subset(conc_data,
# SAMPLING_MATRIX %in% gas.media &
# CONC_U==this.unit)$DTXSID)
# for (this.chem in these.chems)
# {
# this.factor <- convert_units(
# input.units=this.unit,
# output.units=target.unit,
# dtxsid=this.chem, state="gas")
# print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit))
#
# # Scale the observation
# conc_data[conc_data$DTXSID==this.chem &
# conc_data$SAMPLING_MATRIX %in% gas.media &
# conc_data$CONC_U==this.unit,"CONCENTRATION"] <-
# this.factor * conc_data[
# conc_data$DTXSID==this.chem &
# conc_data$SAMPLING_MATRIX %in% gas.media &
# conc_data$CONC_U==this.unit,"CONCENTRATION"]
# # Change the reported unit
# conc_data[conc_data$DTXSID==this.chem &
# conc_data$SAMPLING_MATRIX %in% gas.media &
# conc_data$CONC_U==this.unit,"CONC_U"] <-
# target.unit
#
# }
# }
## ----normalize_other_units, eval = FALSE--------------------------------------
# tissue.media <- c("VBL","BL","ABL","PL","BL (+W)")
# tissue.units <- unique(subset(conc_data,
# SAMPLING_MATRIX %in% tissue.media)$CONC_U)
# target.unit <- "uM"
# for (this.unit in tissue.units)
# if (this.unit != target.unit)
# {
# these.chems <- unique(subset(conc_data,
# SAMPLING_MATRIX %in% tissue.media &
# CONC_U==this.unit)$DTXSID)
# for (this.chem in these.chems)
# {
# this.factor <- try(convert_units(
# input.units=this.unit,
# output.units=target.unit,
# dtxsid=this.chem))
# print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit))
#
# # Scale the observation
# conc_data[conc_data$DTXSID==this.chem &
# conc_data$SAMPLING_MATRIX %in% tissue.media &
# conc_data$CONC_U==this.unit,"CONCENTRATION"] <-
# this.factor * conc_data[
# conc_data$DTXSID==this.chem &
# conc_data$SAMPLING_MATRIX %in% tissue.media &
# conc_data$CONC_U==this.unit,"CONCENTRATION"]
# # Change the reported unit
# conc_data[conc_data$DTXSID==this.chem &
# conc_data$SAMPLING_MATRIX %in% tissue.media &
# conc_data$CONC_U==this.unit,"CONC_U"] <-
# target.unit
#
# }
# }
## ----summary, eval = execute.vignette-----------------------------------------
# # Small molecule chemicals
# summary(met_data$AVERAGE_MASS)
# # Generally more lipophilic chemicals
# summary(met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED)
# # Unsurprisingly then, the chemicals are generally less water-soluble
# summary(met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED)
# # ~60% of samples in humans
# table(conc_data$CONC_SPECIES)/nrow(conc_data)*100
# # ~72% of samples are from blood
# table(conc_data$SAMPLING_MATRIX)/nrow(conc_data)*100
## ----scenarios, eval = execute.vignette---------------------------------------
# # Create a dataframe with 1 row for each unique external exposure scenario
# unique_scenarios <- conc_data[with(conc_data,
# order(PREFERRED_NAME,
# CONC_SPECIES,
# SAMPLING_MATRIX,
# as.numeric(as.character(DOSE)),EXP_LENGTH,-TIME)),] %>%
# distinct(DTXSID,DOSE,DOSE_U,EXP_LENGTH,CONC_SPECIES,SAMPLING_MATRIX, .keep_all = TRUE)
## ----runmodel, eval = execute.vignette----------------------------------------
# # Store the output of each simulation:
# simlist <- list()
# # Store the Cvt data relevant to each simulation
# obslist <- list()
# # Conduct one simulation for each unique combination of chemical, species, dose:
# for (i in 1:nrow(unique_scenarios))
# if (unique_scenarios$CASRN[i] %in% get_cheminfo(model="gas_pbtk",
# suppress.messages = TRUE))
# {
# # Identify relevant Cvt data:
# relconc <- subset(conc_data,conc_data$DTXSID == unique_scenarios$DTXSID[i] &
# conc_data$DOSE == unique_scenarios$DOSE[i] &
# conc_data$EXP_LENGTH == unique_scenarios$EXP_LENGTH[i] &
# conc_data$CONC_SPECIES == unique_scenarios$CONC_SPECIES[i] &
# conc_data$SAMPLING_MATRIX == unique_scenarios$SAMPLING_MATRIX[i])
# obslist[[i]] <- relconc
# #
# #
# #
# #
# #
# # THE FOLLOWING CODE RUNS solve_gas_pbtk FOR EACH SCENARIO
# # (UNIQUE COMBINATION OF CHEMICAL, SPECIES, DOSE, ETC.)
# #
# #
# #
# #
# #
# solver.out <- try(suppressWarnings(as.data.frame(solve_gas_pbtk(
# chem.cas = unique_scenarios$CASRN[i],
# days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]),
# # Make sure we get predicted conc's at the observed times:
# times=unique(c(0,signif(obslist[[i]]$TIME,4))), # days
# tsteps = 500,
# exp.conc = as.numeric(unique_scenarios$DOSE[i]), # SED (06-22-2021) think this is ppmv for all scenarios
# input.units = unique_scenarios$DOSE_U[i], # specify the units for exp.conc (ppmv)
# exp.duration = unique_scenarios$EXP_LENGTH[i], # days
# period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]), # days
# species = as.character(unique_scenarios$CONC_SPECIES[i]),
# monitor.vars = c(
# "Cven",
# "Clung",
# "Cart",
# "Cplasma",
# "Calvppmv",
# "Cendexhppmv",
# "Cmixexhppmv",
# "Calv",
# "Cendexh",
# "Cmixexh",
# "Cmuc",
# "AUC"),
# vmax.km = FALSE,
# suppress.messages = TRUE))))
# #
# #
# #
# #
# #
# #
# #
# #
# #
# #
# if (class(solver.out) %in% "try-error")
# solver.out <- data.frame(time=NA,Conc=NA)
#
# print(solver.out)
# # Get the blood:plasma ratio:
# this.Rb2p <- suppressWarnings(available_rblood2plasma(
# chem.cas=unique_scenarios$CASRN[i],
# species=as.character(unique_scenarios$CONC_SPECIES[i])))
# solver.out$Rb2p <- this.Rb2p
#
# # The column simconc holds the appropriate prediction for the sampling matrix
# # BL : blood
# # EEB : end-exhaled breath
# # MEB : mixed exhaled breath
# # VBL : venous blood
# # ABL : arterial blood
# # EB : unspecified exhaled breath sample (assumed to be EEB)
# # PL: plasma
# # +W with work/exercise
# #
# # The model outputs are provided in the following units:
# # uM: Cven, Clung, Cart, Cplasma, Calv, Cendexh, Cmixexh, Cmuc
# # ppmv: Calvppmv, Cendexhppmv, Cmixexhppmv
# # uM*days: AUC
# if (unique_scenarios$SAMPLING_MATRIX[i] == "VBL" |
# unique_scenarios$SAMPLING_MATRIX[i] == "BL" |
# unique_scenarios$SAMPLING_MATRIX[i] == "BL (+W)")
# {
# solver.out$simconc <- solver.out$Cven*this.Rb2p
# solver.out$unit <- "uM"
# } else if (unique_scenarios$SAMPLING_MATRIX[i] == "ABL") {
# solver.out$simconc <- solver.out$Cart*this.Rb2p
# solver.out$unit <- "uM"
# } else if (unique_scenarios$SAMPLING_MATRIX[i] == "EB" |
# unique_scenarios$SAMPLING_MATRIX[i] == "EEB" |
# unique_scenarios$SAMPLING_MATRIX[i] == "EB (+W)")
# {
# solver.out$simconc <- solver.out$Cendexh # uM
# solver.out$unit <- "uM"
# } else if (unique_scenarios$SAMPLING_MATRIX[i] == "MEB") {
# solver.out$simconc <- solver.out$Cmixexh # uM
# solver.out$unit <- "uM"
# } else if (unique_scenarios$SAMPLING_MATRIX[i] == "PL") {
# solver.out$simconc <- solver.out$Cplasma
# solver.out$unit <- "uM"
# } else {
# solver.out$simconc <- NA
# solver.out$unit <- NA
# }
# simlist[[i]] <- solver.out
# }
## ----Makestudyplots, eval = execute.vignette----------------------------------
# cvtlist <- list()
# for(i in 1:nrow(unique_scenarios))
# {
# plot.data <- simlist[[i]]
# relconc <- obslist[[i]]
#
# if (!is.null(plot.data))
# {
# #Right now this is only calculating real concentrations according to mg/L in blood
# cvtlist[[i]] <- ggplot(data=plot.data, aes(time*24, simconc)) +
# geom_line() +
# xlab("Time (h)") +
# ylab(paste0("Simulated ",
# unique_scenarios$SAMPLING_MATRIX[i],
# "\nConcentration (" ,
# solver.out$unit, ")")) +
# ggtitle(paste0(
# unique_scenarios$PREFERRED_NAME[i],
# " (",
# unique_scenarios$CONC_SPECIES[i],
# ", ",
# round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
# unique_scenarios$DOSE_U[i],
# " for ",
# round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
# "h in ",
# unique_scenarios$SAMPLING_MATRIX[i], ")")) +
# geom_point(data = relconc, aes(TIME*24,CONCENTRATION)) +
# theme(plot.title = element_text(face = 'bold', size = 20),
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.text.x = element_text(size=16),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 16),
# legend.title = element_text(face = 'bold', size = 16),
# legend.text = element_text(face = 'bold',size = 14))+
# theme_bw()
# }
# }
## ----init_dataset, eval = execute.vignette------------------------------------
# # Creation of simulated vs. observed concentration dataset
# unique_scenarios$RSQD <- 0
# unique_scenarios$RMSE <- 0
# unique_scenarios$AIC <- 0
# simobslist <- list()
# obvpredlist <- list()
## ----merge_datasets, eval = execute.vignette----------------------------------
# for(i in 1:length(simlist))
# {
# obsdata <- as.data.frame(obslist[[i]])
# simdata <- as.data.frame(simlist[[i]])
# # skips over anything for which there was no observed data or
# # insufficient information to run simulation:
# if (!is.null(simdata) &
# !is.null(obsdata) &
# dim(simdata)[1]>1)
# {
# # Make sure we are looking at consistent time points:
# simobscomb <- simdata[simdata$time %in% signif(obsdata$TIME,4),]
# obsdata <- subset(obsdata,signif(TIME,4) %in% simobscomb$time)
# # Merge with obsdata
# colnames(obsdata)[colnames(obsdata) ==
# "TIME"] <-
# "obstime"
# # Round to match sim time:
# obsdata$time <- signif(obsdata$obstime,4)
# colnames(obsdata)[colnames(obsdata) ==
# "CONCENTRATION"] <-
# "obsconc"
# colnames(obsdata)[colnames(obsdata) ==
# "PREFERRED_NAME"] <-
# "chem"
# colnames(obsdata)[colnames(obsdata) ==
# "DOSE"] <-
# "dose"
# colnames(obsdata)[colnames(obsdata) ==
# "EXP_LENGTH"] <-
# "explen"
# colnames(obsdata)[colnames(obsdata) ==
# "CONC_SPECIES"] <-
# "species"
# colnames(obsdata)[colnames(obsdata) ==
# "SAMPLING_MATRIX"] <-
# "matrix"
# colnames(obsdata)[colnames(obsdata) ==
# "AVERAGE_MASS"] <-
# "mw"
# colnames(obsdata)[colnames(obsdata) ==
# "CONC_U"] <-
# "CONC_U"
# simobscomb <- suppressWarnings(merge(obsdata[,c(
# "time",
# "obstime",
# "obsconc",
# "chem",
# "dose",
# "explen",
# "species",
# "matrix",
# "mw",
# "CONC_U",
# "ORIGINAL_CONC_U"
# )], simobscomb, by="time", all.x=TRUE))
#
# # Merge with met_data
# this.met_data <- subset(met_data,
# PREFERRED_NAME == simobscomb[1,"chem"] &
# SPECIES == simobscomb[1,"species"])
# colnames(this.met_data)[colnames(this.met_data)=="CHEM_CLASS"] <-
# "chemclass"
# colnames(this.met_data)[colnames(this.met_data) ==
# "OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED"] <-
# "logp"
# colnames(this.met_data)[colnames(this.met_data) ==
# "WATER_SOLUBILITY_MOL.L_OPERA_PRED"] <-
# "sol"
# colnames(this.met_data)[colnames(this.met_data) ==
# "HENRYS_LAW_ATM.M3.MOLE_OPERA_PRED"] <-
# "henry"
# colnames(this.met_data)[colnames(this.met_data) ==
# "VMAX"] <-
# "vmax"
# colnames(this.met_data)[colnames(this.met_data) ==
# "KM"] <-
# "km"
# simobscomb <- suppressWarnings(cbind(simobscomb,this.met_data[c(
# "chemclass",
# "logp",
# "sol",
# "henry",
# "vmax",
# "km")]))
# simobslist[[i]] <- simobscomb
# }
# }
## ----time_quartile, eval = execute.vignette-----------------------------------
# for(i in 1:length(simobslist))
# if (!is.null(simobslist[[i]]))
# {
# simobscomb <- simobslist[[i]]
# for (j in 1:nrow(simobscomb))
# {
# max.time <- max(simobscomb$time,na.rm=TRUE)
# if (is.na(max.time)) simobscomb$tquart <- NA
# else if (max.time == 0) simobscomb$tquart <- "1"
# else if (!is.na(simobscomb$time[j]))
# {
# simobscomb$tquart[j] <- as.character(1 +
# floor(simobscomb$time[j]/max.time/0.25))
# simobscomb$tquart[simobscomb$tquart=="5"] <-
# "4"
# } else simobscomb$tquart[j] >- NA
# }
# simobslist[[i]] <- simobscomb
# }
## ----calc_AUC, eval = execute.vignette----------------------------------------
# for(i in 1:length(simobslist))
# if (!is.null(simobslist[[i]]))
# {
# simobscomb <- simobslist[[i]]
# # Calculat the AUC with the trapezoidal rule:
# if (nrow(simobscomb)>1)
# {
# for (k in 2:max(nrow(simobscomb)-1,2,na.rm=TRUE))
# {
# simobscomb$obsAUCtrap[1] <- 0
# simobscomb$simAUCtrap[1] <- 0
# if (min(simobscomb$time) <= (simobscomb$explen[1]*1.03) &
# nrow(simobscomb) >=2)
# {
# simobscomb$obsAUCtrap[k] <- simobscomb$obsAUCtrap[k-1] +
# 0.5*(simobscomb$time[k] - simobscomb$time[k-1]) *
# (simobscomb$obsconc[k] + simobscomb$obsconc[k-1])
# simobscomb$simAUCtrap[k] <- simobscomb$simAUCtrap[k-1] +
# 0.5*(simobscomb$time[k]-simobscomb$time[k-1]) *
# (simobscomb$simconc[k] + simobscomb$simconc[k-1])
# } else {
# simobscomb$obsAUCtrap <- 0
# simobscomb$simAUCtrap <- 0
# }
# }
# } else {
# simobscomb$obsAUCtrap <- 0
# simobscomb$simAUCtrap <- 0
# }
# simobscomb$AUCobs <- max(simobscomb$obsAUCtrap)
# simobscomb$AUCsim <- max(simobscomb$simAUCtrap)
# simobscomb$calcAUC <- max(simobscomb$AUC)
# if (min(simobscomb$time) <= simobscomb$explen[1]*1.03)
# {
# simobscomb$Cmaxobs <- max(simobscomb$obsconc)
# simobscomb$Cmaxsim <- max(simobscomb$simconc)
# } else {
# simobscomb$Cmaxobs <- 0
# simobscomb$Cmaxsim <- 0
# }
# simobslist[[i]] <- simobscomb
# }
## ----calc_error, eval = execute.vignette--------------------------------------
# for(i in 1:length(simobslist))
# if (!is.null(simobslist[[i]]))
# {
# simobscomb <- simobslist[[i]]
# unique_scenarios$RSQD[i] <- 1 - (
# sum((simobscomb$obsconc - simobscomb$simconc)^2) /
# sum((simobscomb$obsconc-mean(simobscomb$obsconc))^2)
# )
# unique_scenarios$RMSE[i] <-
# sqrt(mean((simobscomb$simconc - simobscomb$obsconc)^2))
# unique_scenarios$AIC[i] <-
# nrow(simobscomb)*(
# log(2*pi) + 1 +
# log((sum((simobscomb$obsconc-simobscomb$simconc)^2) /
# nrow(simobscomb)))
# ) + ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
# simobslist[[i]] <- simobscomb
# }
## ----combine_studies, eval = execute.vignette---------------------------------
# obsvpredlist <- list()
# for(i in 1:length(simobslist))
# if (!is.null(simobslist[[i]]))
# {
# simobscomb <- simobslist[[i]]
# obsvpredlist[[i]] <- ggplot(simobscomb, aes(x = simconc, y = obsconc)) +
# geom_point() +
# geom_abline() +
# xlab("Simulated Concentrations (uM)") +
# ylab("Observed Concentrations (uM)") +
# ggtitle(paste0(
# unique_scenarios$PREFERRED_NAME[i],
# " (",
# unique_scenarios$CONC_SPECIES[i],
# ", ",
# round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
# unique_scenarios$DOSE_U[i],
# " for ",
# round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
# "h in ",
# unique_scenarios$SAMPLING_MATRIX[i], ")")) +
# theme_bw() +
# theme(plot.title = element_text(face = 'bold', size = 20),
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.text.x = element_text(size=16),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 16),
# legend.title = element_text(face = 'bold', size = 16),
# legend.text = element_text(face = 'bold',size = 14))
# }
## ----obsvspredplots, eval = execute.vignette----------------------------------
# # Create pdfs of observed vs. predicted concentration plots
# dir.create("Linakis2020")
# pdf("Linakis2020/obvpredplots.pdf", width = 10, height = 10)
# for (i in 1:length(obsvpredlist)) {
# print(obsvpredlist[[i]])
# }
# dev.off()
## ----annotate_study_stats, eval = execute.vignette----------------------------
# for (i in 1:length(cvtlist))
# if (!is.null(cvtlist[[i]]))
# {
# cvtlist[[i]] <- cvtlist[[i]] +
# geom_text(
# x = Inf,
# y = Inf,
# hjust = 1.3,
# vjust = 1.3,
# # size = 6,
# label = paste0(
# "RMSE: ",
# round(unique_scenarios$RMSE[i],digits = 2),
# "\nAIC: ",
# round(unique_scenarios$AIC[i],digits = 2)))# +
# # theme(
# # plot.title = element_text(face = 'bold', size = 15),
# # axis.title.x = element_text(face = 'bold', size = 20),
# # axis.text.x = element_text(size=16),
# # axis.title.y = element_text(face = 'bold', size = 20),
# # axis.text.y = element_text(size = 16),
# # legend.title = element_text(face = 'bold', size = 16),
# # legend.text = element_text(face = 'bold',size = 14))
# }
## ----cvtplots, eval = execute.vignette----------------------------------------
# # Create pdfs of simulated concentration-time plots against observed c-t values
# pdf("Linakis2020/simdataplots.pdf")
# for (i in 1:length(cvtlist)) {
# print(cvtlist[[i]])
# }
# dev.off()
## ----combine_obs_vs_pred, eval = execute.vignette-----------------------------
# simobsfull <- do.call("rbind",simobslist)
# simobsfullrat <- subset(simobsfull, simobsfull$species == "Rat")
# simobsfullhum <- subset(simobsfull, simobsfull$species == "Human")
# unique_scenarios <- subset(unique_scenarios,!is.na(unique_scenarios$RSQD))
## ----Standardize Units, eval = FALSE------------------------------------------
# these.chems <- unique(subset(simobsfull,unit=="ppmv")$chem)
# for (this.chem in these.chems)
# {
# # Use HTTK unit conversion:
# this.factor <- convert_units(
# input.units="ppmv", output.units="um", chem.name=this.chem, state="gas")
#
# # Scale the observation
# simobsfull[simobsfull$chem==this.chem &
# simobsfull$unit=="ppmv","obsconc"] <-
# this.factor * simobsfull[
# simobsfull$chem==this.chem &
# simobsfull$unit=="ppmv","obsconc"]
# # Scale the prediction
# simobsfull[simobsfull$chem==this.chem &
# simobsfull$unit=="ppmv","simconc"] <-
# this.factor * simobsfull[
# simobsfull$chem==this.chem &
# simobsfull$unit=="ppmv","simconc"]
# # Change the reported unit
# simobsfull[simobsfull$chem==this.chem &
# simobsfull$unit=="ppmv","unit"] <-
# "uM"
# }
## ----regression, eval = execute.vignette--------------------------------------
# table(unique_scenarios$CONC_SPECIES)
# nrow(simobsfull) - nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,])
# pmiss <- (nrow(simobsfull) -
# nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,])) /
# nrow(simobsfull) * 100
# missdata <- (simobsfull[
# is.na(simobsfull$simconc) |
# simobsfull$simconc <= 0 |
# simobsfull$obsconc <= 0,])
# t0df <- simobsfull[simobsfull$obstime == 0,]
# lmall <- lm(
# #log transforms:
# log10(simobsfull$obsconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0]) ~
# #log transforms:
# log10(simobsfull$simconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0]))
# #Linear binned 1
# lmsub1 <- lm(
# simobsfull$obsconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc < 0.1] ~
# simobsfull$simconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc < 0.1])
# #Linear binned 2
# lmsub2 <- lm(
# simobsfull$obsconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc >= 0.1 &
# simobsfull$obsconc < 10] ~
# simobsfull$simconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc >= 0.1 &
# simobsfull$obsconc < 10])
# #Linear binned 3
# lmsub3 <- lm(
# simobsfull$obsconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc >= 10] ~
# simobsfull$simconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc >= 10])
# lmrat <- lm(
# log10(simobsfullrat$obsconc[
# !is.na(simobsfullrat$simconc) &
# simobsfullrat$simconc > 0 &
# simobsfullrat$obsconc > 0]) ~
# log10(simobsfullrat$simconc[
# !is.na(simobsfullrat$simconc) &
# simobsfullrat$simconc > 0 &
# simobsfullrat$obsconc > 0]))
# unique(simobsfullrat$chem)
# lmhum <- lm(
# log10(simobsfullhum$obsconc[
# !is.na(simobsfullhum$simconc) &
# simobsfullhum$simconc > 0 &
# simobsfullhum$obsconc > 0]) ~
# log10(simobsfullhum$simconc[
# !is.na(simobsfullhum$simconc) &
# simobsfullhum$simconc > 0 &
# simobsfullhum$obsconc > 0]))
# unique(simobsfullhum$chem)
# concregslope <- summary(lmall)$coef[2,1]
# concregr2 <- summary(lmall)$r.squared
# concregrmse <- sqrt(mean(lmall$residuals^2))
# totalrmse <- sqrt(mean((
# log10(simobsfull$simconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0]) -
# log10(simobsfull$obsconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0]))^2,
# na.rm = TRUE))
# totalmae <- mean(abs(
# log10(simobsfull$simconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0]) -
# log10(simobsfull$obsconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0])),
# na.rm = TRUE)
# totalaic <- nrow(
# simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc >0 &
# simobsfull$obsconc >
# 0,]) *
# (log(2*pi) +
# 1 +
# log((sum(
# (simobsfull$obsconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0] -
# simobsfull$simconc[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0])^2,
# na.rm=TRUE) /
# nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,])))) +
# ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
# mispred <- table(abs(
# log10(simobsfull$simconc) -
# log10(simobsfull$obsconc))>2 &
# simobsfull$simconc>0)
# mispred[2]
# mispred[2] / nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc >0 &
# simobsfull$obsconc > 0,])*100
# overpred <- table(
# log10(simobsfull$simconc) -
# log10(simobsfull$obsconc)>2 &
# simobsfull$simconc>0)
# overpred[2]
# overpred[2] / nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc >0 &
# simobsfull$obsconc > 0,])*100
# underpred <- table(
# log10(simobsfull$obsconc) -
# log10(simobsfull$simconc)>2 &
# simobsfull$simconc>0)
# underpred[2]
# underpred[2] / nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc >0 &
# simobsfull$obsconc > 0,])*100
# mispredhalf <- table(abs(
# log10(simobsfull$simconc) -
# log10(simobsfull$obsconc))>0.5 &
# simobsfull$simconc>0)
# mispredhalf[2]
# mispredhalf[2] / nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc >0 &
# simobsfull$obsconc > 0,])*100
# overpredhalf <- table(
# log10(simobsfull$simconc) -
# log10(simobsfull$obsconc)>0.5 &
# simobsfull$simconc>0)
# overpredhalf[2]
# overpredhalf[2] / nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc >0 &
# simobsfull$obsconc > 0,])*100
# underpredhalf <- table(
# log10(simobsfull$obsconc) -
# log10(simobsfull$simconc)>0.5 &
# simobsfull$simconc>0)
# underpredhalf[2]
# underpredhalf[2] / nrow(simobsfull[
# !is.na(simobsfull$simconc) &
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,])*100
# chemunderpred <- subset(simobsfull,
# log10(simobsfull$simconc) -
# log10(simobsfull$obsconc) < 0 &
# simobsfull$simconc > 0)
# table(chemunderpred$chemclass) / table(simobsfull$chemclass)*100
## ----obspredFig2, eval = execute.vignette-------------------------------------
# fig2 <- ggplot(
# data = simobsfull[
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,],
# aes(x = log10(simconc), y = log10(obsconc))) +
# geom_point(
# color = ifelse(
# abs(
# log10(simobsfull[
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,]$simconc) -
# log10(simobsfull[
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,]$obsconc)) >2,
# 'red',
# 'black')) +
# geom_abline() +
# xlab("Log(Simulated Concentrations)") +
# ylab("Log(Observed Concentrations)") +
# theme_bw() +
# geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall')) +
# geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
# geom_text(
# x = Inf,
# y = -Inf,
# hjust = 1.05,
# vjust = -0.15,
# size = 8,
# label = paste0(
# # "Regression slope: ",
# # round(summary(lmall)$coef[2,1],digits = 2),
# "\nRegression R^2: ",
# round(summary(lmall)$r.squared,digits = 2),
# "\nRegression RMSE: ",
# round(sqrt(mean(lmall$residuals^2)),digits = 2),
# "\nRMSE (Identity): ",
# round(totalrmse,digits = 2)
# # "\n% Missing:",
# # round(pmiss, digits = 2), "%")
# )) +
# #geom_text(
# # data = simobsfull[
# # abs(log10(simobsfull$simconc) - log10(simobsfull$obsconc))>7 &
# # simobsfull$simconc>0 & simobsfull$obsconc > 0,],
# # aes(label = paste(chem,species,matrix)),
# ## fontface = 'bold',
# # check_overlap = TRUE,
# # size = 3.5,
# # hjust = 0.5,
# # vjust = -0.8) +
# scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
# theme(
# plot.title = element_text(face = 'bold', size = 15),
# axis.title.x = element_text(face = 'bold', size = 30),
# axis.text.x = element_text(size=16),
# axis.title.y = element_text(face = 'bold', size = 30),
# axis.text.y = element_text(size = 16),
# legend.title = element_text(face = 'bold', size = 24),
# legend.text = element_text(face = 'bold',size = 24))
# fig2 #Display plot in R
## ----make_how_to_add_models_version, eval=execute.vignette--------------------
# library(scales)
# # Function for formatting tick labels:
# scientific_10 <- function(x) {
# out <- gsub("1e", "10^", scientific_format()(x))
# out <- gsub("\\+","",out)
# out <- gsub("10\\^01","10",out)
# out <- parse(text=gsub("10\\^00","1",out))
# }
#
# font.size.large <- 10
# font.size.small <- 8
#
# figaddmodels <- ggplot(
# data = simobsfull[
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,],
# aes(x = simconc, y = obsconc)) +
# geom_point(
# color = ifelse(
# abs(
# log10(simobsfull[
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,]$simconc) -
# log10(simobsfull[
# simobsfull$simconc > 0 &
# simobsfull$obsconc > 0,]$obsconc)) >2,
# 'red',
# 'black'),alpha=0.15) +
# geom_abline() +
# scale_y_log10(label=scientific_10,limits=c(1e-4,1e4))+
# scale_x_log10(label=scientific_10,limits=c(1e-4,1e4))+
# xlab("Simulated Concentrations") +
# ylab("Observed Concentrations") +
# theme_bw() +
# geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall', linetype="Overall")) +
# geom_smooth(method = 'lm', se = FALSE, aes(color = species, linetype = species)) +
# geom_text(
# x = 2,
# y = -1,
# size = 4,
# label = paste0("RMSLE: ",
# round(totalrmse,digits = 2)
# )) +
# scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
# scale_linetype_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
# theme(
# plot.title = element_text(face = 'bold', size = font.size.small),
# axis.title.x = element_text(face = 'bold', size = font.size.large),
# axis.text.x = element_text(size=font.size.small),
# axis.title.y = element_text(face = 'bold', size = font.size.large),
# axis.text.y = element_text(size = font.size.small),
# legend.title = element_text(face = 'bold', size = font.size.large),
# legend.text = element_text(face = 'bold',size = font.size.large))
# print(figaddmodels)
# ggsave("c:/users/jwambaug/AddModelsFig1.tiff", width=6, height=4, dpi=300)
#
# counts <- simobsfull[,c("chem","dose","explen","species")]
# counts <- subset(counts,!duplicated(counts))
# paste(length(unique(counts$chem)),
# "chemicals across",dim(counts)[1],
# "experimental conditions in",
# length(unique(counts$species)),"species.")
## ----write_figure2, eval = FALSE----------------------------------------------
# pdf("Linakis2020/Figure2.pdf", width = 10, height = 10)
# print(fig2)
# dev.off()
## ----calculations, eval = execute.vignette------------------------------------
# # Create data and run calculations for populating plots
# cmaxfull <- subset(simobsfull, !duplicated(simobsfull$AUCobs) & simobsfull$Cmaxobs != 0)
# cmaxobs <- cmaxfull$Cmaxobs
# cmaxsim <- cmaxfull$Cmaxsim
# cmaxobs <- cmaxobs[!is.nan(cmaxsim)]
# cmaxsim <- cmaxsim[!is.nan(cmaxsim)]
# cmaxsim[!is.finite(log10(cmaxsim))] <- NA
# cmaxlm <- lm(log10(cmaxobs)~log10(cmaxsim), na.action = na.exclude)
# cmaxvcbkg <- subset(cmaxfull,
# paste(
# cmaxfull$chem,
# cmaxfull$dose,
# cmaxfull$explen,
# cmaxfull$species,
# cmaxfull$matrix) %in%
# paste(
# t0df$chem,
# t0df$dose,
# t0df$explen,
# t0df$species,
# t0df$matrix))
# cmaxvcbkg$cmaxcbkgratio <- cmaxvcbkg$Cmaxobs / cmaxvcbkg$obsconc
# cmaxvcbkg$adjustedCmaxsim <- cmaxvcbkg$Cmaxsim - cmaxvcbkg$obsconc
# aucfull <-subset(simobsfull,
# !duplicated(simobsfull$AUCobs) &
# simobsfull$AUCobs != 0)
# aucobs <- aucfull$AUCobs
# aucsim <- aucfull$AUCsim
# aucobs <- aucobs[!is.nan(aucsim)]
# aucsim <- aucsim[!is.nan(aucsim)]
# aucsim[!is.finite(log10(aucsim))] <- NA
# auclm <- lm(log10(aucobs)~log10(aucsim), na.action = na.exclude)
# cmaxslope <- summary(cmaxlm)$coef[2,1]
# cmaxrsq <- summary(cmaxlm)$r.squared
# totalrmsecmax <- sqrt(mean((log10(cmaxfull$Cmaxsim) -
# log10(cmaxfull$Cmaxobs))^2, na.rm = TRUE))
# cmaxmiss <- nrow(cmaxfull[
# abs(log10(cmaxfull$Cmaxsim) -
# log10(cmaxfull$Cmaxobs)) > 1,])
# cmaxmissp <- nrow(cmaxfull[
# abs(log10(cmaxfull$Cmaxsim) -
# log10(cmaxfull$Cmaxobs)) > 1,]) /
# nrow(cmaxfull) * 100
# cmaxmisschem <- table(cmaxfull[
# abs(log10(cmaxfull$Cmaxsim) -
# log10(cmaxfull$Cmaxobs)) > 1,]$chem)
# aucslope <- summary(auclm)$coef[2,1]
# aucrsq <- summary(auclm)$r.squared
# totalrmseauc <- sqrt(mean((
# log10(aucfull$AUCsim) -
# log10(aucfull$AUCobs))^2, na.rm = TRUE))
# aucmiss <- nrow(aucfull[
# abs(log10(aucfull$AUCsim) -
# log10(aucfull$AUCobs)) > 1,])
# aucmissp <- nrow(aucfull[
# abs(log10(aucfull$AUCsim) -
# log10(aucfull$AUCobs)) > 1,]) /
# nrow(aucfull) * 100
# aucmisschem <- table(aucfull[
# abs(log10(aucfull$AUCsim) -
# log10(aucfull$AUCobs)) > 1,]$chem)
## ----Fig4plot, eval = execute.vignette----------------------------------------
# cmaxp <- ggplot(data = cmaxfull, aes(x = log10(Cmaxsim), y = log10(Cmaxobs))) +
# geom_point(color =
# ifelse(abs(log10(cmaxfull$Cmaxsim) -log10(cmaxfull$Cmaxobs))>=2, "red","black")) +
# geom_abline() +
# xlab("Log(Simulated Max Concentration)") +
# ylab("Log(Observed Max Concentration)") +
# theme_bw() +
# geom_smooth(method = 'lm', se = FALSE, aes(color = 'Overall')) +
# geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
# geom_text(x = Inf,
# y = -Inf,
# hjust = 1.05,
# vjust = -0.15,
# # size = 6,
# label = paste0("Regression slope: ",
# round(summary(cmaxlm)$coef[2,1],digits = 2),
# "\nRegression R^2: ",
# round(summary(cmaxlm)$r.squared,digits = 2))) +
# geom_text_repel(
# data = cmaxfull[
# (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 &
# log10(cmaxfull$Cmaxsim) > 2,],
# aes(label = paste(chem,species,matrix)),
# force = 2,
# # size = 5.3,
# fontface = 'bold',
# color = 'black',
# hjust = -0.05,
# vjust = 0.5) +
# scale_y_continuous(lim = c (-1,5)) +
# scale_x_continuous(lim = c(-1,5)) +
# geom_text_repel(
# data = cmaxfull[
# (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 &
# log10(cmaxfull$Cmaxsim) <= 2,],
# aes(label = paste(chem,species,matrix)),
# nudge_x = 0.0,
# nudge_y = -0.2,
# force = 2,
# # size = 5.3,
# fontface = 'bold',
# color = 'black',
# hjust = -0.05,
# vjust = 0.5) +
# geom_text(
# data = cmaxfull[
# (log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))<=-2,],
# aes(label = paste(chem,species,matrix)),
# # size = 5.3,
# fontface = 'bold',
# color = 'black',
# hjust = 0.5,
# vjust = -0.7) +
# scale_color_discrete(
# name = 'Species',
# breaks = c("Overall","Human","Rat")) #+
# # theme(plot.title = element_text(face = 'bold', size = 10),
# # axis.title.x = element_text(face = 'bold', size = 10),
# # axis.text.x = element_text(size=8),
# # axis.title.y = element_text(face = 'bold', size = 10),
# # axis.text.y = element_text(size = 8),
# # legend.title = element_text(face = 'bold', size = 8),
# # legend.text = element_text(face = 'bold',size = 8))
# cmaxp
# aucp <- ggplot(
# data = aucfull,
# aes(x = log10(AUCsim), y = log10(AUCobs))) +
# geom_point(color =
# ifelse(abs(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2, "red","black")) +
# geom_abline() +
# xlab("Log(Simulated AUC)") +
# ylab("Log(Observed AUC)") +
# theme_bw() +
# geom_smooth(method = 'lm', se = FALSE, aes(color = "Overall")) +
# geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
# geom_text(
# x = Inf,
# y = -Inf,
# hjust = 1.05,
# vjust = -0.15,
# # size = 6,
# label = paste0(
# "Regression slope: ",
# round(summary(auclm)$coef[2,1],digits = 2),
# "\nRegression R^2: ",
# round(summary(auclm)$r.squared,digits = 2))) +
# geom_text_repel(
# data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2,],
# aes(label = paste(chem,species,matrix)),
# # size = 5.3,
# fontface = 'bold',
# color = 'black',
# hjust = -0.05,
# vjust = 0.5) +
# scale_y_continuous(lim = c (-2,4)) +
# scale_x_continuous(lim = c(-2,4)) +
# geom_text(
# data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))<=-2,],
# aes(label = paste(chem,species,matrix)),
# # size = 5.3,
# fontface = 'bold',
# color = 'black',
# hjust = 0.5,
# vjust = -0.8) +
# scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) #+
# # theme(
# # plot.title = element_text(face = 'bold', size = 15),
# # axis.title.x = element_text(face = 'bold', size = 20),
# # axis.text.x = element_text(size=16),
# # axis.title.y = element_text(face = 'bold', size = 20),
# # axis.text.y = element_text(size = 16),
# # legend.title = element_text(face = 'bold', size = 16),
# # legend.text = element_text(face = 'bold',size = 14))
# aucp
## ----write_figure4, eval = execute.vignette-----------------------------------
# pdf("Linakis2020/Figure4.pdf", width = 20, height = 10)
# plot_grid(cmaxp,aucp,nrow = 1, labels = c('A','B'), label_size = 30)
# dev.off()
## ----Fig3plot, eval = execute.vignette----------------------------------------
# simobsfull$aggscen <- as.factor(paste(simobsfull$chem,
# simobsfull$species,
# simobsfull$matrix))
# chem.lm <- lm(
# log10(simconc) - log10(obsconc) ~
# aggscen,
# data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,])
# chem.res <- resid(chem.lm)
# # Number of observations per chemical class
# numpt <- simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,] %>%
# group_by(chemclass) %>% summarize(n = paste("n =", length(simconc)))
# fig3 <- ggplot(
# data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
# aes(x = aggscen, y = log10(simconc)-log10(obsconc), fill = chemclass)) +
# geom_boxplot() +
# geom_hline(yintercept = 0) +
# geom_hline(yintercept = 2, linetype = 2)+
# geom_hline(yintercept = -2, linetype = 2)+
# xlab("Exposure Scenario") +
# ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
# facet_wrap(vars(chemclass), scales = 'free_x', nrow = 1) + #35 by 12 for poster
# theme_bw() +
# geom_text(
# data = numpt,
# aes(x = Inf, y = -Inf, hjust = 1.05, vjust = -0.5, label = n),
# size = 10,
# colour = 'black',
# inherit.aes = FALSE,
# parse = FALSE) +
# theme(
# axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5,size = 20, face = 'bold'),
# strip.text.x = element_text(face = 'bold', size = 24),
# legend.position = 'none',
# axis.title.x = element_text(face = 'bold', size = 30),
# axis.title.y = element_text(face = 'bold', size = 30),
# axis.text.y = element_text(face = 'bold',size = 25, color = 'black'))
# fig3
## ----write_figure3, eval = execute.vignette-----------------------------------
# pdf("Linakis2020/Figure3.pdf", width = 40, height = 13)
# print(fig3)
# dev.off()
## ----FigS1, eval = execute.vignette-------------------------------------------
# figs1a <- ggplot(
# data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
# aes(x = tquart, y = log10(simconc)-log10(obsconc))) +
# geom_boxplot()+
# geom_hline(yintercept = 0)+
# geom_hline(yintercept = 2, linetype = 2)+
# geom_hline(yintercept = -2, linetype = 2)+
# xlab("\nTime Quartile\n") +
# ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
# theme_bw()+
# theme(
# axis.text.x = element_text(size = 20, face = 'bold'),
# strip.text.x = element_text(face = 'bold', size = 20),
# legend.position = 'none',
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 20, face = 'bold'))
# figs1a
# figs1b <- ggplot(
# data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
# aes(x = mw, y = log10(simconc)-log10(obsconc))) +
# geom_point()+
# geom_hline(yintercept = 0)+
# geom_hline(yintercept = 2, linetype = 2)+
# geom_hline(yintercept = -2, linetype = 2)+
# xlab("\nMolecular Weight (g/mol)\n") +
# ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
# theme_bw()+
# theme(
# axis.text.x = element_text(size = 20, face = 'bold'),
# strip.text.x = element_text(face = 'bold', size = 20),
# legend.position = 'none',
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 20, face = 'bold'))
# figs1b
# figs1c <- ggplot(
# data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
# aes(x = logp, y = log10(simconc)-log10(obsconc))) +
# geom_point()+
# geom_hline(yintercept = 0)+
# geom_hline(yintercept = 2, linetype = 2)+
# geom_hline(yintercept = -2, linetype = 2)+
# xlab("\nLog P") +
# ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
# theme_bw() +
# theme(
# axis.text.x = element_text(size = 20, face = 'bold'),
# strip.text.x = element_text(face = 'bold', size = 20),
# legend.position = 'none',
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 20, face = 'bold'))
# figs1c
# figs1d <- ggplot(
# data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
# aes(x = sol, y = log10(simconc)-log10(obsconc))) +
# geom_point()+
# geom_hline(yintercept = 0)+
# geom_hline(yintercept = 2, linetype = 2)+
# geom_hline(yintercept = -2, linetype = 2)+
# xlab("\nSolubility (mol/L)") +
# ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
# scale_x_log10()+
# theme_bw() +
# theme(
# axis.text.x = element_text(size = 20, face = 'bold'),
# strip.text.x = element_text(face = 'bold', size = 20),
# legend.position = 'none',
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 20, face = 'bold'))
# figs1d
## ----write_figures1, eval = execute.vignette----------------------------------
# pdf("Linakis2020/FigureS1.pdf", width = 20, height = 20)
# plot_grid(figs1a,figs1b,figs1c,figs1d,nrow = 2, labels = c('A','B','C','D'), label_size = 30)
# dev.off()
## ----SupTable2, eval = execute.vignette---------------------------------------
# senschem <- list()
# sensslope <- list()
# sensrsq <- list()
# sensregrmse <- list()
# senstotalrmse <- list()
# senspmiss <- list()
# senscmaxslope <- list()
# senscmaxrsq <- list()
# senstotalrmsecmax <- list()
# sensaucslope <- list()
# sensaucrsq <- list()
# senstotalrmseauc <- list()
# for (i in 1:nrow(simobsfull))
# {
# simobsfullsens <- subset(simobsfull,simobsfull$chem != simobsfull$chem[i])
# senschem[i] <- as.character(simobsfull$chem[i])
# senslm <- lm(
# log10(simobsfullsens$obsconc[
# !is.na(simobsfullsens$simconc) &
# simobsfullsens$simconc > 0 &
# simobsfullsens$obsconc > 0]) ~
# log10(simobsfullsens$simconc[
# !is.na(simobsfullsens$simconc) &
# simobsfullsens$simconc >0 &
# simobsfullsens$obsconc > 0]))
# sensslope[i] <- round(summary(senslm)$coef[2,1],digits = 2)
# sensrsq[i] <- round(summary(senslm)$r.squared,digits = 2)
# sensregrmse[i] <- round(sqrt(mean(senslm$residuals^2)),digits = 2)
# senstotalrmse[i] <- round(sqrt(mean((
# log10(simobsfullsens$simconc[
# !is.na(simobsfullsens$simconc) &
# simobsfullsens$simconc >0 &
# simobsfullsens$obsconc > 0]) -
# log10(simobsfullsens$obsconc[
# !is.na(simobsfullsens$simconc) &
# simobsfullsens$simconc >0 &
# simobsfullsens$obsconc > 0]))^2,
# na.rm = TRUE)), digits = 2)
# senspmiss[i] <- round((nrow(simobsfullsens) -
# nrow(simobsfullsens[
# !is.na(simobsfullsens$simconc) &
# simobsfullsens$simconc >0 &
# simobsfullsens$obsconc > 0,])) / nrow(simobsfullsens) * 100,
# digits = 2)
# senscmaxfull <- subset(simobsfullsens, !duplicated(simobsfullsens$Cmaxobs))
# senscmaxlm <- lm(
# log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]) ~
# log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]),
# na.action = na.exclude)
# sensaucfull <-subset(simobsfullsens, !duplicated(simobsfullsens$AUCobs))
# sensauclm <- lm(
# log10(aucfull$AUCobs[aucfull$AUCobs>0]) ~
# log10(aucfull$AUCsim[aucfull$AUCobs>0]),
# na.action = na.exclude)
# senscmaxslope[i] <- round(summary(senscmaxlm)$coef[2,1],digits = 2)
# senscmaxrsq[i] <- round(summary(senscmaxlm)$r.squared,digits = 2)
# senstotalrmsecmax[i] <- sqrt(mean((log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]) - log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]))^2, na.rm = TRUE))
# sensaucslope[i] <- round(summary(sensauclm)$coef[2,1],digits = 2)
# sensaucrsq[i] <- round(summary(sensauclm)$r.squared,digits = 2)
# senstotalrmseauc[i] <- sqrt(mean((log10(sensaucfull$AUCsim[sensaucfull$AUCobs>0]) - log10(sensaucfull$AUCobs[sensaucfull$AUCobs>0]))^2, na.rm = TRUE))
# }
# sensitivitydf <- data.frame(Chemical <- as.character(senschem),
# sensSlope <- as.numeric(sensslope),
# sensRsq <- as.numeric(sensrsq),
# sensRegrmse <- as.numeric(sensregrmse),
# sensTotrmse <- as.numeric(senstotalrmse),
# sensPmiss <- as.numeric(senspmiss),
# sensCmaxslope <- as.numeric(senscmaxslope),
# sensCmaxrsq <- as.numeric(senscmaxrsq),
# sensCmaxrmse <- as.numeric(senstotalrmsecmax),
# sensAUCslope <- as.numeric(sensaucslope),
# sensAUCrsq <- as.numeric(sensaucrsq),
# sensAUCrmse <- as.numeric(senstotalrmseauc),
# stringsAsFactors=FALSE)
# sensitivitydf <- subset(sensitivitydf,!duplicated(sensitivitydf$Chemical....as.character.senschem.))
# names(sensitivitydf) <- c('Chemical Dropped','Regression Slope','Regression R^2','Regression RMSE','Orthogonal RMSE', 'Percent Data Censored','Cmax Regression Slope','Cmax Regression R^2','Cmax Orthogonal RMSE','AUC Regression Slope','AUC Regression R^2','AUC Orthogonal RMSE')
# notdropped <- c('None',concregslope,concregr2,concregrmse,totalrmse,pmiss,cmaxslope,cmaxrsq,totalrmsecmax,aucslope,aucrsq,totalrmseauc)
# sensitivitydf <- rbind(notdropped, sensitivitydf)
# sensitivitydf[,-1] <- sapply(sensitivitydf[,-1],as.numeric)
# sensitivitydf[,-1] <- round(sensitivitydf[,-1],2)
# # Clean up and write file
# rm(chem.lm, obvpredplot, p, pdata1, plot.data, plots, relconc, sensaucfull, sensauclm, sensaucrsq, sensaucslope, senschem, senscmaxfull, senscmaxlm, senscmaxrsq, senscmaxslope, senslm, senspmiss, sensregrmse, sensrsq, sensslope, senstotalrmse, senstotalrmseauc, senstotalrmsecmax, solve, AUCrmse, AUCrsq, AUCslope, chem.res, Chemical, Cmaxrmse, Cmaxrsq, Cmaxslope, colors, count, i, j, k, met_col, name, name1, Pmiss, Regrmse, Rsq, Slope, rem, Totrmse)
#
# write.csv(sensitivitydf, 'supptab2.csv',row.names = FALSE)
## ----SupTable1, eval = execute.vignette---------------------------------------
# supptab1 <- subset(unique_scenarios, !duplicated(unique_scenarios$PREFERRED_NAME) | !duplicated(unique_scenarios$SOURCE_CVT) | !duplicated(unique_scenarios$CONC_SPECIES))
# for(i in 1:nrow(supptab1)){
# tryCatch({
# supptab1$Metabolism_Source[i] <- met_data$SOURCE_MET[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
# supptab1$Log_P[i] <- met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
# supptab1$Solubility[i] <- met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
# supptab1$Blood_Air_Partition_Coefficient[i] <- met_data$CALC_PBA[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
# supptab1$Chem_Class[i] <- met_data$CHEM_CLASS[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
# supptab1$Species[i] <- met_data$SPECIES[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
# supptab1$Vmax[i] <- met_data$VMAX[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
# supptab1$Km[i] <- met_data$KM[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
# }, error = function(e){})
# }
# supptab1 <- supptab1[c('PREFERRED_NAME','DTXSID','CASRN','Chem_Class','AVERAGE_MASS','Log_P','Solubility','Blood_Air_Partition_Coefficient','Species','Vmax','Km','Metabolism_Source','SOURCE_CVT')]
# names(supptab1) <- c('Chemical','DTXSID','CASRN','CAMEO Chemical Class','Molecular Weight (g/mol)','Log P','Solubility (mol/L)','Blood Air Partition Coefficient','Available Species Data','Vmax (pmol/min/10^6 cells)','KM (uM)','Metabolism Data Source','Concentration-Time Data Source')
# write.csv(supptab1, "supptab1.csv", row.names = FALSE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.