Nothing
## ----configure_knitr, eval = TRUE---------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = '#>')
options(rmarkdown.html_vignette.check_title = FALSE)
## ----clear_memory, eval = TRUE------------------------------------------------
rm(list=ls())
## ----runchunks, eval = TRUE---------------------------------------------------
# Set whether or not the following chunks will be executed (run):
execute.vignette <- FALSE
## ----load_packages, eval = execute.vignette-----------------------------------
# #
# #
# # Setup
# #
# #
# library(httk) # run Armitage.R and Kramer.R in vitro distribution models
# library(tidyverse) # data wrangling
# library(data.table) # data table functionality
# library(ggplot2) # plotting
# library(ggrepel) # jitter labels in plots
# library(ggpubr) # arrange plots for publication
# library(viridis) # colorblind palette
# library(scales) # required for label_log
#
# #save figures and tables to working directory: yes/no
# save_output = FALSE
#
# #for saving outputs
# data.date <- "16JUL2025"
# path_out <- "C:/Users/mscherer/OneDrive - Environmental Protection Agency (EPA)
# /Profile/Desktop/IVDManuscriptData/manuscript figures 16JUL25/"
#
## ----rmsle_func, eval = execute.vignette--------------------------------------
# #function to calculate RMSLE
# sle <- function (actual, predicted)
# {
# log.act <- log10(actual)
# log.pred <- log10(predicted)
# good <- !is.na(log.act) & is.finite(log.act) &
# !is.na(log.pred) & is.finite(log.pred)
# log.act <- log.act[good]
# log.pred <- log.pred[good]
# return((log.act - log.pred)^2)
# }
# msle <- function (actual, predicted)
# {
# mean(sle(actual, predicted))
# }
# rmsle <-function (actual, predicted)
# {
# sqrt(msle(actual, predicted))
# }
## ----load_IVD_Manuscript_data, eval = execute.vignette------------------------
#
# IVD_Manuscript_data <- httk::Scherer2025.IVD
#
# cat(paste("Summarized data from ",
# length(unique(IVD_Manuscript_data$Citation)),
# " studies covering ",
# length(unique(IVD_Manuscript_data$ChemicalName)),
# " unique chemicals ",
# "with ",
# length(IVD_Manuscript_data$CellConcentration_uM),
# " observations total.",
# sep="")
# )
#
# #14 studies, 43 chemicals, 153 observations
#
# #number each row
# IVD_Manuscript_data[,X:=1:length(IVD_Manuscript_data$ChemicalName)]
#
## ----invitrodescriptors, eval = execute.vignette------------------------------
#
# ### Create assay_component_endpoint_name columns for assay parameters ###
#
# #create assay_name column
# IVD_Manuscript_data[, assay_name :=
# gsub(", ", "", IVD_Manuscript_data$Citation)]
#
# #create the assay_component_endpoint_name column
# IVD_Manuscript_data[, assay_component_endpoint_name :=
# paste("IVD", gsub(", ", "", assay_name), sep="")]
#
# #tack on the "interesting" part for each assay to create the endpoint_name
# IVD_Manuscript_data[assay_component_endpoint_name=="IVDBellwon2015b" |
# assay_component_endpoint_name == "IVDBroeders2013" |
# assay_component_endpoint_name == "IVDWei2010",
# assay_component_endpoint_name :=
# paste(assay_component_endpoint_name, CellType, sep="_")]
# #these two have multiple concentrations or time points
# IVD_Manuscript_data[assay_component_endpoint_name=="IVDMundy2004" |
# assay_component_endpoint_name=="IVDLundquist2014",
# assay_component_endpoint_name :=
# paste(paste(assay_component_endpoint_name,
# paste(ExposureDuration_hrs, "hr", sep = ""),
# sep="_"), paste(FBSf, "FBSf", sep = ""),
# sep = "_")]
#
# #create a copy with the assay parameters for the options table (Table S7)
# options.IVD_Manuscript_data<-data.table::copy(IVD_Manuscript_data)
#
# ### Pull in httk assay parameter table ###
# invitro.assay.params <- data.table::copy(invitro.assay.params)
#
# #copy the experimental data frame to pull out the parameter lines
# parameters_only<-data.table::copy(IVD_Manuscript_data)
#
# ### Set up assay parameters to match with invitro.assay.params.RData ###
#
# #rename columns to align with the names in invitro.assay.params.RData
# parameters_only[, timepoint_hr := ExposureDuration_hrs] %>%
# .[, media_serum := SerumType] %>%
# .[, cell_type := CellType]
#
# # get the list of columns we need
# ivpnames<-names(invitro.assay.params)
#
# #add on the extra columns to match invitro.assay.params and parameters_only
# if(!all(ivpnames%in%names(parameters_only))){
# parameters_only[,ivpnames[!(ivpnames %in% names(parameters_only))]] <-
# as.double(NA)}
#
# #filter parameters_only to match up with ivpnames and only the unique rows
# parameters_only<-parameters_only %>% select(as.character(ivpnames)) %>% unique()
#
# # reassign own parameters table to pass to armitage and kramer functions
# user_assay_parameters<-parameters_only
# # should have 29 + 1649 = 1678 rows
#
# #remove parameter columns since we will be pulling from "user_assay_parameters"
# ivpnames[2]<-NA #remove endpoint_name bc we need to be able to reference it
# IVD_Manuscript_data[,ivpnames[(ivpnames %in% names(IVD_Manuscript_data))]]<-
# NULL
#
## ----run_IVD_models, eval = execute.vignette----------------------------------
#
# # run Armitage model, output concentrations in umol/L (e.g. uM)
# armitageOutput_data.dt <- armitage_eval(tcdata=IVD_Manuscript_data,
# surface.area.switch = TRUE,
# user_assay_parameters =
# user_assay_parameters,
# restrict.ion.partitioning = TRUE)
#
#
#
# # run Kramer model, output concentrations in umol/L (e.g. uM)
# kramerOutput_data.dt <- kramer_eval(tcdata=IVD_Manuscript_data,
# surface.area.switch = TRUE,
# user_assay_parameters =
# user_assay_parameters,
# restrict.ion.partitioning = TRUE)
#
## ----manipulate_for_plotting, eval = execute.vignette-------------------------
#
# #delete nomconc column
# Armitage.dt<-subset(armitageOutput_data.dt, select=-c(nomconc))
#
# #pull ioc types from armitage since kramer doesn't generate them
# IOC_Type_output<-Armitage.dt[,.(ChemicalName, IOC_Type)] %>% distinct()
#
# #delete nomconc column and add in the IOC type column for Kramer
# Kramer.dt_subset<-subset(kramerOutput_data.dt, select=-c(nomconc))
# Kramer.dt<-Kramer.dt_subset[IOC_Type_output, on = "ChemicalName"]
#
# #delete cellconc column and add in the IOC type column for Nominal
# Nominal.dt_subset<-subset(kramerOutput_data.dt,
# select = -c(concentration_cells))
# Nominal.dt<-Nominal.dt_subset[IOC_Type_output, on = "ChemicalName"]
#
# #add tags
# Armitage.dt[,label:= "Armitage"]
# Kramer.dt[,label:= "Kramer"]
# Nominal.dt[,label:= "Nominal"]
#
# #combine data for plotting (datasets are side by side)
# combodata<-merge(Armitage.dt, kramerOutput_data.dt, by = "X", all.x = TRUE)
# #kramer output still has nomconc values
#
# if(save_output){
# write.csv(combodata, file = paste0(path_out,"ManuscriptOutput_",
# data.date,"_",Sys.Date(),".csv"))
# }
#
## ----ggplot_set_up, eval = execute.vignette-----------------------------------
#
# #set default theme elements for visually consistent manuscript plots
# personal_theme <- function(){
# theme_bw() +
# theme(axis.line = element_blank(),
# panel.border = element_rect(colour = "black", linewidth=1),
# axis.title = element_text(size = rel(1.25)),
# axis.text = element_text(size = rel(1.15)),
# plot.title = element_text(size = rel(1.5),
# hjust = 0.5),
# panel.grid = element_blank(),
# legend.position = "none")
# }
#
# #set plot colors to correspond with each chemica
# plotcolors<-scales::viridis_pal()(length(
# unique(armitageOutput_data.dt$ChemicalName)))
# names(plotcolors)<-armitageOutput_data.dt$ChemicalName %>% unique() %>% sort()
#
## ----joint_intracel_plot_fig2, eval = execute.vignette------------------------
#
# #ARMITAGE:
#
# #Armitage RMSLE:
# arm_rmsle_long<-rmsle((Armitage.dt$CellConcentration_uM),(Armitage.dt$ccells))
# arm_rmsle<-round(arm_rmsle_long, 2)
# #1.12
#
# #Armitage r^2:
# ml_arm_overall = lm(CellConcentration_uM~ccells, data = Armitage.dt)
# summary(ml_arm_overall)$r.squared
# #0.33
#
# #plot set up
# yrng <- range(Armitage.dt$CellConcentration_uM)
# xrng <- range(Armitage.dt$ccells)
# n_chems<-n_distinct(Nominal.dt$ChemicalName)
# n_data<-nrow(Nominal.dt)
#
# # now plot
# Armitage_ccell<-ggplot(Armitage.dt) +
# geom_point(aes(ccells, CellConcentration_uM, color = ChemicalName)) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# xlab(expression("Armitage Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
# ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
# scale_x_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# scale_y_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
# "N[data] ", "~'='~", n_data)), hjust = 1, vjust = 0, size = 4) +
# annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
# "N[chemicals] ", "~'='~", n_chems)), hjust = 1, vjust = 1.2, size = 4)+
# annotate(geom = "text", x = xrng[2], y = yrng[1],
# label = paste("RMSLE =", arm_rmsle),
# hjust = 1, vjust = 3, size = 4) +
# personal_theme() +
# scale_color_manual(values=plotcolors)
#
# #KRAMER:
#
# #Kramer RMSLE:
# kram_rmsle_long<-
# rmsle((Kramer.dt$CellConcentration_uM),(Kramer.dt$concentration_cells))
# kram_rmsle<-format(round(kram_rmsle_long, digits=2), nsmall = 2)
# #1.30
#
# #Kramer r^2:
# ml_kram_overall = lm(CellConcentration_uM~concentration_cells, data = Kramer.dt)
# summary(ml_kram_overall)$r.squared
# #0.03
#
# #plot
# Kramer_ccell<-ggplot(Kramer.dt)+
# geom_point(aes(concentration_cells, CellConcentration_uM,
# colour = ChemicalName)) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# scale_x_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# scale_y_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# xlab(expression("Kramer Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
# ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
# annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
# "N[data] ", "~'='~", n_data)), hjust = 1, vjust = 0, size = 4) +
# annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
# "N[chemicals] ", "~'='~", n_chems)), hjust = 1, vjust = 1.2, size = 4)+
# annotate(geom = "text", x = xrng[2], y = yrng[1],
# label = paste("RMSLE =", kram_rmsle), hjust = 1, vjust = 3, size = 4) +
# personal_theme() +
# scale_color_manual(values=plotcolors)
#
# #NOMINAL:
#
# #Nominal RMSLE:
# nom_rmsle_long<-rmsle((Nominal.dt$CellConcentration_uM),(Nominal.dt$nomconc))
# nom_rmsle<-round(nom_rmsle_long, 2)
# #1.45
#
# Nominal_ccell<-ggplot(Nominal.dt) +
# geom_point(aes(nomconc, CellConcentration_uM, colour = ChemicalName))+
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# scale_x_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# scale_y_log10(lim = c(0.00001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# xlab(expression(italic("C"["nominal"])~"(\u03BCM)")) +
# ylab(expression("Experimental "~italic("C"["cell"])~"(\u03BCM)")) +
# labs(colour = "Chemical")+
# annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
# "N[data] ", "~'='~", n_data)), hjust = 1, vjust = 0, size = 4) +
# annotate(geom = "text", x = xrng[2], y = yrng[1], label = parse(text = paste(
# "N[chemicals] ", "~'='~", n_chems)), hjust = 1, vjust = 1.2, size = 4) +
# annotate(geom = "text", x = xrng[2], y = yrng[1],
# label = paste("RMSLE =", nom_rmsle),
# hjust = 1, vjust = 3, size = 4) +
# personal_theme() +
# theme(legend.position = "bottom",
# legend.key.size = unit(2, 'mm'),
# legend.spacing = unit(10, "pt"))+
# # legend.text = element_text(size = 8))+
# guides(col = guide_legend(ncol = 5)) +
# scale_color_manual(values=plotcolors)
#
# #Combine the three plots w RMSLE
# Ccells_comboplot<-ggarrange(Nominal_ccell, Armitage_ccell, Kramer_ccell,
# ncol=3 , legend = "bottom", common.legend = TRUE)
#
# #save
# if(save_output){
# ggsave(file = paste0("Figure2_",data.date,"_",Sys.Date(),".png"),
# path = path_out,
# Ccells_comboplot, width = 10, height = 5.5, dpi = 300)
# }
#
# print(Ccells_comboplot)
#
## ----multiobs_intracel_plot_fig3, eval = execute.vignette---------------------
#
# #Armitage.multobs.dt %>% count(ChemicalName, NominalDose_uM, Citation)
# #some of these have super close nomconcs because they are experimentally
# #measured - not really relevant for this comparison because they are replicates
# #chemicals to remove:
# #Hexachlorobenzene, Malathion, Pentachlorophenol, Propiconazole
# #(all of the chemicals from Stadnicka, 2014)
#
# multiObsChemnameList<-Armitage.dt %>%
# dplyr::filter(Citation != "Stadnicka, 2014") %>%
# count(ChemicalName, NominalDose_uM) %>%
# count(ChemicalName) %>% dplyr::filter(n>=2) %>%
# select(ChemicalName) %>% distinct()
# #10 chemicals with multiple nominal doses measured
#
# #filter measured data to just these chemicals
# Armitage.multobs.dt_og <- Armitage.dt %>%
# dplyr::filter(ChemicalName %in% multiObsChemnameList$ChemicalName)
#
# Kramer.multobs.dt_og <- Kramer.dt %>%
# dplyr::filter(ChemicalName %in% multiObsChemnameList$ChemicalName)
#
# Nominal.multobs.dt_og <- Nominal.dt %>%
# dplyr::filter(ChemicalName %in% multiObsChemnameList$ChemicalName)
# #each have 89 observations
#
# #merge the lines with multiple observations for the same nomconc (ie tox21)
# Armitage.multobs.dt<-Armitage.multobs.dt_og %>%
# group_by(ChemicalName, NominalDose_uM) %>%
# mutate(mean_measuredccell = mean(CellConcentration_uM),
# mean_predictedccell = mean(ccells)) %>%
# select(ChemicalName, NominalDose_uM, mean_measuredccell,
# mean_predictedccell) %>% distinct()
#
# Kramer.multobs.dt<-Kramer.multobs.dt_og %>%
# group_by(ChemicalName, NominalDose_uM) %>%
# mutate(mean_measuredccell = mean(CellConcentration_uM),
# mean_predictedccell = mean(concentration_cells)) %>%
# select(ChemicalName, NominalDose_uM, mean_measuredccell,
# mean_predictedccell) %>% distinct()
#
# Nominal.multobs.dt<-Nominal.multobs.dt_og %>%
# group_by(ChemicalName, NominalDose_uM) %>%
# mutate(mean_measuredccell = mean(CellConcentration_uM),
# mean_predictedccell = mean(NominalDose_uM)) %>%
# select(ChemicalName, NominalDose_uM, mean_measuredccell,
# mean_predictedccell) %>% distinct()
# #down to 50 observations because the duplicates have been averaged
#
# ### Plotting ###
#
# #plot set up
# yrng_mult <- range(Armitage.multobs.dt$mean_measuredccell)
# xrng_mult <- range(Armitage.multobs.dt$mean_predictedccell)
# n_chems_mult<-n_distinct(Armitage.multobs.dt$ChemicalName)
# n_data_mult<-nrow(Armitage.multobs.dt)
#
# #ARMITAGE
#
# #Armitage RMSLE:
# arm_mult_rmsle_long<-rmsle((Armitage.multobs.dt$mean_measuredccell),
# (Armitage.multobs.dt$mean_predictedccell))
# arm_mult_rmsle<-round(arm_mult_rmsle_long, 2)
# #1.27
#
# #r squared
# ml_arm <- lm(mean_measuredccell~mean_predictedccell, data = Armitage.multobs.dt)
# rsq_armitage_mult <- summary(ml_arm)$r.squared
# #0.87
#
# # now plot
# Armitage_ccell_mult<-ggplot(Armitage.multobs.dt) +
# geom_point(aes(mean_predictedccell, mean_measuredccell,
# colour = ChemicalName)) +
# geom_smooth(aes(mean_predictedccell, mean_measuredccell,
# colour = ChemicalName), method=lm, se = FALSE) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# scale_x_log10(lim = c(0.0001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# scale_y_log10(lim = c(0.0001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# xlab(expression("Armitage Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
# ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
# labs(colour = "Chemical")+
# annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
# label = parse(text = paste("N[data] ", "~'='~", n_data_mult)),
# hjust = 1, vjust = 0, size = 4) +
# annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
# label = parse(text = paste("N[chemicals] ", "~'='~", n_chems_mult)),
# hjust = 1, vjust = 1.2, size = 4)+
# annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
# label = paste("RMSLE =", arm_mult_rmsle), hjust = 1, vjust = 3, size = 4) +
# personal_theme() +
# scale_color_manual(values=plotcolors)
#
# #KRAMER
#
# #Kramer RMSLE:
# kram_mult_rmsle_long<-rmsle((Kramer.multobs.dt$mean_measuredccell),
# (Kramer.multobs.dt$mean_predictedccell))
# kram_mult_rmsle<-round(kram_mult_rmsle_long, 2)
# # 1.58
#
# #r squared
# ml_kram <- lm(mean_measuredccell~mean_predictedccell, data = Kramer.multobs.dt)
# rsq_kramer_mult <- summary(ml_kram)$r.squared
# #0.87
#
# # now plot
# Kramer_ccell_mult<-ggplot(Kramer.multobs.dt) +
# geom_point(aes(mean_predictedccell, mean_measuredccell,
# colour = ChemicalName)) +
# geom_smooth(aes(mean_predictedccell, mean_measuredccell,
# colour = ChemicalName), method=lm, se = FALSE) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# scale_x_log10(lim = c(0.0001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# scale_y_log10(lim = c(0.0001, 400000), breaks = c(10^-4, 10^0, 10^4),
# labels = label_log(digits = 2)) +
# xlab(expression("Kramer Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
# ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
# labs(colour = "Chemical")+
# annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
# label = parse(text = paste("N[data] ", "~'='~", n_data_mult)),
# hjust = 1, vjust = 0, size = 4) +
# annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
# label = parse(text = paste("N[chemicals] ", "~'='~", n_chems_mult)),
# hjust = 1, vjust = 1.2, size = 4) +
# annotate(geom = "text", x = xrng_mult[2], y = yrng_mult[1],
# label = paste("RMSLE =", kram_mult_rmsle), hjust = 1, vjust = 3, size = 4) +
# personal_theme() +
# scale_color_manual(values=plotcolors) +
# theme(legend.key.size = unit(2, 'mm'),
# legend.text = element_text(size = 6))
#
#
# #arrange
# Ccells_comboplot_mult<-ggarrange(Armitage_ccell_mult, Kramer_ccell_mult, ncol=2,
# legend = "bottom", common.legend = TRUE)
#
# if(save_output){
# ggsave(file = paste0("Figure3_",data.date,"_",Sys.Date(),".png"),
# path = path_out,
# Ccells_comboplot_mult, width = 8, height = 4, dpi = 300)
# }
#
# print(Ccells_comboplot_mult)
#
## ----chemicalPart_intracel_plot_fig4, eval = execute.vignette-----------------
#
# #Cwat_uM -------- Cwat_R
# Cwater_comparison<-ggplot(combodata) +
# geom_point(aes(concentration_medium, cwat_s, colour = ChemicalName.x)) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# ggtitle("Medium Concentration") +
# xlab(expression("Kramer Predicted "*italic("C"["medium"])~"(\u03BCM)")) +
# ylab(expression("Armitage Predicted "*italic(" C"["medium"])~"(\u03BCM)")) +
# scale_x_log10(labels = label_log(digits = 2)) +
# scale_y_log10(labels = label_log(digits = 2)) +
# personal_theme() +
# scale_color_manual(values=plotcolors)
#
# #Cair_uM -------- Cair_R
# Cair_comparison<-ggplot(combodata) +
# geom_point(aes(concentration_air, cair, colour = ChemicalName.x)) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# ggtitle("Air Concentration") +
# xlab(expression("Kramer Predicted "*italic("C"["air"])~"(\u03BCM)")) +
# ylab(expression("Armitage Predicted "*italic(" C"["air"])~"(\u03BCM)")) +
# scale_x_log10(labels = label_log(digits = 2)) +
# scale_y_log10(labels = label_log(digits = 2)) +
# personal_theme() +
# scale_color_manual(values=plotcolors)
#
# #C_cells_uM ----- Ccells
# Ccells_comparison<-ggplot(combodata) +
# geom_point(aes(concentration_cells, ccells, colour = ChemicalName.x)) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# ggtitle("Cell Concentration") +
# xlab(expression("Kramer Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
# ylab(expression("Armitage Predicted "*italic(" C"["cell"])~"(\u03BCM)")) +
# scale_x_log10(lim = c(10^-4, 10^5), labels = label_log(digits = 2),
# breaks = c(10^-2, 10^0, 10^2, 10^4)) +
# scale_y_log10(lim = c(10^-4, 10^5), labels = label_log(digits = 2),
# breaks = c(10^-2, 10^0, 10^2, 10^4)) +
# personal_theme() +
# scale_color_manual(values=plotcolors)
#
# #Aplastic_uM_m2 - (Cplastic_R / Sarea_R)
# Aplastic_comparison<-ggplot(combodata) +
# geom_point(aes(concentration_plastic, cplastic, colour = ChemicalName.x)) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# ggtitle("Plastic Concentration") +
# xlab(expression("Kramer Predicted "*italic("C"["plastic"])~"(\u03BCM)")) +
# ylab(expression("Armitage Predicted "*italic(" C"["plastic"])~"(\u03BCM)")) +
# labs(colour = "Chemical")+
# scale_x_log10(labels = label_log(digits = 2),
# breaks = c(10^-6, 10^-4, 10^-2, 10^0)) +
# scale_y_log10(labels = label_log(digits = 2)) +
# personal_theme() +
# scale_color_manual(values=plotcolors) +
# theme(legend.key.size = unit(1, 'mm'),
# legend.spacing = unit(2, "pt"),
# legend.text = element_text(size = 6))
#
#
# compartment_plots<-ggarrange(Ccells_comparison, Aplastic_comparison,
# Cwater_comparison, Cair_comparison,
# common.legend = TRUE, legend = "none")
#
# if(save_output){
# ggsave(file = paste0("Figure4_",data.date,"_",Sys.Date(),".png"),
# path = path_out,
# compartment_plots, width = 8, height = 6, dpi= 300)
# }
#
# print(compartment_plots)
#
## ----curated_values_plot_fig5, eval = execute.vignette------------------------
#
# #load curated data info
# original_curated.data.dt <- httk::Dimitrijevic.IVD
#
# #first - run using the default/dashboard values
# default.dt<-data.table::copy(original_curated.data.dt %>%
# select(-c( "Arnot_pka", "Arnot_pkb", "log KOW,N", "log KAW,N",
# "Predicted_Ccell_µM", "FoA")))
#
# # run the model, output concentrations in umol/L (e.g. uM)
# armitageOutput_default.dt <- armitage_eval(tcdata=default.dt,
# restrict.ion.partitioning = TRUE,
# surface.area.switch = FALSE)
#
# #next- run using curated values (EAS-E Suite)
# curated.data.dt<-data.table::copy(original_curated.data.dt)
#
# #overwrite using curated physchem properties
# curated.data.dt[, pKa_Donor := as.character(Arnot_pka)] %>% #acidic
# .[, pKa_Accept := as.character(Arnot_pkb)] %>% #basic
# .[, gkow := `log KOW,N`] %>%
# .[, gkaw_n := `log KAW,N`]
#
# # run the model, output concentrations in umol/L (e.g. uM)
# armitageOutput_curated.data.dt <- armitage_eval(tcdata=curated.data.dt,
# restrict.ion.partitioning = TRUE,
# surface.area.switch = FALSE)
#
# #save output with these values (Supplemental Materials T9)
# defaultvals<- armitageOutput_default.dt %>%
# select(Name, casrn, gkow_n, gkaw_n, pKa_Donor, pKa_Accept)
# curatedvals<-armitageOutput_curated.data.dt %>%
# select(Name, casrn, gkow_n, gkaw_n, pKa_Donor, pKa_Accept)
# #tie the two tables together for the supplement
# s9_table<-rbind(curatedvals, defaultvals)
#
# #calculate rmsle
#
# #default values
#
# #individual
# armitageOutput_default.dt[,rmsle_ccell_default:=
# rmsle(Reported_Ccell_µM, ccells), by = Name]
#
# #total
# RMSLE_default.dt<-rmsle(armitageOutput_default.dt$Reported_Ccell_µM,
# armitageOutput_default.dt$ccells)
# RMSLE_default.dt<-round(RMSLE_default.dt, 3)
# #0.586
#
# #curated values
#
# #individual
# armitageOutput_curated.data.dt[,rmsle_ccell_curated:=
# rmsle(Reported_Ccell_µM, ccells), by = Name]
#
# #total
# RMSLE_curated.data.dt<-rmsle(armitageOutput_curated.data.dt$Reported_Ccell_µM,
# armitageOutput_curated.data.dt$ccells)
# RMSLE_curated.data.dt<-round(RMSLE_curated.data.dt, 3)
# #0.570
#
# #improvement when using curated data:
# RMSLE_default.dt-RMSLE_curated.data.dt
# #0.016
#
# ## difference between the two on a per-chemical basis ##
# curated_data.dt<-merge(armitageOutput_default.dt,
# armitageOutput_curated.data.dt, by = "Name")
#
# #calculate rmsle difference for each chemical
# curated_data.dt[,rmsle_difference:=
# rmsle_ccell_curated-rmsle_ccell_default, by = Name]
#
# #which chemicals had the greatest magnitude of difference
# #largest improvement when using curated data
# head(curated_data.dt %>%
# select(rmsle_difference, Name) %>%
# arrange(rmsle_difference), 2)
#
# #largest improvement when using default data
# tail(curated_data.dt %>%
# select(rmsle_difference, Name) %>%
# arrange(rmsle_difference), 2)
#
# #set custom colors and shapes for plot below
# #tricky to tell apart with just colors
# custom_shapes <- c(22, 23, 24, 25, 22, 23,
# 24, 25, 22, 23, 24, 25) # 12 distinct shapes
# custom_colors <- scales::viridis_pal()(length(
# unique(curated_data.dt$Name))) # 12 distinct colors
#
# #plot
# curated_difference_httk_plot<-ggplot(curated_data.dt) +
# geom_point(aes(ccells.y, Reported_Ccell_µM.y, fill = Name, shape = Name), size = 4) +
# geom_label_repel(aes(ccells.y, Reported_Ccell_µM.y,
# label = round(rmsle_difference, 2))) +
# geom_abline(intercept=0,slope=1, linetype = "dashed") +
# xlab(expression("Armitage Predicted "*italic("C"["cell"])~"(\u03BCM)")) +
# ylab(expression("Experimental "*italic(" C"["cell"])~"(\u03BCM)")) +
# labs(fill = "Chemical", shape = "Chemical")+
# scale_x_log10(lim = c(10, 10000), labels = label_log(digits = 2)) +
# scale_y_log10(lim = c(10, 10000), labels = label_log(digits = 2)) +
# personal_theme() +
# scale_shape_manual(values = custom_shapes) +
# scale_fill_manual(values = custom_colors) +
# theme(legend.position = "bottom",
# legend.key.size = unit(2, 'mm'),
# legend.spacing = unit(10, "pt"),
# legend.text = element_text(size = 10))
#
# if(save_output){
# ggsave(file = paste0("Figure5_",data.date,"_",Sys.Date(),".png"),
# path = path_out,
# curated_difference_httk_plot, width = 8, height = 5, dpi = 300)
#
# write.csv(s9_table, file = paste0(path_out,"table_s9_",data.date,"_",
# Sys.Date(),".csv"))
# }
#
## ----fold_change_calculation, eval = execute.vignette-------------------------
#
# # The Armitage model resulted in a slight over prediction of Ccell whereas
# # the Kramer model had the greatest spread but most evenly over and
# # underpredicted Ccell (Figure 2).
# range(Armitage.dt$ccells)
# #1.555754e-02 4.845393e+04
# range(Kramer.dt$concentration_cells)
# #5.838149e-04 2.663947e+04
#
# #Kramer has a larger spread of predictions
#
# ##create datatable with both outputs stacked
# stacked.data<-rbind(Armitage.dt, Kramer.dt, fill=TRUE)
#
# #overpredict or underpredict for each model compared to experimental
# stacked.data_HL<-data.table::copy(stacked.data)
# stacked.data_HL[ccells>CellConcentration_uM,
# ARMITAGE_ccellcomp:="ARMITAGE higher"] %>%
# .[ccells<CellConcentration_uM,
# ARMITAGE_ccellcomp:="ARMITAGE lower"] %>%
# .[concentration_cells>CellConcentration_uM,
# KRAMER_ccellcomp:="KRAMER higher"] %>%
# .[concentration_cells<CellConcentration_uM,
# KRAMER_ccellcomp:="KRAMER lower"]
#
# stacked.data_HL %>% count(ARMITAGE_ccellcomp, KRAMER_ccellcomp)
# #Armitage model more often over predicts
# #Kramer is pretty even split of over and under predictions
#
# #pick out where armitage prediction or kramer prediction is larger
# sidebyside.data_HL<-data.table::copy(combodata)
# sidebyside.data_HL[ccells>concentration_cells, ARMITAGE:="ARMITAGE higher"] %>%
# .[ccells<concentration_cells, KRAMER:="KRAMER higher"]
#
# #count which is higher or lower by chemical
# sidebyside.data_HL %>% select(ChemicalName.x, ARMITAGE, KRAMER) %>%
# distinct() %>% count(ARMITAGE, KRAMER)
# #armitage: higher for 38 chemicals
# #kramer: higher for 5 chemicals
# #38+5 = 43 chemicals overall
#
## ----water_solublity, eval = execute.vignette---------------------------------
#
# #Calculating water solubility using OPERA gswat
# #pull cyc a values from input data
# cycA.dt<-data.table::copy(IVD_Manuscript_data %>%
# dplyr::filter(ChemicalName == "Cyclosporin A"))
#
# #using regular gswat value:
# #run armitage
# armitageOutput_cycA.dt_orig <- armitage_eval(tcdata=cycA.dt,
# surface.area.switch = TRUE,
# user_assay_parameters =
# user_assay_parameters,
# restrict.ion.partitioning = TRUE)
#
# #regular gswat value is 2.341266 mg/L
# #armitageOutput_cycA.dt_orig$gswat_n
#
# #check gswat value:
# # 1. pull from data.table
# chem.physical_and_invitro.data %>%
# dplyr::filter(CAS == "59865-13-3") %>%
# select(MW, logWSol, logWSol.Reference)
#
# # 2. convert mol/L to mg/L which is what armitage uses
# #from chem.physical_and_invitro.data table:
# -3.739 #(mol/L) log10 water solubility
# 10^-3.739 # un-log = 0.0001823896 (mol/L)
# #convert to mg/L
# 0.0001823896*(1203*convert_units("g", "mg")) #1203 is MW in g/mol
# #output: 219.4147 mg/L = swat
# log10(219.4147)
# #output: 2.341266 mg/L = gswat
#
# #calculate RMSLE
# cycA.dt_rmsle_orig<-round(rmsle((
# armitageOutput_cycA.dt_orig$CellConcentration_uM),
# (armitageOutput_cycA.dt_orig$ccells)),2)
# #2.89 - overall rmsle
#
# #Calculating water solubility using experimental gswat
# #pull cyc a values from input data
# cycA.dt<-data.table::copy(IVD_Manuscript_data %>% dplyr::filter(ChemicalName == "Cyclosporin A"))
#
# #using measured gswat value: mg/L
# cycA.dt[, gswat_n := log10(0.0073)]
#
# #run armitage
# armitageOutput_cycA.dt_meas <- armitage_eval(tcdata=cycA.dt,
# surface.area.switch = TRUE,
# user_assay_parameters =
# user_assay_parameters,
# restrict.ion.partitioning = TRUE)
#
# #measured gswat value is -2.136677 mg/L
# #armitageOutput_cycA.dt_meas$gswat_n
#
# #calculate RMSLE
# cycA.dt_rmsle_meas<-round(rmsle((
# armitageOutput_cycA.dt_meas$CellConcentration_uM),
# (armitageOutput_cycA.dt_meas$ccells)),2)
# #1.47 - overall rmsle
#
## ----experimental_nomconc, eval = execute.vignette----------------------------
#
# #pull observations that did experimentally measure the nominal dose
# #exp_nom.dt<-data.table::copy(IVD_Manuscript_data[!(is.na(ExperimentalDose_uM)),])
# #6 citations did measure experimental dose
# #42 observations
#
# #BUT - Stadnicka 2014 did not report a nominal dose to begin with,
# #only the experimentally measured values
# exp_nom.dt<-data.table::copy(IVD_Manuscript_data[!(is.na(ExperimentalDose_uM)) &
# Citation != "Stadnicka, 2014",])
# #unique(exp_nom.dt$Citation)
# #5 citations measured experimental dose AND reported nominal dose
# #27 observations
#
# #run armitage with nomconc as NominalDose_uM.x (not experimentally measured)
# arm_nomconc<-data.table::copy(exp_nom.dt[, nomconc := NominalDose_uM])
#
# # run Armitage model, output concentrations in umol/L (e.g. uM)
# armitageOutput_arm_nomconc <- armitage_eval(tcdata=arm_nomconc,
# surface.area.switch = TRUE,
# user_assay_parameters =
# user_assay_parameters,
# restrict.ion.partitioning = TRUE)
#
# #run armitage with nomconc as ExperimentalDose_uM.x (experimentally measured)
# arm_expconc<-data.table::copy(exp_nom.dt[, nomconc := ExperimentalDose_uM])
#
# armitageOutput_arm_expconc <- armitage_eval(tcdata=arm_expconc,
# surface.area.switch = TRUE,
# user_assay_parameters =
# user_assay_parameters,
# restrict.ion.partitioning = TRUE)
#
# #Armitage NOMCONC RMSLE:
# arm_nomconc_rmsle_long<-rmsle((armitageOutput_arm_nomconc$CellConcentration_uM),
# (armitageOutput_arm_nomconc$ccells))
# arm_nomconc_rmsle<-round(arm_nomconc_rmsle_long, 2)
# #1.87
#
# #Armitage EXPCONC RMSLE:
# arm_expconc_rmsle_long<-rmsle((armitageOutput_arm_expconc$CellConcentration_uM),
# (armitageOutput_arm_expconc$ccells))
# arm_expconc_rmsle<-round(arm_expconc_rmsle_long, 2)
# #1.75
#
# #improvement between the experimental and nomconc rmsles
# arm_nomconc_rmsle- arm_expconc_rmsle
# #0.12
#
## ----physchem_data_tableS1, eval = execute.vignette---------------------------
#
# #create datatable to hold the info
# table1.dt<-data.table::copy(Nominal.dt)
#
# # pull out the columns we need
# data.dt<-table1.dt[, c("Citation", "casrn", "ChemicalName", "IOC_Type",
# "nomconc", "CellConcentration_uM"), with = FALSE]
#
#
# # pull dtxsids, log P, henry's law, and pka:
#
# #dtxsids
# cheminfo.dt<-as.data.table(get_chem_id(chem.cas=unique(data.dt$casrn)))
#
# cheminfo.dt[, casrn:=chem.cas] %>% #rename
# .[,chem.cas:=NULL] %>% #delete
# .[,chem.name:=NULL] #delete
#
# #add dtxsid info to the storage file
# data.dt2<-data.dt[cheminfo.dt,on="casrn"]
#
# #collect log P, henry's law, and pka values
# data.dt2[, c("logP","logHenry", "pKa_Donor", "pKa_Accept") :=
# as.data.frame(get_physchem_param(param = c("logP","logHenry",
# "pKa_Donor",
# "pKa_Accept"),
# chem.cas = casrn))]
#
#
# # calculate median CellConcentration_uM:cnom ratio
# # (for each citation if multiple) and number of observations per chemical
#
# #get ccell : cnom ratio
# data.dt2[,cellconc_cnom_ratio:=CellConcentration_uM/nomconc]
#
# #for each chemical/citation combination, calculate the median ccell:cnom ratio
# data.dt2[, median_ratio := median(cellconc_cnom_ratio),
# by=list(Citation, ChemicalName)]
#
# #count the number of observations by citation and chemname
# data.dt2[, number_of_obs:=.N, by=list(Citation, ChemicalName)]
#
# #filter down to one observation per citation/casrn combo
# final_output<-data.dt2 %>%
# mutate(logP=round(logP, 2),
# logHenry=round(logHenry, 2),
# median_ratio=round(median_ratio, 3)) %>%
# select(-c("casrn", "nomconc", "CellConcentration_uM",
# "cellconc_cnom_ratio")) %>% #get rid of extra rows
# distinct(Citation, ChemicalName, .keep_all = TRUE)
# #1 obs per citation/chem pair
#
# #save the output
# if(save_output){
# write.csv(final_output, file = paste0(path_out,"table_s1_",data.date,"_",
# Sys.Date(),".csv"))
# }
#
## ----assay_descriptors_tableS2, eval = execute.vignette-----------------------
#
# #copy table to manipulate
# parameter_table<-data.table::copy(Nominal.dt)
#
# table_s2<-parameter_table[, c("Citation", "ChemicalName", "nomconc",
# "NominalDose_uM", "v_total", "v_working",
# "sarea", "well_number", "cell_yield", "FBSf",
# "FigureNumber", "CellType")] #subset columns
#
# #save the output
# if(save_output){
# write.csv(table_s2,
# file = paste0(path_out,"table_s2_raw_",data.date,"_",
# Sys.Date(),".csv"))
# }
#
## ----ionization_RMSLE_tableS4, eval = execute.vignette------------------------
#
# #copy for editing
# ionization_rmsle<-data.table::copy(stacked.data)
#
# #calculate RMSLSE and r^2 for each model and ionization type
# ionization_rmsle_output<- data.table::copy(ionization_rmsle[label=="Armitage",
# RMSLE_armitage_ionization:=
# rmsle(CellConcentration_uM, ccells),
# by = "IOC_Type"] %>% #RMSLE
# .[label=="Armitage", rsq_armitage_ionization:=
# summary(lm(CellConcentration_uM~ccells))$r.squared,
# by = "IOC_Type"] %>% #r^2
# .[label=="Kramer", RMSLE_kramer_ionization:=
# rmsle(CellConcentration_uM, concentration_cells),
# by = "IOC_Type"] %>% #RMSLE
# .[label=="Kramer", rsq_kramer_ionization:=
# summary(lm(CellConcentration_uM~concentration_cells))$r.squared,
# by = "IOC_Type"] %>% #r^2
# .[,numberofpoints:= .N/2, by = "IOC_Type"] %>%
# .[, c("IOC_Type", "RMSLE_armitage_ionization",
# "rsq_armitage_ionization", "RMSLE_kramer_ionization",
# "rsq_kramer_ionization", "numberofpoints")] %>% #select columns we want
# distinct())
#
# print(ionization_rmsle_output)
# #number of points is overall, not for each calculation
#
# #save the output
# if(save_output){
# write.csv(ionization_rmsle_output,
# file = paste0(path_out,"table_s4_",data.date,"_",Sys.Date(),".csv"))
# }
#
## ----logP_RMSLE_tableS5, eval = execute.vignette------------------------------
#
# #cut up into chunks
# gkowbins<-stacked.data %>% mutate(LogP_bins =
# cut(gkow, breaks = c(-Inf, 1.30, 3.36, Inf),
# labels = c("Hydrophilic", "Moderate Lipophilic", "Lipophilic")))
#
# #calculate rmsle for each model and logP bin combo
# gkowbins_rmsle_output<-suppressWarnings(data.table::copy(
# gkowbins[label == "Armitage", Armitage_RMSLE := rmsle(
# CellConcentration_uM, ccells), by = LogP_bins] %>%
# .[label == "Kramer", Kramer_RMSLE := rmsle(
# CellConcentration_uM, concentration_cells), by = LogP_bins] %>%
# .[, numberofpoints := .N, by = list(LogP_bins, label)] %>%
# .[, c("LogP_bins", "Armitage_RMSLE", "Kramer_RMSLE",
# "numberofpoints", "label")] %>%
# distinct()))
#
# print(gkowbins_rmsle_output)
# #number of points is overall, not for each calculation
#
# #save the output
# if(save_output){
# write.csv(gkowbins_rmsle_output,
# file = paste0(path_out,"table_s5_",data.date,"_",Sys.Date(),".csv"))
# }
#
## ----kaw_RMSLE_tableS6, eval = execute.vignette-------------------------------
#
# #source for chunks is table 2 Volatility class in water:
# #https://www.epa.gov/pesticide-science-and-assessing-pesticide-risks/
# #guidance-reporting-environmental-fate-and-transport#:~:text=is%20very%20low.-,
# #Volatility%20from%20Water,the%20reciprocal%20of%20KAW.&text=GMW%20=%20gram%20
# #molecular%20weight%20in%20g/mole.&text=This%20value%20is%20the%20inverse,as%20
# #Cwater/Cair.&text=Note%20that%20all%20chemicals%20may,volatility%20potential%20
# #is%20very%20low.
#
# #cut up kaw into relevant bins
# kawbins<-stacked.data %>%
# mutate(Kaw_bins = cut(DR_kaw, breaks = c(Inf, 10^-2, 10^-3, 10^-5, -Inf),
# labels = c("Rapidly lost from a water surface",
# "Volatile from a water surface",
# "Slightly volatile from a water surface", "Non-volatile")))
#
# #calculate rmsle for each model and logP bin combo
# kaw_rmsle_output<-suppressWarnings(kawbins[label == "Armitage",
# Armitage_RMSLE := rmsle(
# CellConcentration_uM, ccells), by = Kaw_bins] %>%
# .[label == "Kramer", Kramer_RMSLE := rmsle(
# CellConcentration_uM, concentration_cells), by = Kaw_bins] %>%
# .[, numberofpoints := .N, by = list(Kaw_bins, label)] %>%
# .[, c("Kaw_bins", "Armitage_RMSLE", "Kramer_RMSLE",
# "numberofpoints", "label")] %>%
# distinct())
#
#
# #save the output
# if(save_output){
# write.csv(kaw_rmsle_output,
# file = paste0(path_out,"table_s6_",data.date,"_",Sys.Date(),".csv"))
# }
#
#
## ----multobs_RMSLE_tableS7, eval = execute.vignette---------------------------
#
# #Armitage
#
# #copy for editing
# multobs_rmsle_armitage<-data.table::copy(as.data.table(Armitage.multobs.dt))
#
# #group by chemname, calc RMSLE, r^2, and number of obs
# multobs_rmsle_armitage[, RMSLE_armitage_mult:=
# rmsle(mean_measuredccell, mean_predictedccell),
# by = "ChemicalName"] %>% #rmsle
# .[, rsq_armitage_mult:=
# summary(lm(mean_measuredccell~mean_predictedccell))$r.squared,
# by = "ChemicalName"] %>% #r^2
# .[, numberofpoints:= .N, by = "ChemicalName"]
#
# #numberofpoints = number of datapoints considered (includes averaged points)
#
# #repeat for kramer
#
# #copy for editing
# multobs_rmsle_kramer<-data.table::copy(as.data.table(Kramer.multobs.dt))
#
# #group by chemname, calc RMSLE, r^2, and number of obs
# multobs_rmsle_kramer[, RMSLE_kramer_mult:=
# rmsle(mean_measuredccell, mean_predictedccell),
# by = "ChemicalName"] %>% #rmsle
# .[, rsq_kramer_mult:=
# summary(lm(mean_measuredccell~mean_predictedccell))$r.squared,
# by = "ChemicalName"] %>% #r^2
# .[, numberofpoints:= .N, by = "ChemicalName"]
#
#
# #cut down both to the columns we need and only 1 obs per chemical
# mult.dt<-merge(multobs_rmsle_armitage[, -c("NominalDose_uM",
# "mean_measuredccell",
# "mean_predictedccell")] %>%
# distinct() ,
# multobs_rmsle_kramer[, -c("NominalDose_uM",
# "mean_measuredccell",
# "mean_predictedccell")] %>%
# distinct())
#
# #for higher/lower rmsle comparison
# mult_HL<-data.table::copy(mult.dt[RMSLE_kramer_mult>RMSLE_armitage_mult,
# higherorlower:="Kramer higher"] %>%
# .[RMSLE_kramer_mult<RMSLE_armitage_mult,
# higherorlower:="kramer lower"])
#
# mult_HL %>% count(higherorlower)
# #armitage predictions have lower error for 6 chemicals
# #kramer predictions are lower error for the other 4
#
# #save the output
# if(save_output){
# write.csv(mult.dt,
# file = paste0(path_out,"table_s3_",data.date,"_",Sys.Date(),".csv"))
# }
#
## ----options_tableS8, eval = execute.vignette---------------------------------
#
# #list of options that can be changed in the armitage model:
#
# # - this.option.kbsa2 Use alternative bovine-serum-albumin partitioning
# # - this.option.swat2 Use alternative water solubility correction
# # - this.option.kpl2 Use alternative plastic-water partitioning model
# # - option.plastic == TRUE (default) gives nonzero surface area
# # - option.bottom == TRUE (default) includes surface area of the bottom of
# # the well in determining sarea
# # - restrict.ion.partitioning
# # § False (default): Treat entire chemical as neutral and allow it all
# # to partition normally as such
# # § True: Restrict so only the neutral fraction of the chemical is
# # available to partition normally but the ionized portion is scaled
#
#
# ##filter to just the ones we can run with the surface area calculation since
# #option.plastic and option.bottom require that function:
# options.IVD_Manuscript_data_2<-
# options.IVD_Manuscript_data[!is.na(well_number), ] %>%
# .[,sarea:=NULL] #delete sarea so the surface area function will run
# #126 out of the original 153 observations
# # length(options.IVD_Manuscript_data$Citation %>% unique()) #14 citations
# # length(options.IVD_Manuscript_data_2$Citation %>% unique()) #12 citations
# # 153-126 data points; 27 data points total removed
#
# # create table to append the results to
# options.table <- NULL
#
# # run basic armitage, only setting the surface area switch
# option.basic <- armitage_eval(tcdata=options.IVD_Manuscript_data_2,
# surface.area.switch = TRUE,
# user_assay_parameters = user_assay_parameters)
#
# option.basic_RMSLE<-rmsle((option.basic$CellConcentration_uM),
# (option.basic$ccells))
# #1.148
#
# #append to table
# options.table<-rbind(options.table, c(round(option.basic_RMSLE ,3),
# "All Default"))
#
# # run with this.option.kbsa2 == TRUE (default is false)
# option.kbsa <- armitage_eval(tcdata=options.IVD_Manuscript_data_2,
# this.option.kbsa2 = TRUE,
# surface.area.switch = TRUE,
# user_assay_parameters = user_assay_parameters)
#
# option.kbsa_RMSLE<-rmsle((option.kbsa$CellConcentration_uM),
# (option.kbsa$ccells))
# #1.150
#
# #append to table
# options.table<-rbind(options.table, c(round(option.kbsa_RMSLE, 3),
# "option.kbsa=TRUE"))
#
#
# # run with this.option.swat2 == TRUE (default is false)
# option.swat <- armitage_eval(tcdata=options.IVD_Manuscript_data_2,
# this.option.swat2 = TRUE,
# surface.area.switch = TRUE,
# user_assay_parameters = user_assay_parameters)
#
# option.swat_RMSLE<-rmsle((option.swat$CellConcentration_uM),
# (option.swat$ccells))
# #3.585
#
# #append to table
# options.table<-rbind(options.table, c(round(option.swat_RMSLE, 3),
# "option.swat2=TRUE"))
#
# # run with this.option.kpl2 == TRUE (Fischer) (default is false/kramer)
# option.kpl <- armitage_eval(tcdata=options.IVD_Manuscript_data_2,
# this.option.kpl2 = TRUE,
# surface.area.switch = TRUE,
# user_assay_parameters = user_assay_parameters)
#
# option.kpl_RMSLE<-rmsle((option.kpl$CellConcentration_uM),(option.kpl$ccells))
# #1.101
#
# #append to table
# options.table<-rbind(options.table, c(round(option.kpl_RMSLE, 3),
# "option.kpl2=TRUE"))
#
# # run with option.plastic == FALSE (default is true)
# opt.plast_input<- data.table::copy(options.IVD_Manuscript_data_2)
# opt.plast_input[,option.plastic := FALSE]
#
# option.plast <- armitage_eval(tcdata=opt.plast_input,
# surface.area.switch = TRUE,
# user_assay_parameters = user_assay_parameters)
#
# option.plast_RMSLE<-rmsle((option.plast$CellConcentration_uM),
# (option.plast$ccells))
# #1.178
#
# #append to table
# options.table<-rbind(options.table, c(round(option.plast_RMSLE, 3),
# "option.plastic=FALSE"))
#
# # run with option.bottom == FALSE (default is true)
# option.bottom_input<- data.table::copy(options.IVD_Manuscript_data_2)
# option.bottom_input[,option.bottom := FALSE]
#
# option.bottom <- armitage_eval(tcdata=option.bottom_input,
# surface.area.switch = TRUE,
# user_assay_parameters = user_assay_parameters)
#
# option.bottom_RMSLE<-rmsle((option.bottom$CellConcentration_uM),
# (option.bottom$ccells))
# #1.157
#
# #append to table
# options.table<-rbind(options.table, c(round(option.bottom_RMSLE,3),
# "option.bottom=FALSE"))
#
# # run with restrict.ion.partitioning == TRUE (default is FALSE)
# option.restion <- armitage_eval(tcdata=options.IVD_Manuscript_data_2,
# restrict.ion.partitioning = TRUE,
# surface.area.switch = TRUE,
# user_assay_parameters = user_assay_parameters)
#
# option.restion_RMSLE<-rmsle((option.restion$CellConcentration_uM),
# (option.restion$ccells))
# #1.152
#
# #append to table
# options.table<-rbind(options.table, c(round(option.restion_RMSLE,3),
# "restrict.ion.partitioning = TRUE"))
#
# #save the output
# if(save_output){
# write.csv(options.table,
# file = paste0(path_out,"table_s7_",data.date,"_",Sys.Date(),".csv"))
# }
#
## ----cell_type_tableS11, eval = execute.vignette------------------------------
#
# #copy for editing
# celltype_rmsle<-data.table::copy(stacked.data)
#
# #one set doesnt have cell type annotated, add here:
# celltype_rmsle[assay_name == "Tox21 Experiment", cell_type := "MCF-7"]
#
# #check all assays have cell type added (no nas)
# unique(celltype_rmsle$cell_type)
#
# #create bins for the cell types
# celltype_rmsle[cell_type == "PRH" |
# cell_type == "PHH" |
# cell_type == "HepaRG" |
# cell_type == "HepG2"
# , cell_type_class:="Liver"] %>%
# .[cell_type == "Neocortical" |
# cell_type == "PFC" |
# cell_type == "Caco-2" |
# cell_type == "LS180" |
# cell_type == "MCF-7" |
# cell_type == "H295R" |
# cell_type == "PC12" |
# cell_type == "RTgill W1" |
# cell_type == "HEK293" |
# cell_type == "Balb/c 3T3", cell_type_class:="Non-Liver"]
#
# #check all are labeled
# celltype_rmsle %>% select(cell_type, cell_type_class) %>% distinct()
#
# #calculate RMSLSE and r^2 for each model and ionization type
# celltype_rmsle_output<- data.table::copy(celltype_rmsle[label=="Armitage",
# RMSLE_armitage_cell_type:=
# rmsle(CellConcentration_uM, ccells),
# by = "cell_type_class"] %>% #RMSLE
# .[label=="Armitage", rsq_armitage_cell_type:=
# summary(lm(CellConcentration_uM~ccells))$r.squared,
# by = "cell_type_class"] %>% #r^2
# .[label=="Kramer", RMSLE_kramer_cell_type:=
# rmsle(CellConcentration_uM, concentration_cells),
# by = "cell_type_class"] %>% #RMSLE
# .[label=="Kramer", rsq_kramer_cell_type:=
# summary(lm(CellConcentration_uM~concentration_cells))$r.squared,
# by = "cell_type_class"] %>% #r^2
# .[,numberofpoints:= .N/2, by = "cell_type_class"] %>%
# .[, c("cell_type_class", "RMSLE_armitage_cell_type",
# "rsq_armitage_cell_type", "RMSLE_kramer_cell_type",
# "rsq_kramer_cell_type", "numberofpoints")] %>% #select columns we want
# distinct())
#
# print(celltype_rmsle_output)
# #number of points is overall, not for each calculation
#
## ----intracellular_histogram_plotS1, eval = execute.vignette------------------
#
# #calculate RMSLE for each chemical
# combodata[, RMSLE_armitage_by_chem:= rmsle(CellConcentration_uM.x, ccells),
# by = "ChemicalName.x"]
# combodata[, RMSLE_kramer_by_chem:=
# rmsle(CellConcentration_uM.x, concentration_cells),
# by = "ChemicalName.x"]
# combodata[, RMSLE_nominal_by_chem:= rmsle(CellConcentration_uM.x, nomconc),
# by = "ChemicalName.x"]
#
# #cut down to one observation per chemical
# histo.data<-data.table::copy(combodata %>%
# select(ChemicalName.x, RMSLE_armitage_by_chem,
# RMSLE_kramer_by_chem, RMSLE_nominal_by_chem) %>%
# unique())
#
# #histo.data %>% arrange(desc(RMSLE_kramer_by_chem))
#
# #plot histograms
# nominal_histogram_by_chem <-ggplot(histo.data, aes(x=RMSLE_nominal_by_chem)) +
# geom_histogram(binwidth = 0.5, boundary = 0) +
# labs(x = "Nominal RMSLE",
# y = "Number of Observations") +
# xlim(0, 5) + ylim(0, 17) +
# theme_classic() +
# theme(axis.text=element_text(size=12),
# axis.title=element_text(size=14))
#
# armitage_histogram_by_chem <-ggplot(histo.data, aes(x=RMSLE_armitage_by_chem)) +
# geom_histogram(binwidth = 0.5, boundary = 0) +
# labs(x = "Armitage RMSLE",
# y = "Number of Observations") +
# xlim(0, 5) + ylim(0, 17) +
# theme_classic() +
# theme(axis.text=element_text(size=12),
# axis.title=element_text(size=14))
#
# kramer_histogram_by_chem <- ggplot(histo.data, aes(x=RMSLE_kramer_by_chem)) +
# geom_histogram(binwidth = 0.5, boundary = 0) +
# labs(x = "Kramer RMSLE",
# y = "Number of Observations") +
# xlim(0, 5) + ylim(0, 17) +
# theme_classic()+
# theme(axis.text=element_text(size=12),
# axis.title=element_text(size=14))
#
# histogram_comboplot_by_chem<-ggarrange(nominal_histogram_by_chem +
# theme(axis.ticks.y = element_blank(),
# plot.margin = margin(r = 1)),
# armitage_histogram_by_chem +
# theme(axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
# axis.title.y = element_blank(),
# plot.margin = margin(r = 1,
# l = 1)),
# kramer_histogram_by_chem +
# theme(axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
# axis.title.y = element_blank(),
# plot.margin = margin(r = 1,
# l = 1)),
# ncol=3)
#
# if(save_output){
# ggsave(file = paste0("Figure_S1_by_chem_",data.date,"_",Sys.Date(),".png"),
# path = path_out,
# histogram_comboplot_by_chem, width = 10, height = 5, dpi = 300)
# }
#
# print(histogram_comboplot_by_chem)
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.