Corton et al. "Biological Thresholds Derived from Common Measures in Rat Studies are Predictive of Liver Tumorigenic Chemicals."

Supplemental File 1: The R code used to extract information from ToxRefDB version 2.0. Katie Paul Friedman (paul-friedman.katie@epa.gov) and Jason Brown (brown.jason@epa.gov)

knitr::opts_chunk$set(echo = TRUE)
library(RSQLite)
library(tidyr)
library(dplyr)
library(DT)
# build all queries and extract info from toxref database
con <- dbConnect(SQLite(), dbname="data/tox_ref.sqlite")

#all doses for specific effects
q1 <- "
SELECT 
    dose.study_id,
    dtg.dose_adjusted,
    dtg.dose_adjusted_unit,
    tg_effect.tg_id,
    dose.dose_level,
    effect_val,
    effect_val_unit,
    endpoint.endpoint_target,
    effect.effect_desc
FROM
    dose
        INNER JOIN
    dtg ON dose.dose_id = dtg.dose_id
        INNER JOIN
    dtg_effect ON dtg_effect.dtg_id = dtg.dtg_id
        INNER JOIN
    tg_effect ON tg_effect.tg_effect_id = dtg_effect.tg_effect_id
        INNER JOIN
    effect ON effect.effect_id = tg_effect.effect_id
        INNER JOIN
    endpoint ON endpoint.endpoint_id = effect.endpoint_id
WHERE
    effect_desc IN ('relative to body weight' , 'alanine aminotransferase (alt/sgpt)',
        'aspartate aminotransferase (ast/sgot)')
        AND endpoint_target IN ('alanine aminotransferase (alt/sgpt)' , 'aspartate aminotransferase (ast/sgot)',
        'liver')
        AND effect_val IS NOT NULL
        AND dose.study_id IS NOT NULL
ORDER BY dose.study_id , effect_desc , dose.dose_level;
"

# long format pod info 
q2 <- "
SELECT 
    pod.study_id,tg_id,pod_type,pod_value,pod_unit,endpoint_target,effect_desc,study_type,admin_route,chemical.chemical_id,casrn,preferred_name
FROM
    endpoint
        INNER JOIN
    effect ON endpoint.endpoint_id = effect.endpoint_id
        INNER JOIN
    tg_effect ON effect.effect_id = tg_effect.effect_id
        INNER JOIN
    pod_tg_effect ON tg_effect.tg_effect_id = pod_tg_effect.tg_effect_id
        INNER JOIN
    pod ON pod_tg_effect.pod_id = pod.pod_id
        LEFT JOIN
    study ON pod.study_id = study.study_id
        INNER JOIN
    chemical ON study.chemical_id = chemical.chemical_id
WHERE
    endpoint_target IN ('alanine aminotransferase (alt/sgpt)' , 'aspartate aminotransferase (ast/sgot)',
        'liver')
        AND pod_type IN ('lel' , 'loael')
        AND effect_desc IN ('relative to body weight' , 'alanine aminotransferase (alt/sgpt)',
        'aspartate aminotransferase (ast/sgot)')
        and study.study_id is not null
ORDER BY study.study_id, effect_desc;"

# Study citation information
q3 <- "
SELECT 
    study.*
FROM
    endpoint
        INNER JOIN
    effect ON endpoint.endpoint_id = effect.endpoint_id
        INNER JOIN
    tg_effect ON effect.effect_id = tg_effect.effect_id
        INNER JOIN
    pod_tg_effect ON tg_effect.tg_effect_id = pod_tg_effect.tg_effect_id
        INNER JOIN
    pod ON pod_tg_effect.pod_id = pod.pod_id
        LEFT JOIN
    study ON pod.study_id = study.study_id
        INNER JOIN
    chemical ON study.chemical_id = chemical.chemical_id
WHERE
    endpoint_target IN ('alanine aminotransferase (alt/sgpt)' , 'aspartate aminotransferase (ast/sgot)',
        'liver')
        AND pod_type IN ('lel' , 'loael')
        AND effect_desc IN ('relative to body weight' , 'alanine aminotransferase (alt/sgpt)',
        'aspartate aminotransferase (ast/sgot)')
        AND study.study_id IS NOT NULL
ORDER BY study.study_id , effect_desc;"

#load all tables into memory
t1 <- dbGetQuery(con,q1)
t2 <- dbGetQuery(con,q2)
t3 <- dbGetQuery(con,q3)
dbDisconnect(con)
#remove duplicates
t3 <- unique(t3)
t3 <- t3[-1,]
#select relevant columns
t3 <- t3 %>% select(study_id,study_citation,study_year,study_source,study_type:study_comment)

Study information/citations for associated studies

datatable(t3)
#go from long format to wide format for the LEL and LOAEL values
t2 <- unique(t2)
t2 <- t2 %>% spread(pod_type,pod_value)
#join both tables
res <- t1 %>% left_join(t2, by = c("study_id","endpoint_target","effect_desc","tg_id"))
#sort by study_id and tg_id
res <- res %>% arrange(study_id,tg_id)
#extract the control data
control <- res %>% filter(dose_level == 0) %>% mutate(control = effect_val) %>% select(-dose_level,-effect_val,-dose_adjusted)

#join back to results table, control is NA where no dose_level 0
res <- res %>% left_join(control)

#calculate fold change
res <- res %>% mutate(fold_change = ifelse(grepl("control",effect_val_unit),effect_val/100,(effect_val - control)/control))

#rearrange table for easier view
res <- res %>% select(study_id:effect_val_unit,fold_change,lel,loael,pod_unit,everything()) %>% mutate(control_unit = effect_val_unit)

Unfiltered fold change values for all dose levels (not just LEL and/or LOAELS

datatable(res)

Fold change values for associated LELS and LOAELS based on the effect value at dose level 0. Those studies that did not report control values will not have associated fold change data

#pod table
res_pod <- res %>% filter(dose_adjusted==lel | dose_adjusted==loael)
datatable(res_pod)


brown-jason/threshold_II documentation built on Nov. 4, 2019, 8:15 a.m.