knitr::opts_chunk$set(echo = TRUE, eval = FALSE, warning = FALSE, message = FALSE) library(tcpl)
toxEval
includes ToxCast data to help simplify EAR calculations. Here are the instructions for getting the necessary columns from the ToxCast database using the tcpl
package. This is NOT necessary for all toxEval
users. However, if you are interested in extending the data or exploring additional fields, this might be useful. It is also a way to remember how to update the toxEval
package when there are new ToxCast database updates.
First you would need to download the database and set up a MySQL server/database. This is beyond the scope of this vigentte right now.
Next, you need to connect to that local database:
library(tcpl) library(toxEval) library(tidyverse) tcplConf(drvr = "MySQL", user = "root", pass = "my_super_secret_password", host = "localhost", db = "prod_internal_invitrodb_v4_1")
First, we'll need to load several tables to get our necessary columns:
mc5_raw <- tcplQuery("SELECT * FROM mc5") mc4_raw <- tcplQuery("SELECT * FROM mc4") mc5_chid <- tcplQuery("SELECT * FROM mc5_chid") mc6_raw <- tcplQuery("SELECT * FROM mc6") sample_raw <- tcplQuery("SELECT * FROM sample") chemical_raw <- tcplQuery("SELECT * FROM chemical") mc5_param <- tcplQuery("SELECT * FROM mc5_param")
Next, we'll use dplyr
to join:
ToxCast_ACC <- mc5_raw |> filter(hitc >= 0.9) |> select(-created_date, -modified_date, -modified_by) |> left_join(mc4_raw |> select(-created_date, -modified_date, -modified_by), by = join_by(m4id, aeid)) |> left_join(mc5_chid, by = join_by(m5id)) |> filter(chid_rep == 1) |> left_join(mc6_raw |> select(-created_date, -modified_date, -modified_by), by = join_by(m5id, m4id, aeid)) |> left_join(sample_raw, by = join_by(spid)) |> left_join(chemical_raw, by = join_by(chid)) |> left_join(mc5_param |> filter(hit_param == "acc"), by = join_by(m5id, aeid)) |> select(casn, hit_val, aeid, mc6_mthd_id) |> group_by(casn, hit_val, aeid) |> # 1-to-many flags summarise(flags = list(mc6_mthd_id)) |> ungroup() names(ToxCast_ACC)
[1] "casn" "hit_val" "aeid" "flags"
Next, we'll populate the end_point_info
file:
assay_component_endpoint <- tcplQuery("SELECT * FROM assay_component_endpoint;") assay <- tcplQuery("SELECT * FROM assay;") assay_component <- tcplQuery("SELECT * FROM assay_component;") assay_source <- tcplQuery("SELECT * FROM assay_source;") gene <- tcplQuery("SELECT * FROM gene;") intended_target <- tcplQuery("SELECT * FROM intended_target;") end_point_info_41 <- assay_component_endpoint |> left_join(assay_component, by = join_by(acid)) |> left_join(assay, by = join_by(aid)) |> left_join(assay_source, by = join_by(asid)) |> left_join(intended_target, by = join_by(aeid)) |> left_join(gene, by = c("target_id" = "gene_id")) |> filter(!is.na(aeid)) |> select(aeid, acid, assay_component_endpoint_name, assay_component_endpoint_desc, intended_target_type, intended_target_family_sub, intended_target_family, biological_process_target, tissue, gene_symbol, assay_source_name) |> group_by(across(c(-gene_symbol))) |> # 1-to-many genes summarise(gene_symbol = paste(gene_symbol, collapse = ", ")) |> ungroup()
tox_chemicals41 <- mc5_raw |> select(-created_date, -modified_date, -modified_by) |> left_join(mc4_raw |> select(-created_date, -modified_date, -modified_by), by = join_by(m4id, aeid)) |> left_join(mc5_chid, by = join_by(m5id)) |> filter(chid_rep == 1) |> left_join(mc6_raw |> select(-created_date, -modified_date, -modified_by), by = join_by(m5id, m4id, aeid)) |> left_join(sample_raw, by = join_by(spid)) |> left_join(chemical_raw, by = join_by(chid)) |> left_join(mc5_param |> filter(hit_param == "acc"), by = join_by(m5id, aeid)) |> group_by(casn, chnm, dsstox_substance_id) |> summarise(Total_tested = length(unique(aeid)), Active = length(unique(aeid[hitc >= 0.9]))) |> ungroup() |> left_join(tox_chemicals_35 |> select(casn = Substance_CASRN, Structure_MolWt), by = "casn")
We need molecular weights to convert the ACC values to concentrations. We can either join previous versions of the toxEval package, and/or get new values from the CompTox Dashboard via the batch search:
https://comptox.epa.gov/dashboard/batch-search/
To create a list of chemicals to input to the dashboard:
need_mlwt <- tox_chemicals41$dsstox_substance_id[is.na(tox_chemicals41$Structure_MolWt)] #Bring this to Comptox: data.table::fwrite(data.frame(need_mlwt), "need_mlwt.csv")
# Import the file from Comptox: more_mlwts <- data.table::fread("CCD-Batch-Search.csv", data.table = FALSE) |> mutate(new_Structure_MolWt = as.numeric(AVERAGE_MASS)) |> select(casn = CASRN, dsstox_substance_id = DTXSID, new_Structure_MolWt) tox_chemicals41 <- tox_chemicals41 |> left_join(more_mlwts, by = c("casn", "dsstox_substance_id")) |> mutate(Structure_MolWt = if_else(is.na(Structure_MolWt), new_Structure_MolWt, Structure_MolWt)) |> select(-new_Structure_MolWt)
A small table to describe ToxCast flags is also included:
flags <- mc6_raw |> select(flag_id = mc6_mthd_id, flag_full = flag) |> distinct()
Finally we can save that data in the package:
ToxCast_ACC <- ToxCast_ACC_41 tox_chemicals <- tox_chemicals41 end_point_info <- end_point_info_41_rel save(ToxCast_ACC, tox_chemicals, end_point_info, flags, file = "sysdata.rda", compress = "xz")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.