knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
This package includes example data geo_tox_data
. Below is a description of the data and example code for how it was gathered.
NOTE: FIPS codes can change. Since data is being pulled from various sources, ensure that the FIPS values can be used to connect data across these sources. For example, in 2022 Connecticut began the process of going from 8 legacy counties to 9 planning regions.
library(dplyr) library(sf) library(tidyr) library(readr) library(stringr) library(purrr) library(readxl) library(httk) library(httr2) geo_tox_data <- list()
Download modeled exposure data from AirToxScreen. Results from AirToxScreen 2019 for a subset of chemicals in North Carolina counties are included in the package data as geo_tox_data$exposure
.
NOTE: The 2020 release does not currently provide a single file for the exposure national concentration summaries. The 2019 data can be found following the "Previous air toxics assessments" link.
filename <- "2019_Toxics_Exposure_Concentrations.xlsx" tmp <- tempfile(filename) download.file( paste0("https://www.epa.gov/system/files/documents/2022-12/", filename), tmp ) exposure <- read_xlsx(tmp) # Normalization function min_max_norm = function(x) { min_x <- min(x, na.rm = TRUE) max_x <- max(x, na.rm = TRUE) if (min_x == max_x) { rep(0, length(x)) } else { (x - min_x) / (max_x - min_x) } } geo_tox_data$exposure <- exposure |> # North Carolina counties filter(State == "NC", !grepl("0$", FIPS)) |> # Aggregate chemicals by county summarize(across(-c(State:Tract), c(mean, sd)), .by = FIPS) |> pivot_longer(-FIPS, names_to = "chemical") |> mutate(stat = if_else(grepl("_1$", chemical), "mean", "sd"), chemical = gsub('.{2}$', '', chemical)) |> pivot_wider(names_from = stat) |> # Normalize concentrations mutate(norm = min_max_norm(mean), .by = chemical)
# Copy/paste the chemical names into the batch search field # https://comptox.epa.gov/dashboard/batch-search cat(geo_tox_data$exposure |> distinct(chemical) |> pull(), sep = "\n") # Export results with "CAS-RN" identifiers as a csv file, then process in R exposure_casrn <- read_csv("CCD-Batch-Search.csv", show_col_types = FALSE) |> filter(DTXSID != "N/A") |> # Prioritize results based on FOUND_BY status arrange(INPUT, grepl("Approved Name", FOUND_BY), grepl("^Synonym", FOUND_BY)) |> # Keep one result per INPUT group_by(INPUT) |> slice(1) |> ungroup() # Update exposure data with CompTox Dashboard data geo_tox_data$exposure <- geo_tox_data$exposure |> inner_join(exposure_casrn, by = join_by(chemical == INPUT)) |> select(FIPS, casn = CASRN, chnm = PREFERRED_NAME, mean, sd, norm)
Use ICE cHTS data to identify active chemicals for a given set of assays.
get_cHTS_hits <- function(assays = NULL, chemids = NULL) { if (is.null(assays) & is.null(chemids)) { stop("Must provide at least one of 'assays' or 'chemids'") } # Format query parameters req_params <- list() if (!is.null(assays)) { if (!is.list(assays)) assays <- as.list(assays) req_params$assays <- assays } if (!is.null(chemids)) { if (!is.list(chemids)) chemids <- as.list(chemids) req_params$chemids <- chemids } # Query ICE API resp <- request("https://ice.ntp.niehs.nih.gov/api/v1/search") |> req_body_json(req_params) |> req_perform() if (resp$status_code != 200) { stop("Failed to retrieve data from ICE API") } # Return active chemicals result <- resp |> resp_body_json() |> pluck("endPoints") fields <- c("assay", "casrn", "dtxsid", "substanceName", "endpoint", "value") map(fields, \(x) map_chr(result, x)) |> set_names(fields) |> bind_cols() |> filter(endpoint == "Call", value == "Active") |> select(-c(endpoint, value)) |> distinct() } assays <- c("APR_HepG2_p53Act_1h_dn", "APR_HepG2_p53Act_1h_up", "APR_HepG2_p53Act_24h_dn", "APR_HepG2_p53Act_24h_up", "APR_HepG2_p53Act_72h_dn", "APR_HepG2_p53Act_72h_up", "ATG_p53_CIS_up", "TOX21_DT40", "TOX21_DT40_100", "TOX21_DT40_657", "TOX21_ELG1_LUC_Agonist", "TOX21_H2AX_HTRF_CHO_Agonist_ratio", "TOX21_p53_BLA_p1_ratio", "TOX21_p53_BLA_p2_ratio", "TOX21_p53_BLA_p3_ratio", "TOX21_p53_BLA_p4_ratio", "TOX21_p53_BLA_p5_ratio") chemids <- unique(geo_tox_data$exposure$casn) cHTS_hits_API <- get_cHTS_hits(assays = assays, chemids = chemids)
Use the ICE API to retrieve dose-response data for selected assays and chemicals.
get_ICE_dose_resp <- function(assays = NULL, chemids = NULL) { if (is.null(assays) & is.null(chemids)) { stop("Must provide at least one of 'assays' or 'chemids'") } # Format query parameters req_params <- list() if (!is.null(assays)) { if (!is.list(assays)) assays <- as.list(assays) req_params$assays <- assays } if (!is.null(chemids)) { if (!is.list(chemids)) chemids <- as.list(chemids) req_params$chemids <- chemids } # Query ICE API resp <- request("https://ice.ntp.niehs.nih.gov/api/v1/curves") |> req_body_json(req_params) |> req_perform() if (resp$status_code != 200) { stop("Failed to retrieve data from ICE API") } # Return dose-response data result <- resp |> resp_body_json() |> pluck("curves") map(result, function(x) { tibble( endp = x[["assay"]], casn = x[["casrn"]], call = x[["dsstoxsid"]], chnm = x[["substance"]], call = x[["call"]], logc = map_dbl(x[["concentrationResponses"]], "concentration") |> log10(), resp = map_dbl(x[["concentrationResponses"]], "response") ) }) |> bind_rows() } assays <- unique(cHTS_hits_API$assay) chemids <- intersect(cHTS_hits_API$casrn, geo_tox_data$exposure$casn) dose_response <- get_ICE_dose_resp(assays = assays, chemids = chemids) # Only keep active calls for assay/chemical combinations geo_tox_data$dose_response <- dose_response |> filter(call == "Active") |> select(-call) # Update dose-response data with CompTox Dashboard data geo_tox_data$dose_response <- geo_tox_data$dose_response |> inner_join(exposure_casrn, by = join_by(casn == CASRN)) |> select(endp, casn, chnm = PREFERRED_NAME, logc, resp)
Download age data from the U.S. Census Bureau by searching for "County Population by Characteristics". A subset of data for North Carolina from 2019 is included in the package data as geo_tox_data$age
.
# Data for North Carolina url <- paste0("https://www2.census.gov/programs-surveys/popest/datasets/", "2010-2019/counties/asrh/cc-est2019-alldata-37.csv") age <- read_csv(url, show_col_types = FALSE) geo_tox_data$age <- age |> # 7/1/2019 population estimate filter(YEAR == 12) |> # Create FIPS mutate(FIPS = str_c(STATE, COUNTY)) |> # Keep selected columns select(FIPS, AGEGRP, TOT_POP)
Follow the "Data Portal" link from CDC PLACES and search for "places county data". Go to the desired dataset webpage, for example 2020 county data, and download the data by selecting Actions → API → Download file. A subset of data for North Carolina is included in the package data as geo_tox_data$obesity
.
places <- read_csv("PLACES__County_Data__GIS_Friendly_Format___2020_release.csv", show_col_types = FALSE) # Convert confidence interval to standard deviation extract_SD <- function(x) { range <- as.numeric(str_split_1(str_sub(x, 2, -2), ",")) diff(range) / 3.92 } geo_tox_data$obesity <- places |> # North Carolina Counties filter(StateAbbr == "NC") |> # Select obesity data select(FIPS = CountyFIPS, OBESITY_CrudePrev, OBESITY_Crude95CI) |> # Change confidence interval to standard deviation rowwise() |> mutate(OBESITY_SD = extract_SD(OBESITY_Crude95CI)) |> ungroup() |> select(-OBESITY_Crude95CI)
C_ss
)Use httk
to generate C_ss
values for combinations of age group and weight status for each chemical. The generation of these values is a time-intensive step, so one approach is to generate populations of C_ss
values initially and then sample them later.
set.seed(2345) n_samples <- 500 # Get CASN for which httk simulation is possible. Try using load_dawson2021, # load_sipes2017, or load_pradeep2020 to increase availability. load_sipes2017() casn <- intersect(unique(geo_tox_data$dose_response$casn), get_cheminfo(suppress.messages = TRUE)) # Define population demographics for httk simulation pop_demo <- cross_join( tibble(age_group = list(c(0, 2), c(3, 5), c(6, 10), c(11, 15), c(16, 20), c(21, 30), c(31, 40), c(41, 50), c(51, 60), c(61, 70), c(71, 100))), tibble(weight = c("Normal", "Obese"))) |> # Create column of lower age_group values rowwise() |> mutate(age_min = age_group[1]) |> ungroup() # Create wrapper function around httk steps simulate_css <- function(chem.cas, agelim_years, weight_category, samples, verbose = TRUE) { if (verbose) { cat(chem.cas, paste0("(", paste(agelim_years, collapse = ", "), ")"), weight_category, "\n") } httkpop <- list(method = "vi", gendernum = NULL, agelim_years = agelim_years, agelim_months = NULL, weight_category = weight_category, reths = c( "Mexican American", "Other Hispanic", "Non-Hispanic White", "Non-Hispanic Black", "Other" )) css <- try( suppressWarnings({ mcs <- create_mc_samples(chem.cas = chem.cas, samples = samples, httkpop.generate.arg.list = httkpop, suppress.messages = TRUE) calc_analytic_css(chem.cas = chem.cas, parameters = mcs, model = "3compartmentss", suppress.messages = TRUE) }), silent = TRUE ) # Return if (is(css, "try-error")) { warning(paste0("simulate_css failed to generate data for CASN ", chem.cas)) list(NA) } else { list(css) } } # Simulate `C_ss` values simulated_css <- map(casn, function(chem.cas) { pop_demo |> rowwise() |> mutate( css = simulate_css(.env$chem.cas, age_group, weight, .env$n_samples) ) |> ungroup() }) simulated_css <- setNames(simulated_css, casn) # Remove CASN that failed simulate_css casn_keep <- map_lgl(simulated_css, function(df) { !(length(df$css[[1]]) == 1 && is.na(df$css[[1]])) }) simulated_css <- simulated_css[casn_keep] # Get median `C_ss` values for each age_group simulated_css <- map( simulated_css, function(cas_df) { cas_df |> nest(.by = age_group) |> mutate( age_median_css = map_dbl(data, function(df) median(unlist(df$css), na.rm = TRUE)) ) |> unnest(data) } ) # Get median `C_ss` values for each weight simulated_css <- map( simulated_css, function(cas_df) { cas_df |> nest(.by = weight) |> mutate( weight_median_css = map_dbl(data, function(df) median(unlist(df$css), na.rm = TRUE)) ) |> unnest(data) |> arrange(age_min, weight) } ) geo_tox_data$simulated_css <- simulated_css
Retain only those chemicals found in exposure, dose-response and C_ss
datasets.
casn_keep <- names(geo_tox_data$simulated_css) geo_tox_data$exposure <- geo_tox_data$exposure |> filter(casn %in% casn_keep) geo_tox_data$dose_response <- geo_tox_data$dose_response |> filter(casn %in% casn_keep)
Download cartographic boundary files for counties and states from the U.S. Census Bureau. The geometry data for North Carolina counties and the state are included in the package data as geo_tox_data$boundaries
.
county <- st_read("cb_2019_us_county_5m/cb_2019_us_county_5m.shp") state <- st_read("cb_2019_us_state_5m/cb_2019_us_state_5m.shp") geo_tox_data$boundaries <- list( county = county |> filter(STATEFP == 37) |> select(FIPS = GEOID, geometry), state = state |> filter(STATEFP == 37) |> select(geometry) )
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.