knitr::opts_chunk$set( collapse = TRUE, cache = FALSE, comment = "#>" )
library(elktoeChemistry) library(dplyr) valve_data <- readRDS(params$inputs$valve_data) element_info <- readRDS(params$inputs$element_info) lod <- readRDS(params$inputs$lod)
A list containing the chosen signal filters used as sensitivity analyses:
signal_filters <- list( base = function(x, ds) identity(x), mad_10 = function(x, ds) { pracma::hampel(x, k = 10, t0 = 3)$y }, avg5_trunc_3sd = function(x, ds){ rm <- zoo::rollmean(c(rep(0, 4), x), k = 5) rs <- zoo::rollapply(c(rep(0, 4), x), width = 5, FUN = sd) outliers <- abs(x - rm) > 3*sd(x) x[outliers] <- rm[outliers] x }, gam = function(x, ds){ ds$y <- x p <- mgcv::gam(log(y) ~ s(distance, bs = "ts") + layer, data = ds) as.numeric(exp(predict(p))) } )
The output of the following workflow produces a function
that returns data ready to be analyzed. This function has four arguments:
contrast
determines how the experimental units are groups. Options are:"all"
: each site is an exposure level"nobaseline"
: each site is an exposure level, baseline units are removed"baseline_v_sites"
: baseline units are compared to sites as a single unit"sites"
: compares Little Tennessee vs Tuckasegee, grouping sites within each river"litn"
: compares sites within the Little Tennessee only"tuck"
: compares sites within the Tuckasegee onlygroup_by_valve
: an indicator of whether to group observations by valve (i.e. by specimen) (TRUE
) or by transect (FALSE
). Defaults to FALSE
.test_data_FUN
: a function(data){...}
applied within the pipeline used to create test statistic data. See example....
: dots are passed to filter_analysis_data
to filter observations by specimens, elements, analysis_group, transect layers, or signal (see above).valve_data %>% dplyr::ungroup() %>% ## Keep only necessary variables #### dplyr::select( # identifiers id, transect, river, site, site_num, species, # baseline covariates starts_with("baseline"), n_annuli, drawer, # outcomes dead, prop_weight_lost, moribund, final_status, # analysis groupings starts_with("agrp"), # chemistry and measurements chemistry, distance, lod ) %>% ## Make character variables easier to type #### dplyr::mutate( species = if_else(species == "A. raveneliana", "Arav", "Lfas") ) %>% ## Censor concentrations at lower limit of detection #### # Drop unneeded variables (e.g. censoring) dplyr::mutate( chemistry = purrr::map2( .x = chemistry, .y = lod, .f = ~ { apply_lod(.x, .y) %>% dplyr::select(-ends_with("_censor")) }) ) %>% ## Clean up distance data #### # Keep only needed variables dplyr::mutate( distance = purrr::map( .x = distance, .f = ~ dplyr::select(.x, distance, layer, annuli, obs) ) ) %>% ## Apply signal filters #### dplyr::mutate( chemistry = purrr::map2( .x = chemistry, .y = distance, .f = function(elements, distance){ purrr::map( .x = elements[-1], .f = function(el){ purrr::map( .x = signal_filters, .f = ~ .x(el, distance)) } ) } ) ) %>% ## Append LOD data to chemistry data #### # used for IPCW estimators # Note that at this stage, this simply creates a copy of the LOD data for # each observation; however, at the next stage (conversion to different scale), # the lod will vary depending on the layer. dplyr::mutate( chemistry = purrr::map2( .x = chemistry, .y = lod, .f = function(ch, ld){ purrr::imap( .x = ch, .f = ~ { lod_dat <- rep(ld[ld$element == .y, "lod", drop = TRUE], length(ch[[1]][[1]])) c(.x, list(lod = lod_dat)) } ) } ) ) %>% ## Convert values to mmmol per Ca mol #### dplyr::mutate( chemistry = purrr::map2( .x = chemistry, .y = distance, .f = function(ch, ds){ cawtpct <- dplyr::if_else(ds[["layer"]] == "ncr", 39.547395, 40.078) # apply to each element purrr::imap( .x = ch, .f = function(vals, element){ # apply to each signal mass <- element_info[element_info[["element"]] == element, "mass", drop = TRUE] purrr::map( .x = vals, .f = ~ ppm_to_mmol_camol(ppm = .x, gmol = mass, ca_wt_pct = cawtpct) ) } ) } ) ) %>% ## Add the transect filter functions #### dplyr::mutate( transect_filter = purrr::map(distance, ~ create_transect_filter(.x)) ) %>% create_analysis_data_preparer() -> analysis_data saveRDS(analysis_data, file = params$outputs$analysis)
function
res <- analysis_data( contrast = "all", test_data_FUN = function(data) data, agrp = "all", elements = "Ba_ppm_m138", # "all" returns all elements signals = c("base", "gam"), group_by_valve = TRUE, transect_opts = list( .layers = "psm", .min_n_obs = 15L ) )
head(res)
str(res$data[[1]])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.