knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = FALSE, message = FALSE )
library(elktoeChemistry) library(dplyr) library(gt) library(ggplot2) analysis_data <- readRDS(params$inputs$analysis)
dt <- analysis_data( contrast = "all", test_data_FUN = function(data) data, elements = "all", signals = c("mad_10"), group_by_valve = TRUE, transect_opts = list( .layers = c("ipx", "ncr", "psm", "pio", "opx"), .min_n_obs = 5L ) )
layer_modifier <- function(layer, annuli){ case_when( layer == "ncr" & annuli == "A" ~ "ncr_new", layer == "ncr" & annuli != "A" ~ "ncr_old", TRUE ~ layer ) }
make_summary_table_data <- function(specimen_summaries){ specimen_summaries %>% group_by(element, species, site, layer) %>% mutate( iv = if_else(v == 0, 1, 1/v) ) %>% # Choose specimen-level summary statistic mutate( value = l1 # value = md ) %>% summarise( mean = mean(value), sd = sd(value), median = median(value), iqr = IQR(value), ivm = mean(value*iv)/sum(iv), cv = mean/sd ) } make_summary_table <- function(table_data){ # browser() table_data %>% mutate( mdvalue = sprintf("%.02g (%.02g)", median, iqr), # if_else(median <= 9.99, # sprintf("%.02g (%.03g)", median, iqr), # sprintf("%.1f (%.1f)", median, iqr)), mnvalue = if_else(mean <= 9.99, sprintf("%.02g (%.03g)", mean, sd), sprintf("%.1f (%.1f)", mean, sd)), layer = factor(layer, levels = c("ipx", "ncr_new", "ncr_old", "psm", "pio", "opx"), ordered = TRUE), species = factor( species, levels = c("Arav", "Lfas"), ordered = TRUE ), species_layer = interaction(layer, species, sep = "_") ) %>% ungroup() %>% select( element, species_layer, site, l_value = median, s_value = iqr ) %>% tidyr::pivot_wider( # names_from = c("layer"), names_from = c("species_layer"), names_sort = TRUE, values_from = c("l_value", "s_value") ) } cols_merge_ls <- function(tbl, layer, species){ vs <- paste(c("l", "s"), "value", layer, species, sep = "_") cols_merge( data = tbl, columns = vs, hide_columns = vs[2], pattern = "{1} ({2})" ) } make_gt_table <- function(table_data){ table_data %>% gt( rowname_col = "site", groupname_col = c("element") ) %>% tab_header( title = "Average mol/Ca mmol concentrations by layer", subtitle = "Values are site-level median (interquartile range) of specimen-level probability of censoring weighted L-moment location parameter for each layer." ) %>% fmt_number( columns = starts_with("l_value"), n_sigfig = 2 ) %>% fmt_number( columns = starts_with("s_value"), n_sigfig = 2 ) %>% cols_merge_ls("ncr_new", "Arav") %>% cols_merge_ls("ncr_new", "Lfas") %>% cols_merge_ls("ncr_old", "Arav") %>% cols_merge_ls("ncr_old", "Lfas") %>% cols_merge_ls("psm", "Arav") %>% cols_merge_ls("psm", "Lfas") %>% cols_merge_ls("pio", "Arav") %>% cols_merge_ls("pio", "Lfas") %>% cols_merge_ls("ipx", "Arav") %>% cols_merge_ls("ipx", "Lfas") %>% cols_merge_ls("opx", "Arav") %>% cols_merge_ls("opx", "Lfas") %>% cols_label( l_value_ipx_Arav = "inner epoxy", l_value_ncr_new_Arav = "nacre (youngest annuli)", l_value_ncr_old_Arav = "nacre (older annuli)", l_value_psm_Arav = "prismatic", l_value_pio_Arav = "periostracum", l_value_opx_Arav = "outer epoxy", l_value_ipx_Lfas = "inner epoxy", l_value_ncr_new_Lfas = "nacre (youngest annuli)", l_value_ncr_old_Lfas = "nacre (older annuli)", l_value_psm_Lfas = "prismatic", l_value_pio_Lfas = "periostracum", l_value_opx_Lfas = "outer epoxy" ) %>% tab_spanner( "A. raveneliana", columns = ends_with("Arav"), ) %>% tab_spanner( "L. fasciola", columns = ends_with("Lfas") ) }
specimen_summaries <- summarize_specimens(dt, modify_layer = layer_modifier)
tab <- specimen_summaries %>% make_summary_table_data() %>% make_summary_table() %>% mutate( element = case_when( stringr::str_detect(element, "^(Zn|Cu|Mg)") ~ paste0(stringr::str_replace(element, "_ppm_m", "["), "]"), TRUE ~ stringr::str_remove(element, "_ppm_m.*") ) ) %>% make_gt_table() %>% tab_options( data_row.padding = 2, table.font.size = 8 ) %>% cols_width( vars(site) ~ px(50), everything() ~ px(50) ) tab
tab %>% gtsave("element_summary.tex", here::here("manuscript", "figures"))
create_pvals <- function(summarydt, filterFUN = identity){ summarydt %>% group_by(element, species, layer) %>% filterFUN() %>% summarise( k_pvalue_unadj_site = kruskal.test(x = l1, g = site)[["p.value"]], k_pvalue_adj_site = { x <- residuals(lm(l1 ~ -1 + factor(drawer))) kruskal.test(x = x, g = site)[["p.value"]] } , k_pvalue_unadj_river = kruskal.test(x = l1, g = river)[["p.value"]], k_pvalue_adj_river = { x <- residuals(lm(l1 ~ -1 + factor(drawer))) kruskal.test(x = x, g = river)[["p.value"]] } ) }
kruskal_pvalues_A <- kruskal_pvalues_B <- specimen_summaries %>% filter(site != "Baseline") %>% group_by(element, species, layer) %>% summarise( k_pvalue_unadj_site = kruskal.test(x = l1, g = site)[["p.value"]], k_pvalue_adj_site = { x <- residuals(lm(l1 ~ -1 + factor(drawer))) kruskal.test(x = x, g = site)[["p.value"]] }, k_pvalue_unadj_river = kruskal.test(x = l1, g = river)[["p.value"]], k_pvalue_adj_river = { x <- residuals(lm(l1 ~ -1 + factor(drawer))) kruskal.test(x = x, g = river)[["p.value"]] } ) pvals <- bind_rows( kruskal_pvalues_A %>% mutate(test = "A"), kruskal_pvalues_B %>% mutate(test = "B") ) %>% tidyr::pivot_longer( cols = c("k_pvalue_unadj_site", "k_pvalue_adj_site", "k_pvalue_unadj_river", "k_pvalue_adj_river") ) %>% mutate( name = gsub("k_pvalue_", "", name) ) %>% tidyr::separate( col = "name", into = c("adjustment", "grouping"), sep = "_" ) %>% mutate( p_value = k_pvalue_adj_site, layer = factor(layer, levels = c("ipx", "ncr_new", "ncr_old", "psm", "pio", "opx"), ordered = TRUE), element2 = case_when( stringr::str_detect(element, "^(Zn|Cu|Mg)") ~ paste0(stringr::str_replace(element, "_ppm_m", "["), "]"), TRUE ~ stringr::str_remove(element, "_ppm_m.*") ), p_flag = p_value < 0.1, p_small = (p_value < 0.001), p_value2 = if_else(p_small, 0.001, p_value), test = if_else( test == "A", "including baseline site", "experimental sites only", ), species = if_else( species == "Arav", "A. raveneliana", "L. fasciola" ) # layer = case_when( # layer == "ncr_new" ~ "nacre (youngest)", # layer == "ncr_old" ~ "nacre (older)", # layer == "psm" ~ "prismatic", # layer == "pio" ~ "periostracum", # layer == "epx" ~ "epoxy", # ) ) p <- ggplot( data = pvals, aes(y = -log10(k_pvalue_adj), x = element2, shape = layer, color = layer) ) + geom_point(size = 1) + coord_flip() + scale_y_continuous( name = expression(-log[10]~(p)), limits = -log10(c(1.1, 0.0000001)), breaks = -log10(c(1, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001)), labels = c("1", "0.1", "", "0.01", "0.001", "0.0001", "0.00001"), expand = c(0, 0) ) + scale_color_manual( values = c(ipx = "black", ncr_new = "grey75", ncr_old = "grey50", psm = "blue", pio = "green", opx = "black") ) + facet_grid(test ~ species) + theme_classic() + theme( strip.background = element_blank(), legend.position = "bottom", panel.grid.major.x = element_line(color = "grey50", size = 0.2, linetype = "dotted"), panel.grid.major.y = element_line(color = "grey90", size = 0.2), axis.text.x = element_text(size = 8), axis.line.x = element_line(color = "grey50"), axis.ticks.x = element_line(color = "grey50"), axis.title.x = element_blank(), axis.text.y = element_text(size = 8), axis.line.y = element_blank(), axis.title.y = element_blank(), axis.ticks.y = element_blank(), ) ggsave(here::here("manuscript", "figures", "pvalues.pdf"), p, width = 7, height =5)
site_colors <- c("LiTN 1" = "#99d8c9", "LiTN 2" = "#41ae76", "LiTN 3" = "#005824", "Tuck 1" = "#fdae6b", "Tuck 2" = "#f16913", "Tuck 3" = "#8c2d04", "Baseline" = "#525252") make_plot_data <- function(dt, species){ summarize_specimens(dt, modify_layer = layer_modifier) %>% group_by(species, element, layer) %>% mutate( l1 = log10(l1), value = (l1 - mean(l1))/sd(l1), layer = factor(layer, levels = c("ipx", "ncr_new", "ncr_old", "psm", "pio", "opx"), labels = c("Inner~Epoxy", "Young~nacre", "Older~nacre", "Prismatic", "Periostracum", "Outer~Epoxy"), ordered = TRUE), element2 = case_when( stringr::str_detect(element, "^(Zn|Cu|Mg)") ~ paste0(stringr::str_replace(element, "_ppm_m", "["), "]"), TRUE ~ stringr::str_remove(element, "_ppm_m.*")), site2 = case_when( site == "Baseline" ~ 0, site == "LiTN 1" ~ 1, site == "LiTN 2" ~ 1.5, site == "LiTN 3" ~ 2, site == "Tuck 1" ~ 3, site == "Tuck 2" ~ 3.5, site == "Tuck 3" ~ 4, ) ) } make_plot_summary_data <- function(plotdt){ plotdt %>% group_by(species, element2, layer, site, site2) %>% summarise( value = median(value) ) } create_summary_plot <- function(plotdt, plot_summary){ plotdt %>% # filter(species == !!.species) %>% filter((mean(value) - 3 * sd(value)) > value | value < mean(value) + 3 * sd(value)) %>% ggplot( aes(x = site2, y = value) ) + geom_point( aes(color = site), alpha = 0.75, size = 0.1, shape = 20 ) + geom_point( data = plot_summary, size = 1, shape = 18, fill = "grey10", color = "grey10" ) + scale_color_manual( values = site_colors, guide = FALSE ) + scale_x_continuous( breaks = c(0, 1, 1.5, 2, 3, 3.5, 4), labels = c("B", "L1", "L2", "L3", "T1", "T2", "T3") ) + facet_grid( layer ~ element2, labeller = label_parsed, switch = "y", scales = "free" ) + # theme_classic() + theme( # strip.text = element_text(angle = 180), panel.grid = element_blank(), panel.background = element_rect(fill = "grey95"), # strip.text = element_text(angle = 40), strip.text.y = element_text(angle = 40), axis.title = element_blank(), axis.ticks.y = element_blank(), axis.ticks.x = element_line(color = "grey80"), axis.text.y = element_blank(), axis.text.x = element_text(size = 6) ) } plotpipe <- . %>% make_plot_data() %>% { x <- . y <- make_plot_summary_data(x) list(x, y) } p1 <- dt %>% plotpipe() p2 <- dt %>% mutate( data = purrr::map( .x = data, .f = ~ .x %>% dplyr::group_by(id, layer) %>% dplyr::filter( distance >= (min(distance) + 15), distance <= (max(distance) - 15) ) ) ) %>% plotpipe() p2 %>% purrr::map( ~ filter(.x, species == "Arav")) %>% do.call(create_summary_plot, args = .) ggsave(here::here("manuscript", "figures", "element_summaries_Arav.pdf"), p, width = 10, height = 6.5)
specimen_summaries <- dt %>% summarize_specimens(modify_layer = layer_modifier) kruskal_pvalues_A <- create_pvals(specimen_summaries) kruskal_pvalues_B <- create_pvals(specimen_summaries, function(x) filter(x, site != "Baseline")) plot_pvals <- bind_rows( kruskal_pvalues_A %>% mutate(test = "A"), kruskal_pvalues_B %>% mutate(test = "B") ) %>% tidyr::pivot_longer( cols = c("k_pvalue_unadj_site", "k_pvalue_adj_site", "k_pvalue_unadj_river", "k_pvalue_adj_river") ) %>% mutate( name = gsub("k_pvalue_", "", name) ) %>% tidyr::separate( col = "name", into = c("adjustment", "grouping"), sep = "_" ) %>% mutate( layer = factor(layer, levels = c("ipx", "ncr_new", "ncr_old", "psm", "pio", "opx"), labels = c("Inner~Epoxy", "Young~nacre", "Older~nacre", "Prismatic", "Periostracum", "Outer~Epoxy"), ordered = TRUE), element2 = case_when( stringr::str_detect(element, "^(Zn|Cu|Mg)") ~ paste0(stringr::str_replace(element, "_ppm_m", "["), "]"), TRUE ~ stringr::str_remove(element, "_ppm_m.*") ), ) %>% mutate( value = if_else(value > 0.1, 1, value), value = log10(value), value = if_else(species == "Lfas", abs(value), value), layer_modifier = as.integer(layer) + (0.5*(species == "Lfas")), grouping = case_when( grouping == "river" ~ "Difference between rivers?", grouping == "site" ~ "Difference between sites?" ), test = case_when( test == "A" ~ "Including baseline site", test == "B" ~ "Excluding baseline site" ) ) create_pval_plot <- function(pvals){ ggplot( data = pvals, aes(x = layer_modifier, y = element2, fill = value) ) + geom_tile() + geom_vline( xintercept = 1:5+0.75, color = "grey25", size = 0.5 ) + scale_x_continuous( breaks = 1:6+0.25, labels = c("Inner Epoxy", "Young nacre", "Older nacre", "Prismatic", "Periostracum", "Outer Epoxy") ) + scale_fill_gradient2( "p-values", breaks = c(3, 0, -3), labels = c("0.001 L. fas", "1", "0.001 A. rav") # breaks = c(1, 0.1, 0.01, 0.001, 0.0001), # low="blue", high="white" ) + facet_grid(test ~ grouping) + theme( plot.background = element_blank(), panel.background = element_blank(), axis.title.y = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank(), axis.text.x = element_text(angle = 45, size = 8, hjust = 1, vjust = 1) ) } create_pval_plot(pvals %>% filter(adjustment == "adj")) ggsave(here::here("manuscript", "figures", "pval_summaries_1.pdf"), p, width = 6.5, height = 6.5)
pvalues_tab <- pvals %>% mutate( k_pvalue_unadj = case_when( k_pvalue_unadj < 0.001 ~ "<0.001", k_pvalue_unadj < 0.01 ~ sprintf("%.03f", k_pvalue_unadj), TRUE ~ sprintf("%.02f", k_pvalue_unadj)), k_pvalue_adj = case_when( k_pvalue_adj < 0.001 ~ "<0.001", k_pvalue_adj < 0.01 ~ sprintf("%.03f", k_pvalue_adj), TRUE ~ sprintf("%.02f", k_pvalue_adj)), ) %>% mutate( k_pvalue = sprintf("%s (a: %s)", k_pvalue_unadj, k_pvalue_adj), layer = factor(layer, levels = c("ncr_new", "ncr_old", "psm", "pio", "epx"), ordered = TRUE), species = factor( species, levels = c("Arav", "Lfas"), ordered = TRUE ), species_layer = interaction(layer, species, sep = "_") ) %>% ungroup() %>% select( test, element, species_layer, value = k_pvalue ) %>% tidyr::pivot_wider( names_from = c("species_layer"), names_sort = TRUE, values_from = "value" ) # "'Test' row p-values are from Kruskal-Wallis test of a null hypothesis that the location parameters are the same across sites. P-values are presented as unadjusted (adjusted for LA-ICP-MS batch). The Test A includes the baseline site in the comparison, while Test excludes this site. A small p-value indicates that the distribution of L-moment measures of location may be different at least one site."
Within specimen differences of inner and outer epoxy L$_1$ moments reveals a trend toward larger values in the outer epoxy, as most the distributions in the figure below skew to the left.
specimen_summaries %>% filter(layer %in% c("ipx", "opx")) %>% group_by(element, id) %>% summarise(d = diff(l1)) %>% ggplot(aes(x = d)) + geom_histogram() + facet_wrap(element ~ ., scales = 'free')
What if we don't group transects within a specimen?
dt <- analysis_data( contrast = "all", test_data_FUN = function(data) data, elements = "all", signals = c("base", "mad_10"), group_by_valve = FALSE, transect_opts = list( .layers = c("ipx", "opx"), .min_n_obs = 5L ) ) dt %>% select(species, element, signal, data) %>% tidyr::unnest(data) %>% group_by(species, element, signal, id, transect, site, layer) %>% summarise( m = mean(value) ) %>% group_by(species, element, signal, id, transect, site) %>% summarise( d = diff(m) ) %>% filter(signal == "mad_10") %>% ggplot( aes(x = d) ) + geom_histogram() + facet_wrap(element ~ ., scales = 'free')
dt %>% filter(signal == "mad_10") %>% select(species, element, data) %>% tidyr::unnest(data) %>% group_by(id, transect, site, layer) %>% mutate(ds = (distance - max(distance)) ) %>% mutate(ds = if_else(layer == "ipx", abs(ds), ds)) %>% ggplot( aes(x = ds, y = value, group = id) ) + geom_line() + facet_wrap(element ~ ., scales = 'free')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.