knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo    = FALSE,
  message = FALSE
)
library(elktoeChemistry)
library(dplyr)
library(gt)
library(ggplot2)
analysis_data  <- readRDS(params$inputs$analysis)
dt <-
  analysis_data(
    contrast = "all",
    test_data_FUN = function(data) data,
    elements  = "all", 
    signals   = c("mad_10"),
    group_by_valve = TRUE,
    transect_opts  = list(
      .layers    = c("ipx", "ncr", "psm", "pio", "opx"),
      .min_n_obs = 5L
    )
  ) 
layer_modifier <- function(layer, annuli){
   case_when(
      layer == "ncr" & annuli == "A" ~ "ncr_new",
      layer == "ncr" & annuli != "A" ~ "ncr_old",
      TRUE ~ layer
  )
}
make_summary_table_data <- function(specimen_summaries){
  specimen_summaries %>%
  group_by(element, species, site, layer) %>%
  mutate(
    iv = if_else(v == 0, 1, 1/v)
  ) %>%
  # Choose specimen-level summary statistic
  mutate(
    value = l1
    # value = md
  ) %>%
  summarise(
    mean   = mean(value),
    sd     = sd(value),
    median = median(value),
    iqr    = IQR(value),
    ivm    = mean(value*iv)/sum(iv),
    cv     = mean/sd
  ) 
}

make_summary_table <- function(table_data){
  # browser()
  table_data %>%
  mutate(
    mdvalue = sprintf("%.02g (%.02g)", median, iqr),
      # if_else(median <= 9.99, 
      #   sprintf("%.02g (%.03g)", median, iqr),
      #   sprintf("%.1f (%.1f)", median, iqr)),
    mnvalue = 
      if_else(mean <= 9.99, 
              sprintf("%.02g (%.03g)", mean, sd),
              sprintf("%.1f (%.1f)", mean, sd)),
    layer = factor(layer, levels = c("ipx", "ncr_new", "ncr_old", "psm", "pio", "opx"),
                   ordered =  TRUE),
    species = factor(
      species, levels = c("Arav", "Lfas"), ordered = TRUE
    ),
    species_layer = interaction(layer, species, sep = "_")
  ) %>%
  ungroup() %>%
  select(
    element, species_layer, site, 
    l_value = median,
    s_value = iqr
  ) %>%
  tidyr::pivot_wider(
    # names_from = c("layer"),
    names_from = c("species_layer"),
    names_sort = TRUE,
    values_from = c("l_value", "s_value")
  ) 
}

cols_merge_ls <- function(tbl, layer, species){
  vs <- paste(c("l", "s"), "value", layer, species, sep = "_")

  cols_merge(
     data = tbl,
     columns = vs,
     hide_columns = vs[2],
     pattern      = "{1} ({2})"
   ) 
}

make_gt_table <- function(table_data){

  table_data %>%
  gt(
    rowname_col = "site",
    groupname_col = c("element")
  ) %>%
  tab_header(
    title = "Average mol/Ca mmol concentrations by layer",
    subtitle = "Values are site-level median (interquartile range) of specimen-level probability of censoring weighted L-moment location parameter for each layer."
  ) %>%
  fmt_number(
    columns  = starts_with("l_value"),
    n_sigfig = 2
  ) %>%
  fmt_number(
    columns  = starts_with("s_value"),
    n_sigfig = 2
  ) %>%
  cols_merge_ls("ncr_new", "Arav") %>%
  cols_merge_ls("ncr_new", "Lfas") %>%
  cols_merge_ls("ncr_old", "Arav") %>%
  cols_merge_ls("ncr_old", "Lfas") %>%
  cols_merge_ls("psm", "Arav") %>%
  cols_merge_ls("psm", "Lfas") %>%
  cols_merge_ls("pio", "Arav") %>%
  cols_merge_ls("pio", "Lfas") %>%
  cols_merge_ls("ipx", "Arav") %>%
  cols_merge_ls("ipx", "Lfas") %>%
  cols_merge_ls("opx", "Arav") %>%
  cols_merge_ls("opx", "Lfas") %>%
  cols_label(
    l_value_ipx_Arav = "inner epoxy",
    l_value_ncr_new_Arav = "nacre (youngest annuli)",
    l_value_ncr_old_Arav = "nacre (older annuli)",
    l_value_psm_Arav = "prismatic",
    l_value_pio_Arav = "periostracum",
    l_value_opx_Arav = "outer epoxy",
    l_value_ipx_Lfas = "inner epoxy",
    l_value_ncr_new_Lfas = "nacre (youngest annuli)",
    l_value_ncr_old_Lfas = "nacre (older annuli)",
    l_value_psm_Lfas = "prismatic",
    l_value_pio_Lfas = "periostracum",
    l_value_opx_Lfas = "outer epoxy"
  ) %>%
  tab_spanner(
    "A. raveneliana",
    columns = ends_with("Arav"), 
  )  %>%
  tab_spanner(
    "L. fasciola", 
    columns = ends_with("Lfas")
  )
}

Summary of elements by species/site/layer

specimen_summaries <- 
  summarize_specimens(dt, modify_layer = layer_modifier)
tab <-
  specimen_summaries %>%
  make_summary_table_data() %>%
  make_summary_table() %>%
  mutate(
    element =  case_when(
      stringr::str_detect(element, "^(Zn|Cu|Mg)") ~
        paste0(stringr::str_replace(element, "_ppm_m", "["), "]"),
      TRUE ~ stringr::str_remove(element, "_ppm_m.*")
    )
  ) %>%
  make_gt_table() %>%
  tab_options(
    data_row.padding = 2,
    table.font.size = 8
  ) %>%
  cols_width(
    vars(site) ~ px(50),
    everything() ~ px(50)
  )


tab
tab %>% 
  gtsave("element_summary.tex", here::here("manuscript", "figures"))
create_pvals <- function(summarydt, filterFUN = identity){
  summarydt %>%
  group_by(element, species, layer) %>%
  filterFUN() %>%
  summarise(
    k_pvalue_unadj_site = kruskal.test(x = l1, g = site)[["p.value"]],
    k_pvalue_adj_site = {
      x <- residuals(lm(l1 ~ -1 + factor(drawer)))
      kruskal.test(x = x, g = site)[["p.value"]]
    } ,
    k_pvalue_unadj_river = kruskal.test(x = l1, g = river)[["p.value"]],
    k_pvalue_adj_river = {
      x <- residuals(lm(l1 ~ -1 + factor(drawer)))
      kruskal.test(x = x, g = river)[["p.value"]]
    } 
  )
}
kruskal_pvalues_A <- 


kruskal_pvalues_B <- 
  specimen_summaries %>%
  filter(site != "Baseline") %>%
  group_by(element, species, layer) %>%
  summarise(
    k_pvalue_unadj_site = kruskal.test(x = l1, g = site)[["p.value"]],
    k_pvalue_adj_site = {
      x <- residuals(lm(l1 ~ -1 + factor(drawer)))
      kruskal.test(x = x, g = site)[["p.value"]]
    },
    k_pvalue_unadj_river = kruskal.test(x = l1, g = river)[["p.value"]],
    k_pvalue_adj_river = {
      x <- residuals(lm(l1 ~ -1 + factor(drawer)))
      kruskal.test(x = x, g = river)[["p.value"]]
    } 
  )

pvals <- 
  bind_rows(
    kruskal_pvalues_A %>% mutate(test = "A"),
    kruskal_pvalues_B %>% mutate(test = "B")
  ) %>% 
  tidyr::pivot_longer(
    cols = c("k_pvalue_unadj_site", "k_pvalue_adj_site",
             "k_pvalue_unadj_river", "k_pvalue_adj_river")
  ) %>%
  mutate(
    name = gsub("k_pvalue_", "", name)
  ) %>%
  tidyr::separate(
    col = "name",
    into = c("adjustment", "grouping"),
    sep = "_"
  ) %>%
  mutate(
    p_value = k_pvalue_adj_site,
    layer = factor(layer, levels = c("ipx", "ncr_new", "ncr_old", "psm", "pio", "opx"),
               ordered =  TRUE),
    element2 = case_when(
      stringr::str_detect(element, "^(Zn|Cu|Mg)") ~
        paste0(stringr::str_replace(element, "_ppm_m", "["), "]"),
      TRUE ~ stringr::str_remove(element, "_ppm_m.*")
    ),
    p_flag  = p_value < 0.1,
    p_small = (p_value < 0.001),
    p_value2 = if_else(p_small, 0.001, p_value),
    test = if_else(
      test == "A",
      "including baseline site",
      "experimental sites only",
    ),
    species = if_else(
      species == "Arav",
      "A. raveneliana",
      "L. fasciola"
    )
    # layer = case_when(
    #       layer == "ncr_new" ~ "nacre (youngest)",
    #       layer == "ncr_old" ~ "nacre (older)",
    #       layer == "psm" ~ "prismatic",
    #       layer == "pio" ~ "periostracum",
    #       layer == "epx" ~ "epoxy",
    # )
  )

p <- 
  ggplot(
    data = pvals,
    aes(y = -log10(k_pvalue_adj), x = element2, shape = layer, color = layer)
  ) + 
  geom_point(size = 1) + 
  coord_flip() + 
  scale_y_continuous(
    name   = expression(-log[10]~(p)),
    limits = -log10(c(1.1, 0.0000001)),
    breaks =  -log10(c(1, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001)),
    labels = c("1", "0.1", "", "0.01", "0.001", "0.0001", "0.00001"),
    expand = c(0, 0)
  ) +
  scale_color_manual(
    values = c(ipx = "black", ncr_new = "grey75", ncr_old = "grey50",
               psm = "blue", pio = "green", opx = "black")
  ) +
  facet_grid(test ~ species) +
  theme_classic() +
  theme(
      strip.background = element_blank(),
      legend.position = "bottom",
      panel.grid.major.x = element_line(color = "grey50", size = 0.2,
                                        linetype = "dotted"),
      panel.grid.major.y = element_line(color = "grey90", size = 0.2),
      axis.text.x  = element_text(size = 8),
      axis.line.x  = element_line(color = "grey50"),
      axis.ticks.x = element_line(color = "grey50"),
      axis.title.x = element_blank(),
      axis.text.y  = element_text(size = 8),
      axis.line.y  = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
    )
ggsave(here::here("manuscript", "figures", "pvalues.pdf"), p, width = 7, height =5)

site_colors <- 
  c("LiTN 1" = "#99d8c9",
    "LiTN 2" = "#41ae76",
    "LiTN 3" = "#005824",
    "Tuck 1" = "#fdae6b",
    "Tuck 2" = "#f16913",
    "Tuck 3" = "#8c2d04",
    "Baseline" = "#525252")

make_plot_data <- function(dt, species){
  summarize_specimens(dt, modify_layer = layer_modifier) %>%
  group_by(species, element, layer) %>%
  mutate(
    l1 = log10(l1),
    value = (l1 - mean(l1))/sd(l1),
    layer = factor(layer, levels = c("ipx", "ncr_new", "ncr_old", "psm", "pio", "opx"),
                   labels = c("Inner~Epoxy", "Young~nacre", "Older~nacre", "Prismatic", 
                              "Periostracum", "Outer~Epoxy"),
               ordered =  TRUE),
    element2 = case_when(
      stringr::str_detect(element, "^(Zn|Cu|Mg)") ~
        paste0(stringr::str_replace(element, "_ppm_m", "["), "]"),
      TRUE ~ stringr::str_remove(element, "_ppm_m.*")),
    site2 = case_when(
      site == "Baseline" ~ 0,
      site == "LiTN 1"   ~ 1,
      site == "LiTN 2"   ~ 1.5,
      site == "LiTN 3"   ~ 2,
      site == "Tuck 1"   ~ 3,
      site == "Tuck 2"   ~ 3.5,
      site == "Tuck 3"   ~ 4,
    )
  )
}

make_plot_summary_data <- function(plotdt){
  plotdt %>%  
  group_by(species, element2, layer, site, site2) %>%
  summarise(
    value = median(value)
  )
}

create_summary_plot <- function(plotdt, plot_summary){
  plotdt %>%
  # filter(species == !!.species) %>%
  filter((mean(value) - 3 * sd(value)) > value | value < mean(value) + 3 * sd(value)) %>%
  ggplot(
    aes(x = site2, y = value)
  ) + 
  geom_point(
    aes(color = site),
    alpha = 0.75, 
    size = 0.1,
    shape = 20
  ) +
  geom_point(
    data = plot_summary,
    size = 1,
    shape = 18,
    fill = "grey10",
    color = "grey10"
  )  +
  scale_color_manual(
    values = site_colors,
    guide = FALSE
  ) +
  scale_x_continuous(
    breaks = c(0, 1, 1.5, 2, 3, 3.5, 4),
    labels = c("B", "L1", "L2", "L3", "T1", "T2", "T3")
  )  +
  facet_grid(
    layer ~ element2,
    labeller = label_parsed,
    switch = "y",
    scales = "free"
  ) +
  # theme_classic() +
  theme(
    # strip.text =  element_text(angle = 180),
    panel.grid = element_blank(),
    panel.background = element_rect(fill = "grey95"),
    # strip.text = element_text(angle = 40),
    strip.text.y = element_text(angle = 40),
    axis.title   = element_blank(),
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(color = "grey80"),
    axis.text.y  = element_blank(),
    axis.text.x  = element_text(size = 6)
  )

}

plotpipe <- .  %>%
  make_plot_data() %>%
  {
    x <- .
    y <- make_plot_summary_data(x)
    list(x, y)
  }

p1 <- dt %>% plotpipe()

p2 <- dt %>%
  mutate(
    data = purrr::map(
      .x = data,
      .f = ~ .x %>%
          dplyr::group_by(id, layer) %>%
          dplyr::filter(
            distance >= (min(distance) + 15), 
            distance <= (max(distance) - 15)
          )
    )
  ) %>% plotpipe()

p2 %>% 
  purrr::map( ~ filter(.x, species == "Arav")) %>%
  do.call(create_summary_plot, args = .)


ggsave(here::here("manuscript", "figures", "element_summaries_Arav.pdf"), p, 
       width = 10, height = 6.5)
specimen_summaries <- 
  dt %>%
  summarize_specimens(modify_layer = layer_modifier)
kruskal_pvalues_A <- create_pvals(specimen_summaries)
kruskal_pvalues_B <- create_pvals(specimen_summaries,
                                  function(x) filter(x, site != "Baseline"))

plot_pvals <- 
  bind_rows(
    kruskal_pvalues_A %>% mutate(test = "A"),
    kruskal_pvalues_B %>% mutate(test = "B")
  ) %>% 
  tidyr::pivot_longer(
    cols = c("k_pvalue_unadj_site", "k_pvalue_adj_site",
             "k_pvalue_unadj_river", "k_pvalue_adj_river")
  ) %>%
  mutate(
    name = gsub("k_pvalue_", "", name)
  ) %>%
  tidyr::separate(
    col = "name",
    into = c("adjustment", "grouping"),
    sep = "_"
  ) %>%
  mutate(
    layer = factor(layer, levels = c("ipx", "ncr_new", "ncr_old", "psm", "pio", "opx"),
                   labels = c("Inner~Epoxy", "Young~nacre", "Older~nacre", "Prismatic", 
                              "Periostracum", "Outer~Epoxy"),
                   ordered =  TRUE),
    element2 = case_when(
      stringr::str_detect(element, "^(Zn|Cu|Mg)") ~
        paste0(stringr::str_replace(element, "_ppm_m", "["), "]"),
      TRUE ~ stringr::str_remove(element, "_ppm_m.*")
    ),
  ) %>%
  mutate(
    value = if_else(value > 0.1, 1, value),
    value = log10(value),
    value = if_else(species == "Lfas", abs(value), value),
    layer_modifier = as.integer(layer) + (0.5*(species == "Lfas")),
    grouping = case_when(
      grouping == "river" ~ "Difference between rivers?",
      grouping == "site" ~ "Difference between sites?"
    ),
    test = case_when(
      test == "A" ~ "Including baseline site",
      test == "B" ~ "Excluding baseline site"
    )
  ) 


create_pval_plot <- function(pvals){
  ggplot(
    data = pvals,
    aes(x = layer_modifier, y = element2, fill = value)
  ) +
  geom_tile() +
  geom_vline(
    xintercept = 1:5+0.75,
    color = "grey25",
    size = 0.5
  ) +
  scale_x_continuous(
    breaks = 1:6+0.25,
    labels = c("Inner Epoxy", "Young nacre", "Older nacre", 
               "Prismatic", "Periostracum", "Outer Epoxy")
  ) +
  scale_fill_gradient2(
    "p-values",
    breaks = c(3, 0, -3),
    labels = c("0.001 L. fas", "1", "0.001 A. rav")
    # breaks = c(1, 0.1, 0.01, 0.001, 0.0001),
    # low="blue", high="white"
  ) +
  facet_grid(test ~ grouping) +
  theme(
    plot.background = element_blank(),
    panel.background = element_blank(),
    axis.title.y = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.x = element_blank(),
    axis.text.x  = element_text(angle = 45, size = 8, hjust = 1, vjust = 1)
  )
}

create_pval_plot(pvals %>% filter(adjustment == "adj"))

ggsave(here::here("manuscript", "figures", "pval_summaries_1.pdf"), p, 
       width = 6.5, height = 6.5)
pvalues_tab <- 
  pvals %>%
  mutate(
    k_pvalue_unadj =  case_when(
            k_pvalue_unadj < 0.001 ~  "<0.001",
            k_pvalue_unadj < 0.01  ~  sprintf("%.03f", k_pvalue_unadj),
            TRUE ~ sprintf("%.02f", k_pvalue_unadj)),
    k_pvalue_adj =  case_when(
            k_pvalue_adj < 0.001 ~  "<0.001",
            k_pvalue_adj < 0.01  ~  sprintf("%.03f", k_pvalue_adj),
            TRUE ~ sprintf("%.02f", k_pvalue_adj)),
  ) %>%
  mutate(
    k_pvalue = sprintf("%s (a: %s)", k_pvalue_unadj, k_pvalue_adj),
    layer = factor(layer, levels = c("ncr_new", "ncr_old", "psm", "pio", "epx"),
                   ordered =  TRUE),
    species = factor(
      species, levels = c("Arav", "Lfas"), ordered = TRUE
    ),
    species_layer = interaction(layer, species, sep = "_")
  ) %>%
  ungroup() %>%
  select(
    test, element, species_layer, value = k_pvalue
  ) %>%
  tidyr::pivot_wider(
    names_from = c("species_layer"),
    names_sort = TRUE,
    values_from = "value"
  ) 
# "'Test' row p-values are from Kruskal-Wallis test of a null hypothesis that the location parameters are the same across sites. P-values are presented as unadjusted (adjusted for LA-ICP-MS batch). The Test A includes the baseline site in the comparison, while Test excludes this site. A small p-value indicates that the distribution of L-moment measures of location may be different at least one site."

Comparison of inner and outer epoxy

Within specimen differences of inner and outer epoxy L$_1$ moments reveals a trend toward larger values in the outer epoxy, as most the distributions in the figure below skew to the left.

specimen_summaries %>% 
  filter(layer %in% c("ipx", "opx")) %>% 
  group_by(element, id) %>% summarise(d = diff(l1)) %>% 
  ggplot(aes(x = d)) + 
  geom_histogram() + 
  facet_wrap(element ~ ., scales = 'free')

What if we don't group transects within a specimen?

dt <-
  analysis_data(
    contrast = "all",
    test_data_FUN = function(data) data,
    elements  = "all", 
    signals   = c("base", "mad_10"),
    group_by_valve = FALSE,
    transect_opts  = list(
      .layers    = c("ipx", "opx"),
      .min_n_obs = 5L
    )
  ) 

dt %>% 
  select(species, element, signal, data) %>% 
  tidyr::unnest(data) %>%
  group_by(species, element, signal, id, transect, site, layer) %>%
  summarise(
    m = mean(value)
  ) %>%
  group_by(species, element, signal, id, transect, site) %>%
  summarise(
    d = diff(m)
  ) %>%
  filter(signal == "mad_10") %>%
  ggplot(
    aes(x = d)
  ) + 
  geom_histogram() +
  facet_wrap(element ~ ., scales = 'free')
dt %>% 
  filter(signal == "mad_10") %>%
  select(species, element, data) %>% 
  tidyr::unnest(data) %>%
  group_by(id, transect, site, layer) %>%
  mutate(ds = (distance - max(distance)) ) %>%
  mutate(ds = if_else(layer == "ipx", abs(ds), ds)) %>%
  ggplot(
    aes(x = ds, y = value, group = id)
  ) +
  geom_line() + 
  facet_wrap(element ~ ., scales = 'free')


bsaul/elktoeChemistry documentation built on Nov. 17, 2022, 8:10 a.m.