This document does a preliminary, ad-hoc analysis of the data in
"extdata/Data/Mussel Shell Edge Net Intensities.xlsx"
,
which contains:
library(dplyr) library(tidyr) library(ggplot2) library(grid) library(gridExtra) library(gt)
xldt <- readxl::read_excel( path = here::here("extdata/Data/Mussel Shell Edge Net Intensities.xlsx") )
TODO: resolve questions about data
sample
column has inconsistencies.
I (BS) understand the common pattern to be "{valve_id}.{transect_id}{n|p}
.
But, for example, A3.1na
, A5.1p_1
and P111.1P
do not conform to this pattern.Bring in the valve_data
, which contains information on sites and outcomes.
valve_data <- readRDS(here::here("data/valve_data.rds")) %>% select(id, species, river, site, site_num, dead, moribund) %>% distinct()
# Extract the mean value and reshape the data to make it easier to analyze. dt <- xldt %>% select( drawer = Drawer , sample = Sample , duration = `Duration (s)` , width = starts_with("Width") , ends_with("_mean") ) %>% # HARDCODES mutate( sample = case_when( sample == "P111.1P" ~ "P111.1p" , TRUE ~ sample ) ) %>% filter( # Don't know what layer this is !(sample %in% c("P164.2")) ) %>% mutate( id = stringr::str_extract(sample, "^[A|C|P]\\d{1,3}") , transect = stringr::str_extract(sample, "(?<=\\.)[\\d]") , layer = tolower(stringr::str_extract(sample, "[n|p]")) ) %>% pivot_longer( cols = ends_with("_mean") , names_to = "element" , values_to = "value" ) %>% mutate( element = stringr::str_replace(element, "_CPS_mean", "") ) %>% left_join(valve_data, by = "id")
TotalBeam
tdt <- dt %>% filter(site != "Baseline", element != "TotalBeam") %>% group_by(species, river, site, element, id, layer) %>% summarise( # TODO: weight by duration (?) value = mean(value), .groups = "drop" ) %>% group_by(species, element, layer) make_pvalue_table <- function(x){ x %>% tidyr::pivot_wider( names_from = c(species, layer), values_from = p ) %>% ungroup() %>% gt() %>% cols_label( `A. raveneliana_n` = "nacre", `A. raveneliana_p` = "perio", `L. fasciola_n` = "nacre", `L. fasciola_p` = "perio" ) %>% fmt_number( columns = matches("_p|_n"), decimals = 4, ) %>% tab_spanner( label = "A. raveneliana", columns = c("A. raveneliana_n", "A. raveneliana_p") ) %>% tab_spanner( label = "L. fasciola", columns = c("L. fasciola_n", "L. fasciola_p") ) }
The values in the table are p-values from a Kruskal-Wallis test, a rank-based ANOVA-like test.
tdt %>% summarise( p = kruskal.test(value ~ site)$p.value , .groups = "drop" ) %>% make_pvalue_table()
The values in the table are p-values from a Kruskal-Wallis test, a rank-based ANOVA-like test.
tdt %>% summarise( p = kruskal.test(value ~ (site == "Tuck 1"))$p.value , .groups = "drop" ) %>% make_pvalue_table()
median
values of each site.make_element_plot <- function(x, element = NULL){ if (is.null(element)) { x <- x } else { x <- x %>% filter(element == !! element) } ggplot( data = x, aes(x = site, y = value, group = river) ) + geom_point(size = 0.25) + geom_line( data = x %>% group_by(species, river, site, layer, element) %>% summarise( value = median(value), .groups = "drop"), size = 0.1) }
element_plot_data <- dt %>% group_by(species, river, site, element, id, layer) %>% summarise( # TODO: weight by duration (?) # Take mean across transects within a valve value = mean(value), .groups = "drop" ) %>% filter( site != "Baseline" ) %>% mutate( site = factor( site, levels = c("Tuck 1", "Tuck 2", "Tuck 3", "LiTN 1", "LiTN 2", "LiTN 3")) )
make_single_element_plot <- function(element){ f <- function(species, element){ element_plot_data %>% filter(species == !! species) %>% make_element_plot(element = element) + facet_grid(layer ~ ., scales = "free") + ggtitle(paste(species, "-", element)) + theme( axis.text.y = element_blank(), axis.text.x = element_text(angle = 0), strip.text.y = element_text(angle = 0), axis.title.x = element_blank(), axis.title.y = element_blank() ) } grid.arrange( f("A. raveneliana", element), f("L. fasciola", element), nrow = 1 ) }
elements <- dt %>% distinct(element) %>% filter(element != "TotalBeam") %>% pull() %>% sort() for(element in elements){ cat("\n") cat("###", element, "\n") make_single_element_plot(element) cat("\n") }
do_princ <- function(dt) { fdt <- dt %>% distinct(id, site, river) dt %>% select(id, element, value) %>% pivot_wider( id_cols = "id" , names_from = "element" , values_from = "value") %>% .[, -1] %>% as.matrix() %>% prcomp(center = TRUE, scale = TRUE) -> pr temp <- hclust(dist(pr$x), method = "ward.D") mojema <- mean(temp$height) + 3.75 * sd(temp$height) g <- length(temp$height[temp$height>mojema]) + 1 if(g == 1){ return(1) } list( dt = fdt %>% mutate(PC1 = pr$x[, 1], PC2 = pr$x[, 2]) , clust = cutree(temp, k=g)) }
element_plot_data %>% ungroup() %>% filter( layer == "p" , element != "TotalBeam" , species == "A. raveneliana" , site != "Baseline" ) %>% do_princ() %>% .[["dt"]] %>% ggplot( aes(x = PC1, y = PC2, color = site) ) + geom_point()
element_plot_data %>% ungroup() %>% filter( layer == "n" , element != "TotalBeam" , species == "A. raveneliana" , site != "Baseline" ) %>% do_princ() %>% .[["dt"]] %>% ggplot( aes(x = PC1, y = PC2, color = site) ) + geom_point()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.