In bsaul/elktoeChemistry: Data and code for Elktoe Chemistry project

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo    = FALSE,
  message = FALSE,
  warning = FALSE
)

library(elktoeChemistry)
library(dplyr)
library(entropart)
library(ggplot2)

cdt <- readRDS("data/valve_chemistry.rds")
mdt <- readRDS("data/valve_measurements.rds")
lod <- readRDS('data/lower_detection_limits.rds')
vdt <- readRDS('data/valve_data.rds')

chemistry <- 
  cdt %>%
  left_join(distinct(mdt, drawer, id, transect), by = c("id", "transect")) %>%
  tidyr::pivot_longer(
    cols = !one_of("id", "transect", "drawer", "distance"),
    names_to = "element"
  ) %>%
  left_join(lod, by = c("drawer", "element")) %>%
  mutate(
    value = if_else(
      is.na(lod),
      pmax(0, value),
      pmax(lod, value)
    )
  ) %>%
  select(-lod) %>%
  tidyr::pivot_wider(
    names_from = "element",
    values_from = "value"
  ) %>%
  group_by(id, transect, drawer) %>%
  tidyr::nest(
    dst = distance,
    cps = ends_with("CPS"),
    els = matches("m[0-9]{2,3}$")
  ) %>%
  mutate(
    range = purrr::map(dst, range),
    ruler = purrr::map(range, ~ function(x) (x * 2.881) + min(.x)),
    cps   = purrr::map(cps, as.matrix),
    els   = purrr::map(els, as.matrix)
  ) 

measurements <- 
  mdt %>%
  group_by(drawer, id, transect) %>%
  group_nest(.key = "meas")

adt <- 
  left_join(
    chemistry,
    measurements,
    by = c("id", "transect", "drawer")
  )

#' Create a beta function
#' 
#' @param trFUNs a `list` of functions which transforms the matrix `x`, 
#'    Defaults to `list(identity)`.
#' @param cpFUN a changepoint function 
make_beta <- function(cpFUN, trFUNs = list(identity)){
  beta <- purrr::compose(cpFUN, purrr::compose(!!!trFUNs))
  function(x){
    purrr::safely(beta)(x)
  }
}

#' View changepoints from a "changepoint" object
view_cp <- function(x){
  UseMethod("view_cp")
}

view_cp.cpt.range <- function(x){
  x@cpts
}

view_cp.ocp <- function(x){
  x[["changepoint_lists"]][["maxCPs"]][[1]]
}

view_cp.wbs <- function(x){
  x[["cpt.aft"]]
}

#' 
make_comparator <- 
  function(feature_measurements, ruler){
    force(ruler)
    function(changepoints){
      cpd   <- ruler(changepoints)
      diffs <- outer(cpd, feature_measurements, "-")
      apply(diffs, 2, function(z) {
        i <- which.min(abs(z))
        list(
          i = i,
          e = z[i], 
          d = cpd[i]
        ) 
      })
    }
  }

compute_alignment_error <- function(comparison, pos, .f = identity){
  picks <- comparison[pos]
  sum(.f(purrr::map_dbl(picks, "e")))
}

is_error <- function(l){
  rlang::is_empty(l[["result"]])
}

continue_safely <- function(f, l){
  `if`(is_error(l), 
       list(error = l[["error"]], result = NULL),
       list(error = NULL,         result = f(l[["result"]])))
}

sum_safely <- function(l){
  purrr::reduce(
    .x = l,
    .f = function(x, y) {
      `if`(is_error(y), 
           purrr::modify_at(x, "errors", ~ .x + 1),
           purrr::modify_at(x, "sum",    ~ .x + y$result)
           ) 
    },
    .init = list(errors = 0, sum = 0)
  )
}

make_methods_applicator <- function(methods) {
  function(x) { purrr::map(methods, ~ .x(x)) }
}

div_q2 <- function(x){
  apply(x, 1, function(r) entropart::Diversity(r, q = 2, Correction = "None"))
}

butter_2_5_low <- function(x) { signal::filter(signal::butter(2, 1/5, "low"), x) }
butter_2_5_low_sweep <- function(x) { apply(x, 2, function(c) butter_2_5_low(c)) }

mad_10 <- function(x) { pracma::hampel(x, k = 10)[["y"]] }
mad_20 <- function(x) { pracma::hampel(x, k = 20)[["y"]] }
mad_30 <- function(x) { pracma::hampel(x, k = 30)[["y"]] }

mad_10_sweep <- function(x) { apply(x, 2, function(c) mad_10(c) ) }
mad_20_sweep <- function(x) { apply(x, 2, function(c) mad_20(c) ) }
mad_30_sweep <- function(x) { apply(x, 2, function(c) mad_30(c) ) }

methods_els <- list(
  list(
    tr = list(function(x) c(0, diff(x)), div_q2),
    cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_10_sweep),
    cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_20_sweep),
    cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_30_sweep),
    cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(div_q2),
    cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(div_q2, mad_10_sweep),
    cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(div_q2, mad_20_sweep),
    cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
  ), 
  list(
    tr = list(div_q2, mad_30_sweep),
    cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
  ),

  list(
    tr = list(function(x) c(0, diff(x)), div_q2),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_10_sweep),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_20_sweep),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_30_sweep),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(div_q2),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(div_q2, mad_10_sweep),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(div_q2, mad_20_sweep),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ), 
  list(
    tr = list(div_q2, mad_30_sweep),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(div_q2, butter_2_5_low_sweep),
    cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
  ),  

  list(
    tr = list(function(x) c(0, diff(x)), div_q2),
    cp = function(x){ changepoint::cpt.mean(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_10_sweep),
    cp = function(x){ changepoint::cpt.mean(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_20_sweep),
    cp = function(x){ changepoint::cpt.mean(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2, mad_30_sweep),
    cp = function(x){ changepoint::cpt.mean(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(div_q2),
    cp = function(x){ changepoint::cpt.mean(x, method = "BinSeg", Q = 15) }
  ),
  list(
    tr = list(log1p, div_q2),
    cp = function(x){ ocp::onlineCPD(x) }
  ),
  list(
    tr = list(function(x) c(0, diff(x)), div_q2),
    cp = function(x){ ocp::onlineCPD(x) }
  ),
  list(
    tr = list(log1p),
    cp = function(x){ ocp::onlineCPD(x, multivariate = TRUE) }
  )
)

methods_cps <- 
  list(
    list(
      tr = list(function(x){ x[ , 1] }),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, mad_10_sweep),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, mad_20_sweep),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, mad_30_sweep),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_10_sweep),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_20_sweep),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_30_sweep),
      cp = function(x){ changepoint::cpt.meanvar(x, method = "BinSeg", Q = 15) }
    ),


    list(
      tr = list(function(x){ x[ , 1] }),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, mad_10_sweep),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, mad_20_sweep),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, mad_30_sweep),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, butter_2_5_low_sweep),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_10_sweep),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_20_sweep),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_30_sweep),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    list(
      tr = list(function(x){ exp(log1p(x[ , 2]) - log1p(x[ , 1])) }, butter_2_5_low_sweep),
      cp = function(x){ changepoint::cpt.var(x, method = "BinSeg", Q = 15) }
    ),
    # list(
    #   tr = list(function(x){ x[ , 1] }),
    #   cp = function(x){ ocp::onlineCPD(x) }
    # ),
    list(
      tr = list(function(x){ x[ , 2] }),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, mad_10_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, mad_20_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(function(x){ x[ , 2] }, butter_2_5_low_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(log1p),
      cp = function(x){ ocp::onlineCPD(x, multivariate = TRUE) }
    ),
    list(
      tr = list(mad_10, function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(mad_20, function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(mad_30, function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(butter_2_5_low, function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_10_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_20_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_30_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }, butter_2_5_low_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(mad_10, function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_10_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(mad_20, function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_20_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(mad_30, function(x){ expm1(log1p(x[ , 2]) - log1p(x[ , 1])) }, mad_30_sweep),
      cp = function(x){ ocp::onlineCPD(x) }
    ),
    list(
      tr = list(function(x) x[ , 2]),
      cp = function(x) { r <- wbsts::wbs.lsw(x, M = 0); class(r) <- "wbs"; r}
    )

  )

fels <- 
  purrr::map(
    .x = methods_els,
    .f = ~ make_beta(cpFUN = .x$cp, trFUNs = .x$tr)
  ) %>%
  make_methods_applicator()

fcps <- 
  purrr::map(
    .x = methods_cps,
    .f = ~ make_beta(cpFUN = .x$cp, trFUNs = .x$tr)
  ) %>%
  make_methods_applicator()

res <-
  adt %>%
  ungroup() %>%
  # filter(row_number() < 5) %>%
  mutate(
    comparator = purrr::map2(
      .x = meas,
      .y = ruler,
      .f = ~ make_comparator(.x[["distance"]], .y)),
    changepoints = purrr::map2(els, cps, ~ c(fels(.x), fcps(.y))),
    res = purrr::map2(
      .x = comparator, 
      .y = changepoints,
      .f = function(f, y) { 
        purrr::map(
          .x = y,
          .f = ~ continue_safely(purrr::compose(f, view_cp), .x)
        ) 
      }),
    meas = purrr::map(
      .x = meas,
      .f = ~ .x %>%
        mutate(transition = if_else(is_layer, layer_transition, annuli_transition))
    ),
    res = purrr::map2(
      .x = res,
      .y = meas,
      .f = function(r, m){
        purrr::map(
          .x = r,
          .f = ~ if (!is_error(.x)) {
            purrr::modify_at(.x, "result", ~ purrr::set_names(.x, nm = m[["transition"]]) )
          } ) }
        ),
    results = purrr::map(
      .x = res,
      .f = ~ purrr::map_dfr(
          .x = .x,
          .f = ~ `if`(is_error(.x),
                      tibble(error = TRUE),
                      purrr::map_dfr(.x[["result"]], as_tibble, .id = "transition") %>%
                        mutate(error = FALSE)
                      ),
          .id = "method"
        )
    )
  )

res3 <- 
  res %>%
  select(id, transect, drawer, results) %>%
  tidyr::unnest(cols = results)

res3 %>%
  group_by(id, transect, method) %>%
  filter(transition %in% c("ipx_ncr", "ipx_psm", "ncr_psm", "psm_pio", "pio_opx")) %>%
  summarise(
    u = length(unique(i)) == n()
  ) %>%
  left_join(
    vdt %>% select(id, transect, species, dead),
    by = c("id", "transect")
  ) %>%
  group_by(method) %>%
  summarize(
    n = n(),
    u = sum(u),
    up = u/n()
  ) %>%
  View()

results <- 
res3 %>%
  group_by(method, transition) %>%
  summarize(
    n = n(),
    errors = sum(error),
    mean_e   = mean(e, na.rm = TRUE),
    median_e = median(e, na.rm = TRUE),
    mean_abs_e = mean(abs(e), na.rm = TRUE),
    median_abs_e = median(abs(e), na.rm = TRUE),
    var_e = var(e, na.rm = TRUE),
    sd_e  = sd(e, na.rm = TRUE)
  ) 


ggplot(
  filter(results, errors == 0, mean_abs_e < 80) %>%
    # filter(transition %in% LETTERS[1:5]),
    filter(transition %in% c("ipx_ncr", "ipx_psm", "ncr_psm", "psm_pio", "pio_opx")), 
  aes(x = mean_e, y = sd_e)
) +
  geom_point() +
  # geom_label(
  #   aes(label = method)
  # ) +
  facet_grid(~ transition)

results %>%
  filter(errors == 0) %>%
  group_by(method) %>%
  filter(transition %in% c("ipx_ncr", "ipx_psm", "ncr_psm", "psm_pio", "pio_opx")) %>%
  summarise(m = abs(mean(mean_e)),
            sd = mean(sd_e)) %>%
  View()
measurements %>%
  tidyr::unnest(cols = meas) %>%
  filter(layer_transition %in% c("pio_opx", "psm_pio")) %>%
  group_by(id, transect) %>%
  summarise(d = abs(diff(distance))) %>%
  # filter(d > 200)
  pull(d) %>%
  hist(breaks = 30)


ggplot(
  filter(results, errors == 0, mean_abs_e < 80) %>%
    # filter(transition %in% LETTERS[1:5]),
    filter(transition %in% c("ipx_ncr", "ipx_psm", "ncr_psm", "psm_pio", "pio_opx")), 
  aes(x = mean_abs_e, y = sd_e)
) +
  geom_point() +
  facet_wrap(~ method, ncol = 5)
c(methods_els, methods_cps)[57]
c(methods_els, methods_cps)[33]