knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(odbc)
library(bulletsamplr)
library(bulletxtrctr)
library(ggplot2)
library(stringr)
library(tidyr)
library(furrr)
plan(multicore, workers = 20)

con <- dbConnect(odbc::odbc(), "bullets", timeout = 10)

The peak db is made up of peaks from three Hamby sets. Thus, the random match probability isn't as simple as it may appear: Not only is there the simple probability of the same cycle being drawn for multiple chunks, there is also the probability that two cycles from different lands are drawn, but those lands happen to be a match.

barrel10 <- tbl(con, "bullet.crosssection") %>%
  filter(Barrel == "10") %>%
  collect()

barrel10 <- barrel10 %>% group_by(Study, Barrel, Bullet, Land, id, source) %>%
  nest(x, y, value, sig, .key = 'ccdata') %>%
  ungroup()

ccdata_align <- function(ccd1, ccd2) {
  sig_align(ccd1$sig, ccd2$sig)
}

barrel10combo <- tidyr::crossing(barrel10, barrel10) %>%
  # filter(Study != Study1) %>%
  mutate(align = purrr::map2(ccdata, ccdata1, ccdata_align))

barrel10combo <- barrel10combo %>%
  mutate(ccf = sapply(align, function(.) .$ccf),
         lag = sapply(align, function(.) .$lag))
barrel10combo %>%
  mutate_at(vars(matches("id")), ~str_remove(., "10-\\d-")) %>%
ggplot(aes(x = id, y = id1, fill = ccf)) + 
  geom_tile() + facet_grid(Study1 + Bullet1 ~ Study + Bullet) + 
  scale_fill_gradient2(low = "gray", high = "darkorange", mid = "white", midpoint = .5) + 
  ggtitle("Barrel 10, Bullet 1 & 2, across 3 Hamby studies") + 
  coord_fixed() + 
  theme(axis.title = element_blank())

Examining all sequences which align with Barrel 10, Bullet 1, Land 1 of Hamby 224 (10-2-3 from Hamby 224; 10-1-4, 10-2-4 from Hamby 36; 10-1-5, 10-2-5 from Hamby44), we see:

db <- tbl(con, "bullet.slice") %>%
  collect() %>%
  filter(str_detect(id, "(.*)_10-.-._..")) 

db10 <- db %>%
  mutate(chunk = str_extract(id, "\\d{2}$") %>% as.numeric(),
         study = str_extract(id, "Hamby\\d{1,}"),
         barrel = str_extract(id, "_\\d{1,}-") %>% str_remove_all("[[:punct:]]"),
         bullet = str_extract(id, "-\\d-") %>% str_remove_all("-"),
         land = str_extract(id, "-\\d_") %>% str_remove_all("[[:punct:]]")) %>%
  select(study, barrel, bullet, land, chunk, sig) %>%
  tidyr::nest(sig)

## Define sig_align_abs to be sig_align but to take the maximum absolute ccf

sig_align_abs <- function(sig1, sig2) {
  assertthat::assert_that(is.numeric(sig1), is.numeric(sig2))

  sig1 <- zoo::na.trim(sig1)
  sig2 <- zoo::na.trim(sig2)

  n1 <- length(sig1)
  n2 <- length(sig2)

  # assume y is the long vector, x is the short vector. If not, switch the
  # vectors around
  if (n1 < n2) {
    x <- sig1
    y <- sig2
  } else {
    x <- sig2
    y <- sig1
  }

  cors <- get_ccf(x, y, round(0.75 * min(length(sig1), length(sig2))))
  # do some padding at the front
  lag <- cors$lag[which.max(abs(cors$ccf))]
  if (lag < 0) {
    x <- c(rep(NA, abs(lag)), x)
  }
  if (lag > 0) {
    y <- c(rep(NA, lag), y)
  }

  # at the back
  delta <- length(x) - length(y)
  if (delta < 0) x <- c(x, rep(NA, abs(delta)))
  if (delta > 0) y <- c(y, rep(NA, delta))

  # switch back
  if (n1 < n2) {
    dframe0 <- data.frame(x = 1:length(x), sig1 = x, sig2 = y)
  } else {
    dframe0 <- data.frame(x = 1:length(x), sig1 = y, sig2 = x)
  }

  maxcor <- max(cors$ccf, na.rm = TRUE)

  # dfcor <- cor(dframe0$sig1, dframe0$sig2, use = "pairwise")
  # if (maxcor != dfcor) browser()

  list(ccf = maxcor, lag = lag, lands = dframe0)
}

db10combo <- tidyr::crossing(db10, db10) %>%
  mutate(ccf = furrr::future_map2(data, data1, ~sig_align_abs(.x$sig, .y$sig)))

db10combo$ccf_only <- sapply(db10combo$ccf, function(x) x$ccf)
db10combo$ccf_lag <- sapply(db10combo$ccf, function(x) x$lag)

db10combo <- db10combo %>%
  mutate(id = sprintf("%s_%s-%s-%s_%02s", study, barrel, bullet, land, chunk),
         id1 = sprintf("%s_%s-%s-%s_%02s", study1, barrel1, bullet1, land1, chunk1))

db10combo %>%
  filter(id != id1) %>%
  ggplot(aes(x = id, y = id1, fill = ccf_only)) + geom_tile() + 
  scale_fill_gradient2()


srvanderplas/bulletsamplr documentation built on June 10, 2021, 7:55 a.m.