knitr::opts_chunk$set(echo = TRUE) library(dplyr) library(odbc) library(bulletsamplr) library(bulletxtrctr) library(ggplot2) library(stringr) library(tidyr) library(furrr) plan(multicore, workers = 20) con <- dbConnect(odbc::odbc(), "bullets", timeout = 10)
The peak db is made up of peaks from three Hamby sets. Thus, the random match probability isn't as simple as it may appear: Not only is there the simple probability of the same cycle being drawn for multiple chunks, there is also the probability that two cycles from different lands are drawn, but those lands happen to be a match.
barrel10 <- tbl(con, "bullet.crosssection") %>% filter(Barrel == "10") %>% collect() barrel10 <- barrel10 %>% group_by(Study, Barrel, Bullet, Land, id, source) %>% nest(x, y, value, sig, .key = 'ccdata') %>% ungroup() ccdata_align <- function(ccd1, ccd2) { sig_align(ccd1$sig, ccd2$sig) } barrel10combo <- tidyr::crossing(barrel10, barrel10) %>% # filter(Study != Study1) %>% mutate(align = purrr::map2(ccdata, ccdata1, ccdata_align)) barrel10combo <- barrel10combo %>% mutate(ccf = sapply(align, function(.) .$ccf), lag = sapply(align, function(.) .$lag)) barrel10combo %>% mutate_at(vars(matches("id")), ~str_remove(., "10-\\d-")) %>% ggplot(aes(x = id, y = id1, fill = ccf)) + geom_tile() + facet_grid(Study1 + Bullet1 ~ Study + Bullet) + scale_fill_gradient2(low = "gray", high = "darkorange", mid = "white", midpoint = .5) + ggtitle("Barrel 10, Bullet 1 & 2, across 3 Hamby studies") + coord_fixed() + theme(axis.title = element_blank())
Examining all sequences which align with Barrel 10, Bullet 1, Land 1 of Hamby 224 (10-2-3 from Hamby 224; 10-1-4, 10-2-4 from Hamby 36; 10-1-5, 10-2-5 from Hamby44), we see:
db <- tbl(con, "bullet.slice") %>% collect() %>% filter(str_detect(id, "(.*)_10-.-._..")) db10 <- db %>% mutate(chunk = str_extract(id, "\\d{2}$") %>% as.numeric(), study = str_extract(id, "Hamby\\d{1,}"), barrel = str_extract(id, "_\\d{1,}-") %>% str_remove_all("[[:punct:]]"), bullet = str_extract(id, "-\\d-") %>% str_remove_all("-"), land = str_extract(id, "-\\d_") %>% str_remove_all("[[:punct:]]")) %>% select(study, barrel, bullet, land, chunk, sig) %>% tidyr::nest(sig) ## Define sig_align_abs to be sig_align but to take the maximum absolute ccf sig_align_abs <- function(sig1, sig2) { assertthat::assert_that(is.numeric(sig1), is.numeric(sig2)) sig1 <- zoo::na.trim(sig1) sig2 <- zoo::na.trim(sig2) n1 <- length(sig1) n2 <- length(sig2) # assume y is the long vector, x is the short vector. If not, switch the # vectors around if (n1 < n2) { x <- sig1 y <- sig2 } else { x <- sig2 y <- sig1 } cors <- get_ccf(x, y, round(0.75 * min(length(sig1), length(sig2)))) # do some padding at the front lag <- cors$lag[which.max(abs(cors$ccf))] if (lag < 0) { x <- c(rep(NA, abs(lag)), x) } if (lag > 0) { y <- c(rep(NA, lag), y) } # at the back delta <- length(x) - length(y) if (delta < 0) x <- c(x, rep(NA, abs(delta))) if (delta > 0) y <- c(y, rep(NA, delta)) # switch back if (n1 < n2) { dframe0 <- data.frame(x = 1:length(x), sig1 = x, sig2 = y) } else { dframe0 <- data.frame(x = 1:length(x), sig1 = y, sig2 = x) } maxcor <- max(cors$ccf, na.rm = TRUE) # dfcor <- cor(dframe0$sig1, dframe0$sig2, use = "pairwise") # if (maxcor != dfcor) browser() list(ccf = maxcor, lag = lag, lands = dframe0) } db10combo <- tidyr::crossing(db10, db10) %>% mutate(ccf = furrr::future_map2(data, data1, ~sig_align_abs(.x$sig, .y$sig))) db10combo$ccf_only <- sapply(db10combo$ccf, function(x) x$ccf) db10combo$ccf_lag <- sapply(db10combo$ccf, function(x) x$lag) db10combo <- db10combo %>% mutate(id = sprintf("%s_%s-%s-%s_%02s", study, barrel, bullet, land, chunk), id1 = sprintf("%s_%s-%s-%s_%02s", study1, barrel1, bullet1, land1, chunk1)) db10combo %>% filter(id != id1) %>% ggplot(aes(x = id, y = id1, fill = ccf_only)) + geom_tile() + scale_fill_gradient2()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.