inst/doc/cmpsR-vignette.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "figures/vignette-",
  echo=FALSE,
  warning = FALSE,
  message = FALSE,
  fig.width = 7,
  fig.height = 5,
  out.width = "80%"
)

library(tidyverse)
theme_set(theme_bw())

## ---- eval=FALSE, echo=TRUE---------------------------------------------------
#  install.packages("cmpsR")

## ---- eval=FALSE, echo=TRUE---------------------------------------------------
#  # install.packages("devtools")
#  devtools::install_github("willju-wangqian/cmpsR")

## ---- echo=TRUE---------------------------------------------------------------
library(cmpsR)
data("bullets")

## ----signature_plot-----------------------------------------------------------
signatures <- bullets %>% unnest(sigs)
signatures <- signatures %>% mutate(
  bulletland = factor(bulletland, levels=c(paste(rep(c(1,2), each=6), c(1:6,2:6,1), sep="-")))
)
signatures %>% ggplot(aes(x = x/1000, y = sig)) + geom_line() + facet_wrap(~bulletland, ncol=6) +
  theme_bw() +
  xlab("Length in mm") +
  ylab("Relative height in micron")

## ---- echo=TRUE---------------------------------------------------------------
library(cmpsR)
data("bullets")

x <- bullets$sigs[bullets$bulletland == "2-3"][[1]]$sig
y <- bullets$sigs[bullets$bulletland == "1-2"][[1]]$sig

cmps <- extract_feature_cmps(x, y, include = "full_result")
cmps$CMPS_score

## ----plot_example, fig.cap="A KM Comparison, x and y", fig.align='center'-----
# knitr::include_graphics("man/figures/step0.png")
signatures %>% filter(bulletland %in% c("2-3", "1-2")) %>% 
  ggplot(aes(x = x/1000, y = sig)) + geom_line(aes(color = bulletland)) +
  theme_bw() +
  xlab("Length in mm") +
  ylab("Relative height in micron") +
  labs(color = "bullet land")

## ----plot_cut_x, fig.cap="Cut x into consecutive and non-overlapping basis segments of the same length. Only 4 basis segments are shown here", out.width="80%", fig.keep="hold", fig.align='center'----
segments <- get_segs(x, len = 50)
nseg <- length(segments$segs)
df <- data.frame(value = unlist(segments$segs),
                 segs = rep(1:nseg, each = 50),
                 index = unlist(segments$index))
df$segs_tag <- paste("seg", df$segs)
df.partx <- data.frame(value = unlist(segments$segs),
                       segs = 0,
                       index = unlist(segments$index),
                       segs_tag = "x")
df.party <- data.frame(value = y,
                       segs = 0,
                       index = 1:length(y),
                       segs_tag = "y")
cutt <- sapply(segments$index, function(idx) {idx[1]})
rbind(df.partx, df) %>% filter(segs <= 4) %>% 
  ggplot() +
  geom_line(aes(x = index, y = value)) +
  geom_vline(xintercept = cutt, color = "red") +
  facet_grid(segs_tag ~ .) +
  xlab("position") +
  ylab("signature") +
  ggtitle("Cut Comparison Signature x into Consecutive and Non-overlapping Basis Segments")

## ----plot_y_and_seg, fig.cap="y and 7th basis segment", out.width="80%", fig.keep="hold", fig.align='center'----
seg3 <- df %>% filter(segs == 7)
seg3$segs <- "segment 7"
df.y <- data.frame(value = y, segs = "y", index = 1:length(y))
df.y <- rbind(df.y, seg3[,-4])
df.y$segs <- factor(df.y$segs, levels = c("y", "segment 7"))
df.y %>% filter(!is.na(value)) %>% 
  ggplot() +
  geom_line(aes(x = index, y = value, color = segs)) +
  labs(x = "position", y = "signature", color = "label") +
  ggtitle("Reference Signature y and the 7th Segment") +
  scale_color_manual(values = c("black", "red"))

## ----plot_ccf_y_seg, fig.cap="the cross-correlation function (ccf) between y and segment 7", out.width="80%", fig.keep="hold", fig.align='center'----
ccrpeaks <- get_ccr_peaks(y, segments = segments, 50, nseg = 7, npeaks = 1)
df.ccf <- data.frame(value = ccrpeaks$ccr$ccf, index = ccrpeaks$adj_pos)

df.ccf %>% ggplot() +
  geom_line(aes(index, value)) + 
  geom_vline(xintercept = ccrpeaks$peaks_pos, color = "red") +
  xlab("position") +
  ylab("ccf") +
  ggtitle("CCF of y and the 7th Basis Segment")

## ----plot_x_itself, fig.cap="ideal case: compare x to itself. The highest peak has value 1 and is marked by the blue dot", out.width="80%", fig.keep="hold", fig.align='center'----
comp <- x
npeaks <- 1
seg_outlength <- 50

ccf.df <- do.call(rbind, lapply(6:12, function(nss) {
  ccrpeaks <- get_ccr_peaks(comp, segments = segments, seg_outlength = seg_outlength, nseg = nss, npeaks = npeaks)
  df.ccf <- data.frame(value = ccrpeaks$ccr$ccf, index = ccrpeaks$adj_pos)
  df.ccf$segs <- nss
  df.ccf
}))
peak.df <- do.call(rbind, lapply(6:12, function(nss) {
  ccrpeaks <- get_ccr_peaks(comp, segments = segments, seg_outlength = seg_outlength, nseg = nss, npeaks = npeaks)
  df.ccf <- data.frame(value = ccrpeaks$peaks_heights, index = ccrpeaks$peaks_pos)
  df.ccf$segs <- nss
  df.ccf
}))

ccf.df %>% 
  ggplot() +
  geom_line(aes(x = index, y = value)) +
  geom_point(data = peak.df, aes(x = index, y = value),
             color = "blue") +
  geom_vline(xintercept = 0, color = "red") +
  facet_grid(segs ~ .) +
  # ggtitle("Real Case: x compares to y") +
  ggtitle("Ideal Case: x Compares to Itself") +
  xlab("position") +
  ylab("ccf")

## ----plot_real_xy, fig.cap="real case: compare x to y. The 5 highest peaks are marked by the blue dots", out.width="80%", fig.keep="hold", fig.align='center'----
comp <- y
npeaks <- 5
seg_outlength <- 50

ccf.df <- do.call(rbind, lapply(6:12, function(nss) {
  ccrpeaks <- get_ccr_peaks(comp, segments = segments, seg_outlength = seg_outlength, nseg = nss, npeaks = npeaks)
  df.ccf <- data.frame(value = ccrpeaks$ccr$ccf, index = ccrpeaks$adj_pos)
  df.ccf$segs <- nss
  df.ccf
}))
peak.df <- do.call(rbind, lapply(6:12, function(nss) {
  ccrpeaks <- get_ccr_peaks(comp, segments = segments, seg_outlength = seg_outlength, nseg = nss, npeaks = npeaks)
  df.ccf <- data.frame(value = ccrpeaks$peaks_heights, index = ccrpeaks$peaks_pos)
  df.ccf$segs <- nss
  df.ccf
}))

ccf.df %>% 
  ggplot() +
  geom_line(aes(x = index, y = value)) +
  geom_point(data = peak.df, aes(x = index, y = value),
             color = "blue") +
  geom_vline(xintercept = -6, color = "red") +
  facet_grid(segs ~ .) +
  ggtitle("Real Case: x compares to y") +
  # ggtitle("Ideal Case: x compares to itself") +
  xlab("position") +
  ylab("ccf")

## ----plot_false_positive, fig.cap="Multi Segment Lengths Strategy - increasing the segment length could decrease the number of false positive peaks in ccf curves", out.width="80%", fig.keep="hold", fig.align='center'----
comp <- y
nseg <- 7
npeaks_set <- c(5, 3, 1)

multi.seg <- do.call(rbind, lapply(c(50, 100, 200), function(scale) {
  tt <- get_seg_scale(segments, nseg, out_length = scale)
  df.tmp <- data.frame(value = tt$aug_seg, index=tt$aug_idx, scale=paste("length of", scale))
}))

multi.seg$scale <- factor(multi.seg$scale, 
                          levels = c("length of 50", "length of 100", "length of 200"))

# rbind(data.frame(value = x, index = 1:length(x), scale = "x"), multi.seg) 
p1 <- multi.seg %>% filter(!is.na(value)) %>% 
  ggplot() +
  geom_line(aes(index, value)) +
  facet_grid(scale ~ .) + 
  ylab("signature") +
  xlab("position") +
  ggtitle("Segment 7 in 3 Different Scale Levels")

out_length <- c(50, 100, 200)

multi.df <- do.call(rbind, lapply(1:3, function(scale) {
  ccrpeaks <- get_ccr_peaks(comp, segments = segments,
                            seg_outlength = out_length[scale],
                            nseg = nseg, npeaks = npeaks_set[scale])
  df.tmp <- data.frame(value = ccrpeaks$ccr$ccf, index = ccrpeaks$adj_pos)
  df.tmp$scale <- paste("scale level", scale)
  df.tmp
}))
multi.peak.df <- do.call(rbind, lapply(1:3, function(scale) {
  ccrpeaks <- get_ccr_peaks(comp, segments = segments, seg_outlength = out_length[scale],
                            nseg = nseg, npeaks = npeaks_set[scale])
  df.tmp <- data.frame(value = ccrpeaks$peaks_heights, index = ccrpeaks$peaks_pos)
  df.tmp$scale <- paste("scale level", scale)
  df.tmp
}))
p2 <- multi.df %>% ggplot() +
  geom_line(aes(x=index, y=value)) +
  geom_point(data = multi.peak.df, aes(x = index, y = value),
           color = "blue") +
  geom_vline(xintercept = -7, color = "red") +
  facet_grid(scale ~ .) +
  scale_x_continuous(breaks=seq(-400,800,100)) +
  ylab("ccf") +
  xlab("position") +
  ggtitle("ccf of segment 7 and y in 3 different scales")

p1
p2

## ----plot_ccf_seg7, echo=TRUE-------------------------------------------------
cmps <- extract_feature_cmps(x, y, include = "full_result")
cmps_plot_list <- cmpsR::cmps_segment_plot(cmps, seg_idx = 7)
ggpubr::ggarrange(plotlist = unlist(cmps_plot_list, recursive = FALSE),
                  nrow = 3, ncol = 2)

## ---- echo=TRUE, eval=TRUE----------------------------------------------------
cmps <- extract_feature_cmps(x, y, seg_length = 50, Tx = 25, 
                     npeaks_set = c(5, 3, 1), include = "full_result")
cmps$CMPS_score

## ----plot_ccf_seg6, echo=TRUE, eval=TRUE--------------------------------------
cmps_plot_list <- cmpsR::cmps_segment_plot(cmps, seg_idx = 6)
ggpubr::ggarrange(plotlist = unlist(cmps_plot_list, recursive = FALSE),
                  nrow = 3, ncol = 2)

## ---- eval=TRUE, echo=TRUE----------------------------------------------------
land23 <- bullets$sigs[bullets$bulletland == "2-3"][[1]]
land13 <- bullets$sigs[bullets$bulletland == "1-3"][[1]]

cmps_knm <- extract_feature_cmps(land23$sig, land13$sig, seg_length = 50, Tx = 25, 
                     npeaks_set = c(5, 3, 1), include="full_result")
cmps_knm$CMPS_score

## ----eval = TRUE, echo=TRUE---------------------------------------------------
library(tidyverse)
library(cmpsR)

data("bullets")

lands <- unique(bullets$bulletland)

comparisons <- data.frame(expand.grid(land1 = lands[1:6], land2 = lands[7:12]),
                          stringsAsFactors = FALSE)

comparisons <- comparisons %>%
  left_join(bullets %>% select(bulletland, sig1=sigs),
            by = c("land1" = "bulletland")) %>%
  left_join(bullets %>% select(bulletland, sig2=sigs),
            by = c("land2" = "bulletland"))

comparisons <- comparisons %>% mutate(
  cmps = purrr::map2(sig1, sig2, .f = function(x, y) {
    extract_feature_cmps(x$sig, y$sig, include = "full_result")
  })
)

comparisons <- comparisons %>%
  mutate(
    cmps_score = sapply(comparisons$cmps, function(x) x$CMPS_score),
    cmps_nseg = sapply(comparisons$cmps, function(x) x$nseg)
  )
  
cp1 <- comparisons %>% select(land1, land2, cmps_score, cmps_nseg)
cp1  

## ----plot_all_pairwise--------------------------------------------------------
comparisons <- comparisons %>%
  mutate(
    bulletA = gsub("(\\d)-\\d", "\\1", land1),
    landA = gsub("\\d-(\\d)", "\\1", land1),
    bulletB = gsub("(\\d)-\\d", "\\1", land2),
    landB = gsub("\\d-(\\d)", "\\1", land2)
  )

dframe <- comparisons %>% select(-sig1, -sig2)

cc.idx <- c(6,7, 14, 21, 28, 35)
dframe$samesource <- FALSE
dframe$samesource[cc.idx] <- TRUE

dframe <- dframe %>% mutate(
  landA = paste0("L", landA),
  landB = paste0("L", landB),
  landB = factor(landB, levels = paste0("L", c(2:6,1))),
  bulletA = paste0("Bullet ", bulletA),
  bulletB = paste0("Bullet ", bulletB)
)

dframe %>% ggplot(aes(x = landA, y = landB, fill = cmps_score)) + 
  geom_tile() + 
  geom_tile(aes(colour="same land"), fill=NA, data = dframe %>% filter(samesource), size=1) + 
  scale_fill_gradient2("CMPS score", low = "gray80", high = "darkorange", midpoint = 6) + 
  scale_colour_manual("Source", values="darkorange") +
  facet_grid(bulletB ~ bulletA) + xlab("Bullet1 Lands") + 
  ylab("Bullet2 Lands") + 
  geom_text(aes(label=cmps_score)) +
  theme_bw() +
  theme(aspect.ratio = 1)

Try the cmpsR package in your browser

Any scripts or data that you put into this service are public.

cmpsR documentation built on July 18, 2022, 9:07 a.m.