library(loxcoder)
library(ggplot2)
library(dplyr)
library(ggbeeswarm)
library(circlize)
library(VennDiagram)
#load('/home/zsteve/loxcoder/analysis/data.RData')

# run this first
loxcoder::load_origin_distmaps('/run/media/zsteve/ssd_ext/maps/origin')
loxcoder::load_pair_distmaps('/run/media/zsteve/ssd_ext/maps/pair')
fnames <- c('100k_1_2___S47',
  '100k_2_2___S1',
  '100k_3_2___S2',
  '12k_1_1__A_S33',
  '12k_1_1__B_S34',
  '12k_1_2__A_S27',
  '12k_1_2__B_S28',
  '12k_2_1__A_S35',
  '12k_2_1__B_S36',
  '12k_2_2__A_S29',
  '12k_2_2__B_S30',
  '12k_3_1__A_S37',
  '12k_3_1__B_S38',
  '12k_3_2__A_S31',
  '12k_3_2__B_S32',
  '25k_1_2__A_S15',
  '25k_2_1__B_S24',
  '25k_2_2__A_S17',
  '25k_2_2__B_S18',
  '25k_3_1__A_S25',
  '25k_3_1__B_S26',
  '25k_3_2__A_S19',
  '50k_1_1__A_S9',
  '50k_1_1__B_S10',
  '50k_1_2__A_S3',
  '50k_1_2__B_S4',
  '50k_2_1__A_S11',
  '50k_2_1__B_S12',
  '50k_2_2__A_S5',
  '50k_2_2__B_S6',
  '50k_3_1__A_S13',
  '50k_3_1__B_S14',
  '50k_3_2__A_S7',
  '50k_3_2__B_S8')

# fnames <- fnames[-c(1, 2, 3)]
fnames_no_samp <- matrix(lapply(fnames, function(x) paste0(strsplit(x, split = '_S')[[1]][1])))

files <-t(sapply(fnames, function(x) return(strsplit(x, split = '_')[[1]][c(1, 2, 3, 5, 6)])))
files <- data.frame(files)
names(files) <- c('cellcount', 'sample', 'pulse', 'rep', 'sample_n')
rownames(files) <- fnames_no_samp

fastq_dir <- '/run/media/zsteve/ssd_ext/renamed/'

files$R1 <- paste(fastq_dir, fnames, '_R1_001.fastq', sep = '')
files$R2 <- paste(fastq_dir, fnames, '_R2_001.fastq', sep = '')

samples <- as.list(rownames(files))
names(samples) <- fnames_no_samp
samples_data <- lapply(samples, function(x){
      print(paste("Sample", x))
      loxcoder::decode(c(files[x, 'R1'], files[x, 'R2']),
      name = x,
      meta = data.frame(sample = x),
      min_r1_len = 300, min_r2_len = 280)})
names(samples_data) <- fnames_no_samp # change names 

# load read stats
files$raw_read_count <- as.matrix(lapply(samples_data, function(x) x@decode_stats$tot_reads))
samples_data <- lapply(samples_data, function(t){
  t <- loxcoder::impute(t)
  t <- loxcoder::validate(t)
  #t <- loxcoder::makeid(t)
  t <- loxcoder::get_origin_dist(t)
  return(t)
})
samp_12k <- fnames_no_samp[grep('12k', fnames_no_samp)]
samp_50k <- fnames_no_samp[grep('50k', fnames_no_samp)]
samp_100k <- fnames_no_samp[grep('100k', fnames_no_samp)]

plots_correl_12k <- lapply(samp_12k, function(x) lapply(samp_12k, function(y) loxcoder::pair_comparison_plot(samples_data[[y]], samples_data[[x]])))
plots_correl_50k <- lapply(samp_50k, function(x) lapply(samp_50k, function(y) loxcoder::pair_comparison_plot(samples_data[[y]], samples_data[[x]])))
plots_correl_100k <- lapply(samp_100k, function(x) lapply(samp_100k, function(y) loxcoder::pair_comparison_plot(samples_data[[y]], samples_data[[x]])))

grid.arrange(grobs = plots_correl_12k[[1]][1:8], ncol = 4)
grid.arrange(grobs = plots_correl_50k[[1]][1:8], ncol = 4)
grid.arrange(grobs = plots_correl_100k[[1]][1:3], ncol = 4)
samp_names <- c('50k_1_2__A',
                '50k_2_2__A',
                '50k_3_2__A')
filter_size = 9
samp <- as.list(samp_names)
names(samp) <- samp_names
samp <- lapply(samp, function(x) loxcoder::valid(samples_data[[x]]) %>% filter(size == filter_size && dist_orig > 4))
pairs <- list("n12" = c(1, 2), "n23" = c(2, 3), "n13" = c(1, 3));

n_pairs <- lapply(pairs, function(x){
  length(intersect(samp[[x[1]]]$code, samp[[x[2]]]$code))
})

n_all <- length(intersect(samp[[1]]$code, intersect(samp[[2]]$code, samp[[3]]$code)))

v <- draw.triple.venn(area1 = nrow(samp[[1]]), 
                 area2 = nrow(samp[[2]]),
                 area3 = nrow(samp[[3]]),
                 n12 = n_pairs$n12,
                 n23 = n_pairs$n23,
                 n13 = n_pairs$n13,
                 n123 = n_all,
                 category = samp_names, 
                 fontfamily = rep('sans', 7), 
                 cat.fontfamily = rep('sans', 3),
                 fill = c('red', 'blue', 'green'))
grid.newpage()
grid.draw(v)
# now look at size and distance
samp <- samp_50k

size_plots <- lapply(samp, function(x) loxcoder::size_plot(samples_data[[x]]))
grid.arrange(grobs = size_plots, ncol = 4)

# size 9 distances
dist_plot_9 <- lapply(samp, function(x) loxcoder::dist_orig_plot(samples_data[[x]], 9))
grid.arrange(grobs = dist_plot_9, ncol = 4)

# rank count plot
rank_count_plot <- lapply(samp, function(x) loxcoder::rank_count_plot(samples_data[[x]], 9))
grid.arrange(grobs = rank_count_plot, ncol = 4)

swarm_plots <- lapply(samp, function(x) loxcoder::dist_count_beeswarm_plot(samples_data[[x]], 100))
grid.arrange(grobs = swarm_plots, ncol = 4)
sample_a <- samples_data['50k_3_2__A'][[1]]
sample_b <- samples_data['50k_3_2__A'][[1]]

loxcoder::pair_comparison_plot(sample_a, sample_b)

loxcoder::valid(sample_a) %>% filter(size == 9 & count < 10) -> sample_a_codes
loxcoder::valid(sample_b) %>% filter(size == 9 & count < 10) -> sample_b_codes

codes_a <- loxcoder::get_cass_vec(sample_a_codes$code)
codes_b <- loxcoder::get_cass_vec(sample_b_codes$code)

loxcoder::retrieve_dist_pair(codes_a, codes_b) -> m

library(ComplexHeatmap)
h <- Heatmap(m, cluster_rows = T, cluster_columns = T, col = colorRamp2(c(0, max(m)/2, max(m)), c("blue", "white", "red")),
        row_title = 'sample A', column_title = 'sample A')

codes_dist <- sample_a_codes$dist_orig
order <- column_order(h)
ggplot() + geom_point(aes(x = order, y = codes_dist))
sample_a <- samples_data['50k_1_1__A'][[1]]
sample_b <- samples_data['50k_2_2__A'][[1]]

loxcoder::pair_comparison_plot(sample_a, sample_b)

loxcoder::valid(sample_a) %>% filter(size == 9 & count < 10 & dist_orig > 4 & dist_orig < 6) -> sample_a_codes
loxcoder::valid(sample_b) %>% filter(size == 9 & count < 10 & dist_orig > 6) -> sample_b_codes

codes_a <- loxcoder::get_cass_vec(sample_a_codes$code)
codes_b <- loxcoder::get_cass_vec(sample_b_codes$code)

loxcoder::retrieve_dist_pair(codes_a, codes_b) -> m

library(ComplexHeatmap)
Heatmap(m, cluster_rows = T, cluster_columns = T, col = colorRamp2(c(0, 5, 10), c("blue", "white", "red")),
        row_title = 'sample A', column_title = 'sample B')
sample_a2 <- samples_data['50k_2_2__A'][[1]]
sample_a1 <- samples_data['50k_1_2__A'][[1]]
rbind(loxcoder::valid(sample_a1), loxcoder::valid(sample_a2)) %>% filter(size == 9) %>% filter(dist_orig == 5) -> sample_a_codes
# rownames(sample_a_codes) <- sample_a_codes$id
sample_a_codes_vec <- loxcoder::get_cass_vec(sample_a_codes$code)
loxcoder::retrieve_dist_pair(sample_a_codes_vec, sample_a_codes_vec) -> n
rownames(n) <- sample_a_codes$dist_orig
colnames(n) <- sample_a_codes$dist_orig
Heatmap(n, column_names_gp = gpar(fontsize = 8), column_names_rot = 90)
Heatmap(n, cluster_rows = F, cluster_columns = F)
# get sample-sample matrix
sample_50k <- samples[grep('50k', samples)]
sample_50k_valid <- lapply(sample_50k, function(x){
  loxcoder::valid(samples_data[[x]]) %>% filter(size == 9) -> y
  return(loxcoder::get_cass_vec(y$code))
})

sample_50k_1pulse1_1pulse2 <- loxcoder::retrieve_dist_pair(sample_50k_valid[['50k_1_1__A']], sample_50k_valid[['50k_1_2__A']])
sample_50k_1pulse1_2pulse2 <- loxcoder::retrieve_dist_pair(sample_50k_valid[['50k_1_1__A']], sample_50k_valid[['50k_2_2__A']])

Heatmap(sample_50k_1pulse1_1pulse2)
Heatmap(sample_50k_1pulse1_2pulse2)

sample_50k_1pulse2_2pulse2 <- loxcoder::retrieve_dist_pair(sample_50k_valid[['50k_1_2__A']], sample_50k_valid[['50k_2_2__A']])
sample_50k_2pulse2a_2pulse2b <- loxcoder::retrieve_dist_pair(sample_50k_valid[['50k_2_2__A']], sample_50k_valid[['50k_2_2__B']]) 
h <- Heatmap(sample_50k_1pulse2_2pulse2)
h

# show that the heatmap clustering tends to cluster together according to complexity
ro <- row_order(h)
u <- valid(samples_data[['50k_1_2__A']]) %>% filter(size == 9)
u <- u$dist_orig
plot(u[ro])

h2 <- Heatmap(sample_50k_2pulse2a_2pulse2b)
h2

# show that the heatmap clustering tends to cluster together according to complexity
ro2 <- row_order(h2)
u2 <- valid(samples_data[['50k_2_2__A']]) %>% filter(size == 9)
u2 <- u2$dist_orig
plot(u2[ro2])
u1 <- valid(samples_data[['12k_1_2__B']]) %>% filter(dist_orig >= 4 & count > 1 & size == 9)
c <- loxcoder::get_cass_vec(u1$code)
m <- loxcoder::retrieve_dist_pair(c, c)
colnames(m) <- u1$code
rownames(m) <- u1$code
h <- Heatmap(m)
ro <- row_order(h)
co <- column_order(h)
m <- m[ro, co]
Heatmap(m, cluster_rows = F, cluster_columns = F)

m_same <- m
# m_diff <- m

plot_me <- function(x, k, m){
  z <- match(x, rownames(m))
  m_temp = m;
  m_temp[z, z] = 10
  # Heatmap(m_temp)
  Heatmap(m_temp[(z-k):(z+k),(z-k):(z+k)], cluster_rows = F, cluster_columns = F)
}


jngwehi/loxcodeR documentation built on March 17, 2020, 5:32 p.m.