library(loxcoder) library(ggplot2) library(dplyr) library(ggbeeswarm) library(circlize) library(VennDiagram) #load('/home/zsteve/loxcoder/analysis/data.RData') # run this first loxcoder::load_origin_distmaps('/run/media/zsteve/ssd_ext/maps/origin') loxcoder::load_pair_distmaps('/run/media/zsteve/ssd_ext/maps/pair')
fnames <- c('100k_1_2___S47', '100k_2_2___S1', '100k_3_2___S2', '12k_1_1__A_S33', '12k_1_1__B_S34', '12k_1_2__A_S27', '12k_1_2__B_S28', '12k_2_1__A_S35', '12k_2_1__B_S36', '12k_2_2__A_S29', '12k_2_2__B_S30', '12k_3_1__A_S37', '12k_3_1__B_S38', '12k_3_2__A_S31', '12k_3_2__B_S32', '25k_1_2__A_S15', '25k_2_1__B_S24', '25k_2_2__A_S17', '25k_2_2__B_S18', '25k_3_1__A_S25', '25k_3_1__B_S26', '25k_3_2__A_S19', '50k_1_1__A_S9', '50k_1_1__B_S10', '50k_1_2__A_S3', '50k_1_2__B_S4', '50k_2_1__A_S11', '50k_2_1__B_S12', '50k_2_2__A_S5', '50k_2_2__B_S6', '50k_3_1__A_S13', '50k_3_1__B_S14', '50k_3_2__A_S7', '50k_3_2__B_S8') # fnames <- fnames[-c(1, 2, 3)] fnames_no_samp <- matrix(lapply(fnames, function(x) paste0(strsplit(x, split = '_S')[[1]][1]))) files <-t(sapply(fnames, function(x) return(strsplit(x, split = '_')[[1]][c(1, 2, 3, 5, 6)]))) files <- data.frame(files) names(files) <- c('cellcount', 'sample', 'pulse', 'rep', 'sample_n') rownames(files) <- fnames_no_samp fastq_dir <- '/run/media/zsteve/ssd_ext/renamed/' files$R1 <- paste(fastq_dir, fnames, '_R1_001.fastq', sep = '') files$R2 <- paste(fastq_dir, fnames, '_R2_001.fastq', sep = '') samples <- as.list(rownames(files)) names(samples) <- fnames_no_samp samples_data <- lapply(samples, function(x){ print(paste("Sample", x)) loxcoder::decode(c(files[x, 'R1'], files[x, 'R2']), name = x, meta = data.frame(sample = x), min_r1_len = 300, min_r2_len = 280)}) names(samples_data) <- fnames_no_samp # change names # load read stats files$raw_read_count <- as.matrix(lapply(samples_data, function(x) x@decode_stats$tot_reads))
samples_data <- lapply(samples_data, function(t){ t <- loxcoder::impute(t) t <- loxcoder::validate(t) #t <- loxcoder::makeid(t) t <- loxcoder::get_origin_dist(t) return(t) })
samp_12k <- fnames_no_samp[grep('12k', fnames_no_samp)] samp_50k <- fnames_no_samp[grep('50k', fnames_no_samp)] samp_100k <- fnames_no_samp[grep('100k', fnames_no_samp)] plots_correl_12k <- lapply(samp_12k, function(x) lapply(samp_12k, function(y) loxcoder::pair_comparison_plot(samples_data[[y]], samples_data[[x]]))) plots_correl_50k <- lapply(samp_50k, function(x) lapply(samp_50k, function(y) loxcoder::pair_comparison_plot(samples_data[[y]], samples_data[[x]]))) plots_correl_100k <- lapply(samp_100k, function(x) lapply(samp_100k, function(y) loxcoder::pair_comparison_plot(samples_data[[y]], samples_data[[x]]))) grid.arrange(grobs = plots_correl_12k[[1]][1:8], ncol = 4) grid.arrange(grobs = plots_correl_50k[[1]][1:8], ncol = 4) grid.arrange(grobs = plots_correl_100k[[1]][1:3], ncol = 4)
samp_names <- c('50k_1_2__A', '50k_2_2__A', '50k_3_2__A') filter_size = 9 samp <- as.list(samp_names) names(samp) <- samp_names samp <- lapply(samp, function(x) loxcoder::valid(samples_data[[x]]) %>% filter(size == filter_size && dist_orig > 4)) pairs <- list("n12" = c(1, 2), "n23" = c(2, 3), "n13" = c(1, 3)); n_pairs <- lapply(pairs, function(x){ length(intersect(samp[[x[1]]]$code, samp[[x[2]]]$code)) }) n_all <- length(intersect(samp[[1]]$code, intersect(samp[[2]]$code, samp[[3]]$code))) v <- draw.triple.venn(area1 = nrow(samp[[1]]), area2 = nrow(samp[[2]]), area3 = nrow(samp[[3]]), n12 = n_pairs$n12, n23 = n_pairs$n23, n13 = n_pairs$n13, n123 = n_all, category = samp_names, fontfamily = rep('sans', 7), cat.fontfamily = rep('sans', 3), fill = c('red', 'blue', 'green')) grid.newpage() grid.draw(v)
# now look at size and distance samp <- samp_50k size_plots <- lapply(samp, function(x) loxcoder::size_plot(samples_data[[x]])) grid.arrange(grobs = size_plots, ncol = 4) # size 9 distances dist_plot_9 <- lapply(samp, function(x) loxcoder::dist_orig_plot(samples_data[[x]], 9)) grid.arrange(grobs = dist_plot_9, ncol = 4) # rank count plot rank_count_plot <- lapply(samp, function(x) loxcoder::rank_count_plot(samples_data[[x]], 9)) grid.arrange(grobs = rank_count_plot, ncol = 4) swarm_plots <- lapply(samp, function(x) loxcoder::dist_count_beeswarm_plot(samples_data[[x]], 100)) grid.arrange(grobs = swarm_plots, ncol = 4)
sample_a <- samples_data['50k_3_2__A'][[1]] sample_b <- samples_data['50k_3_2__A'][[1]] loxcoder::pair_comparison_plot(sample_a, sample_b) loxcoder::valid(sample_a) %>% filter(size == 9 & count < 10) -> sample_a_codes loxcoder::valid(sample_b) %>% filter(size == 9 & count < 10) -> sample_b_codes codes_a <- loxcoder::get_cass_vec(sample_a_codes$code) codes_b <- loxcoder::get_cass_vec(sample_b_codes$code) loxcoder::retrieve_dist_pair(codes_a, codes_b) -> m library(ComplexHeatmap) h <- Heatmap(m, cluster_rows = T, cluster_columns = T, col = colorRamp2(c(0, max(m)/2, max(m)), c("blue", "white", "red")), row_title = 'sample A', column_title = 'sample A') codes_dist <- sample_a_codes$dist_orig order <- column_order(h) ggplot() + geom_point(aes(x = order, y = codes_dist))
sample_a <- samples_data['50k_1_1__A'][[1]] sample_b <- samples_data['50k_2_2__A'][[1]] loxcoder::pair_comparison_plot(sample_a, sample_b) loxcoder::valid(sample_a) %>% filter(size == 9 & count < 10 & dist_orig > 4 & dist_orig < 6) -> sample_a_codes loxcoder::valid(sample_b) %>% filter(size == 9 & count < 10 & dist_orig > 6) -> sample_b_codes codes_a <- loxcoder::get_cass_vec(sample_a_codes$code) codes_b <- loxcoder::get_cass_vec(sample_b_codes$code) loxcoder::retrieve_dist_pair(codes_a, codes_b) -> m library(ComplexHeatmap) Heatmap(m, cluster_rows = T, cluster_columns = T, col = colorRamp2(c(0, 5, 10), c("blue", "white", "red")), row_title = 'sample A', column_title = 'sample B')
sample_a2 <- samples_data['50k_2_2__A'][[1]] sample_a1 <- samples_data['50k_1_2__A'][[1]] rbind(loxcoder::valid(sample_a1), loxcoder::valid(sample_a2)) %>% filter(size == 9) %>% filter(dist_orig == 5) -> sample_a_codes # rownames(sample_a_codes) <- sample_a_codes$id sample_a_codes_vec <- loxcoder::get_cass_vec(sample_a_codes$code) loxcoder::retrieve_dist_pair(sample_a_codes_vec, sample_a_codes_vec) -> n rownames(n) <- sample_a_codes$dist_orig colnames(n) <- sample_a_codes$dist_orig Heatmap(n, column_names_gp = gpar(fontsize = 8), column_names_rot = 90) Heatmap(n, cluster_rows = F, cluster_columns = F)
# get sample-sample matrix sample_50k <- samples[grep('50k', samples)] sample_50k_valid <- lapply(sample_50k, function(x){ loxcoder::valid(samples_data[[x]]) %>% filter(size == 9) -> y return(loxcoder::get_cass_vec(y$code)) }) sample_50k_1pulse1_1pulse2 <- loxcoder::retrieve_dist_pair(sample_50k_valid[['50k_1_1__A']], sample_50k_valid[['50k_1_2__A']]) sample_50k_1pulse1_2pulse2 <- loxcoder::retrieve_dist_pair(sample_50k_valid[['50k_1_1__A']], sample_50k_valid[['50k_2_2__A']]) Heatmap(sample_50k_1pulse1_1pulse2) Heatmap(sample_50k_1pulse1_2pulse2) sample_50k_1pulse2_2pulse2 <- loxcoder::retrieve_dist_pair(sample_50k_valid[['50k_1_2__A']], sample_50k_valid[['50k_2_2__A']]) sample_50k_2pulse2a_2pulse2b <- loxcoder::retrieve_dist_pair(sample_50k_valid[['50k_2_2__A']], sample_50k_valid[['50k_2_2__B']]) h <- Heatmap(sample_50k_1pulse2_2pulse2) h # show that the heatmap clustering tends to cluster together according to complexity ro <- row_order(h) u <- valid(samples_data[['50k_1_2__A']]) %>% filter(size == 9) u <- u$dist_orig plot(u[ro]) h2 <- Heatmap(sample_50k_2pulse2a_2pulse2b) h2 # show that the heatmap clustering tends to cluster together according to complexity ro2 <- row_order(h2) u2 <- valid(samples_data[['50k_2_2__A']]) %>% filter(size == 9) u2 <- u2$dist_orig plot(u2[ro2])
u1 <- valid(samples_data[['12k_1_2__B']]) %>% filter(dist_orig >= 4 & count > 1 & size == 9) c <- loxcoder::get_cass_vec(u1$code) m <- loxcoder::retrieve_dist_pair(c, c) colnames(m) <- u1$code rownames(m) <- u1$code h <- Heatmap(m) ro <- row_order(h) co <- column_order(h) m <- m[ro, co] Heatmap(m, cluster_rows = F, cluster_columns = F) m_same <- m # m_diff <- m plot_me <- function(x, k, m){ z <- match(x, rownames(m)) m_temp = m; m_temp[z, z] = 10 # Heatmap(m_temp) Heatmap(m_temp[(z-k):(z+k),(z-k):(z+k)], cluster_rows = F, cluster_columns = F) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.