## Analysis of copy number variations across genome.
## Codes mainly from R package Alleloscope
#' Generate Alleloscope object for analysis
#'
#' @param raw_mat A binned coverage matrix (m1 bin by n1 cell) with
#' values being read counts for scATAC-seq of tumor sample (with some
#' normal cells).The matrix can be generated using
#' https://github.com/seasoncloud/Basic_CNV_SNV/blob/main/scripts/Gen_bin_cell_atac.R
#' @param cell_type A matrix with two columns: COL1- cell barcodes;
#' COL2- cell types (Tumor cells should be labeled with "tumor1,
#' tumor2 and etc.").
#' @param normal_lab Character(s) indicating the cell types considered
#' as normal cells. If not specify, "normal" cell type should exist in
#' the cell_type dataframe.
#' @param size A matrix with two columns: col1: different chromosome;
#' col2: for the size (bp) of different chromosomes (eq.1-22).
#' @param window_w window size for signal pooling in individual cells.
#' @param window_step step size for signal smoothing in individual cells.
#' @param plot_path Path to plot the heatmap.
#' @param nclust Integer. Number of clusters the rows are divided into for visualization and clustering.
#' @param var_filter Logical (TRUE/FALSE) Whether or not to filter our highly variable features.
#' @param ... parameters for pheatmap::pheatmap
#' @return A vector indicating the ordered cluster number (from hierarchical clustering) of each cell and a heatmap saved.
#' @export
plot_scATAC_cnv = function(raw_mat = NULL, cell_type = NULL, normal_lab = "normal", size = NULL,
window_w = 10000000, window_step = 2000000, plot_path = NULL, var_filter = FALSE, ...) {
if (is.null(plot_path)) {
plot_path = paste0("./plots/CNV_cov_w", window_w, "_s", window_step, "_sub.png")
dir.create("./plots/")
}
## normlize by cell size
if (var_filter == TRUE) {
vars = apply(raw_mat, 1, stats::var)
cellsize = colSums(raw_mat[which(vars < stats::quantile(vars, 0.99)), ])
} else {
cellsize = colSums(raw_mat)
}
cellsize = matrix(rep(cellsize, nrow(raw_mat)), byrow = T, ncol = ncol(raw_mat))
raw_mat = raw_mat / cellsize
if (grepl("chr", size[1, 1])) {
size = size
} else {
size[, 1] = paste0("chr", size[, 1])
}
size = size[(size[, 1] %in% paste0('chr', 1:22)), ]
cnv_bin0 = GenomicRanges::GRanges(size[, 1], IRanges(1, as.numeric(size[, 2])))
sw = IRanges::slidingWindows(x = cnv_bin0, width = window_w, step = window_step)
# width(sw)
cnv_bin = sw@unlistData
subject = GenomicRanges::GRanges(sapply(strsplit(rownames(raw_mat), ":|_|-"), '[', 1),
IRanges(as.numeric(sapply(strsplit(rownames(raw_mat), ":|_|-"), '[', 2)) + 1,
as.numeric(sapply(strsplit(rownames(raw_mat), ":|_|-"), '[', 3))))
ov = IRanges::findOverlaps(cnv_bin, subject)
ov = as.matrix(ov)
smooth_mat = matrix(ncol = ncol(raw_mat), nrow = length(cnv_bin))
for (ii in 1:length(cnv_bin)) {
ri = colSums(raw_mat[ov[which(ov[, 1] == ii), 2], , drop = F])
smooth_mat[ii, ] = ri
}
colnames(smooth_mat) = colnames(raw_mat)
rownames(smooth_mat) = paste0(as.character(GenomicRanges::seqnames(cnv_bin)), '-',
stats::start(cnv_bin), '-', stats::end(cnv_bin))
rownames(cell_type) = cell_type[, 1]
mat_celltype = cell_type[match(colnames(smooth_mat), rownames(cell_type)), 2]
# tumor_mat=smooth_mat[,which(!grepl('tumor',mat_celltype))]###
nontumor_mat = smooth_mat[, which((mat_celltype %in% normal_lab))] ###
# tumor_mat=smooth_mat[,which(mat_celltype=='tumor')]
# nontumor_mat=smooth_mat[,which(mat_celltype!='tumor')]
bin_ind = which(Matrix::rowMeans(nontumor_mat) != 0)
norm_matrix = matrix(rep(Matrix::rowMeans(nontumor_mat), ncol(smooth_mat)), ncol = ncol(smooth_mat), byrow = F)
smooth_mat_norm = smooth_mat[bin_ind, ] / (norm_matrix[bin_ind, ])
smooth_mat_norm = apply(smooth_mat_norm, c(1, 2), function(x) min(x, 5))
plot_matrix = t(smooth_mat_norm)
chrgap = (table(sapply(strsplit(rownames(smooth_mat[bin_ind, ]), '-'), '[', 1))[paste0('chr', 1:nrow(size))])
chrgap[is.na(chrgap)] = 0
col_lab = rep(" ", ncol(plot_matrix))
# col_lab[c(0, cumsum(chrgap)[1:(length(chrgap)-1)])+chrgap/2]=paste0("chr",as.character(1:nrow(size)))
col_lab[c(0, cumsum(chrgap)[which(chrgap != 0)])[1:length(which(chrgap != 0))] + chrgap[which(chrgap != 0)] / 2] = names(chrgap)[!is.na(names(chrgap))]
celltype = cell_type
# rownames(celltype)=celltype[,1]
# celltype=celltype[,-1, drop=F]
celltype = as.data.frame(cell_type)
rownames(celltype) = celltype[, 1]
## break for coloring
plot_matrix = plot_matrix * 2
plot_matrix = apply(plot_matrix, c(1, 2), function(x) if (x <= 2.5 & x >= 1.5) {x = 2} else {x = x})
breaklength = 50
setcolor = grDevices::colorRampPalette(c("blue", "white", "red"))(breaklength)
setbreaks = c(seq(min(plot_matrix), 1.7, length.out = ceiling(breaklength / 2) + 1),
c(2.3, seq((max(plot_matrix) - 2.3) / breaklength + 2.3, max(plot_matrix),
length.out = floor(breaklength / 2)))[1:(breaklength / 2)])
# pdf(plot_path, width=16, height=9)
grDevices::png(plot_path, width = 800, height = 450)
tmp = pheatmap::pheatmap(plot_matrix,
color = setcolor,
breaks = setbreaks,
labels_col = col_lab,
clustering_distance_rows = "correlation",
clustering_method = "ward.D2",
gaps_col = cumsum(chrgap),
annotation_row = celltype[, ncol(celltype), drop = F],
...)
grDevices::dev.off()
message("Plot successfully generated!")
cat("Path: ", plot_path, "\n")
## od = tmp$tree_row$order
## clust = stats::cutree(tmp$tree_row, k = nclust)
## clust_order = clust[od]
## cov_obj = list(clust_order = clust_order, plot_matrix = plot_matrix)
return(0)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.