#' Normalize coverage using identified/ specified normal cells and one normal region and generate a table with (rho_hat) of each cell for all regions.
#'
#' rho_hat: Relative coverage change for each cell in a region
#'
#' @param Obj_filtered An Alleloscope object with segments, specified normal cells and a normal region
#' @param type Specify whether the sample is a "tumor" or "cellline". If "type" is a "cellline", param "ref_counts" needs to be specified for normal sample.
#' @param raw_counts (required) A large binned coverage matrix (m1 bin by n1 cell) with values being read counts for all chromosomal regions of tumor sample.
#' @param ref_counts (required only when type = "cellline") A binned coverage matrix (m2 bin by n2 cell) with values being read counts for all chromosomal regions of normal sample. n2 can be 1 for bulk sample.
#' @param cov_adj An integer for coverage adjustment for tumor cells.
#' @param qt_filter Logical (TRUE/ FALSE). Whether or not to exclude cells with rho_hat>0.99 or <0.01 for each region.
#' @param refr Logical (TRUE/ FALSE). Whether or not to use diplid region for normalization (otherwise, cell size is used).
#'
#' @return (rho_hat) of each cell for all region in the "cov_values".
#' Every column in the cov_values is (rho_hat) of each region. Each row is a cell.
#'
#' @import rtracklayer
#' @import pheatmap
#' @export
Cov_value=function(Obj_filtered=NULL, type="tumor", raw_counts=NULL, ref_counts=NULL, cov_adj=1, qt_filter=FALSE, refr=TRUE, plot_path=NULL){
## check parameters
if(is.null(Obj_filtered)){
stop("Please provide a valid Alleloscope object for Obj_filttered.")
}else if(!(type=="tumor" | type=="cellline")){
stop("type should be either tumor or cellline.")
}else if(length(unlist(strsplit(rownames(raw_counts)[2],'-')))!=3){
stop("The rownames for the raw_counts matrix should be formatted as: chr1-1-20000.")
}else if(!(nrow(raw_counts)>0 & ncol(raw_counts)>0)){
stop("raw_counts matrix is not valid.")
}
if(!dir.exists(paste0(Obj_filtered$dir_path,"/rds"))){
dir.create(paste0(Obj_filtered$dir_path,"/rds"))
}
# set values
samplename=Obj_filtered$samplename
dir_path=Obj_filtered$dir_path
assay=Obj_filtered$assay
barcode_normal=Obj_filtered$select_normal$barcode_normal
if(refr==TRUE){
ref=Obj_filtered$ref
refn=sapply((strsplit(ref,":")),'[',1)
}else{
ref="cellsize"
refn="0"
}
seg_table_filtered=Obj_filtered$seg_table_filtered
#rds_list=Obj_filtered$rds_list
cell_barcodes=Obj_filtered$barcodes
ncell=length(Obj_filtered$barcodes)
theta_N_nr_nc=list()
##ref
# if(!is.null(ref_gtv)){
# dna_gt=ref_gtv
# dna_gt=dna_gt[,which(sapply(strsplit(colnames(dna_gt),"_"),'[',1)=='rho')]
# }
#
# if(is.null(mincell)){
# mincell=round(ncol(Obj_filtered$total_all)*0.9)
# }
# if(mincell<0 | mincell> length(Obj_filtered$barcodes)){
# stop("mincell should be in the range [0, ncell].")
# }
## raw/ref count matrix info
raw_chr=sapply(strsplit(rownames(raw_counts),'-'),'[',1)
raw_start=as.numeric(sapply(strsplit(rownames(raw_counts),'-'),'[',2))
raw_end=as.numeric(sapply(strsplit(rownames(raw_counts),'-'),'[',3))
#if(is.null(ref_gtv) & type=='cellline'){
if( type=='cellline'){
if(refr==TRUE){
ref_chr=sapply(strsplit(rownames(ref_counts),'-'),'[',1)
ref_start=as.numeric(sapply(strsplit(rownames(ref_counts),'-'),'[',2))
ref_end=as.numeric(sapply(strsplit(rownames(ref_counts),'-'),'[',3))}
}
N0_all=colSums(raw_counts) ## for p and q #for cytoarm
message("Start estimating cell specific (rho_hat) for each region.")
####
if(!is.null(plot_path)){
#regions=gsub('chr','',names(Obj_filtered$rds_list))
regions=gsub('chr','',seg_table_filtered$chrr)
for(chrr in regions){
chrrn=unlist(strsplit(chrr,':'))[1]
#result=rds_list[[paste0('chr',chrr)]]
#theta_hat=result$theta_hat
#names(theta_hat)=result$barcodes
raw_counts_chr=raw_counts[which(raw_chr %in% paste0('chr', as.character(chrrn))),]
raw_chr_sub=raw_chr[which(raw_chr %in% paste0('chr', as.character(chrrn)))]
raw_start_sub=raw_start[which(raw_chr %in% paste0('chr', as.character(chrrn)))]
raw_end_sub=raw_end[which(raw_chr %in% paste0('chr', as.character(chrrn)))]
## subsetting region
query=GenomicRanges::GRanges(paste0('chr',chrrn),IRanges::IRanges(as.numeric(seg_table_filtered[which(seg_table_filtered$chrr == chrr),2]), as.numeric(seg_table_filtered[which(seg_table_filtered$chrr == chrr),3])))
subject=GenomicRanges::GRanges(raw_chr_sub, IRanges::IRanges(as.numeric(raw_start_sub),as.numeric(raw_end_sub))) ## cytoband 1-based start and 1-based end
ov=findOverlaps(query, subject)
ov=as.matrix(ov)
bin_start=min(ov[,2])
bin_end=max(ov[,2])
#raw_counts_chr=raw_counts_chr[bin_start:bin_end,match(result$barcodes, cell_barcodes)] ## for p and q #for cytoarm
raw_counts_chr=raw_counts_chr[bin_start:bin_end,] ## for p and q #for cytoarm
Nr=colSums(raw_counts_chr)
#names(Nr)=result$barcodes
names(Nr)=cell_barcodes
# subsetting normal
if(refr==TRUE){
raw_counts_ref=raw_counts[which(raw_chr %in% paste0('chr', as.character(refn))),]
raw_ref_chr_sub=raw_chr[which(raw_chr %in% paste0('chr', as.character(refn)))]
raw_ref_start_sub=raw_start[which(raw_chr %in% paste0('chr', as.character(refn)))]
raw_ref_end_sub=raw_end[which(raw_chr %in% paste0('chr', as.character(refn)))]
query=GenomicRanges::GRanges(paste0('chr',refn),IRanges::IRanges(as.numeric(seg_table_filtered[which(seg_table_filtered$chrr == ref),2]), as.numeric(seg_table_filtered[which(seg_table_filtered$chrr == ref),3])))
subject=GenomicRanges::GRanges(raw_ref_chr_sub, IRanges::IRanges(as.numeric(raw_ref_start_sub),as.numeric(raw_ref_end_sub))) ## cytoband 1-based start and 1-based end
ov=findOverlaps(query, subject)
ov=as.matrix(ov)
bin_start=min(ov[,2])
bin_end=max(ov[,2])
#raw_counts_ref=raw_counts_ref[bin_start:bin_end,match(result$barcodes, cell_barcodes)] ## for p and q #for cytoarm
raw_counts_ref=raw_counts_ref[bin_start:bin_end,]
N0=colSums(raw_counts_ref)
}else{
#raw_counts_ref=raw_counts[,match(result$barcodes, cell_barcodes)] ## for p and q #for cytoarm
#N0=N0_all[match(result$barcodes, cell_barcodes)]
N0=N0_all
ref="cell_size"
Obj_filtered$ref=ref
}
## non_noisy
barcodes_non_noisy=cell_barcodes#cell_info$barcode[which(cell_info$is_noisy==0)]
#barcodes_non_noisy=intersect(barcodes_non_noisy, result$barcodes)
#Nr=Nr[match(barcodes_non_noisy, result$barcodes)]
#N0=N0[match(barcodes_non_noisy, result$barcodes)]
Ni=Nr/N0
names(Ni)=barcodes_non_noisy
#if(is.null(ref_gtv)){
# if(length(result$barcodes)<mincell){
# cat(paste0("Exclude ",chrr," region:<",mincell," cells\n"))
# next
# }
if(type=='tumor'){
ref_ncell=length(barcode_normal)
Nrref=Nr[which(names(Nr) %in% barcode_normal)]
N0ref=N0[which(names(Nr) %in% barcode_normal)]
Ni_ref=median(Nrref/N0ref)
}else if(type=='cellline'){
#normal sample from other normal dataset
ref_counts_chr=ref_counts[which(ref_chr %in% paste0('chr', as.character(chrrn))),]
ref_counts_ref=ref_counts[which(ref_chr %in% paste0('chr', as.character(refn))),]
ref_chr_sub=ref_chr[which(ref_chr %in% paste0('chr', as.character(chrrn)))]
ref_start_sub=ref_start[which(ref_chr %in% paste0('chr', as.character(chrrn)))]
ref_end_sub=ref_end[which(ref_chr %in% paste0('chr', as.character(chrrn)))]
ref_ref_chr_sub=ref_chr[which(ref_chr %in% paste0('chr', as.character(refn)))]
ref_ref_start_sub=ref_start[which(ref_chr %in% paste0('chr', as.character(refn)))]
ref_ref_end_sub=ref_end[which(ref_chr %in% paste0('chr', as.character(refn)))]
# ref chr
query=GenomicRanges::GRanges(paste0('chr',chrrn),IRanges::IRanges(as.numeric(seg_table_filtered[which(seg_table_filtered$chrr == chrr),2]), as.numeric(seg_table_filtered[which(seg_table_filtered$chrr == chrr),3])))
subject=GenomicRanges::GRanges(ref_chr_sub, IRanges::IRanges(as.numeric(ref_start_sub),as.numeric(ref_end_sub))) ## cytoband 1-based start and 1-based end
ov=findOverlaps(query, subject)
ov=as.matrix(ov)
bin_start=min(ov[,2])
bin_end=max(ov[,2])
ref_counts_chr=ref_counts_chr[bin_start:bin_end,] ## for p and q #for cytoarm
## ref ref
query=GenomicRanges::GRanges(paste0('chr',refn),IRanges::IRanges(as.numeric(seg_table_filtered[which(seg_table_filtered$chrr == ref),2]), as.numeric(seg_table_filtered[which(seg_table_filtered$chrr == ref),3])))
subject=GenomicRanges::GRanges(ref_ref_chr_sub, IRanges::IRanges(as.numeric(ref_ref_start_sub),as.numeric(ref_ref_end_sub))) ## cytoband 1-based start and 1-based end
ov=findOverlaps(query, subject)
ov=as.matrix(ov)
bin_start=min(ov[,2])
bin_end=max(ov[,2])
ref_counts_ref=ref_counts_ref[bin_start:bin_end,] ## for p and q #for cytoarm
Nrref=colSums(ref_counts_chr)
N0ref=colSums(ref_counts_ref)
#
Ni_ref=Nrref/N0ref
Ni_ref[is.na(Ni_ref)]=0
Ni_ref=median(Ni_ref)
}
if(type=='cellline'){
Ni=Ni*cov_adj
}else{
Ni[which(! names(Nr) %in% barcode_normal)]=Ni[which(! names(Nr) %in% barcode_normal)]*cov_adj
}
Ni=Ni/Ni_ref
Ni[is.na(Ni)]=0
Ni[which(Ni==Inf)]=0
if(qt_filter==TRUE){
Niq=Ni[which(Ni<=quantile(Ni, 0.99) & Ni>=quantile(Ni, 0.01))] ##
}else{
Niq=Ni
Niq=pmin(Niq, quantile(Ni, 0.99))
Niq=pmax(Niq, quantile(Ni, 0.01))
}
barcodes_nn_q=names(Niq)
# }else{
# gt=as.numeric(dna_gt[,which(colnames(dna_gt)==paste0('rho_',chrr))])
# if(length(Ni)>(ncell/2)){
# md=median(gt)
# Ni=md/median(Ni, na.rm = TRUE)*Ni
# }else{
# md=quantile(gt,0.2)
# Ni=md/quantile(Ni,0.2, na.rm=TRUE)*Ni}
# Niq=Ni
# barcodes_nn_q=names(Niq)
#
#
# }
#theta_hat=theta_hat[match(barcodes_nn_q, names(theta_hat))]
#w1=result$w1[match(names(Ni), names(result$w1))]
#w2=result$w2[match(names(Ni), names(result$w2))]
theta_N_nr_nc[[paste0("rho_",as.character(chrr))]]=Niq
#theta_N_nr_nc[[paste0("theta_",as.character(chrr))]]=theta_hat
#theta_N_nr_nc[[paste0("h1_",as.character(chrr))]]=w1
#theta_N_nr_nc[[paste0("h2_",as.character(chrr))]]=w2
cat(paste0(chrr," "))
}
cat('\n')
cell_list<-lapply(theta_N_nr_nc, function(x) {
names(x)
})
# if(!is.null(ref_gtv)){
# cell_intersect <- Reduce(union, cell_list)
# }else{
# if(cell_filter==TRUE){
cell_intersect <- Reduce(intersect, cell_list)
# }else{
# cell_intersect <- Reduce(union, cell_list)
# }
#}
theta_hat_cbn <- sapply(theta_N_nr_nc,function(x){
x[match(cell_intersect, names(x))]
})
rownames(theta_hat_cbn) <- cell_intersect
saveRDS(theta_hat_cbn, paste0(Obj_filtered$dir_path, "/rds/cov_values.rds"))
Obj_filtered$cov_values=theta_hat_cbn
message("\"cov_values\" is added to the Obj_filtered object.")
cat(paste0("Matrix for cell specific (rho) for each region is stored as cov_values.rds in the path:",dir_path,"\n"))
#### plotting
message("Plotting the coverage values for each segment...")
cluster_cbn=theta_hat_cbn
cluster_cbn=cluster_cbn*2
cluster_cbn=apply(cluster_cbn, c(1,2), function(x) if(x<=2.5 & x>=1.5){x=2}else{x=x})
#cluster_cbn=apply(cluster_cbn, c(1,2), function(x) if(x<=2.3 & x>=1.7){x=2}else{x=x})
#cluster_cbn=apply(cluster_cbn, c(1,2), function(x){pmin(x,5)})
segmentation=Obj_filtered$seg_table_filtered
nrep=round(as.numeric(segmentation[,5])/5000000)
cnrep=c(0,cumsum(nrep))
cluster_cbn2_all=matrix(ncol=sum(nrep), nrow=nrow(cluster_cbn))
for(ii in 1:ncol(cluster_cbn)){
if(nrep[ii]>0){
rr=(replicate(nrep[ii],cluster_cbn[,ii]))
cluster_cbn2_all[,(cnrep[ii]+1):(cnrep[ii+1])]=rr
}
}
rownames(cluster_cbn2_all)=rownames(cluster_cbn)
chrgap=c()
for(ii in 1:22){
chrgap=c(chrgap,sum(nrep[which(segmentation$chr==ii)]))
}
nclust=3
celltype=as.data.frame(celltypes)
rownames(celltype)=celltype[,1]
plot_matrix<- cluster_cbn2_all
breaklength = 50
setcolor = colorRampPalette(c("blue", "white", "red"))(breaklength)
setbreaks = c(seq(min(plot_matrix), 1.7, length.out=ceiling(breaklength/2) + 1),
c(2.3,seq((max(plot_matrix)-2.3)/breaklength+2.3, max(plot_matrix),
length.out=floor(breaklength/2)))[1:(breaklength/2)])
col_lab=rep(" ", ncol(cluster_cbn2_all))
col_lab[c(0, cumsum(chrgap)[1:(length(chrgap)-1)])+chrgap/2]=paste0("chr",as.character(1:22))
png(plot_path,width=800, height=450)
tmp=pheatmap::pheatmap(plot_matrix,
cluster_cols = F, cluster_rows = TRUE,
show_rownames = F,
show_colnames = T,
color=setcolor,
breaks = setbreaks,
labels_col=col_lab,
clustering_distance_rows = "correlation",
clustering_method = "ward.D2",
gaps_col=cumsum(chrgap),
cutree_rows = nclust,
annotation_row=celltype[, ncol(celltype),drop=F])
dev.off()
message(paste0("Plot is successfully saved in the path:",plot_path))
}
return(Obj_filtered)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.