##' Calls CNA using a HMM approach.
##'
##' @title Call CNA
##' @param z_df the Z-scores, from \code{\link{zscore}}.
##' @param trans_prob the transition probability for the HMM.
##' @param nb_cores the number of processor to use.
##' @param mc_info the information about the metacells, if relevant. Default is NULL.
##' @return a data.frame with the CNA calls.
##' @author Jean Monlong
##' @importFrom magrittr %>%
##' @export
call_cna <- function(z_df, trans_prob=1e-4, nb_cores=1, mc_info=NULL){
. = V1 = end = queryHits = subjectHits = NULL
options('dplyr.show_progress'=FALSE)
freq.range <- function(df, nb.samp){
gr.all = GenomicRanges::GRanges(df$chr,
IRanges::IRanges(df$start, df$end),
cell=df$cell)
gr.d = GenomicRanges::disjoin(gr.all)
fr.df = GenomicRanges::as.data.frame(gr.d)[, 1:3]
colnames(fr.df)[1] = "chr"
fr.df$chr = as.character(fr.df$chr)
ol = as.data.frame(GenomicRanges::findOverlaps(gr.d,gr.all))
ol = ol %>% dplyr::group_by(queryHits) %>% dplyr::summarize(n=length(unique(gr.all$cell[subjectHits])))
fr.df$nb = 0
fr.df$nb[ol$queryHits] = ol$n
fr.df$prop = fr.df$nb/nb.samp
fr.df
}
cells = setdiff(colnames(z_df), c("chr","start","end", 'symbol'))
## Call CNV in each cell
hmm.o = parallel::mclapply(cells, function(cell){
cn.df = cnaHMM(z_df[,c('chr','start','end',cell)], trans.prob=trans_prob)
rle.o = rle(paste(cn.df$chr, cn.df$CN))
## Merge into segments
cn.df$seg = rep(1:length(rle.o$lengths), rle.o$lengths)
seg.df = dplyr::ungroup(cn.df) %>%
dplyr::group_by(.data$chr, .data$CN, .data$seg) %>%
dplyr::summarize(start=min(.data$start), end=max(.data$end),
mean=mean(.data$mean), length=dplyr::n())
seg.df$seg = NULL
seg.df = seg.df[order(seg.df$chr, seg.df$start),]
seg.df$cell = as.character(cell)
cn.df$cell = as.character(cell)
list(seg=seg.df, cn=cn.df)
}, mc.cores=nb_cores)
seg.df = do.call(rbind, lapply(hmm.o, function(e)e$seg))
cn.df = do.call(rbind, lapply(hmm.o, function(e)e$cn))
seg.df$pass.filter = seg.df$length>5
if(!is.null(mc_info)){
seg.df = seg.df[which(seg.df$pass.filter),]
seg.df = merge(seg.df, mc_info)
nb_metacells = unique(table(mc_info$community))
if(length(nb_metacells)>1){
warning('Different number of metacells per community. Normal?')
}
seg.df = seg.df %>% dplyr::group_by(.data$community, .data$CN) %>%
dplyr::do(freq.range(.data, nb_metacells))
}
return(list(seg.df=seg.df, hmm.df=cn.df))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.