#' @title HostGeneCount
#'
#' @description
#' Count circRNA host gene expression from given BED6 format CircCoordinates.
#'
#' @param CircCoordinates CircCoordinates output of DCC, or any other BED6 format files, input as data.frame
#' @param bamfiles A iterable list of bamfiles with full path.
#' @param ncore Number of core to use.
#' @param stranded Whether the library is prepared strand specific, default "unstranded". Accepted values are "unstranded","fs","ss", meaning "unstranded library", "first strand library" and "second strand library" respectively.
#' @param splicedOnly Only count reads that are spliced at the circRNA junction, default TRUE.
#' @export HostGeneCount
#'
HostGeneCount <- function(CircCoordinates,bamfiles,ncore=2,stranded='unstranded',splicedOnly=TRUE){
require(Rsamtools)
require(BiocParallel)
register(MulticoreParam(ncore),default=FALSE)
if(stranded=='fs'){
strand_info <- CircCoordinates[,6]
message('First strand library.')
}else{
if(stranded=='ss'){
strand_info <- CircCoordinates[,6]
strand_info <- ifelse(strand_info=='+','-',strand_info)
message('Second strand library.')
}else{
strand_info <- '*'
message('Unstranded library.')
}
}
CircCoordinates <- data.frame(CircCoordinates)
if(is.character(bamfiles)) bamfiles = c(bamfiles)
if(any(!sapply(bamfiles, file.exists))){
stop('One or more bamfile do not exist!')
}
if(any(!sapply(paste0(bamfiles,'.bai'), file.exists))){
stop('One or more bamfile index do not exist!')
}
# setup the criteria/filtering for host gene reads counting
flag <- scanBamFlag(isNotPassingQualityControls=FALSE, isDuplicate=FALSE,isSecondaryAlignment=FALSE,isUnmappedQuery=FALSE)
left <- CircCoordinates[,c(1,2,2,4,5,6)]
colnames(left) <- c('chr','start','end','Gene','strand')
which_left <- as(left,'GRanges')
right <- CircCoordinates[,c(1,3,3,4,5,6)]
colnames(right) <- c('chr','start','end','Gene','strand')
which_right <- as(right,'GRanges')
## Do reads counting
# Count junction
left_count <- bplapply(bamfiles,function(bamfile) scanBam(bamfile,param=ScanBamParam(flag=flag,which=which_left,what=c('strand','cigar'))))
right_count <- bplapply(bamfiles,function(bamfile) scanBam(bamfile,param=ScanBamParam(flag=flag,which=which_right,what=c('strand','cigar'))))
left_counts = bplapply(left_count, function(one_left_count){
one_left_count <- read_scanBam(one_left_count,strand=strand_info)
})
left_counts <- do.call(cbind,left_counts)
right_counts = bplapply(right_count, function(one_right_count){
one_right_count <- read_scanBam(one_right_count,strand=strand_info)
})
right_counts <- do.call(cbind,right_counts)
counts <- ceiling((left_counts+right_counts)/2)
colnames(counts) <- sapply(basename(bamfiles),function(x) strsplit(x,'\\.')[[1]][1])
counts <- cbind(CircCoordinates,counts)
return(counts)
}
#' Read scanBam ouput list object.
#' @param scanBam_result the result list object generated by Rsamtools::scanBam function
#' @param strand a vector of strand information, need to be the same length as the "which" input option of sanBam (the number of circRNA subjected to host gene counting).
#' CircTest needs this information to determine which strand of the reads should be counted.
#' @param splicedOnly Only count reads that are spliced at the circRNA junction. Default to be TRUE.
#'
read_scanBam <- function(scanBam_result,strand=strands,splicedOnly=TRUE){
scanBam_result <- mapply(function(x,y) c(x,gene_strand=y),scanBam_result,as.list(as.character(strand)),SIMPLIFY=F)
counts <- lapply(
scanBam_result,function(x) {
read_one_scanBam(x,strand=x$gene_strand,splicedOnly=splicedOnly)
})
counts <- unlist(counts)
return(counts)
}
##' Read one element of the result of sanBam function from Rsamtools. This function Called by read_scanBam.
#' @param scanBam_one_result one element of the sanBam result, corresponding to one genomic region.
#' @param strand of the region, "+", "-" or "*".
#' @param splicedOnly Only count reads that are spliced at the circRNA junction. Default to be TRUE.
#'
read_one_scanBam <- function(scanBam_one_result,strand='+',splicedOnly=TRUE){
# Only count spliced reads
spliced <- grepl('N',scanBam_one_result$cigar)
if(strand=='*'){
strandMatched <- TRUE
}else{
strandMatched <- scanBam_one_result$strand==strand
}
count <- sum(spliced & strandMatched)
return(count)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.