#' generate.peaks.from.gmm is a function that takes the output of a GMM and generates peaks
#'
#' @param dp_data Results from fitting a Dirichlet Process GMM model
#' @param PARAMETERS A PARAMETERS list with the parameters indicated in the DPDE4PM function
#' @param GENEINFO A list of gene attributes generated by the get.gene.anno function
#'
#' @return
#' \describe{
#' \item{merged.peaks.filtered.rna}{A merged peaks GenomicRanges object in RNA coordinates}
#' \item{merged.peaks.filtered.genome}{A merged peaks GenomicRanges object in genomic coordinates}
#' }
.generate.peaks.from.gmm = function(
dp_data,
PARAMETERS,
GENEINFO
){
# Filtering by Threshold
dp_data = dp_data[dp_data$Weights > PARAMETERS$WEIGHT.THRESHOLD,]
dp_data = dp_data[complete.cases(dp_data),]
if(nrow(dp_data) == 0){
warning("No Peaks Survive Past The Weight Threshold or DP could not fit reasonable Gaussians. Consider Lowering Requirements or Tuning Parameters.",
call. = TRUE, domain = NULL)
return(list(GenomicRanges::GRanges(), GenomicRanges::GRanges()))
}
dp_data = dp_data[order(-dp_data$Weights),]
# Creating Peaks
merged.peaks = data.frame(
"chr" = GENEINFO$chr,
"start" = round(dp_data$Mu - PARAMETERS$N.SD*dp_data$Sigma),
"end" = round(dp_data$Mu + PARAMETERS$N.SD*dp_data$Sigma),
"name" = GENEINFO$gene,
"strand" = GENEINFO$strand,
"weights" = dp_data$Weights,
"i" = dp_data$i,
"j" = seq(1, nrow(dp_data), 1),
stringsAsFactors = F
)
merged.peaks$start = ifelse(merged.peaks$start < 1, 1, merged.peaks$start)
merged.peaks$end = ifelse(merged.peaks$end > GENEINFO$exome_length, GENEINFO$exome_length, merged.peaks$end)
merged.peaks = merged.peaks[!duplicated(merged.peaks[,c("chr", "start", "end", "name", "strand")]),]
# Filtering peaks where 1 peak is within the other peak
merged.peaks.gr = GenomicRanges::makeGRangesFromDataFrame(merged.peaks, keep.extra.columns = T)
within.peaks = GenomicRanges::findOverlaps(merged.peaks.gr, merged.peaks.gr, type = "within")
within.peaks = within.peaks[S4Vectors::subjectHits(within.peaks) != S4Vectors::queryHits(within.peaks)]
if(length(within.peaks) > 0){
wid = IRanges::width(merged.peaks.gr)
remove.elements = ifelse(wid[S4Vectors::queryHits(within.peaks)] > wid[S4Vectors::subjectHits(within.peaks)], S4Vectors::subjectHits(within.peaks), S4Vectors::queryHits(within.peaks))
merged.peaks.gr = merged.peaks.gr[!1:length(merged.peaks.gr) %in% remove.elements]
}
# Subtracting Overlapping Regions
merged.peaks.filtered.rna = GenomicRanges::GRanges()
for ( i in 1:length(merged.peaks.gr)){
if(i == 1){
tmp.gr = merged.peaks.gr[1]
}else{
tmp.gr = GenomicRanges::setdiff(merged.peaks.gr[i], merged.peaks.gr[1:(i-1)])
if(length(tmp.gr) > 0){S4Vectors::mcols(tmp.gr) = S4Vectors::mcols(merged.peaks.gr[i])}
}
merged.peaks.filtered.rna = c(merged.peaks.filtered.rna, tmp.gr)
}
# Removing peaks that are below resolution
# wid = IRanges::width(merged.peaks.filtered.rna)
# remove.elements = which(wid < PARAMETERS$RESOLUTION)
# merged.peaks.filtered.rna = merged.peaks.filtered.rna[!1:length(merged.peaks.filtered.rna) %in% remove.elements]
# if(length(merged.peaks.filtered.rna) == 0){
# warning("No Peaks Survive After The Width Filter",
# call. = TRUE, domain = NULL)
# return(list(GenomicRanges::GRanges(), GenomicRanges::GRanges()))
# }
# Transferring to Genomic Coordinates
merged.peaks.genome = merged.peaks.filtered.rna
GenomicRanges::end(merged.peaks.genome) = GENEINFO$RNA2DNA[GenomicRanges::end(merged.peaks.genome)]
GenomicRanges::start(merged.peaks.genome) = GENEINFO$RNA2DNA[GenomicRanges::start(merged.peaks.genome)]
anno_gr = GenomicRanges::makeGRangesFromDataFrame(GENEINFO$anno)
# Filtering Out Introns
merged.peaks.filtered.genome = GenomicRanges::GRanges()
for(i in 1:length(merged.peaks.genome)){
tmp.gr = GenomicRanges::intersect(merged.peaks.genome[i], anno_gr)
if(length(tmp.gr) > 0){S4Vectors::mcols(tmp.gr) = S4Vectors::mcols(merged.peaks.genome[i])}
merged.peaks.filtered.genome = c(merged.peaks.filtered.genome, tmp.gr)
}
return(list(merged.peaks.filtered.rna, merged.peaks.filtered.genome))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.