#' Extract high confident transcript start or end site
#' @param site_stats A DataFrame of TSS or TES statistics generated by \code{SummaryTranSite}.
#' @param cut_off A numeric value of BH adjsuted p-value (fdr), p-value or probablity cut-off.
#' @param cut_at The \code{cut_off} is applied to "fdr" (adjustd p-value), "pval" (p-value) or "prob" (probability).
#' @param site_region An integer of enriched site region. TSSs/TESs locate within enriched
#' site+/-site_region are all treated as high confident sites.
#' @param min_read The minimum reads in a bin to support high confident sites.
#' @param type "TSS" for transcript start site and "TES" for transcript end site.
#' @return A DataFrame of enriched TSS or TES sites.
#'
HCsite <- function(site_stats,
cut_off=0.05,
cut_at=c('fdr','pval','prob'),
site_region=50,
min_read=2,
type=c('TSS','TES')){
type <- match.arg(arg = type,choices = c('TSS','TES'))
cut_at <- match.arg(arg = cut_at,choices = c('fdr','pval','prob'))
if(!(type %in% colnames(site_stats)))
stop('Please provide valid type: "TSS" or "TES"')
#############################################################
###---> high abundance
site_stats_enriched <- site_stats[site_stats[,cut_at] < cut_off &
site_stats$read > site_stats$gene_read_mean,]
site_stats_enriched$label2 <- paste0(site_stats_enriched$seqnames,';',site_stats_enriched$strand)
site_stats_enriched_split <- GenomicRanges::split(site_stats_enriched,site_stats_enriched$label2)
label_enriched <- lapply(names(site_stats_enriched_split), function(i){
# print(i)
site <- site_stats_enriched_split[[i]][,type]
n <- rep((2*site_region+1),length(site))
insite_region <- sequence(n,from = pmax(rep(0,length(site)),site-site_region))
# insite_region <- insite_region[insite_region %in% site_stats[,type]]
label <- paste0(i,';',insite_region)
label <- label[which(label %in% site_stats$label)]
label
})
label_enriched <- unique(unlist(label_enriched))
site_high <- site_stats[site_stats$label %in% label_enriched,]
#############################################################
###---> low abundance
genes <- setdiff(site_stats$gene_id,site_high$gene_id)
site_low <- site_stats[site_stats$gene_id %in% genes,]
site_low <- site_low[site_low$read_in_bin >= min_read,]
site_high$enriched <-'Yes'
site_low$enriched <- 'No'
result <- rbind(site_high,site_low)
result <- sort(result,by=as.formula(paste0('~seqnames+',type)))
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.