#'
#' gr_candidate_filter
#'
#' This function takes GRangesList with 'epigenome' and 'grammar' meta columns
#' and filter the results based on these two columns.
#'
#' @param gr_candidate
#' A \code{\link[GenomicRanges]{GRangesList}} object with 'epigenome' and 'grammar' meta columns.
#' @param best_k
#' Select the best k regions for each element of gr_candidate. Default is 1L.
#' @param b_intercept
#' The regression coefficient of intercept. Default is -4.1626668.
#' @param b_epigenome
#' The regression coefficient of epigenome similarity. Default is 5.4030573.
#' @param b_grammar
#' The regression coefficient of grammar similarity. Default is 2.0649039.
#' @param b_interaction
#' The regression coefficient of the interaction term. Default is NULL.
#' @param threshold
#' Discard the genomic regions with score below this threshold. Default is 0.
#' @param random
#' Pick best_k regions randomly for each element of gr_candidate. Default is FALSE.
#' @param verbose
#' Print messages or not. Default is TRUE.
#'
#' @return
#' A \code{\link[GenomicRanges]{GRangesList}} class the same structure as gr_candidate
#' with an extra meta column named "score" measuring the overall similarity score.
#' Any genomic regions with score below the threshold will be filtered away.
#'
#'
#' @examples
#'
#' data('data_example')
#' gr_list_filtered <- gr_candidate_filter(gr_list_score, threshold = 0.5)
#'
#'
#' @author Chenyang Dong \email{cdong@stat.wisc.edu}
#' @import GenomicRanges
#' @rawNamespace import(data.table, except = shift)
#' @export
gr_candidate_filter <- function(gr_candidate,
best_k = 1L,
b_intercept = -4.1626668,
b_epigenome = 5.4030573,
b_grammar = 2.0649039,
b_interaction = NULL,
threshold = 0,
random = FALSE,
verbose = TRUE) {
stopifnot(class(gr_candidate) %in% c('CompressedGRangesList', 'GrangesList'),
best_k >= 1)
best_k <- as.integer(best_k)
gr_candidate_dt <- as.data.table(gr_candidate)
gr_candidate_dt$`group_name` <- NULL
stopifnot(
'epigenome' %in% colnames(gr_candidate_dt),
'grammar' %in% colnames(gr_candidate_dt)
)
if (verbose) {
message('Filtering syntenic regions based on epigenome and grammar similarities.')
}
gr_candidate_dt[is.na(gr_candidate_dt)] <- 0
score <-
b_intercept + b_epigenome * gr_candidate_dt$epigenome + b_grammar * gr_candidate_dt$grammar
if (!is.null(b_interaction)) {
score <-
score + b_interaction * gr_candidate_dt$epigenome * gr_candidate_dt$grammar
}
gr_candidate_dt[, score := 1 / (1 + exp(-score))]
gr_candidate_dt <- gr_candidate_dt[score >= threshold]
if (random) {
gr_candidate_dt <-
gr_candidate_dt[, .SD[sample(1:nrow(.SD), min(nrow(.SD), best_k))], by = group]
}
else {
gr_candidate_dt <-
gr_candidate_dt[, .SD[order(-score)][1:min(nrow(.SD), best_k)], by = group]
}
gr_candidate_dt$group <-
factor(gr_candidate_dt$group, levels = 1:length(gr_candidate))
gr_candidate <- makeGRangesListFromDataFrame(gr_candidate_dt,
split.field = 'group',
keep.extra.columns = TRUE)
names(gr_candidate) <- NULL
return(gr_candidate)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.