#' @title Sort dNdS Values Into Divergence Strata
#' @description This function takes a data.table returned by dNdS
#' and sorts the corresponding dNdS value into divergence strata (10-quantile or deciles by default).
#' @param dNdS_tbl a data.table object returned by \code{\link{dNdS}}.
#' @param subject.id a logical value indicating whether \code{query_id} AND \code{subject_id} should be returned.
#' @param n_quantile a numeric value specifying the number of quantiles that should be returned.
#' @details
#'
#' Divergence Strata are typically decile (10-quantile) values of corresponding \code{\link{dNdS}} values.
#' The \code{\link{dNdS}} function returns dNdS values for orthologous genes
#' of a query species (versus subject species). These dNdS values are then
#' sorted into deciles and each orthologous protein coding gene of the
#' query species receives a corresponding decile value instead of the initial dNdS value.
#'
#' This allows a better comparison between Phylostrata and Divergence Strata (for more details see package: \pkg{myTAI}).
#'
#' It is also possible to specify other number of quantile (N-quantile) value such as quintile (5-quantile)
#' rather than decile values.
#'
#' @author Hajk-Georg Drost
#' @examples \dontrun{
#'
#' # get a divergence map of example sequences
#' dNdS_tbl <- dNdS(
#' query_file = system.file('seqs/ortho_thal_cds.fasta', package = 'orthologr'),
#' subject_file = system.file('seqs/ortho_lyra_cds.fasta', package = 'orthologr'),
#' ortho_detection = "RBH",
#' aa_aln_type = "pairwise",
#' aa_aln_tool = "NW",
#' codon_aln_tool = "pal2nal",
#' dnds_est.method = "Comeron",
#' comp_cores = 1 )
#'
#' divMap <- divergence_map( dNdS_tbl )
#'
#' # in case you want the subject_id as well, you can set
#' # the argument subject.id = TRUE
#' divMap <- divergence_map( dNdS_tbl = dNdS_tbl, subject.id = TRUE)
#'
#' # in case you want a divergence map with divergence stratum as quintile (5-quantile) values
#' # or any other N-quantile values rather than the default decile (10-quantile) values,
#' # you can specify this with n_quantile.
#' divMap <- divergence_map( dNdS_tbl = dNdS_tbl, subject.id = TRUE, n_quantile = 5)
#'
#' }
#' @seealso \code{\link{divergence_stratigraphy}}
#' @return a data.table storing a standard divergence map.
#' @import data.table
#' @export
divergence_map <- function(dNdS_tbl, subject.id = FALSE, n_quantile = 10){
# due to the discussion of no visible binding for global variable for
# data.table objects see:
# http://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check?lq=1
query_id <- subject_id <- divergence_strata <- NULL
data.table::setDT(dNdS_tbl)
data.table::setkeyv(dNdS_tbl, c("query_id","subject_id"))
dNdS_tbl_divMap <-
data.table::as.data.table(dplyr::select(dtplyr::lazy_dt(dNdS_tbl), dNdS, query_id, subject_id))
QuantileValues <-
stats::quantile(dNdS_tbl_divMap[ , dNdS],
probs = seq(0.0, 1, (1/{{n_quantile}})),
na.rm = TRUE)
if (any(duplicated(QuantileValues))) {
stop("ERROR: there is at least one duplicate threshold in the quantile analysis. Please select a lower value as n_quantile.")
} else {
# print("All values in the vector are unique.")
}
dNdS_tbl_divMap$dNdS <- base::cut(dNdS_tbl_divMap[ , dNdS], breaks = QuantileValues, include.lowest = TRUE, labels = FALSE)
data.table::setnames(dNdS_tbl_divMap, old = "dNdS", new = "divergence_strata")
if (!subject.id)
return(dNdS_tbl_divMap[ , list(divergence_strata, query_id)])
if (subject.id)
return(dNdS_tbl_divMap[ , list(divergence_strata, query_id, subject_id)])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.