philentropy: Similarity and Distance Quantification Between Probability Functions

Documented in additive_symm_chi_sq avg bhattacharyya canberra chebyshev clark_sq cosine_dist czekanowski dice_dist dist_many_many dist_one_many dist_one_one divergence_sq euclidean fidelity gower harmonic_mean_dist hellinger inner_product intersection_dist jaccard jeffreys jensen_difference jensen_shannon k_divergence kulczynski_d kullback_leibler_distance kumar_hassebrook kumar_johnson lorentzian manhattan matusita minkowski motyka neyman_chi_sq pearson_chi_sq prob_symm_chi_sq ruzicka soergel sorensen squared_chi_sq squared_chord squared_euclidean taneja tanimoto topsoe wave_hedges

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

Ecpp <- function(P, unit) {
    .Call(`_philentropy_Ecpp`, P, unit)
}

JEcpp <- function(JointProbabilities, unit) {
    .Call(`_philentropy_JEcpp`, JointProbabilities, unit)
}

CEcpp <- function(JointProbabilities, Probabilities, unit) {
    .Call(`_philentropy_CEcpp`, JointProbabilities, Probabilities, unit)
}

MIcpp <- function(X, Y, XY, unit) {
    .Call(`_philentropy_MIcpp`, X, Y, XY, unit)
}

pearson_corr_centred <- function(x, y, testNA) {
    .Call(`_philentropy_pearson_corr_centred`, x, y, testNA)
}

pearson_corr_uncentred <- function(x, y, testNA) {
    .Call(`_philentropy_pearson_corr_uncentred`, x, y, testNA)
}

squared_pearson_corr <- function(x, y, testNA) {
    .Call(`_philentropy_squared_pearson_corr`, x, y, testNA)
}

DistMatrixWithoutUnitDF <- function(distsDF, DistFunc, testNA) {
    .Call(`_philentropy_DistMatrixWithoutUnitDF`, distsDF, DistFunc, testNA)
}

DistMatrixMinkowskiMAT <- function(dists, p, testNA) {
    .Call(`_philentropy_DistMatrixMinkowskiMAT`, dists, p, testNA)
}

DistMatrixWithoutUnitMAT <- function(dists, DistFunc, testNA) {
    .Call(`_philentropy_DistMatrixWithoutUnitMAT`, dists, DistFunc, testNA)
}

DistMatrixWithUnitDF <- function(distsDF, DistFunc, testNA, unit) {
    .Call(`_philentropy_DistMatrixWithUnitDF`, distsDF, DistFunc, testNA, unit)
}

DistMatrixWithUnitMAT <- function(dists, DistFunc, testNA, unit) {
    .Call(`_philentropy_DistMatrixWithUnitMAT`, dists, DistFunc, testNA, unit)
}

#' @title Distances and Similarities between Two Probability Density Functions
#' @description This functions computes the distance/dissimilarity between two probability density functions.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param method a character string indicating whether the distance measure that should be computed.
#' @param p power of the Minkowski distance.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @return A single distance value
#' @examples
#' P <- 1:10 / sum(1:10)
#' Q <- 20:29 / sum(20:29)
#' dist_one_one(P, Q, method = "euclidean", testNA = FALSE)
#' @export
dist_one_one <- function(P, Q, method, p = NA_real_, testNA = TRUE, unit = "log", epsilon = 0.00001) {
    .Call(`_philentropy_dist_one_one`, P, Q, method, p, testNA, unit, epsilon)
}

#' @title Distances and Similarities between One and Many Probability Density Functions
#' @description This functions computes the distance/dissimilarity between one probability density functions and a set of probability density functions.
#' @param P a numeric vector storing the first distribution.
#' @param dists a numeric matrix storing distributions in its rows.
#' @param method a character string indicating whether the distance measure that should be computed.
#' @param p power of the Minkowski distance.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @return A vector of distance values
#' @examples
#' set.seed(2020-08-20)
#' P <- 1:10 / sum(1:10)
#' M <- t(replicate(100, sample(1:10, size = 10) / 55))
#' dist_one_many(P, M, method = "euclidean", testNA = FALSE)
#' @export
dist_one_many <- function(P, dists, method, p = NA_real_, testNA = TRUE, unit = "log", epsilon = 0.00001) {
    .Call(`_philentropy_dist_one_many`, P, dists, method, p, testNA, unit, epsilon)
}

#' @title Distances and Similarities between Many Probability Density Functions
#' @description This functions computes the distance/dissimilarity between two sets of probability density functions.
#' @param dists1 a numeric matrix storing distributions in its rows.
#' @param dists2 a numeric matrix storing distributions in its rows.
#' @param method a character string indicating whether the distance measure that should be computed.
#' @param p power of the Minkowski distance.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @return A matrix of distance values
#' @examples 
#'   set.seed(2020-08-20)
#'   M1 <- t(replicate(10, sample(1:10, size = 10) / 55))
#'   M2 <- t(replicate(10, sample(1:10, size = 10) / 55))
#'   result <- dist_many_many(M1, M2, method = "euclidean", testNA = FALSE)
#' @export
dist_many_many <- function(dists1, dists2, method, p = NA_real_, testNA = TRUE, unit = "log", epsilon = 0.00001) {
    .Call(`_philentropy_dist_many_many`, dists1, dists2, method, p, testNA, unit, epsilon)
}

custom_log2 <- function(x) {
    .Call(`_philentropy_custom_log2`, x)
}

custom_log10 <- function(x) {
    .Call(`_philentropy_custom_log10`, x)
}

#' @title Euclidean distance (lowlevel function)
#' @description The lowlevel function for computing the euclidean distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' euclidean(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
euclidean <- function(P, Q, testNA) {
    .Call(`_philentropy_euclidean`, P, Q, testNA)
}

#' @title Manhattan distance (lowlevel function)
#' @description The lowlevel function for computing the manhattan distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' manhattan(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
manhattan <- function(P, Q, testNA) {
    .Call(`_philentropy_manhattan`, P, Q, testNA)
}

#' @title Minkowski distance (lowlevel function)
#' @description The lowlevel function for computing the minkowski distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param n index for the minkowski exponent.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' minkowski(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), n = 2, testNA = FALSE)
#' @export
minkowski <- function(P, Q, n, testNA) {
    .Call(`_philentropy_minkowski`, P, Q, n, testNA)
}

#' @title Chebyshev distance (lowlevel function)
#' @description The lowlevel function for computing the chebyshev distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' chebyshev(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
chebyshev <- function(P, Q, testNA) {
    .Call(`_philentropy_chebyshev`, P, Q, testNA)
}

#' @title Sorensen distance (lowlevel function)
#' @description The lowlevel function for computing the sorensen distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' sorensen(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
sorensen <- function(P, Q, testNA) {
    .Call(`_philentropy_sorensen`, P, Q, testNA)
}

#' @title Gower distance (lowlevel function)
#' @description The lowlevel function for computing the gower distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' gower(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
gower <- function(P, Q, testNA) {
    .Call(`_philentropy_gower`, P, Q, testNA)
}

#' @title Soergel distance (lowlevel function)
#' @description The lowlevel function for computing the soergel distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' soergel(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
soergel <- function(P, Q, testNA) {
    .Call(`_philentropy_soergel`, P, Q, testNA)
}

#' @title Kulczynski_d distance (lowlevel function)
#' @description The lowlevel function for computing the kulczynski_d distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @author Hajk-Georg Drost
#' @examples
#' kulczynski_d(P = 1:10/sum(1:10), Q = 20:29/sum(20:29),
#'     testNA = FALSE, epsilon = 0.00001)
#' @export
kulczynski_d <- function(P, Q, testNA, epsilon) {
    .Call(`_philentropy_kulczynski_d`, P, Q, testNA, epsilon)
}

#' @title Canberra distance (lowlevel function)
#' @description The lowlevel function for computing the canberra distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' canberra(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
canberra <- function(P, Q, testNA) {
    .Call(`_philentropy_canberra`, P, Q, testNA)
}

#' @title Lorentzian distance (lowlevel function)
#' @description The low-level function for computing the lorentzian distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @author Hajk-Georg Drost
#' @examples
#' lorentzian(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE, unit = "log2")
#' @export
lorentzian <- function(P, Q, testNA, unit) {
    .Call(`_philentropy_lorentzian`, P, Q, testNA, unit)
}

#' @title Intersection distance (lowlevel function)
#' @description The lowlevel function for computing the intersection_dist distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' intersection_dist(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
intersection_dist <- function(P, Q, testNA) {
    .Call(`_philentropy_intersection_dist`, P, Q, testNA)
}

#' @title Wave hedges distance (lowlevel function)
#' @description The lowlevel function for computing the wave_hedges distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' wave_hedges(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
wave_hedges <- function(P, Q, testNA) {
    .Call(`_philentropy_wave_hedges`, P, Q, testNA)
}

#' @title Czekanowski distance (lowlevel function)
#' @description The lowlevel function for computing the czekanowski distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' czekanowski(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
czekanowski <- function(P, Q, testNA) {
    .Call(`_philentropy_czekanowski`, P, Q, testNA)
}

#' @title Motyka distance (lowlevel function)
#' @description The lowlevel function for computing the motyka distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' motyka(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
motyka <- function(P, Q, testNA) {
    .Call(`_philentropy_motyka`, P, Q, testNA)
}

#' @title Tanimoto distance (lowlevel function)
#' @description The lowlevel function for computing the tanimoto distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' tanimoto(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
tanimoto <- function(P, Q, testNA) {
    .Call(`_philentropy_tanimoto`, P, Q, testNA)
}

#' @title Ruzicka distance (lowlevel function)
#' @description The lowlevel function for computing the ruzicka distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' ruzicka(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
ruzicka <- function(P, Q, testNA) {
    .Call(`_philentropy_ruzicka`, P, Q, testNA)
}

#' @title Inner product distance (lowlevel function)
#' @description The lowlevel function for computing the inner_product distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' inner_product(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
inner_product <- function(P, Q, testNA) {
    .Call(`_philentropy_inner_product`, P, Q, testNA)
}

#' @title Harmonic mean distance (lowlevel function)
#' @description The lowlevel function for computing the harmonic_mean_dist distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' harmonic_mean_dist(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
harmonic_mean_dist <- function(P, Q, testNA) {
    .Call(`_philentropy_harmonic_mean_dist`, P, Q, testNA)
}

#' @title Cosine distance (lowlevel function)
#' @description The lowlevel function for computing the cosine_dist distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' cosine_dist(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
cosine_dist <- function(P, Q, testNA) {
    .Call(`_philentropy_cosine_dist`, P, Q, testNA)
}

#' @title Kumar hassebrook distance (lowlevel function)
#' @description The lowlevel function for computing the kumar_hassebrook distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' kumar_hassebrook(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
kumar_hassebrook <- function(P, Q, testNA) {
    .Call(`_philentropy_kumar_hassebrook`, P, Q, testNA)
}

#' @title Jaccard distance (lowlevel function)
#' @description The lowlevel function for computing the jaccard distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' jaccard(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
jaccard <- function(P, Q, testNA) {
    .Call(`_philentropy_jaccard`, P, Q, testNA)
}

#' @title Dice distance (lowlevel function)
#' @description The lowlevel function for computing the dice_dist distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' dice_dist(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
dice_dist <- function(P, Q, testNA) {
    .Call(`_philentropy_dice_dist`, P, Q, testNA)
}

#' @title Fidelity distance (lowlevel function)
#' @description The lowlevel function for computing the fidelity distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' fidelity(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
fidelity <- function(P, Q, testNA) {
    .Call(`_philentropy_fidelity`, P, Q, testNA)
}

#' @title Bhattacharyya distance (lowlevel function)
#' @description The lowlevel function for computing the bhattacharyya distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @author Hajk-Georg Drost
#' @examples
#' bhattacharyya(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE,
#'  unit = "log2", epsilon = 0.00001)
#' @export
bhattacharyya <- function(P, Q, testNA, unit, epsilon) {
    .Call(`_philentropy_bhattacharyya`, P, Q, testNA, unit, epsilon)
}

#' @title Hellinger distance (lowlevel function)
#' @description The lowlevel function for computing the hellinger distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' hellinger(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
hellinger <- function(P, Q, testNA) {
    .Call(`_philentropy_hellinger`, P, Q, testNA)
}

#' @title Matusita distance (lowlevel function)
#' @description The lowlevel function for computing the matusita distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' matusita(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
matusita <- function(P, Q, testNA) {
    .Call(`_philentropy_matusita`, P, Q, testNA)
}

#' @title Squared chord distance (lowlevel function)
#' @description The lowlevel function for computing the squared_chord distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' squared_chord(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
squared_chord <- function(P, Q, testNA) {
    .Call(`_philentropy_squared_chord`, P, Q, testNA)
}

#' @title Squared euclidean distance (lowlevel function)
#' @description The lowlevel function for computing the squared_euclidean distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' squared_euclidean(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
squared_euclidean <- function(P, Q, testNA) {
    .Call(`_philentropy_squared_euclidean`, P, Q, testNA)
}

#' @title Pearson chi-squared distance (lowlevel function)
#' @description The lowlevel function for computing the pearson_chi_sq distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @author Hajk-Georg Drost
#' @examples
#' pearson_chi_sq(P = 1:10/sum(1:10), Q = 20:29/sum(20:29),
#'  testNA = FALSE, epsilon = 0.00001)
#' @export
pearson_chi_sq <- function(P, Q, testNA, epsilon) {
    .Call(`_philentropy_pearson_chi_sq`, P, Q, testNA, epsilon)
}

#' @title Neyman chi-squared distance (lowlevel function)
#' @description The lowlevel function for computing the neyman_chi_sq distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @author Hajk-Georg Drost
#' @examples
#' neyman_chi_sq(P = 1:10/sum(1:10), Q = 20:29/sum(20:29),
#'  testNA = FALSE, epsilon = 0.00001)
#' @export
neyman_chi_sq <- function(P, Q, testNA, epsilon) {
    .Call(`_philentropy_neyman_chi_sq`, P, Q, testNA, epsilon)
}

#' @title Squared chi-squared distance (lowlevel function)
#' @description The lowlevel function for computing the squared_chi_sq distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' squared_chi_sq(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
squared_chi_sq <- function(P, Q, testNA) {
    .Call(`_philentropy_squared_chi_sq`, P, Q, testNA)
}

#' @title Probability symmetric chi-squared distance (lowlevel function)
#' @description The lowlevel function for computing the prob_symm_chi_sq distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' prob_symm_chi_sq(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
prob_symm_chi_sq <- function(P, Q, testNA) {
    .Call(`_philentropy_prob_symm_chi_sq`, P, Q, testNA)
}

#' @title Divergence squared distance (lowlevel function)
#' @description The lowlevel function for computing the divergence_sq distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' divergence_sq(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
divergence_sq <- function(P, Q, testNA) {
    .Call(`_philentropy_divergence_sq`, P, Q, testNA)
}

#' @title Clark squared distance (lowlevel function)
#' @description The lowlevel function for computing the clark_sq distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' clark_sq(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
clark_sq <- function(P, Q, testNA) {
    .Call(`_philentropy_clark_sq`, P, Q, testNA)
}

#' @title Additive symmetric chi-squared distance (lowlevel function)
#' @description The lowlevel function for computing the additive_symm_chi_sq distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' additive_symm_chi_sq(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
additive_symm_chi_sq <- function(P, Q, testNA) {
    .Call(`_philentropy_additive_symm_chi_sq`, P, Q, testNA)
}

#' @title kullback-Leibler distance (lowlevel function)
#' @description The lowlevel function for computing the kullback_leibler_distance distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @author Hajk-Georg Drost
#' @examples
#' kullback_leibler_distance(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE,
#'  unit = "log2", epsilon = 0.00001)
#' @export
kullback_leibler_distance <- function(P, Q, testNA, unit, epsilon) {
    .Call(`_philentropy_kullback_leibler_distance`, P, Q, testNA, unit, epsilon)
}

#' @title Jeffreys distance (lowlevel function)
#' @description The lowlevel function for computing the jeffreys distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @author Hajk-Georg Drost
#' @examples
#' jeffreys(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE,
#'  unit = "log2", epsilon = 0.00001)
#' @export
jeffreys <- function(P, Q, testNA, unit, epsilon) {
    .Call(`_philentropy_jeffreys`, P, Q, testNA, unit, epsilon)
}

#' @title K-Divergence (lowlevel function)
#' @description The lowlevel function for computing the k_divergence distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @author Hajk-Georg Drost
#' @examples
#' k_divergence(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE, unit = "log2")
#' @export
k_divergence <- function(P, Q, testNA, unit) {
    .Call(`_philentropy_k_divergence`, P, Q, testNA, unit)
}

#' @title Topsoe distance (lowlevel function)
#' @description The lowlevel function for computing the topsoe distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @author Hajk-Georg Drost
#' @examples
#' topsoe(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE, unit = "log2")
#' @export
topsoe <- function(P, Q, testNA, unit) {
    .Call(`_philentropy_topsoe`, P, Q, testNA, unit)
}

#' @title Jensen-Shannon distance (lowlevel function)
#' @description The lowlevel function for computing the jensen_shannon distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @author Hajk-Georg Drost
#' @examples
#' jensen_shannon(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE, unit = "log2")
#' @export
jensen_shannon <- function(P, Q, testNA, unit) {
    .Call(`_philentropy_jensen_shannon`, P, Q, testNA, unit)
}

#' @title Jensen difference (lowlevel function)
#' @description The lowlevel function for computing the jensen_difference distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @author Hajk-Georg Drost
#' @examples
#' jensen_difference(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE, unit = "log2")
#' @export
jensen_difference <- function(P, Q, testNA, unit) {
    .Call(`_philentropy_jensen_difference`, P, Q, testNA, unit)
}

#' @title Taneja difference (lowlevel function)
#' @description The lowlevel function for computing the taneja distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param unit type of \code{log} function. Option are 
#' \itemize{
#' \item \code{unit = "log"}
#' \item \code{unit = "log2"}
#' \item \code{unit = "log10"}   
#' }
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @author Hajk-Georg Drost
#' @examples
#' taneja(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE,
#'  unit = "log2", epsilon = 0.00001)
#' @export
taneja <- function(P, Q, testNA, unit, epsilon) {
    .Call(`_philentropy_taneja`, P, Q, testNA, unit, epsilon)
}

#' @title Kumar-Johnson distance (lowlevel function)
#' @description The lowlevel function for computing the kumar_johnson distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @param epsilon epsilon a small value to address cases in the distance computation where division by zero occurs. In
#' these cases, x / 0 or 0 / 0 will be replaced by \code{epsilon}. The default is \code{epsilon = 0.00001}.
#' However, we recommend to choose a custom \code{epsilon} value depending on the size of the input vectors,
#' the expected similarity between compared probability density functions and 
#' whether or not many 0 values are present within the compared vectors.
#' As a rough rule of thumb we suggest that when dealing with very large 
#' input vectors which are very similar and contain many \code{0} values,
#' the \code{epsilon} value should be set even smaller (e.g. \code{epsilon = 0.000000001}),
#' whereas when vector sizes are small or distributions very divergent then
#' higher \code{epsilon} values may also be appropriate (e.g. \code{epsilon = 0.01}).
#' Addressing this \code{epsilon} issue is important to avoid cases where distance metrics
#' return negative values which are not defined and only occur due to the
#' technical issues of computing x / 0 or 0 / 0 cases.
#' @author Hajk-Georg Drost
#' @examples
#' kumar_johnson(P = 1:10/sum(1:10), Q = 20:29/sum(20:29),
#'  testNA = FALSE, epsilon = 0.00001)
#' @export
kumar_johnson <- function(P, Q, testNA, epsilon) {
    .Call(`_philentropy_kumar_johnson`, P, Q, testNA, epsilon)
}

#' @title AVG distance (lowlevel function)
#' @description The lowlevel function for computing the avg distance.
#' @param P a numeric vector storing the first distribution.
#' @param Q a numeric vector storing the second distribution.
#' @param testNA a logical value indicating whether or not distributions shall be checked for \code{NA} values.
#' @author Hajk-Georg Drost
#' @examples
#' avg(P = 1:10/sum(1:10), Q = 20:29/sum(20:29), testNA = FALSE)
#' @export
avg <- function(P, Q, testNA) {
    .Call(`_philentropy_avg`, P, Q, testNA)
}

as_matrix <- function(x) {
    .Call(`_philentropy_as_matrix`, x)
}

as_data_frame <- function(mat) {
    .Call(`_philentropy_as_data_frame`, mat)
}

sum_rcpp <- function(vec) {
    .Call(`_philentropy_sum_rcpp`, vec)
}

est_prob_empirical <- function(CountVec) {
    .Call(`_philentropy_est_prob_empirical`, CountVec)
}

# Register entry points for exported C++ functions
methods::setLoadAction(function(ns) {
    .Call(`_philentropy_RcppExport_registerCCallable`)
})
HajkD/philentropy documentation built on Feb. 20, 2024, 8:18 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
HajkD/philentropy
Similarity and Distance Quantification Between Probability Functions

R/RcppExports.R
In HajkD/philentropy: Similarity and Distance Quantification Between Probability Functions

R Package Documentation

Browse R Packages

We want your feedback!

HajkD/philentropy Similarity and Distance Quantification Between Probability Functions

R/RcppExports.R In HajkD/philentropy: Similarity and Distance Quantification Between Probability Functions

R Package Documentation

Browse R Packages

We want your feedback!

HajkD/philentropy
Similarity and Distance Quantification Between Probability Functions

R/RcppExports.R
In HajkD/philentropy: Similarity and Distance Quantification Between Probability Functions