R/D_index.R

Defines functions D_index

Documented in D_index

#' @title Dissimilarity index
#' @description Visual and statistical computation of the area of non-overlap in
#' the trait distribution between two sex groups.
#' @inheritParams univariate
#' @inheritParams multivariate
#' @param plot logical; if TRUE a plot of densities for both sexes is returned,
#' Default: FALSE
#' @param fill Specify which sex's density to be filled with color in the plot;
#' either "male" in blue color, "female" in pink color or "both", Default: 'female'
#' @param B number of bootstrap samples for generating confidence intervals. Higher
#' number means greater accuracy but slower execution. If NULL bootstrap confidence
#' intervals are not produced, Default:NULL
#' @param verbose logical; if TRUE  number of bootstraps is displayed,
#' Default: FALSE
#' @param rand logical; if TRUE, uses random seed.  If FALSE, then set.seed(42)
#' for repeatability, Default: TRUE
#' @details Chakraborty and Majumder's (1982) D index.  The calculations are done
#' using Inman and Bradley's (1989) equations, and the relationship that
#' D =  1 - OVL where OVL is the overlap coefficient described in Inman and Bradley.
#' A parametric bootstrap was used assuming normal distributions. The method is
#' known as the "bias-corrected percentile method" (Efron, 1981) or
#' the "bias-corrected percentile interval" (Tibshirani, 1984)
#' @return a table and a graphical representation of the selected traits and
#' their corresponding dissimilarity indices, confidence intervals and
#' significance tests.
#' @import dplyr
#' @import ggplot2
#' @importFrom stats dnorm integrate na.omit qnorm contr.sum pnorm quantile
#' @importFrom tidyr drop_na
#' @importFrom utils flush.console
#' @export
#' @references Chakraborty, Ranajit, and Partha P. Majumder.(1982) "On Bennett's
#' measure of sex dimorphism." American Journal of Physical Anthropology
#' 59.3 : 295-298.
#'
#'
#' Inman, Henry F., and Edwin L. Bradley Jr.(1989) "The overlapping coefficient as a
#' measure of agreement between probability distributions and point estimation
#' of the overlap of two normal densities." Communications in Statistics-Theory
#' and Methods 18.10:3851-3874.
#'
#' Efron, B. (1981). Nonparametric standard errors and confidence intervals.
#' Canadian Journal of Statistics, 9(2), 139-158.
#'
#' Tibshirani, R. J. (1984). Bootstrap confidence intervals. Technical Report
#' No. 3, Laboratory for Computational Statistics, Department of Statistics,
#' Stanford University.
#'
#' @examples
#' # plot and calculation of D
#' run.D <- function() {
#'   print(D_index(Cremains_measurements[1, ], plot = TRUE))
#'   cat("Published D value: ", Cremains_measurements[1, 8], "\n")
#' }
#' run.D()
#'
#' \dontrun{
#' # confidence interval with bootstrapping
#' D_index(Cremains_measurements[1, ], rand = FALSE, B = 1000)
#' }
#'
D_index <-
  function(x,
           plot = FALSE,
           fill = "female",
           Trait = 1,
           B = NULL,
           verbose=FALSE,
           CI = 0.95,
           rand = TRUE,
           digits = 4) {
    fill <- match.arg(fill, choices = c("female", "male", "both"))
    if (!(is.data.frame(x))) {
      stop("x should be a dataframe")
    }
    if (!all(c("M.mu", "F.mu", "M.sdev", "F.sdev", "m", "f") %in% names(x))) {
      stop(
        "colnames should contain:
            M.mu= Male mean
            F.mu=Female mean
            M.sdev=Male sd
            F.sdev=Female sd
            m= Male sample size
            f=Female sample size
            N.B: colnames are case sensitive"
      )
    }
    if (!(Trait %in% seq_along(x))) {
      stop("Trait should be number from 1 to ncol(x)")
    }
    if (length(unique(x$Trait)) != length(which(!is.na(x$Trait)))) {
      stop("Each trait should be represented by a single raw with a unique name")
    }
    if (!is.logical(plot)) {
      stop("plot should be either TRUE or FALSE")
    }
    if (!is.logical(rand)) {
      stop("rand should be either TRUE or FALSE")
    }
    if (!is.numeric(B) && !is.null(B)) {
      stop("number of bootstraps should be numeric")
    }
    if (!is.numeric(CI) && !is.null(CI)) {
      stop("confidence level should be a number from 0 to 1")
    }
    if (is.numeric(CI) && any(CI < 0, CI > 1)) {
      stop("confidence level should be a number from 0 to 1")
    }
    if (!is.numeric(B) && !is.null(B)) {
      stop("B should be a number from 1 to Inf")
    }
    if (is.numeric(B) && B < 1) {
      stop("B should be a number from 1 to Inf")
    }
    if (is.null(CI) && !is.null(B)) {
      warning("confidence level is not spicified")
    }
    if (isFALSE(rand)) {
      set.seed(42)
    }
    x <- x %>%
      drop_na() %>%
      as.data.frame() %>% rename("Trait"=all_of(Trait))
    x$Trait <- factor(x$Trait, levels = unique(x$Trait))
    m <- x$m
    M.mu <- x$M.mu
    M.sdev <- x$M.sdev
    f <- x$f
    F.mu <- x$F.mu
    F.sdev <- x$F.sdev
    InBr.D <- function(Trait, m, M.mu, M.sdev, f, F.mu, F.sdev, i) {
      m <- x$m[i]
      M.mu <- x$M.mu[i]
      M.sdev <- x$M.sdev[i]
      f <- x$f[i]
      F.mu <- x$F.mu[i]
      F.sdev <- x$F.sdev[i]
      Trait <- x$Trait[i]

      from <- min(qnorm(
        1e-04,
        mean = c(M.mu, F.mu),
        sd = c(M.sdev, F.sdev),
        lower.tail = TRUE
      ))
      to <-
        max(qnorm(
          1e-04,
          mean = c(M.mu, F.mu),
          sd = c(M.sdev, F.sdev),
          lower.tail =
            FALSE
        ))
      abs.dens <- function(x) {
        abs(dnorm(x, c(M.mu, F.mu)[1], c(M.sdev, F.sdev)[1]) - dnorm(x, c(M.mu, F.mu)[2], c(M.sdev, F.sdev)[2]))
      }
      D <-
        integrate(Vectorize(abs.dens), lower = from, upper = to)$val / 2
      z <- seq(from, to, (to - from) / 1000)
      trait <- rep(Trait, length(z))
      dn_male <-
        dnorm(z, as.numeric(na.omit(M.mu)), as.numeric(na.omit(M.sdev))) %>% as.data.frame()
      dn_male <-
        cbind.data.frame(
          z = z,
          dn = dn_male,
          sex = rep("M", nrow(dn_male))
        )
      dn_female <-
        dnorm(z, as.numeric(na.omit(F.mu)), as.numeric(na.omit(F.sdev))) %>% as.data.frame()
      dn_female <-
        cbind.data.frame(
          z = z,
          dn = dn_female,
          sex = rep("F", nrow(dn_female))
        )
      df <- rbind.data.frame(dn_male, dn_female)
      names(df) <- c("z", "dn", "sex")
      df <- cbind.data.frame(trait = trait, df)
      if (!is.null(CI) && !is.null(B)) {
        sto_boot <- rep(NA, B)
        for (i in seq_len(B)) {
          males <- rnorm(m, M.mu, M.sdev)
          females <- rnorm(f, F.mu, F.sdev)
          M.mu_boot <- mean(males)
          M.sdev_boot <- sd(males)
          F.mu_boot <- mean(females)
          F.sdev_boot <- sd(females)
          from_boot <-
            min(qnorm(
              1e-04,
              mean = c(M.mu_boot, F.mu_boot),
              sd = c(
                M.sdev_boot,
                F.sdev_boot
              ),
              lower.tail = TRUE
            ))
          to_boot <-
            max(qnorm(
              1e-04,
              mean = c(M.mu_boot, F.mu_boot),
              sd = c(
                M.sdev_boot,
                F.sdev_boot
              ),
              lower.tail = FALSE
            ))
          abs.dens_boot <- function(x) {
            abs(dnorm(
              x,
              c(M.mu_boot, F.mu_boot)[1],
              c(M.sdev_boot, F.sdev_boot)[1]
            ) -
              dnorm(
                x,
                c(M.mu_boot, F.mu_boot)[2],
                c(M.sdev_boot, F.sdev_boot)[2]
              ))
          }
          D_boot <-
            integrate(Vectorize(abs.dens_boot),
              lower = from_boot,
              upper =
                to_boot
            )$val / 2
          sto_boot[i] <- D_boot
          if(isTRUE(verbose)){
          cat(paste("\r", i, " of ", B, "bootstraps"))
          flush.console()
          }
        }
        if(isTRUE(verbose)){
        cat("\nI'm all done\n")
        }
        half_CI <- CI / 2
        bot <- 0.5 - half_CI
        top <- 0.5 + half_CI
        b <- -qnorm(sum(sto_boot < D) / B)
        alpha.1 <- pnorm(qnorm(bot) - 2 * b)
        alpha.2 <- pnorm(qnorm(top) - 2 * b)
        bounds <- quantile(sto_boot, c(alpha.1, alpha.2))
        bounds <- as.numeric(bounds)
        upper <- bounds[2]
        lower <- bounds[1]

        D_list <-
          list(
            D = cbind.data.frame(
              lower = lower,
              D = D,
              upper = upper
            ),
            df = df
          )
      } else {
        D_list <-
          list(D = data.frame(D = D), df = df)
      }
      D_list
    }
    vec_inBr <- Vectorize(InBr.D, vectorize.args = "i", SIMPLIFY = F)
    vec_inBr(i = seq_len(nrow(x))) -> all_list
    D_list <- lapply(all_list, function(x) {
      x[[1]]
    })
    names(D_list) <- levels(x$Trait)
    df <- lapply(all_list, function(x) {
      x[[2]]
    })
    df <- do.call(rbind.data.frame, df)
    do.call(rbind, D_list) -> D_df
    fill_list <- switch(fill,
      female = list(
        col = c("pink1", "white"),
        col2 = c("pink1", "light blue"),
        sex_levels = c("F", "M")
      ),
      male = list(
        col = c("light blue", "white"),
        col2 = c("light blue", "pink1"),
        sex_levels = c("M", "F")
      ),
      both = list(
        col = c("pink1", "light blue"),
        col2 = c("pink1", "light blue"),
        sex_levels = c("F", "M")
      )
    )
    col <- fill_list$col
    col2 <- fill_list$col2
    df$sex <- factor(df$sex, levels = fill_list$sex_levels)
    names(col) <- levels(df$sex)
    names(col2) <- levels(df$sex)
    scale <- scale_fill_manual(name = "sex", values = col)
    scale2 <- scale_color_manual(name = "sex", values = col2)
    p <- ggplot(data = df, aes(
      x = z,
      y = dn,
      color = sex
    )) +
      geom_polygon(aes(
        fill =
          sex
      )) +
      scale +
      scale2 +
      geom_density(stat = "identity") +
      facet_wrap(~trait, scales = "free") +
      ylab("Density") +
      xlab("x") +
      theme(legend.title = element_blank()) +
      scale_x_continuous(expand = c(0, 0)) +
      scale_y_continuous(expand = c(0, 0)) +
      theme(aspect.ratio = 1)
    D_df <- rown_col(as.data.frame(D_df), var = "Trait") %>%
      mutate(across(-1, function (x) round(x,digits))) %>%
      as.data.frame()
    if (isTRUE(plot)) {
      plot(p)
      D_df
    } else {
      return(D_df)
    }
  }

Try the TestDimorph package in your browser

Any scripts or data that you put into this service are public.

TestDimorph documentation built on Nov. 18, 2023, 1:11 a.m.