R/normal.R
In ISCAM: Companion to the Book "Investigating Statistical Concepts, Applications, and Methods"

Documented in iscamnormprob

#' Normal Tail Probability
#'
#' `normprob` finds a p-value and plots it onto a normal distribution with mean
#'  and standard deviation as specified. The function can find the probability
#'  above, below, between, or outside of the observed value, as specified by
#'  `directions`.
#'
#' @param xval observed value.
#' @param mean mean of normal distribution.
#' @param sd standard deviation of normal distribution.
#' @param direction direction for probability calculation, "above" or "below"; if
#'  "outside" or "between" are used, a second larger observation, `xval2` must be
#'  specified
#' @param label horizontal axis label.
#' @param xval2 second observation value.
#' @param digits number of digits to display.
#' @param verbose Logical, defaults to `TRUE`. Set to `FALSE` to suppress messages
#'
#' @return a p-value and a plot of the normal distribution with shaded area
#'  representing probability of the observed value or more extreme occurring.
#' @export
#'
#' @examples
#' iscamnormprob(1.96, direction = "above")
#' iscamnormprob(-1.5, mean = 1, sd = 2, direction = "below")
#' iscamnormprob(0, xval2 = 1.5, direction = "between")
#' iscamnormprob(-1, xval2 = 1, direction = "outside")
iscamnormprob <- function(
  xval,
  mean = 0,
  sd = 1,
  direction,
  label = NULL,
  xval2 = NULL,
  digits = 4,
  verbose = TRUE
) {
  if (.iscam_maybe_help(xval, "iscamnormprob")) {
    return(invisible())
  }

  old_par <- par(mar = c(4, 3, 2, 1))
  on.exit(par(old_par), add = TRUE)

  ordered_vals <- .iscam_resolve_interval_bounds(
    xval = xval,
    xval2 = xval2,
    direction = direction,
    default_xval2 = abs
  )
  xval <- ordered_vals[1]
  xval2 <- ordered_vals[2]
  xvallabel <- format(xval, digits = digits)
  xval2label <- format(xval2, digits = digits)
  old_opts <- options("scipen" = 100, "digits" = digits)
  on.exit(options(old_opts), add = TRUE)

  minx <- min(mean - 4 * sd, xval - 0.15 * abs(xval))
  maxx <- max(mean + 4 * sd, xval2 + 0.15 * xval2)

  thisx <- seq(minx, maxx, by = max(0.001, abs(maxx - minx) / 2000))
  xlabel <- "x-variable"
  if (!is.null(label)) {
    xlabel <- label
  }

  .iscam_plot_continuous_distribution(
    x = thisx,
    density_y = dnorm(thisx, mean, sd),
    xlim = c(minx, maxx),
    x_label = xlabel,
    y_label = "Density"
  )

  if (direction == "below") {
    normprob <- pnorm(xval, mean, sd)
    showprob <- format(normprob, digits = digits)
    .iscam_shade_left_tail(
      min_x = minx,
      x_end = max(minx, xval),
      density_fn = function(v) dnorm(v, mean, sd)
    )
    text(
      minx,
      max(dnorm(mean, mean, sd)) * 0.9,
      labels = bquote(atop(P(X <= .(xvallabel)), "=" ~ .(showprob))),
      col = "red",
      pos = 4
    )
  } else if (direction == "above") {
    normprob <- pnorm(xval, mean, sd, lower.tail = FALSE)
    showprob <- format(normprob, digits = digits)
    .iscam_shade_right_tail(
      x_start = min(maxx, xval),
      max_x = maxx,
      density_fn = function(v) dnorm(v, mean, sd)
    )
    text(
      maxx,
      max(dnorm(mean, mean, sd)) * 0.9,
      labels = bquote(atop(P(X >= .(xvallabel)), "=" ~ .(showprob))),
      col = "red",
      pos = 2
    )
  } else if (direction == "between") {
    normprob <- pnorm(xval2, mean, sd) - pnorm(xval, mean, sd)
    showprob <- format(normprob, digits = digits)
    .iscam_shade_between(
      x_start = xval,
      x_end = xval2,
      density_fn = function(v) dnorm(v, mean, sd)
    )
    text(
      minx,
      max(dnorm(mean, mean, sd)) * 0.9,
      labels = bquote(
        atop(
          P(.(xvallabel) <= X ~ "," ~ X <= .(xval2label)),
          "=" ~ .(showprob)
        )
      ),
      col = "red",
      pos = 4
    )
  } else if (direction == "outside") {
    normprob <- 1 - (pnorm(xval2, mean, sd) - pnorm(xval, mean, sd))
    showprob <- format(normprob, digits = digits)
    .iscam_shade_left_tail(
      min_x = minx,
      x_end = xval,
      density_fn = function(v) dnorm(v, mean, sd),
      baseline_start = TRUE
    )
    .iscam_shade_right_tail(
      x_start = xval2,
      max_x = maxx,
      density_fn = function(v) dnorm(v, mean, sd)
    )
    text(
      minx,
      max(dnorm(mean, mean, sd)) * 0.9,
      labels = bquote(atop(
        P(X <= .(xvallabel)) ~ "and" ~ P(X >= .(xval2label)),
        "=" ~ .(showprob)
      )),
      col = "red",
      pos = 4
    )
  } else {
    .iscam_stop_invalid_direction()
  }

  title(substitute(
    paste("Normal(", mean == x3, ",  ", SD == x4, ")"),
    list(x3 = mean, x4 = signif(sd, 4))
  ))
  .iscam_print_probability(verbose, showprob)

  invisible(showprob)
}

#' Inverse Normal Calculation
#'
#' @param prob1 probability to find normal quantile of.
#' @param mean mean of normal distribution.
#' @param sd standard deviation of normal distribution.
#' @param Sd deprecated--available for backwards compatibility.
#' @param direction direction for probability calculation: "above", "below",
#'  "outside", "between".
#' @param verbose Logical, defaults to `TRUE`. Set to `FALSE` to suppress messages
#'
#' @return a plot of the normal distribution with the quantile of the specified
#' probability highlighted.
#'
#' @export
#'
#' @examples
#' iscaminvnorm(0.05, direction = "below")
#' iscaminvnorm(0.90, mean = 100, sd = 15, direction = "above")
#' iscaminvnorm(0.10, direction = "outside")
#' iscaminvnorm(0.95, direction = "between")
iscaminvnorm <- function(
  prob1,
  mean = 0,
  sd = 1,
  Sd = sd,
  direction,
  verbose = TRUE
) {
  if (.iscam_maybe_help(prob1, "iscaminvnorm")) {
    return(invisible())
  }

  if ((!missing(sd) && !missing(Sd)) && Sd != sd) {
    stop("`Sd` is deprecated; use `sd`. Don't set `Sd` and `sd`.")
  }
  if (missing(sd) && !missing(Sd)) {
    sd <- Sd
  }
  old <- par(mar = c(4, 3, 2, 2))
  on.exit(par(old), add = TRUE)
  min <- mean - 4 * sd
  max <- mean + 4 * sd
  thisx <- seq(min, max, by = max(0.001, abs(max - min) / 2000))
  varx <- "X"
  if (mean == 0 && sd == 1) {
    varx <- "Z"
  }
  .iscam_plot_continuous_distribution(
    x = thisx,
    density_y = dnorm(thisx, mean, sd),
    x_label = paste(varx, " = variable"),
    y_label = "density",
    baseline_col = "black"
  )
  newtitle <- paste("Normal (mean =", mean, ", SD = ", sd, ")")
  title(newtitle)
  ymax <- max(dnorm(mean, mean, sd))
  if (direction == "below") {
    answer <- signif(qnorm(prob1, mean, sd, TRUE), 4)
    thisrange <- seq(min, answer, by = max(0.001, abs(answer - min) / 2000))
    polygon(
      c(thisrange, answer, answer),
      c(0, dnorm(thisrange, mean, sd), 0),
      col = "pink"
    )
    text(
      (min + answer) / 2,
      dnorm(answer, mean, sd) / 2,
      labels = prob1,
      col = "blue"
    )
    text(
      answer,
      min(dnorm(answer, mean, sd), ymax * 0.85),
      labels = bquote(.(as.name(varx)) <= .(answer)),
      col = "red",
      pos = 3
    )
    if (verbose) {
      cat("The observation with", prob1, "probability below is", answer, "\n")
    }
  } else if (direction == "above") {
    answer <- signif(qnorm(prob1, mean, sd, FALSE), 4)
    thisrange <- seq(answer, max, by = max(0.001, abs(max - answer) / 2000))
    polygon(
      c(answer, thisrange, max),
      c(0, dnorm(thisrange, mean, sd), 0),
      col = "pink"
    )
    text(
      (answer + max) / 2,
      (dnorm(answer, mean, sd) / 2),
      labels = prob1,
      col = "blue"
    )
    text(
      answer,
      min(dnorm(answer, mean, sd), ymax * 0.85),
      labels = bquote(.(as.name(varx)) >= .(answer)),
      col = "red",
      pos = 3
    )
    if (verbose) {
      cat("The observation with", prob1, "probability above is", answer, "\n")
    }
  } else if (direction == "between") {
    answer1 <- signif(qnorm((1 - prob1) / 2, mean, sd, TRUE), 4)
    answer2 <- .iscam_reflect_about(mean, answer1)
    thisrange <- seq(
      answer1,
      answer2,
      by = max(0.001, abs(answer2 - answer1) / 2000)
    )
    polygon(
      c(answer1, thisrange, answer2),
      c(0, dnorm(thisrange, mean, sd), 0),
      col = "pink"
    )
    text(mean, dnorm(mean, mean, sd) / 2, labels = prob1, col = "blue")
    text(
      answer1,
      min(dnorm(answer1, mean, sd), ymax * 0.85),
      labels = bquote(.(as.name(varx)) >= .(answer1)),
      col = "red",
      pos = 3
    )
    text(
      answer2,
      min(dnorm(answer1, mean, sd), ymax * 0.85),
      labels = bquote(.(as.name(varx)) <= .(answer2)),
      col = "red",
      pos = 3
    )
    if (verbose) {
      cat(
        "There is",
        prob1,
        "probability between",
        answer1,
        "and",
        answer2,
        "\n"
      )
    }
  } else if (direction == "outside") {
    answer1 <- signif(qnorm(prob1 / 2, mean, sd, TRUE), 4)
    answer2 <- .iscam_reflect_about(mean, answer1)
    thisrange1 <- seq(min, answer1, by = max(0.001, abs(answer1 - min) / 2000))
    thisrange2 <- seq(answer2, max, by = max(0.001, abs(max - answer2) / 2000))
    polygon(
      c(min, thisrange1, answer1),
      c(0, dnorm(thisrange1, mean, sd), 0),
      col = "pink"
    )
    polygon(
      c(answer2, thisrange2, max),
      c(0, dnorm(thisrange2, mean, sd), 0),
      col = "pink"
    )
    text(
      answer1,
      dnorm(answer2, mean, sd) / 2,
      labels = prob1 / 2,
      col = "blue",
      pos = 2
    )
    text(
      answer2,
      dnorm(answer2, mean, sd) / 2,
      labels = prob1 / 2,
      col = "blue",
      pos = 4
    )
    text(
      answer1,
      min(dnorm(answer2, mean, sd), ymax * 0.85),
      labels = bquote(.(as.name(varx)) <= .(answer1)),
      col = "red",
      pos = 3
    )
    text(
      answer2,
      min(dnorm(answer2, mean, sd), ymax * 0.85),
      labels = bquote(.(as.name(varx)) >= .(answer2)),
      col = "red",
      pos = 3
    )
    if (verbose) {
      cat(
        "There is",
        prob1,
        "probability outside",
        answer1,
        "and",
        answer2,
        "\n"
      )
    }
  }
}

#' Rejection Region for Normal
#'
#' `normpower` determines the rejection region corresponding to the level of
#'  significance and the first probability and shows the normal distribution
#'  shading its corresponding region.
#'
#' @param LOS A numeric value representing the level of significance; 0 < `LOS`< 1
#' @param n A numeric value representing the sample size
#' @param prob1 A numeric value representing the first probability
#' @param alternative "less", "greater", or "two.sided"
#' @param prob2 A numeric value representing the second probability
#' @param verbose Logical, defaults to `TRUE`. Set to `FALSE` to suppress messages
#'
#' @return A plot of the normal distribution with the rejection region highlighted.
#'
#' @export
#'
#' @examples
#' iscamnormpower(0.05, n = 100, prob1 = 0.5, alternative = "greater", prob2 = 0.6)
#' iscamnormpower(0.10, n = 50, prob1 = 0.25, alternative = "less", prob2 = 0.15)
#' iscamnormpower(0.05, n = 200, prob1 = 0.8, alternative = "two.sided", prob2 = 0.7)
iscamnormpower <- function(LOS, n, prob1, alternative, prob2, verbose = TRUE) {
  if (.iscam_maybe_help(LOS, "iscamnormpower")) {
    return(invisible())
  }

  old <- par(mar = c(5, 4, 1, 1), mfrow = c(2, 1))
  on.exit(par(old), add = TRUE)
  minx <- max(
    0,
    min(
      prob1 - 4 * sqrt(prob1 * (1 - prob1) / n),
      prob2 - 4 * sqrt(prob2 * (1 - prob2) / n)
    )
  )
  maxx <- min(
    n,
    max(
      prob1 + 4 * sqrt(prob1 * (1 - prob1) / n),
      prob2 + 4 * sqrt(prob2 * (1 - prob2) / n)
    )
  )
  mean <- prob1
  std <- sqrt(prob1 * (1 - prob1) / n)
  myy1 <- dnorm(mean, mean, std) / 2
  drawseq <- seq(minx, maxx, 0.001)
  plot(
    drawseq,
    dnorm(drawseq, mean, std),
    type = "l",
    xlab = "Probability of Success",
    ylab = "Density",
    panel.first = grid()
  )

  if (alternative == "less") {
    rr <- qnorm(LOS, mean, std)
    this.prob1 <- pnorm(rr, mean, std)
    showprob1 <- format(this.prob1, digits = 4)
    drawseq <- seq(minx, rr, 0.001)
    polygon(
      c(drawseq, rr, minx),
      c(dnorm(drawseq, mean, std), 0, 0),
      col = "red"
    )
    rr <- format(rr, digits = 4)
    text(
      minx,
      myy1,
      labels = bquote(atop(P(hat(p) <= .(rr)), "=" ~ .(showprob1))),
      pos = 4,
      col = "red"
    )
    if (verbose) {
      cat("Null: Probability", rr, "and below =", this.prob1, "\n")
    }
  } else if (alternative == "greater") {
    rr <- qnorm(LOS, mean, std, FALSE)
    this.prob1 <- 1 - pnorm(rr, mean, std)

    showprob1 <- format(this.prob1, digits = 4)
    drawseq <- seq(rr, maxx, 0.001)
    polygon(
      c(drawseq, maxx, rr),
      c(dnorm(drawseq, mean, std), 0, 0),
      col = "red"
    )
    rr <- format(rr, digits = 4)
    text(
      maxx,
      myy1,
      labels = bquote(atop(P(hat(p) >= .(rr)), "=" ~ .(showprob1))),
      pos = 2,
      col = "red"
    )
    if (verbose) {
      cat("Null: Probability", rr, "and above =", this.prob1, "\n")
    }
  } else if (alternative == "two.sided") {
    lowerrr <- qnorm(LOS / 2, mean, std)
    upperrr <- qnorm(LOS / 2, mean, std, FALSE)
    lowerprob1 <- pnorm(lowerrr, mean, std)
    upperprob1 <- pnorm(upperrr, mean, std, FALSE)
    showlowerprob1 <- format(lowerprob1, digits = 4)
    showupperprob1 <- format(upperprob1, digits = 4)
    showprob1 <- format(lowerprob1 + upperprob1, digits = 4)
    drawseq <- seq(minx, lowerrr, 0.001)
    polygon(
      c(drawseq, lowerrr, minx),
      c(dnorm(drawseq, mean, std), 0, 0),
      col = "red"
    )
    drawseq <- seq(upperrr, maxx, 0.001)
    polygon(
      c(drawseq, maxx, upperrr),
      c(dnorm(drawseq, mean, std), 0, 0),
      col = "red"
    )
    showupperrr <- format(upperrr, digits = 4)
    showlowerrr <- format(lowerrr, digits = 4)
    text(
      (maxx + prob1) / 2,
      myy1,
      labels = bquote(
        atop(
          P(hat(p) <= .(showlowerrr)) + P(hat(p) >= .(showupperrr)),
          atop("=" ~ .(showlowerprob1) + .(showupperprob1), "=" ~ .(showprob1))
        )
      ),
      pos = 3,
      col = "red"
    )
    if (verbose) {
      cat("Null: Probability in rejection region", showprob1, "\n")
    }
  } else {
    stop("Check input for alternative")
  }

  title(
    substitute(
      paste("Null: Normal (", mu == x1, ", ", sigma == x2, ")", ),
      list(x1 = round(mean, 3), x2 = round(std, 3))
    )
  )

  if (!is.null(prob2)) {
    mean2 <- prob2
    std2 <- sqrt(prob2 * (1 - prob2) / n)
    drawseq <- seq(minx, maxx, 0.001)
    plot(
      drawseq,
      dnorm(drawseq, mean2, std2),
      type = "l",
      xlab = "Probability of Success",
      ylab = "Density",
      panel.first = grid()
    )

    if (alternative == "less") {
      rr <- qnorm(LOS, mean, std)
      this.prob2 <- pnorm(rr, mean2, std2)
      showprob2 <- format(this.prob2, digits = 4)
      drawseq <- seq(minx, rr, 0.001)
      polygon(
        c(drawseq, rr, minx),
        c(dnorm(drawseq, mean2, std2), 0, 0),
        col = "blue"
      )
      rr <- format(rr, digits = 4)
      text(
        minx,
        myy1,
        labels = bquote(atop(P(hat(p) <= .(rr)), "=" ~ .(showprob2))),
        pos = 4,
        col = "blue"
      )
      if (verbose) {
        cat("Alt: Probability", rr, "and below =", this.prob2, "\n")
      }
    } else if (alternative == "greater") {
      rr <- qnorm(LOS, mean, std, FALSE)
      this.prob2 <- 1 - pnorm(rr, mean2, std2)
      showprob2 <- format(this.prob2, digits = 4)
      drawseq <- seq(rr, maxx, 0.001)
      polygon(
        c(drawseq, maxx, rr),
        c(dnorm(drawseq, mean2, std2), 0, 0),
        col = "blue"
      )
      rr <- format(rr, digits = 4)
      text(
        maxx,
        myy1,
        labels = bquote(atop(P(hat(p) >= .(rr)), "=" ~ .(showprob2))),
        pos = 2,
        col = "blue"
      )
      if (verbose) {
        cat("Alt: Probability", rr, "and above =", this.prob2, "\n")
      }
    } else if (alternative == "two.sided") {
      lowerrr <- qnorm(LOS / 2, mean, std)
      upperrr <- qnorm(LOS / 2, mean, std, FALSE)
      lowerprob2 <- pnorm(lowerrr, mean2, std2)
      upperprob2 <- pnorm(upperrr, mean2, std2, FALSE)
      showlowerprob2 <- format(lowerprob2, digits = 4)
      showupperprob2 <- format(upperprob2, digits = 4)
      showprob2 <- format(lowerprob2 + upperprob2, digits = 4)
      drawseq <- seq(minx, lowerrr, 0.001)
      polygon(
        c(drawseq, lowerrr, minx),
        c(dnorm(drawseq, mean2, std2), 0, 0),
        col = "blue"
      )
      drawseq <- seq(upperrr, maxx, 0.001)
      polygon(
        c(drawseq, maxx, upperrr),
        c(dnorm(drawseq, mean2, std2), 0, 0),
        col = "blue"
      )
      showupperrr <- format(upperrr, digits = 4)
      showlowerrr <- format(lowerrr, digits = 4)
      text2 <- (maxx + prob2) / 2
      if (prob1 < prob2) {
        text2 <- (minx + prob1) / 2
      }
      text(
        text2,
        myy1,
        labels = bquote(
          atop(
            P(hat(p) <= .(showlowerrr)) + P(hat(p) >= .(showupperrr)),
            atop(
              "=" ~ .(showlowerprob2) + .(showupperprob2),
              "=" ~ .(showprob2)
            )
          )
        ),
        pos = 3,
        col = "blue"
      )
      if (verbose) {
        cat("Alt: Probability in rejection region", showprob2, "\n")
      }
    }

    title(
      substitute(
        paste("Alt: Normal (", mu == x1, ", ", sigma == x2, ")", ),
        list(x1 = round(mean2, 3), x2 = round(std2, 3))
      )
    )
  }

  old <- par(mfrow = c(1, 1))
  on.exit(par(old), add = TRUE)
}

#' One Proportion Z-Test and Interval
#'
#' iscamonepropztest calculates a one-proportion z-test and/or a corresponding
#' confidence interval.
#'
#' @param observed The observed number of successes. If a value less than 1 is
#' provided, it is assumed to be the sample proportion.
#' @param n The sample size.
#' @param hypothesized (optional) hypothesized probability of success under the null
#' hypothesis.
#' @param alternative (optional) character string specifying the form of the alternative
#' hypothesis. Must be one of "less", "greater", or "two.sided".
#' @param conf.level (optional) confidence level(s) for a two-sided confidence
#' interval.
#' @param verbose Logical, defaults to `TRUE`. Set to `FALSE` to suppress messages
#'
#' @return This function prints the results of the one-proportion z-test and/or
#' the confidence interval. It also generates plots to visualize the test and
#' interval.
#' @export
#'
#' @examples
#' iscamonepropztest(observed = 35, n = 50, hypothesized = 0.5)
#'
#' iscamonepropztest(
#'   observed = 0.8,
#'   n = 100,
#'   hypothesized = 0.75,
#'   alternative = "greater",
#'   conf.level = 0.95
#' )
#'
#' iscamonepropztest(observed = 60, n = 100, conf.level = 0.90)
iscamonepropztest <- function(
  observed,
  n,
  hypothesized = NULL,
  alternative = "two.sided",
  conf.level = NULL,
  verbose = TRUE
) {
  if (.iscam_maybe_help(observed, "iscamonepropztest")) {
    return(invisible())
  }

  old <- par(mar = c(5, 3, 1, 1))
  on.exit(par(old), add = TRUE)
  if (observed < 1) {
    observed <- round(n * observed)
  }
  if (verbose) {
    cat("\nOne Proportion z test\n\n")
  }
  statistic <- signif(observed / n, 4)
  if (verbose) {
    cat(paste(
      "Data: observed successes = ",
      observed,
      ", sample size = ",
      n,
      ", sample proportion = ",
      statistic,
      "\n\n",
      sep = ""
    ))
  }
  zvalue <- NULL
  pvalue <- NULL
  if (!is.null(hypothesized)) {
    if (verbose) {
      cat(paste("Null hypothesis       : pi =", hypothesized, sep = " "), "\n")
    }
    altname <- switch(
      alternative,
      less = "<",
      greater = ">",
      two.sided = "<>",
      not.equal = "<>"
    )
    if (verbose) {
      cat(
        paste("Alternative hypothesis: pi", altname, hypothesized, sep = " "),
        "\n"
      )
    }
    zvalue <- (statistic - hypothesized) /
      sqrt(hypothesized * (1 - hypothesized) / n)
    if (verbose) {
      cat("z-statistic:", signif(zvalue, 4), "\n")
    }
    pvalue <- switch(
      alternative,
      less = pnorm(zvalue),
      greater = pnorm(zvalue, lower.tail = FALSE),
      two.sided = 2 * pnorm(-abs(zvalue)),
      not.equal = 2 * pnorm(-abs(zvalue))
    )
    pvalue <- signif(pvalue, 4)
    if (verbose) {
      cat("p-value:", pvalue, "\n")
    }
    SD <- sqrt(hypothesized * (1 - hypothesized) / n)
    min <- min(hypothesized - 4 * SD, hypothesized - abs(zvalue) * SD - 0.001)
    max <- max(hypothesized + 4 * SD, hypothesized + abs(zvalue) * SD + 0.001)
    x <- seq(min, max, 0.001)
    plot(
      x,
      dnorm(x, hypothesized, SD),
      xlab = "",
      ylab = "",
      type = "l",
      ylim = c(0, dnorm(hypothesized, hypothesized, SD)),
      panel.first = grid()
    )

    zseq <- c(
      hypothesized - 3 * SD,
      hypothesized - 2 * SD,
      hypothesized - SD,
      hypothesized,
      hypothesized + SD,
      hypothesized + 2 * SD,
      hypothesized + 3 * SD
    )
    axis(
      side = 1,
      at = zseq,
      labels = c("z=-3", "z=-2", "z=-1", "z=0", "z=1", "z=2", "z=3"),
      padj = 1.2,
      tick = FALSE,
      col.axis = "blue"
    )
    abline(h = 0, col = "black")
    mtext(side = 1, line = 3, bquote("<-" ~ "Sample Proportions" ~ "->"))
    mtext(side = 2, line = 2, "Density")
    if (alternative == "less") {
      drawseq <- seq(min, statistic, 0.001)
      polygon(
        c(drawseq, statistic, min),
        c(dnorm(drawseq, hypothesized, SD), 0, 0),
        col = "red"
      )
      text(
        min,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.9,
        labels = paste("z-statistic:", signif(zvalue, 4)),
        pos = 4,
        col = "blue"
      )
      text(
        min,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.8,
        labels = paste("p-value:", pvalue),
        pos = 4,
        col = "red"
      )
    } else if (alternative == "greater") {
      drawseq <- seq(statistic, max, 0.001)
      polygon(
        c(statistic, drawseq, max),
        c(0, dnorm(drawseq, hypothesized, SD), 0),
        col = "red"
      )
      text(
        max,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.9,
        labels = paste("z-statistic:", signif(zvalue, 4)),
        pos = 2,
        col = "blue"
      )
      text(
        max,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.8,
        labels = paste("p-value:", pvalue),
        pos = 2,
        col = "red"
      )
    } else if (alternative == "two.sided" || alternative == "not.equal") {
      if (statistic < hypothesized) {
        drawseq1 <- seq(min, statistic, 0.001)
        drawseq2 <- seq(hypothesized + (hypothesized - statistic), max, 0.001)
      } else {
        drawseq1 <- seq(min, hypothesized - (statistic - hypothesized), 0.001)
        drawseq2 <- seq(statistic, max, 0.001)
      }
      polygon(
        c(min, drawseq1, drawseq1[length(drawseq1)]),
        c(0, dnorm(drawseq1, hypothesized, SD), 0),
        col = "red"
      )
      polygon(
        c(drawseq2[1], drawseq2, max),
        c(0, dnorm(drawseq2, hypothesized, SD), 0),
        col = "red"
      )
      text(
        min,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.9,
        labels = paste("z-statistic:", signif(zvalue, 4)),
        pos = 4,
        col = "blue"
      )
      text(
        min,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.8,
        labels = paste("two-sided p-value:", pvalue),
        pos = 4,
        col = "red"
      )
    }
  }
  old <- par(mfrow = c(3, 1))
  on.exit(par(old), add = TRUE)
  if (length(conf.level) > 1) {
    old <- par(mar = c(4, 2, 1.5, 4), mfrow = c(length(conf.level), 1))
    on.exit(par(old), add = TRUE)
  }
  lower <- 0
  upper <- 0
  if (!is.null(conf.level)) {
    conf.level <- .iscam_normalize_conf_levels(conf.level)
    for (k in seq_along(conf.level)) {
      criticalvalue <- qnorm((1 - conf.level[k]) / 2)
      lower[k] <- statistic +
        criticalvalue * sqrt(statistic * (1 - statistic) / n)
      upper[k] <- statistic -
        criticalvalue * sqrt(statistic * (1 - statistic) / n)
      multconflevel <- 100 * conf.level[k]
      if (verbose) {
        cat(
          multconflevel,
          "% Confidence interval for pi: (",
          lower[k],
          ", ",
          upper[k],
          ") \n"
        )
      }
    }
    if (is.null(hypothesized)) {
      SDphat <- sqrt(statistic * (1 - statistic) / n)
      min <- statistic - 4 * SDphat
      max <- statistic + 4 * SDphat
      CIseq <- seq(min, max, 0.001)

      if (length(conf.level) == 1) {
        myxlab <- substitute(
          paste("Normal (", mean == x1, ", ", SD == x2, ")", ),
          list(x1 = signif(lower[1], 4), x2 = signif(SDphat, 4))
        )
        plot(CIseq, dnorm(CIseq, lower[1], SDphat), type = "l", xlab = " ")
        mtext(
          "sample proportions",
          side = 1,
          line = 1.75,
          adj = 0.5,
          cex = 0.75
        )
        topseq <- seq(statistic, max, 0.001)
        polygon(
          c(statistic, topseq, max),
          c(0, dnorm(topseq, lower[1], SDphat), 0),
          col = "red"
        )
        title(myxlab)
        myxlab <- substitute(
          paste("Normal (", mean == x1, ", ", SD == x2, ")", ),
          list(x1 = signif(upper[1], 4), x2 = signif(SDphat, 4))
        )
        upper_curve <- seq(min, max, 0.001)
        plot(
          upper_curve,
          dnorm(upper_curve, upper[1], SDphat),
          type = "l",
          xlab = " "
        )
        mtext(
          "sample proportions",
          side = 1,
          line = 1.75,
          adj = 0.5,
          cex = 0.75
        )
        bottomseq <- seq(min, statistic, 0.001)
        polygon(
          c(min, bottomseq, statistic, statistic),
          c(
            0,
            dnorm(bottomseq, upper[1], SDphat),
            dnorm(statistic, upper[1], SDphat),
            0
          ),
          col = "red"
        )
        newtitle <- substitute(
          paste("Normal (", mean == x1, ", ", SD == x2, ")", ),
          list(x1 = signif(upper[1], 4), x2 = signif(SDphat, 4))
        )
        title(newtitle)
      }
      for (k in seq_along(conf.level)) {
        .iscam_plot_ci_strip(
          min_x = min,
          statistic = statistic,
          max_x = max,
          lower = lower[k],
          upper = upper[k],
          x_label = "process probability",
          ci_label = paste(conf.level[k] * 100, "% CI:"),
          ci_label_x = min * 1.1
        )
      }
    }
  }
  old <- par(mfrow = c(1, 1))
  on.exit(par(old), add = TRUE)
  invisible(list(
    "zvalue" = zvalue,
    "pvalue" = pvalue,
    "lower" = lower,
    "upper" = upper
  ))
}

#' Two Proportion Z-Test and Interval
#'
#' iscamtwopropztest calculates a two-proportion z-test and/or a corresponding
#' confidence interval.
#'
#' @param observed1 The observed number of successes in group 1. If a value less than 1 is
#' provided, it is assumed to be the sample proportion.
#' @param n1 The sample size for group 1.
#' @param observed2 The observed number of successes in group 2. If a value less than 1 is
#' provided, it is assumed to be the sample proportion.
#' @param n2 The sample size for group 2.
#' @param hypothesized (optional) hypothesized difference in probability of success
#' under the null hypothesis.
#' @param alternative (optional) character string specifying the form of the alternative
#' hypothesis. Must be one of "less", "greater", or "two.sided".
#' @param conf.level (optional) confidence level(s) for a two-sided confidence
#' interval.
#' @param datatable (optional) two-way table of counts as an alternative input
#' method.
#' @param verbose Logical, defaults to `TRUE`. Set to `FALSE` to suppress messages
#'
#' @return This function prints the results of the two-proportion z-test and/or
#' the confidence interval. It also generates plots to visualize the test and
#' interval.
#' @export
#'
#' @examples
#' iscamtwopropztest(observed1 = 35, n1 = 50, observed2 = 28, n2 = 45)
#'
#' iscamtwopropztest(
#'   observed1 = 0.8,
#'   n1 = 100,
#'   observed2 = 0.6,
#'   n2 = 80,
#'   hypothesized = 0,
#'   alternative = "greater",
#'   conf.level = 0.95
#' )
#'
#' iscamtwopropztest(observed1 = 60, n1 = 100, observed2 = 45, n2 = 90, conf.level = 0.90)
iscamtwopropztest <- function(
  observed1,
  n1,
  observed2,
  n2,
  hypothesized = 0,
  alternative = NULL,
  conf.level = NULL,
  datatable = NULL,
  verbose = TRUE
) {
  if (
    !missing(observed1) && .iscam_maybe_help(observed1, "iscamtwopropztest")
  ) {
    return(invisible())
  }

  old <- par(mar = c(5, 3, 1, 1))
  on.exit(par(old), add = TRUE)

  if (!is.null(datatable)) {
    observed1 <- datatable[1]
    n1 <- datatable[1] + datatable[2]
    observed2 <- datatable[3]
    n2 <- datatable[3] + datatable[4]
  }

  pvalue <- NULL
  zvalue <- NULL
  if (observed1 < 1) {
    observed1 <- round(n1 * observed1)
  }
  if (observed2 < 1) {
    observed2 <- round(n2 * observed2)
  }
  if (verbose) {
    cat("\nTwo Proportion z test\n\n")
  }
  statistic1 <- observed1 / n1
  statistic2 <- observed2 / n2
  statistic <- statistic1 - statistic2
  if (verbose) {
    cat(paste(
      "Group1: observed successes = ",
      observed1,
      ", sample size = ",
      n1,
      ", sample proportion = ",
      signif(statistic1, 4),
      "\n\n",
      sep = ""
    ))
    cat(paste(
      "Group2: observed successes = ",
      observed2,
      ", sample size = ",
      n2,
      ", sample proportion = ",
      signif(statistic2, 4),
      "\n\n",
      sep = ""
    ))
  }
  if (!is.null(alternative)) {
    if (verbose) {
      cat(
        paste("Null hypothesis       : pi1-pi2 =", hypothesized, sep = " "),
        "\n"
      )
    }
    altname <- switch(
      alternative,
      less = "<",
      greater = ">",
      two.sided = "<>",
      not.equal = "<>"
    )
    if (verbose) {
      cat(
        paste(
          "Alternative hypothesis: pi1-pi2",
          altname,
          hypothesized,
          sep = " "
        ),
        "\n"
      )
    }
    pooledphat <- (observed1 + observed2) / (n1 + n2)
    zvalue <- (statistic1 - statistic2 - hypothesized) /
      sqrt(pooledphat * (1 - pooledphat) * (1 / n1 + 1 / n2))
    if (verbose) {
      cat("z-statistic:", signif(zvalue, 4), "\n")
    }
    pvalue <- 2
    SD <- sqrt(pooledphat * (1 - pooledphat) * (1 / n1 + 1 / n2))
    min <- min(hypothesized - 4 * SD, hypothesized - abs(zvalue) * SD - 0.001)
    max <- max(hypothesized + 4 * SD, hypothesized + abs(zvalue) * SD + 0.001)
    x <- seq(min, max, 0.001)
    plot(
      x,
      dnorm(x, hypothesized, SD),
      xlab = bquote("<-" ~ "Difference in Sample Proportions" ~ "->"),
      type = "l",
      ylim = c(0, dnorm(hypothesized, hypothesized, SD)),
      panel.first = grid()
    )
    zseq <- c(
      hypothesized - 3 * SD,
      hypothesized - 2 * SD,
      hypothesized - SD,
      hypothesized,
      hypothesized + SD,
      hypothesized + 2 * SD,
      hypothesized + 3 * SD
    )
    axis(
      side = 1,
      at = zseq,
      labels = c("z=-3", "z=-2", "z=-1", "z=0", "z=1", "z=2", "z=3"),
      padj = 1.2,
      tick = FALSE,
      col.axis = "blue"
    )
    abline(h = 0, col = "black")
    newtitle <- paste(
      "Normal (mean=",
      hypothesized,
      ", SD=",
      format(SD, digits = 2),
      ")"
    )
    title(newtitle)
    mtext(side = 2, line = 2, "density")

    if (alternative == "less") {
      pvalue <- signif(pnorm(zvalue), 4)
      drawseq <- seq(min, statistic, 0.001)
      polygon(
        c(drawseq, statistic, min),
        c(dnorm(drawseq, hypothesized, SD), 0, 0),
        col = "red"
      )
      text(
        min,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.9,
        labels = paste("z-statistic:", signif(zvalue, 4)),
        pos = 4,
        col = "blue"
      )
      text(
        min,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.8,
        labels = paste("p-value:", pvalue),
        pos = 4,
        col = "red"
      )
    } else if (alternative == "greater") {
      pvalue <- signif(1 - pnorm(zvalue), 4)
      drawseq <- seq(statistic, max, 0.001)
      polygon(
        c(statistic, drawseq, max),
        c(0, dnorm(drawseq, hypothesized, SD), 0),
        col = "red"
      )
      text(
        max,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.9,
        labels = paste("z-statistic:", signif(zvalue, 4)),
        pos = 2,
        col = "blue"
      )
      text(
        max,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.8,
        labels = paste("p-value:", pvalue),
        pos = 2,
        col = "red"
      )
    } else if (alternative == "two.sided" || alternative == "not.equal") {
      pvalue <- signif(2 * pnorm(-1 * abs(zvalue)), 4)
      if (statistic < hypothesized) {
        drawseq1 <- seq(min, statistic, 0.001)
        drawseq2 <- seq(hypothesized + (hypothesized - statistic), max, 0.001)
      } else {
        drawseq1 <- seq(min, hypothesized - (statistic - hypothesized), 0.001)
        drawseq2 <- seq(statistic, max, 0.001)
      }
      polygon(
        c(min, drawseq1, drawseq1[length(drawseq1)]),
        c(0, dnorm(drawseq1, hypothesized, SD), 0),
        col = "red"
      )
      polygon(
        c(drawseq2[1], drawseq2, max),
        c(0, dnorm(drawseq2, hypothesized, SD), 0),
        col = "red"
      )
      text(
        min,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.9,
        labels = paste("z-statistic:", signif(zvalue, 4)),
        pos = 4,
        col = "blue"
      )
      text(
        min,
        max(dnorm(hypothesized, hypothesized, SD)) * 0.8,
        labels = paste("two-sided p-value:", pvalue),
        pos = 4,
        col = "red"
      )
    }
  } #end have alternative
  lower <- NULL
  upper <- NULL
  if (!is.null(conf.level)) {
    conf.level <- .iscam_normalize_conf_levels(conf.level)
    if (length(conf.level) > 1) {
      old <- par(mar = c(4, 2, 1.5, 4), mfrow = c(length(conf.level), 1))
      on.exit(par(old), add = TRUE)
    }
    for (k in seq_along(conf.level)) {
      #myout=prop.test(observed, n, p=statistic, alternative="two.sided", conf.level, correct=FALSE)
      criticalvalue <- qnorm((1 - conf.level[k]) / 2)
      sephat <- sqrt(
        statistic1 *
          (1 - statistic1) /
          n1 +
          statistic2 * (1 - statistic2) / n2
      )
      lower[k] <- statistic + criticalvalue * sephat
      upper[k] <- statistic - criticalvalue * sephat
      multconflevel <- 100 * conf.level[k]
      if (verbose) {
        cat(
          multconflevel,
          "% Confidence interval for pi1-pi2: (",
          lower[k],
          ", ",
          upper[k],
          ") \n"
        )
      }
    }
    if (is.null(alternative)) {
      min <- statistic - 4 * sephat
      max <- statistic + 4 * sephat
      CIseq <- seq(min, max, 0.001)
      if (length(conf.level) == 1) {
        old <- par(mar = c(4, 0.5, 1.5, 0.5), mfrow = c(3, 1))
        on.exit(par(old), add = TRUE)
        myxlab <- substitute(
          paste("Normal (", mean == x1, ", ", SD == x2, ")", ),
          list(x1 = signif(lower[1], 4), x2 = signif(sephat, 4))
        )
        plot(CIseq, dnorm(CIseq, lower[1], sephat), type = "l", xlab = " ")
        mtext(
          "difference in sample proportions",
          side = 1,
          line = 1.75,
          adj = 0.5,
          cex = 0.75
        )
        topseq <- seq(statistic, max, 0.001)
        polygon(
          c(statistic, topseq, max),
          c(0, dnorm(topseq, lower[1], sephat), 0),
          col = "red"
        )
        title(myxlab)
        myxlab <- substitute(
          paste("Normal (", mean == x1, ", ", SD == x2, ")", ),
          list(x1 = signif(upper[1], 4), x2 = signif(sephat, 4))
        )
        upper_curve <- seq(min, max, 0.001)
        plot(
          upper_curve,
          dnorm(upper_curve, upper[1], sephat),
          type = "l",
          xlab = " "
        )
        mtext(
          "difference in sample proportions",
          side = 1,
          line = 1.75,
          adj = 0.5,
          cex = 0.75
        )
        bottomseq <- seq(min, statistic, 0.001)
        polygon(
          c(min, bottomseq, statistic, statistic),
          c(
            0,
            dnorm(bottomseq, upper[1], sephat),
            dnorm(statistic, upper[1], sephat),
            0
          ),
          col = "red"
        )
        newtitle <- substitute(
          paste("Normal (", mean == x1, ", ", SD == x2, ")", ),
          list(x1 = signif(upper, 4), x2 = signif(sephat, 4))
        )
        title(newtitle)
      } # end one interval
      for (k in seq_along(conf.level)) {
        .iscam_plot_ci_strip(
          min_x = min,
          statistic = statistic,
          max_x = max,
          lower = lower[k],
          upper = upper[k],
          x_label = "difference in process probabilities",
          ci_label = paste(100 * conf.level[k], "% CI:"),
          ci_label_x = min * 1.01
        )
      }
    } # end have alternative
  } # end have confidence level
  if (!is.null(alternative)) {
    if (verbose) {
      cat("p-value:", pvalue, "\n")
    }
  }
  old <- par(mfrow = c(1, 1))
  on.exit(par(old), add = TRUE)
  invisible(list(
    "zvalue" = zvalue,
    "pvalue" = pvalue,
    "lower" = lower,
    "upper" = upper
  ))
}
Any scripts or data that you put into this service are public.
ISCAM documentation built on Feb. 12, 2026, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ISCAM
Companion to the Book "Investigating Statistical Concepts, Applications, and Methods"

R/normal.R
In ISCAM: Companion to the Book "Investigating Statistical Concepts, Applications, and Methods"

Defines functions iscamnormprob

Documented in iscamnormprob

Try the ISCAM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ISCAM Companion to the Book "Investigating Statistical Concepts, Applications, and Methods"

R/normal.R In ISCAM: Companion to the Book "Investigating Statistical Concepts, Applications, and Methods"

Defines functions iscamnormprob

Documented in iscamnormprob

Try the ISCAM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ISCAM
Companion to the Book "Investigating Statistical Concepts, Applications, and Methods"

R/normal.R
In ISCAM: Companion to the Book "Investigating Statistical Concepts, Applications, and Methods"