R/norm_density_plot.R

Defines functions gg_norm_conf_int build_ribbons build_tail_dat plot_t_tails build_dt_dat plot_norm_tails build_pnorm_dat build_qnorm_dat build_dnorm_dat norm_quantile_plot norm_cdf_plot norm_density_plot

Documented in build_dnorm_dat build_dt_dat build_pnorm_dat build_qnorm_dat build_ribbons build_tail_dat gg_norm_conf_int norm_cdf_plot norm_density_plot norm_quantile_plot plot_norm_tails plot_t_tails

#' Make a nice plot of a normal PDF
#'
#' @param x_1 x-values to plot
#' @param pop_mean mean of the normal distribution to plot
#' @param pop_sd standard deviation of the normal distribution to plot
#' @param xmin minimum x value for the plot
#' @param xmax maximum x value for the plot
#' @param len number of points to calculate for the curve
#' @param fill_cdf Color to fill the density area
#' @param x_lab label for the x-axis
#' @param y_lab label for the y-axis#'
#' @param digits the number of siginifcant digits to display in the title
#' @param lty_density the line type for the density curve
#' @param title_fmt The format string, to be passed to sprintf() for the title
#'
#' @import ggplot2
#' @import stats
#'
#' @return A ggplot object
#'
#'
#' @export
#'

norm_density_plot = function(
    x_1,
    pop_mean = 0,
    pop_sd = 1,
    xmin = NULL,
    xmax = NULL,
    len = 1000,
    fill_cdf = rgb(0, 0.3, 0.8, 0.25),
    # fill_bkg = rgb(0, 0, 0, 0),
    digits = 2,
    lty_density = 2,
    x_lab = "x",
    y_lab = "f(x)",
    title_fmt =
      paste0(
        "x = %1$s\n",
        "probability density (height of the curve at x) = %2$s\n",
        "cumulative density (area of shaded region) = %3$s"))
{

  if (FALSE)
  {
    x_1 = 0.1
    pop_mean = 0
    pop_sd = 1
    xmin = NULL
    xmax = NULL
    len = 1000
    fill_cdf = rgb(0, 0.3, 0.8, 0.25)
    digits = 2
    lty_density = 2
    x_lab = "x"
    y_lab = "f(x)"
    title_fmt =
      paste0(
        "x = %1$s\n",
        "probability density (height of the curve at x) = %2$s\n",
        "cumulative density (area of shaded region) = %3$s")
  }

  if (is.null(xmin)) xmin = pop_mean - 3 * pop_sd
  if (is.null(xmax)) xmax = pop_mean + 3 * pop_sd

  norm_dat = build_dnorm_dat(
    xmin = xmin,
    xmax = xmax,
    len = len,
    pop_mean = pop_mean,
    pop_sd = pop_sd)

  y_intercept = dnorm(x_1, mean = pop_mean, sd = pop_sd)

  out =  ggplot(norm_dat) +
    geom_line(aes(x, y1)) +
    geom_ribbon(
      data = subset(norm_dat, x < x_1),
      mapping = aes(x = x, ymin = y0, ymax = y1),
      fill = fill_cdf) +
    geom_hline(yintercept = y_intercept, lty = lty_density) +
    ylab(y_lab) + xlab(x_lab) +
    ggtitle(paste0(
      sprintf(
        title_fmt,
        round(x_1, digits),
        round(y_intercept, digits),
        round(pnorm(x_1, mean = pop_mean, sd = pop_sd), digits)
      )))

  return(out)
}


#' Make a nice example plot for a normal cumulative distribution function
#'
#' @param x_1 x-values to plot
#' @param pop_mean mean of the normal distribution to plot
#' @param pop_sd standard deviation of the normal distribution to plot
#' @param xmin minimum x value for the plot
#' @param xmax maximum x value for the plot
#' @param len number of points to calculate for the curve
#' @param fill_cdf Color to fill the density area
#' @param x_lab label for the x-axis
#' @param y_lab label for the y-axis
#' @param digits the number of siginifcant digits to display in the title
#' @param lty_density the line type for the density curve
#' @param title_fmt The format string, to be passed to sprintf() for the title
#'
#' @import ggplot2
#' @import stats
#'
#' @return A ggplot object
#'
#' @export


norm_cdf_plot = function(
    x_1,
    pop_mean = 0,
    pop_sd = 1,
    xmin = NULL,
    xmax = NULL,
    len = 1000,
    fill_cdf = rgb(0, 0.3, 0.8, 0.25),
    # fill_bkg = rgb(0, 0, 0, 0),
    digits = 2,
    lty_density = 2,
    x_lab = "x",
    y_lab = "f(x)",
    title_fmt = "x: %1$s\nquantile: %2$s")
{
  requireNamespace("ggplot2")

  if (is.null(xmin)) xmin = pop_mean - 3 * pop_sd
  if (is.null(xmax)) xmax = pop_mean + 3 * pop_sd

  norm_dat = build_pnorm_dat(
    xmin = xmin,
    xmax = xmax,
    len = len,
    pop_mean = pop_mean,
    pop_sd = pop_sd)

  y_intercept = pnorm(x_1, mean = pop_mean, sd = pop_sd)

  return(
    ggplot(norm_dat) +
      geom_line(aes(x, y1)) +
      geom_segment(
        mapping = aes(
          x = x_1, xend = x[1],
          y = y_intercept, yend = y_intercept),
        arrow = arrow(length = unit(0.03, "npc"))) +
      geom_segment(
        mapping = aes(
          x = x_1, xend = x_1,
          y = y_intercept, yend = 0),
        arrow = arrow(length = unit(0.03, "npc"))) +
      ylab(y_lab) + xlab(x_lab) +
      ggtitle(paste0(
        sprintf(
          title_fmt,
          round(x_1, digits),
          round(pnorm(x_1, mean = pop_mean, sd = pop_sd), digits)
        )))
  )
}
if(FALSE)
{
  source(here::here("function_scripts", "norm_density_plot.R"))
  source(here::here("function_scripts", "norm_quantile_plot.R"))
  source(here::here("function_scripts", "norm_cdf_plot.R"))
  dev.off()
  norm_density_plot(0.3)
  norm_cdf_plot(0.3)
  norm_quantile_plot(0.3)

  cdf_fmt = "x: %1$s\nquantile: %2$s"
  norm_cdf_plot(10.2, mean = 11.2, sd = 3, title_fmt = cdf_fmt)

  x1 = 10.0
  x2 = 0.3

  plot_grid(
    norm_density_plot(x1),
    norm_cdf_plot(x1),
    norm_quantile_plot(pnorm(x1)),
    norm_density_plot(x2),
    norm_cdf_plot(x2),
    norm_quantile_plot(pnorm(x2))

  )
}




#' A nice demonstration plot of a normal quantile function
#'
#' @param p_1 p
#' @param pop_mean mean of the normal distribution to plot
#' @param pop_sd standard deviation of the normal distribution to plot
#' @param xmin minimum x value for the plot
#' @param xmax maximum x value for the plot
#' @param len number of points to calculate for the curve
#' @param fill_cdf Color to fill the density area
#' @param fill_cdf Color to fill background
#' @param x_lab label for the x-axis
#' @param y_lab label for the y-axis
#' @param digits the number of siginifcant digits to display in the title
#' @param lty_density the line type for the density curve
#' @param title_fmt The format string, to be passed to sprintf() for the title
#'
#' @import ggplot2
#' @import stats
#'
#' @return A ggplot object
#'
#' @export

norm_quantile_plot = function(
    p_1,
    pop_mean = 0,
    pop_sd = 1,
    xmin = 0.001,
    xmax = 0.999,
    len = 1000,
    fill_cdf = rgb(0, 0.3, 0.8, 0.25),
    # fill_bkg = rgb(0, 0, 0, 0),
    digits = 2,
    lty_density = 2,
    y_lab = "x",
    x_lab = "quantile",
    title_fmt = "quantile: %1$s\nx: %2$s\n")
{

  requireNamespace("ggplot2")

  norm_dat = build_qnorm_dat(
    xmin = xmin,
    xmax = xmax,
    len = len,
    pop_mean = pop_mean,
    pop_sd = pop_sd)

  y_intercept = qnorm(p_1, mean = pop_mean, sd = pop_sd)

  return(
    ggplot(norm_dat) +
      geom_line(aes(x, y1)) +
      geom_segment(
        mapping = aes(
          x = p_1, xend = x[1],
          y = y_intercept, yend = y_intercept),
        arrow = arrow(length = unit(0.03, "npc"))) +
      geom_segment(
        mapping = aes(
          x = p_1, xend = p_1,
          y = y_intercept, yend = y1[1]),
        arrow = arrow(length = unit(0.03, "npc"))) +
      ylab(y_lab) + xlab(x_lab) +
      ggtitle(paste0(
        sprintf(
          title_fmt,
          round(p_1, digits),
          round(y_intercept, digits))))
  )
}



#' Build data for a pdf curve for a normal dist
#'
#' @param xmin minimum x-value
#' @param xmax maximum x-value
#' @param len how many points to create for the curve
#' @param pop_mean mean for the population
#' @param pop_sd standard deviation for the population
#'
#' @import stats
#'
#' @export
#'

build_dnorm_dat = function(
    xmin = -2,
    xmax = 2,
    len = 100,
    pop_mean = 0,
    pop_sd = 1
)
{
  if (FALSE)
  {
    xmin = -2
    xmax = 2
    len = 100
    pop_mean = 0
    pop_sd = 1
  }
  x = seq(xmin, xmax, length.out = len)
  norm_dat = data.frame(
    x = x,
    x1 = rev(x),
    y0 = 0 * x,
    y1 = dnorm(x, mean = pop_mean, sd = pop_sd)
  )
  return(norm_dat)
}


#' Build data for a quantile curve for a normal dist
#'
#' @param xmin minimum x-value
#' @param xmin minimum x-value
#' @param xmax maximum x-value
#' @param len how many points to create for the curve
#' @param pop_mean mean for the population
#' @param pop_sd standard deviation for the population
#'
#' @import stats
#'
#' @export
#'
build_qnorm_dat = function(
    xmin,
    xmax,
    len = 100,
    pop_mean = 0,
    pop_sd = 1
)
{
  x = seq(xmin, xmax, length.out = len)
  norm_dat = data.frame(
    x = x,
    y0 = 0 * x,
    y1 = qnorm(x, mean = pop_mean, sd = pop_sd),
    x1 = rev(x)
  )
  return(norm_dat)
}

#' Build data for a cdf curve for a normal dist
#'
#' @param xmin minimum x-value
#' @param xmin minimum x-value
#' @param xmax maximum x-value
#' @param len how many points to create for the curve
#' @param pop_mean mean for the population
#' @param pop_sd standard deviation for the population
#'
#' @import stats
#'
#' @export
#'
build_pnorm_dat = function(
    xmin = -2,
    xmax = 2,
    len = 100,
    pop_mean = 0,
    pop_sd = 1
)
{
  x = seq(xmin, xmax, length.out = len)
  norm_dat = data.frame(
    x = x,
    y0 = 0 * x,
    y1 = pnorm(x, mean = pop_mean, sd = pop_sd),
    x1 = rev(x)
  )
  return(norm_dat)
}


#' Plot a normal distribution with one or both tails shaded
#'
#' @param lower_tail lower tail to plot
#' @param upper_tail upper tail to plot
#' @param pop_mean mean for the population
#' @param pop_sd standard deviation for the plot
#' @param xmin lower limit of x values to include
#' @param xmax upper limit of x values to include
#' @param len how many points to generate?
#' @param fill_lower fill color for the lower tail
#' @param fill_middle fill color for the non-tail part of the pdf
#' @param fill_upper fill color for the upper tail
#' @param y_lab label for x axis
#' @param x_lab label for y axis
#'
#' @export

plot_norm_tails = function(
    lower_tail = 0.025,
    upper_tail = 0.975,
    pop_mean = 0,
    pop_sd = 1,
    xmin = NULL,
    xmax = NULL,
    len = 1000,
    fill_lower = rgb(0, 0.3, 0.8, 0.25),
    fill_middle = rgb(1, 0, 0, 0),
    fill_upper = rgb(0, 0.3, 0.8, 0.25),
    y_lab = "f(x)",
    x_lab = "x"

)
{

  if (FALSE)
  {
    upper_tail = NULL
    lower_tail = 0.025
    upper_tail = 0.925
    x_1 = 0.1
    pop_mean = 0
    pop_sd = 1
    xmin = NULL
    xmax = NULL
    len = 1000
    fill_upper = rgb(0, 0.3, 0.8, 0.25)
    fill_lower = rgb(0, 0.3, 0.8, 0.25)
    fill_middle = rgb(0, 0, 1, 0.5)
    digits = 2
    lty_density = 2
    x_lab = "x"
    y_lab = "f(x)"
    title_fmt =
      paste0(
        "x = %1$s\n",
        "probability density (height of the curve at x) = %2$s\n",
        "cumulative density (area of shaded region) = %3$s")
  }

  if (is.null(xmin)) xmin = pop_mean - 3 * pop_sd
  if (is.null(xmax)) xmax = pop_mean + 3 * pop_sd

  norm_dat = build_dnorm_dat(
    xmin = xmin,
    xmax = xmax,
    len = len,
    pop_mean = pop_mean,
    pop_sd = pop_sd)

  lower_crit = upper_crit = NULL

  if (!is.null(upper_tail))
    upper_crit = qnorm(upper_tail, mean = pop_mean, sd = pop_sd)

  if (!is.null(lower_tail))
    lower_crit = qnorm(lower_tail, mean = pop_mean, sd = pop_sd)

  ribbon_dat = build_tail_dat(
    norm_dat,
    lower_x = lower_crit,
    upper_x = upper_crit)

  gg_ribbons = build_ribbons(
    ribbon_dat,
    fill_lower = fill_lower,
    fill_middle = fill_middle,
    fill_upper = fill_upper)

  out =   ggplot(norm_dat) +
    geom_line(aes(x, y1)) +
    gg_ribbons$middle +
    gg_ribbons$tails$lower +
    gg_ribbons$tails$upper +
    ylab(y_lab) + xlab(x_lab)


  return(out)
}


#' Build data for a pdf curve for a t-distribution
#'
#' @param df_sample sample degrees of freedom
#' @param ncp_sample non-centrality parameter
#' @param xmin lower limit of x values to include
#' @param xmax upper limit of x values to include
#' @param len how many points to generate?
#'
#' @import stats
#'
#' @export
#'
build_dt_dat = function(
    df_sample,
    ncp_sample,
    xmin,
    xmax,
    len
)
{
  x = seq(xmin, xmax, length.out = len)
  t_dat = data.frame(
    x = x,
    y0 = 0 * x,
    y1 = dt(x, df = df_sample, ncp = ncp_sample)
  )
  return(t_dat)
}




#' Plot a t distribution with one or both tails shaded
#'
#' @param lower_tail lower tail to plot
#' @param upper_tail upper tail to plot
#' @param df_sample sample degrees of freedom
#' @param ncp_sample non-centrality parameter
#' @param xmin lower limit of x values to include
#' @param xmax upper limit of x values to include
#' @param len how many points to generate?
#' @param fill_lower fill color for the lower tail
#' @param fill_middle fill color for the non-tail part of the pdf
#' @param fill_upper fill color for the upper tail
#' @param y_lab label for x axis
#' @param x_lab label for y axis
#'
#'
#' @export

plot_t_tails = function(
    lower_tail = 0.025,
    upper_tail = 0.925,
    df_sample = 30,
    ncp_sample = 0,
    xmin = NULL,
    xmax = NULL,
    len = 1000,
    fill_lower = rgb(0, 0.3, 0.8, 0.25),
    fill_middle = rgb(0, 0, 0, 0),
    fill_upper = rgb(0, 0.3, 0.8, 0.25),
    y_lab = "f(x)",
    x_lab = "x"#,
    # t_crit = 0.05
)
{

  if (FALSE)
  {
    lower_tail = 0.05
    upper_tail = 0.975
    upper_tail = NULL

    df_sample = 30
    ncp_sample = 0

    xmin = NULL
    xmax = NULL
    len = 1000

    fill_lower = rgb(0, 0.3, 0.8, 0.25)
    fill_middle = rgb(0, 0, 0, 0)
    fill_upper = rgb(0, 0.3, 0.8, 0.25)

    x_lab = "t"
    y_lab = "f(x)"
    t_crit = 0.05
    title_fmt =
      paste0(
        "x = %1$s\n",
        "probability density (height of the curve at x) = %2$s\n",
        "cumulative density (area of shaded region) = %3$s")
  }

  if (is.null(xmin)) xmin = -3
  if (is.null(xmax)) xmax =  3

  t_dat = build_dt_dat(
    df_sample = df_sample,
    ncp_sample = ncp_sample,
    xmin = xmin,
    xmax = xmax,
    len = len)

  lower_crit = upper_crit = NULL

  if (!is.null(upper_tail))
    upper_crit = qt(upper_tail, df = df_sample, ncp = ncp_sample)

  if (!is.null(lower_tail))
    lower_crit = qt(lower_tail, df = df_sample, ncp = ncp_sample)

  ribbon_dat = build_tail_dat(
    t_dat,
    lower_x = lower_crit,
    upper_x = upper_crit)

  gg_ribbons = build_ribbons(
    ribbon_dat,
    fill_lower = fill_lower,
    fill_middle = fill_middle,
    fill_upper = fill_upper)

  return(

    ggplot(t_dat) +
      geom_line(aes(x, y1)) +
      gg_ribbons$middle +
      gg_ribbons$tails$lower +
      gg_ribbons$tails$upper +
      ylab(y_lab) + xlab(x_lab)
  )
}



#' Separate the lower and upper tails for a pdf dataset
#'
#' @param dat data from which to build the tails
#' @param lower_x lower x-limit for the tail
#' @param upper_x upper x-limit for the tail
#'
#'
#' @export

build_tail_dat = function(
    dat,
    lower_x = -1.96,
    upper_x = NULL)
{
  if (FALSE)
  {
    dat = build_dnorm_dat(-3, 3, 1000, 0, 1)
    lower_x = -1.96
    upper_x = 1
    upper_x = NULL
  }

  middle_dat = dat
  lower_dat = upper_dat = NULL

  if (!is.null(lower_x))
  {
    lower_dat = subset(dat, x < lower_x)
    middle_dat = subset(middle_dat, x > lower_x)
  }

  if (!is.null(upper_x))
  {
    upper_dat = subset(dat, x > upper_x)
    middle_dat = subset(middle_dat, x < upper_x)
  }

  return(
    list(
      tails = list(
        lower = lower_dat,
        upper = upper_dat),
      middle = middle_dat)
  )
}


#' Build geom_ribbon objects for plotting a continuous distribution function
#'
#' @param ribbon_dat data from which to beuld the ribbons
#' @param fill_lower fill color for the lower tail
#' @param fill_middle fill color for the non-tail part of the pdf
#' @param fill_upper fill color for the upper tail
#' @param col_lower border color for the lower tail
#' @param col_middle border color for the non-tail part
#' @param col_upper border color for the upper tail
#'
#' @import ggplot2
#'
#' @export


build_ribbons = function(
    ribbon_dat,
    fill_lower = rgb(0, 0.3, 0.8, 0.25),
    fill_middle = rgb(0, 1, 0, 0.5),
    fill_upper = rgb(0, 0.3, 0.8, 0.25),
    col_lower = "black",
    col_middle = "black",
    col_upper = "black")
{
  if(is.null(ribbon_dat$tails$lower))
  {
    ribbon_lower = NULL
  } else
  {
    ribbon_lower =
      geom_ribbon(
        data = ribbon_dat$tails$lower,
        mapping = aes(x = x, ymin = y0, ymax = y1),
        fill = fill_lower)
  }
  if(is.null(ribbon_dat$tails$upper))
  {
    ribbon_upper = NULL
  } else
  {
    ribbon_upper =
      geom_ribbon(
        data = ribbon_dat$tails$upper,
        mapping = aes(x = x, ymin = y0, ymax = y1),
        fill = fill_upper)
  }

  ribbon_middle =
    geom_ribbon(
      data = ribbon_dat$middle,
      mapping = aes(x = x, ymin = y0, ymax = y1),
      fill = fill_middle)

  return(
    list(
      tails = list(
        lower = ribbon_lower,
        upper = ribbon_upper),
      middle = ribbon_middle))
}

#' Plot a confidence interval on a normal curve
#'
#' @param alpha alpha for the fill
#' @param pop_mean mean for the pop
#' @param pop_sd sd for the pop
#' @param xmin min x
#' @param xmax max s
#' @param len how many points to make for the curve.  Higher number results in smoother curve
#' @param fill_lower fill color for the lower tail
#' @param fill_middle fill color for the non-tail part of the pdf
#' @param fill_upper fill color for the upper tail
#' @param y_lab labe for y axis
#' @param x_lab label for x asxis
#' @param lty_v line type for the upper and lower quantile limits
#' @param title_fmt template for title
#' @param arrow_label_fmt template for arrow lables
#' @param arrow_size size for the horizontal arrow for the CI width label
#' @param digits_label how many digits to round for the label
#' @param digits_axis how many digits to round for the axes
#' @param x_auto_breaks
#'
#' @import ggplot2
#' @import latex2exp
#'
#' @export
#'
gg_norm_conf_int = function(
    alpha = 0.05,
    pop_mean = 0,
    pop_sd = 1,
    xmin = NULL,
    xmax = NULL,
    len = 1000,
    fill_upper = rgb(0, 0.3, 0.8, 0.25),
    fill_lower = rgb(0, 0.3, 0.8, 0.25),
    fill_middle = rgb(0, 0.8, 0.1, 0.25),
    y_lab = "f(x)",
    x_lab = "x",
    lty_v = 2,
    title_fmt = "Interval contains %.1f%s of probabiltiy density.",
    arrow_label_fmt = "$%1$0.1f \\pm %2$0.2f \\times \\sigma$",
    arrow_size = 0.3,
    digits_label = 0,
    digits_axis = 3,
    x_auto_breaks = -1:1
)
{
  if (FALSE)
  {
    alpha = 0.05
    pop_mean = 0
    pop_sd = 1
    xmin = NULL
    xmax = NULL
    len = 1000
    fill_upper = rgb(1, 0, 0, 0.25)
    fill_lower = rgb(1, 0, 0, 0.25)
    fill_middle = rgb(0, 0.3, 0.8, 0.25)
    y_lab = "f(x)"
    x_lab = "x"
    lty_v = 2
    title_fmt = "Interval contains %.1f%s of probabiltiy density."
    arrow_label_fmt = "$%1$0.1f \\pm %2$0.2f \\times \\sigma$"
    arrow_size = 0.3
    digits_label = 0
    digits_axis = 3
    x_auto_breaks = -1:1
  }


  # Convenience variables
  {
    pct_interval = round(100 * (1 - alpha), digits = digits_label)
    alpha_low = alpha / 2
    alpha_hi = 1 - alpha / 2
    q_low = qnorm(alpha_low)
    q_hi = qnorm(alpha_hi)
    y_intercept = dnorm(q_low)
  }

  g_title = sprintf(title_fmt, pct_interval, "%")
  g_arrow_label = TeX(sprintf(arrow_label_fmt, pop_mean, abs(q_low)))

  x_breaks = c(round(q_low, digits_axis), round(q_hi, digits_axis), x_auto_breaks)

  arrow_dat = data.frame(
    x = q_low,
    xend = q_hi,
    y = y_intercept,
    yend = y_intercept)

  gg_t = plot_norm_tails(
    lower_tail = alpha_low,
    upper_tail = alpha_hi,
    pop_mean = pop_mean,
    pop_sd = pop_sd,
    xmin = xmin,
    xmax = xmax,
    len = len,
    fill_middle = fill_middle,
    fill_lower = fill_lower,
    fill_upper = fill_upper,
    y_lab = y_lab,
    x_lab = x_lab
  )

  out =
    gg_t +
    geom_vline(xintercept = q_low, lty = lty_v) +
    geom_vline(xintercept = q_hi, lty = lty_v) +
    geom_segment(
      data = arrow_dat, mapping = aes(x = x, xend = xend, y = y, yend = yend),
      arrow = arrow(length=unit(arrow_size ,"cm"), ends="both", type = "closed")) +
    ylab(y_lab) + xlab(x_lab) +
    scale_x_continuous(breaks = x_breaks) +
    ggtitle(g_title) +
    annotate("label", x = 0, y = arrow_dat$y, label = g_arrow_label)

  return(out)
}
michaelfrancenelson/mfn.teaching.utils documentation built on Oct. 7, 2022, 1:13 a.m.