knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%", 
  dev = "ragg_png"
)
library(psych)
library(GPArotation)
library(tidyverse)
library(WJSmisc)

WJSmisc

The WJSmisc package is set of functions I find convenient to have readily available to me.

Installation

You can install the development version from GitHub with:

# install.packages("remotes")
remotes::install_github("wjschne/WJSmisc")

Plot area under normal curve

I often need to create a normal distribution with a shaded region below a point.

library(WJSmisc)
library(tidyverse)
plotnorm(95, mu = 100, sigma = 15)

Correlation heat maps

library(simstandard)
model <- "
A =~ 0.71 * A_1 + 0.91 * A_2 + 0.85 * A_3
B =~ 0.65 * B_1 + 0.90 * B_2 + 0.75 * B_3
A ~~ -0.2 * B
"

d <- sim_standardized(
  model, 
  latent = FALSE, 
  error = FALSE)

cor_heat(d, margins = 0.1)

Parallel Analysis Plot

parallel_analysis(d)

Factor Analysis Loading Plot

psych::fa(d, nfactors = 2, fm = "pa") %>% 
  plot_loading(factor_names = c("A", "B"))

Composite covariance

# Create covariance matrix
Sigma <- matrix(0.6, nrow = 5, ncol = 5)
diag(Sigma) <- 1

# Create weight matrix
w <- matrix(0, nrow = 5, ncol = 2)
w[1:2,1] <- 1
w[3:5,2] <- 1
w

# covariance of weighted sums
composite_covariance(Sigma, w)

Correlation Ellipse

cor_ellipse(0.75) %>% 
  ggplot(aes(x,y)) + 
  geom_polygon(alpha = 0.5) + 
  coord_fixed()

Split at x = 1

cor_ellipse(0.75, split_x = 1) %>% 
  ggplot(aes(x,y)) + 
  geom_polygon(aes(fill = group), alpha = 0.5) + 
  coord_fixed()

Split at x = 1 and y = 0

cor_ellipse(0.75, split_x = 1, split_y = 0) %>% 
  ggplot(aes(x,y)) + 
  geom_polygon(aes(fill = group), alpha = 0.5) + 
  coord_fixed()

Every combination of 2 or more vectors

cross_vectors(c("a", "b"), 
              c("x", "y"),
              c(1,2), 
              sep = "_")

z-score

Like the scale function except that it returns a plain vector instead of a matrix with attributes. It can also return z-scores based on a user-specified means and standard deviations.

x <- rnorm(100, mean = 100, sd = 15)

# z-score with sample mean and sample sd
x2z(x) %>% 
  qplot(bins = 10) + 
  geom_rug()

# z-score with user-specified population mean and sd
x2z(x, mu = 100, sigma = 15) %>% 
  qplot(bins = 10) + 
  geom_rug()

# Will center score at sample mean if sigma = 1
x2z(x, sigma = 1) %>% 
  qplot(bins = 10) + 
  geom_rug()

Attach function argument defaults to global environment

When debugging a function with many default arguments, it is useful to assign the default values to the variables in the global environment.

my_function <- function(x = 1, y = 2) {x + y}
attach_function(my_function)
x
y

Convert an angle to ggplot2 hjust and vjust parameters

Control placement of labels with the angular position by converting an angle to hjust and vjust parameters.

tibble(degrees = seq(0, 345, 15),
       radians = degrees * pi / 180,
       x = cos(radians),
       y = sin(radians),
       hjust = angle2hjust(radians),
       vjust = angle2vjust(radians)) %>% 
  ggplot(aes(x, y)) + 
  geom_segment(aes(x = 0, y = 0, xend = x, yend = y), size = .1) +
  geom_label(aes(label = degrees, 
                 hjust = hjust,
                 vjust = vjust),
             label.padding = unit(1, "mm"),
             label.size = 0) + 
  geom_point() + 
  coord_fixed(1, clip = "off") +
  theme_void()

I use these functions to make sure that labels on a curve are perpendicular to the curve:

# Small change in x
dx <- .000001
plot_ratio <- 16
tibble(x = seq(-4,4), 
       y = dnorm(x), 
       l = WJSmisc::prob_label(pnorm(x), digits = 2),
       slope = plot_ratio * (dnorm(x + dx) - y) / dx,
       angle = atan(slope) + pi / 2,
       hjust = angle2hjust(angle),
       vjust = angle2vjust(angle)) %>% 
ggplot(aes(x, y, label = l)) +
  geom_point() +
  stat_function(fun = dnorm) +
  geom_label(aes(hjust = hjust,
                 vjust = vjust),
             label.size = 0) + 
  coord_fixed(plot_ratio, clip = "off") + 
  theme_minimal()

Lower triangle to correlation matrix

tri2cor(c(.2,.3,.4))
tri2cor(.5)

Formatting probability values

Probabilities near 0 and 1 are rounded differently.

p <- c(0,.0012, .025, .5, .99, .994, .99952, 1)
prob_label(p, digits = 2)
prob_label(p, accuracy = .01)

proportion_round(p)
proportion2percentile(p, add_percent_character = TRUE)

Sizing text in ggplot2

Text size in geom_text and geom_label does not use the same units as the rest of ggplot2.

I use the ggtext_size function so that text from geom_text will be the same size as the axis labels.

mytextsize <- 24
tibble(x = 1:5, y = x) %>% 
  ggplot(aes(x, y)) + 
  geom_text(aes(label = x), size = ggtext_size(mytextsize)) + 
  theme_gray(base_size = mytextsize) +
  coord_equal()

Random beta distributions with specific means and standard deviations.

Sometimes I need random variables with values between 0 and 1. To get a beta distribution that I want, there is less trial-and-error if I specify the mean and standard deviation rather than 2 shape parameters. Note that not all combinations of means and standard deviations are possible.

rbeta_ms(10000, .7, .1) %>% 
  qplot(bins = 30) +
  coord_cartesian(xlim = c(0, 1))

Formatting numeric values

R has great formatting functions like format and formatC. I find scales::number to be particularly useful. However, I often have particular preferences that I do not want to keep specifying every time I need to format a number.

Remove leading zeroes

For numbers between -1 and 1, leading zeroes are removed.

remove_leading_zero(c(-2, -0.051, 0.05, 2))

Formatting probabilities

The prob_label function formats probabilities according to my preferences:

  1. 0 is 0 unless round_zero_one is FALSE.
  2. 1 is 1 unless round_zero_one is FALSE.
  3. Other probabilities are rounded to the nearest .01 with the leading removed.
prob_label(seq(0,1,0.2))

Setting the digits argument to 2 will round to 2 significant digits with the exception that probabilities near 1 are rounded to the first number that is not 9.

prob_label(c(.00122, .0122, .122, .99112, .999112), digits = 2)

The proportion_round rounds .

proportion_round(c(0,.0011,.5,.991, .99991, 1))

Formatting percentiles

I like to round percentiles to nearest integer unless they are close to 0 or 100.

tibble(z_scores = -4:4,
       proportions = pnorm(z_scores),
       percentiles = proportion2percentile(proportions, 
                                           add_percent_character = TRUE)
       )

Formatting correlations

I like to round correlations to the nearest .01 with leading zeroes removed. The diagonals are just 1s.

tri2cor(c(.4,.5,.66544)) %>% 
  cor_text()

If any correlation in the matrix is negative, the positive correlations get a leading space (to make the correlations easier to align in a plot or table).

tri2cor(c(.4,-.5,.66544)) %>% 
  cor_text()


wjschne/WJSmisc documentation built on June 29, 2023, 8:04 a.m.