#| include: false knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
#| echo: false #| message: false # devtools::install_github("njlyon0/supportR", force = TRUE)
The supportR
package is an amalgam of distinct functions I've written to accomplish small data wrangling, quality control, or visualization tasks. These functions tend to be short and narrowly-defined. An additional consequence of the motivation for creating them is that they tend to not be inter-related or united by a common theme. If this vignette feels somewhat scattered because of that, I hope it doesn't negatively affect how informative it is or your willingness to adopt supportR
into your scripts!
This vignette describes the main functions of supportR
using the examples included in each function.
#install.packages("supportR") library(supportR)
In order to demonstrate some of the data wrangling functions of supportR
, we'll use some some example data from Dr. Allison Horst's palmerpenguins
R package.
#| include: false penguins <- structure(list(species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Adelie", "Chinstrap", "Gentoo"), class = "factor"), island = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Biscoe", "Dream", "Torgersen" ), class = "factor"), bill_length_mm = c(39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, 42, 37.8, 37.8, 41.1, 38.6, 34.6, 36.6, 38.7, 42.5, 34.4, 46, 37.8, 37.7, 35.9, 38.2, 38.8, 35.3, 40.6, 40.5, 37.9, 40.5, 39.5, 37.2, 39.5, 40.9, 36.4, 39.2, 38.8, 42.2, 37.6, 39.8, 36.5, 40.8, 36, 44.1, 37, 39.6, 41.1, 37.5, 36, 42.3, 39.6, 40.1, 35, 42, 34.5, 41.4, 39, 40.6, 36.5, 37.6, 35.7, 41.3, 37.6, 41.1, 36.4, 41.6, 35.5, 41.1, 35.9, 41.8, 33.5, 39.7, 39.6, 45.8, 35.5, 42.8, 40.9, 37.2, 36.2, 42.1, 34.6, 42.9, 36.7, 35.1, 37.3, 41.3, 36.3, 36.9, 38.3, 38.9, 35.7, 41.1, 34, 39.6, 36.2, 40.8, 38.1, 40.3, 33.1, 43.2, 35, 41, 37.7, 37.8, 37.9, 39.7, 38.6, 38.2, 38.1, 43.2, 38.1, 45.6, 39.7, 42.2, 39.6, 42.7, 38.6, 37.3, 35.7, 41.1, 36.2, 37.7, 40.2, 41.4, 35.2, 40.6, 38.8, 41.5, 39, 44.1, 38.5, 43.1, 36.8, 37.5, 38.1, 41.1, 35.6, 40.2, 37, 39.7, 40.2, 40.6, 32.1, 40.7, 37.3, 39, 39.2, 36.6, 36, 37.8, 36, 41.5, 46.1, 50, 48.7, 50, 47.6, 46.5, 45.4, 46.7, 43.3, 46.8, 40.9, 49, 45.5, 48.4, 45.8, 49.3, 42, 49.2, 46.2, 48.7, 50.2, 45.1, 46.5, 46.3, 42.9, 46.1, 44.5, 47.8, 48.2, 50, 47.3, 42.8, 45.1, 59.6, 49.1, 48.4, 42.6, 44.4, 44, 48.7, 42.7, 49.6, 45.3, 49.6, 50.5, 43.6, 45.5, 50.5, 44.9, 45.2, 46.6, 48.5, 45.1, 50.1, 46.5, 45, 43.8, 45.5, 43.2, 50.4, 45.3, 46.2, 45.7, 54.3, 45.8, 49.8, 46.2, 49.5, 43.5, 50.7, 47.7, 46.4, 48.2, 46.5, 46.4, 48.6, 47.5, 51.1, 45.2, 45.2, 49.1, 52.5, 47.4, 50, 44.9, 50.8, 43.4, 51.3, 47.5, 52.1, 47.5, 52.2, 45.5, 49.5, 44.5, 50.8, 49.4, 46.9, 48.4, 51.1, 48.5, 55.9, 47.2, 49.1, 47.3, 46.8, 41.7, 53.4, 43.3, 48.1, 50.5, 49.8, 43.5, 51.5, 46.2, 55.1, 44.5, 48.8, 47.2, NA, 46.8, 50.4, 45.2, 49.9, 46.5, 50, 51.3, 45.4, 52.7, 45.2, 46.1, 51.3, 46, 51.3, 46.6, 51.7, 47, 52, 45.9, 50.5, 50.3, 58, 46.4, 49.2, 42.4, 48.5, 43.2, 50.6, 46.7, 52, 50.5, 49.5, 46.4, 52.8, 40.9, 54.2, 42.5, 51, 49.7, 47.5, 47.6, 52, 46.9, 53.5, 49, 46.2, 50.9, 45.5, 50.9, 50.8, 50.1, 49, 51.5, 49.8, 48.1, 51.4, 45.7, 50.7, 42.5, 52.2, 45.2, 49.3, 50.2, 45.6, 51.9, 46.8, 45.7, 55.8, 43.5, 49.6, 50.8, 50.2), bill_depth_mm = c(18.7, 17.4, 18, NA, 19.3, 20.6, 17.8, 19.6, 18.1, 20.2, 17.1, 17.3, 17.6, 21.2, 21.1, 17.8, 19, 20.7, 18.4, 21.5, 18.3, 18.7, 19.2, 18.1, 17.2, 18.9, 18.6, 17.9, 18.6, 18.9, 16.7, 18.1, 17.8, 18.9, 17, 21.1, 20, 18.5, 19.3, 19.1, 18, 18.4, 18.5, 19.7, 16.9, 18.8, 19, 18.9, 17.9, 21.2, 17.7, 18.9, 17.9, 19.5, 18.1, 18.6, 17.5, 18.8, 16.6, 19.1, 16.9, 21.1, 17, 18.2, 17.1, 18, 16.2, 19.1, 16.6, 19.4, 19, 18.4, 17.2, 18.9, 17.5, 18.5, 16.8, 19.4, 16.1, 19.1, 17.2, 17.6, 18.8, 19.4, 17.8, 20.3, 19.5, 18.6, 19.2, 18.8, 18, 18.1, 17.1, 18.1, 17.3, 18.9, 18.6, 18.5, 16.1, 18.5, 17.9, 20, 16, 20, 18.6, 18.9, 17.2, 20, 17, 19, 16.5, 20.3, 17.7, 19.5, 20.7, 18.3, 17, 20.5, 17, 18.6, 17.2, 19.8, 17, 18.5, 15.9, 19, 17.6, 18.3, 17.1, 18, 17.9, 19.2, 18.5, 18.5, 17.6, 17.5, 17.5, 20.1, 16.5, 17.9, 17.1, 17.2, 15.5, 17, 16.8, 18.7, 18.6, 18.4, 17.8, 18.1, 17.1, 18.5, 13.2, 16.3, 14.1, 15.2, 14.5, 13.5, 14.6, 15.3, 13.4, 15.4, 13.7, 16.1, 13.7, 14.6, 14.6, 15.7, 13.5, 15.2, 14.5, 15.1, 14.3, 14.5, 14.5, 15.8, 13.1, 15.1, 14.3, 15, 14.3, 15.3, 15.3, 14.2, 14.5, 17, 14.8, 16.3, 13.7, 17.3, 13.6, 15.7, 13.7, 16, 13.7, 15, 15.9, 13.9, 13.9, 15.9, 13.3, 15.8, 14.2, 14.1, 14.4, 15, 14.4, 15.4, 13.9, 15, 14.5, 15.3, 13.8, 14.9, 13.9, 15.7, 14.2, 16.8, 14.4, 16.2, 14.2, 15, 15, 15.6, 15.6, 14.8, 15, 16, 14.2, 16.3, 13.8, 16.4, 14.5, 15.6, 14.6, 15.9, 13.8, 17.3, 14.4, 14.2, 14, 17, 15, 17.1, 14.5, 16.1, 14.7, 15.7, 15.8, 14.6, 14.4, 16.5, 15, 17, 15.5, 15, 13.8, 16.1, 14.7, 15.8, 14, 15.1, 15.2, 15.9, 15.2, 16.3, 14.1, 16, 15.7, 16.2, 13.7, NA, 14.3, 15.7, 14.8, 16.1, 17.9, 19.5, 19.2, 18.7, 19.8, 17.8, 18.2, 18.2, 18.9, 19.9, 17.8, 20.3, 17.3, 18.1, 17.1, 19.6, 20, 17.8, 18.6, 18.2, 17.3, 17.5, 16.6, 19.4, 17.9, 19, 18.4, 19, 17.8, 20, 16.6, 20.8, 16.7, 18.8, 18.6, 16.8, 18.3, 20.7, 16.6, 19.9, 19.5, 17.5, 19.1, 17, 17.9, 18.5, 17.9, 19.6, 18.7, 17.3, 16.4, 19, 17.3, 19.7, 17.3, 18.8, 16.6, 19.9, 18.8, 19.4, 19.5, 16.5, 17, 19.8, 18.1, 18.2, 19, 18.7), flipper_length_mm = c(181L, 186L, 195L, NA, 193L, 190L, 181L, 195L, 193L, 190L, 186L, 180L, 182L, 191L, 198L, 185L, 195L, 197L, 184L, 194L, 174L, 180L, 189L, 185L, 180L, 187L, 183L, 187L, 172L, 180L, 178L, 178L, 188L, 184L, 195L, 196L, 190L, 180L, 181L, 184L, 182L, 195L, 186L, 196L, 185L, 190L, 182L, 179L, 190L, 191L, 186L, 188L, 190L, 200L, 187L, 191L, 186L, 193L, 181L, 194L, 185L, 195L, 185L, 192L, 184L, 192L, 195L, 188L, 190L, 198L, 190L, 190L, 196L, 197L, 190L, 195L, 191L, 184L, 187L, 195L, 189L, 196L, 187L, 193L, 191L, 194L, 190L, 189L, 189L, 190L, 202L, 205L, 185L, 186L, 187L, 208L, 190L, 196L, 178L, 192L, 192L, 203L, 183L, 190L, 193L, 184L, 199L, 190L, 181L, 197L, 198L, 191L, 193L, 197L, 191L, 196L, 188L, 199L, 189L, 189L, 187L, 198L, 176L, 202L, 186L, 199L, 191L, 195L, 191L, 210L, 190L, 197L, 193L, 199L, 187L, 190L, 191L, 200L, 185L, 193L, 193L, 187L, 188L, 190L, 192L, 185L, 190L, 184L, 195L, 193L, 187L, 201L, 211L, 230L, 210L, 218L, 215L, 210L, 211L, 219L, 209L, 215L, 214L, 216L, 214L, 213L, 210L, 217L, 210L, 221L, 209L, 222L, 218L, 215L, 213L, 215L, 215L, 215L, 216L, 215L, 210L, 220L, 222L, 209L, 207L, 230L, 220L, 220L, 213L, 219L, 208L, 208L, 208L, 225L, 210L, 216L, 222L, 217L, 210L, 225L, 213L, 215L, 210L, 220L, 210L, 225L, 217L, 220L, 208L, 220L, 208L, 224L, 208L, 221L, 214L, 231L, 219L, 230L, 214L, 229L, 220L, 223L, 216L, 221L, 221L, 217L, 216L, 230L, 209L, 220L, 215L, 223L, 212L, 221L, 212L, 224L, 212L, 228L, 218L, 218L, 212L, 230L, 218L, 228L, 212L, 224L, 214L, 226L, 216L, 222L, 203L, 225L, 219L, 228L, 215L, 228L, 216L, 215L, 210L, 219L, 208L, 209L, 216L, 229L, 213L, 230L, 217L, 230L, 217L, 222L, 214L, NA, 215L, 222L, 212L, 213L, 192L, 196L, 193L, 188L, 197L, 198L, 178L, 197L, 195L, 198L, 193L, 194L, 185L, 201L, 190L, 201L, 197L, 181L, 190L, 195L, 181L, 191L, 187L, 193L, 195L, 197L, 200L, 200L, 191L, 205L, 187L, 201L, 187L, 203L, 195L, 199L, 195L, 210L, 192L, 205L, 210L, 187L, 196L, 196L, 196L, 201L, 190L, 212L, 187L, 198L, 199L, 201L, 193L, 203L, 187L, 197L, 191L, 203L, 202L, 194L, 206L, 189L, 195L, 207L, 202L, 193L, 210L, 198L), body_mass_g = c(3750L, 3800L, 3250L, NA, 3450L, 3650L, 3625L, 4675L, 3475L, 4250L, 3300L, 3700L, 3200L, 3800L, 4400L, 3700L, 3450L, 4500L, 3325L, 4200L, 3400L, 3600L, 3800L, 3950L, 3800L, 3800L, 3550L, 3200L, 3150L, 3950L, 3250L, 3900L, 3300L, 3900L, 3325L, 4150L, 3950L, 3550L, 3300L, 4650L, 3150L, 3900L, 3100L, 4400L, 3000L, 4600L, 3425L, 2975L, 3450L, 4150L, 3500L, 4300L, 3450L, 4050L, 2900L, 3700L, 3550L, 3800L, 2850L, 3750L, 3150L, 4400L, 3600L, 4050L, 2850L, 3950L, 3350L, 4100L, 3050L, 4450L, 3600L, 3900L, 3550L, 4150L, 3700L, 4250L, 3700L, 3900L, 3550L, 4000L, 3200L, 4700L, 3800L, 4200L, 3350L, 3550L, 3800L, 3500L, 3950L, 3600L, 3550L, 4300L, 3400L, 4450L, 3300L, 4300L, 3700L, 4350L, 2900L, 4100L, 3725L, 4725L, 3075L, 4250L, 2925L, 3550L, 3750L, 3900L, 3175L, 4775L, 3825L, 4600L, 3200L, 4275L, 3900L, 4075L, 2900L, 3775L, 3350L, 3325L, 3150L, 3500L, 3450L, 3875L, 3050L, 4000L, 3275L, 4300L, 3050L, 4000L, 3325L, 3500L, 3500L, 4475L, 3425L, 3900L, 3175L, 3975L, 3400L, 4250L, 3400L, 3475L, 3050L, 3725L, 3000L, 3650L, 4250L, 3475L, 3450L, 3750L, 3700L, 4000L, 4500L, 5700L, 4450L, 5700L, 5400L, 4550L, 4800L, 5200L, 4400L, 5150L, 4650L, 5550L, 4650L, 5850L, 4200L, 5850L, 4150L, 6300L, 4800L, 5350L, 5700L, 5000L, 4400L, 5050L, 5000L, 5100L, 4100L, 5650L, 4600L, 5550L, 5250L, 4700L, 5050L, 6050L, 5150L, 5400L, 4950L, 5250L, 4350L, 5350L, 3950L, 5700L, 4300L, 4750L, 5550L, 4900L, 4200L, 5400L, 5100L, 5300L, 4850L, 5300L, 4400L, 5000L, 4900L, 5050L, 4300L, 5000L, 4450L, 5550L, 4200L, 5300L, 4400L, 5650L, 4700L, 5700L, 4650L, 5800L, 4700L, 5550L, 4750L, 5000L, 5100L, 5200L, 4700L, 5800L, 4600L, 6000L, 4750L, 5950L, 4625L, 5450L, 4725L, 5350L, 4750L, 5600L, 4600L, 5300L, 4875L, 5550L, 4950L, 5400L, 4750L, 5650L, 4850L, 5200L, 4925L, 4875L, 4625L, 5250L, 4850L, 5600L, 4975L, 5500L, 4725L, 5500L, 4700L, 5500L, 4575L, 5500L, 5000L, 5950L, 4650L, 5500L, 4375L, 5850L, 4875L, 6000L, 4925L, NA, 4850L, 5750L, 5200L, 5400L, 3500L, 3900L, 3650L, 3525L, 3725L, 3950L, 3250L, 3750L, 4150L, 3700L, 3800L, 3775L, 3700L, 4050L, 3575L, 4050L, 3300L, 3700L, 3450L, 4400L, 3600L, 3400L, 2900L, 3800L, 3300L, 4150L, 3400L, 3800L, 3700L, 4550L, 3200L, 4300L, 3350L, 4100L, 3600L, 3900L, 3850L, 4800L, 2700L, 4500L, 3950L, 3650L, 3550L, 3500L, 3675L, 4450L, 3400L, 4300L, 3250L, 3675L, 3325L, 3950L, 3600L, 4050L, 3350L, 3450L, 3250L, 4050L, 3800L, 3525L, 3950L, 3650L, 3650L, 4000L, 3400L, 3775L, 4100L, 3775L), sex = structure(c(2L, 1L, 1L, NA, 1L, 2L, 1L, 2L, NA, NA, NA, NA, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, NA, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, NA, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, NA, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, NA, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, NA, 2L, 1L, NA, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L), levels = c("female", "male"), class = "factor"), year = crow.names = c(NA, -344L), class = "data.frame")
# Check the structure of the penguins dataset str(penguins)
With that data loaded, we can use the summary_table
function to quickly get group-wise summaries and retrieve generally useful summary statistics.
The groups
argument supports a vector of all of the column names to group by while response
must be a single numeric column. The drop_na
argument allows group combinations that result in an NA to be automatically dropped (i.e., if a penguin didn't have an island listed that would be dropped). The mean, standard deviation (SD), sample size, and standard error (SE) are all returned to facilitate easy figure creation. There is also a round_digits
argument that lets you specify how many digits you'd like to retain for the mean, SD, and SE.
# Summarize the data supportR::summary_table(data = penguins, groups = c("species", "island"), response = "bill_length_mm", drop_na = TRUE)
The safe_rename
function allows--perhaps predictably--"safe" renaming of column names in a given data object. The 'bad' column names and corresponding 'good' names must be specified. The order of entries in the two vectors must match (i.e., the first bad name will be replaced with the first good name), but that order need not match the order in which they occur in the data!
# Make a dataframe to demonstrate df <- data.frame("first" = 1:3, "middle" = 4:6, "second" = 7:9) # Invoke the function safe_rename(data = df, bad_names = c("second", "middle"), good_names = c("third", "second"))
crop_tri
allows dropping one "triangle" of a symmetric dataframe / matrix. It also includes a drop_diag
argument that accepts a logical for whether to drop the diagonal of the data object. This is primarily useful (I find) in allowing piping through this function as opposed to using the base R notation for removing a triangle of a symmetric data object.
# Define a simple matrix wtih symmetric dimensions mat <- matrix(data = c(1:2, 2:1), nrow = 2, ncol = 2) # Crop off it's lower triangle supportR::crop_tri(data = mat, drop_tri = "lower", drop_diag = FALSE) # Drop the diagonal as well supportR::crop_tri(data = mat, drop_tri = "lower", drop_diag = TRUE)
array_melt
allows users to 'melt' an array of dimensions X, Y, and Z into a dataframe containing columns "x", "y", "z", and "value" where "value" is whatever was stored at those coordinates in the array.
# Make data to fill the array vec1 <- c(5, 9, 3) vec2 <- c(10:15) # Create dimension names (x = col, y = row, z = which matrix) x_vals <- c("Col_1","Col_2","Col_3") y_vals <- c("Row_1","Row_2","Row_3") z_vals <- c("Mat_1","Mat_2") # Make an array from these components g <- array(data = c(vec1, vec2), dim = c(3, 3, 2), dimnames = list(x_vals, y_vals, z_vals)) # "Melt" the array into a dataframe melted <- supportR::array_melt(array = g) # Look at that top of that head(melted)
In terms of quality control functions, diff_check
compares two vectors and reports back what is in the first but not the second (i.e., what is "lost") and what is in the second but not the first (i.e., what is "gained"). I find this most useful (A) when comparing the index columns of two data objects I intend to join together and (B) to ensure no columns are unintentionally removed during lengthy tidyverse
-style pipes (%>%
).
diff_check
also includes optional logical arguments sort
and return
that will respectively either sort the difference in both vectors and return a two-element if set to TRUE
.
# Make two vectors vec1 <- c("x", "a", "b") vec2 <- c("y", "z", "a") # Compare them! supportR::diff_check(old = vec1, new = vec2, sort = TRUE, return = TRUE)
This package also includes the function num_check
that identifies all values of a column that would be coerced to NA
if as.numeric
was run on the column. Once these non-numbers are identified you can handle that in whatever way you feel is most appropriate. num_check
is intended only to flag these for your attention, not to attempt a fix using a method you may or may not support.
# Make a dataframe with non-numbers in a number column fish <- data.frame("species" = c("salmon", "bass", "halibut", "eel"), "count" = c(1, "14x", "_23", 12)) # Use `num_check` to identify non-numbers num_check(data = fish, col = "count")
date_check
does a similar operation but is checking a column for entries that would be coerced to NA
by as.Date
instead. Note that if a date is sufficiently badly formatted as.Date
will throw an error instead of coercing to NA
so date_check
will do the same thing.
# Make a dataframe including malformed dates sites <- data.frame("site" = c("LTR", "GIL", "PYN", "RIN"), "visit" = c("2021-01-01", "2021-01-0w", "1990", "2020-10-xx")) # Now we can use our function to identify bad dates supportR::date_check(data = sites, col = "visit")
Both num_check
and date_check
can accept multiple column names to the col
argument (as of version 1.1.1) and all columns are checked separately.
Another date column quality control function is date_format_guess
. This This function checks a column of dates (stored as characters!) and tries to guess the format of the date (i.e., month/day/year, day/month/year, etc.).
It can make a more informed guess if there is a grouping column because it can use the frequency of the "date" entries within those groups to guess whether a given number is the day or the month. This is based on the assumption that sampling occurs more often within months than across them so the number that occurs in more rows within the grouping values is most likely month.
Recognizing that assumption may be uncomfortable for some users, the groups
argument can be set to FALSE
and it will do the clearer judgment calls (i.e., if a number is >12 it is day, etc.). Note that dates that cannot be guessed by my function will return "FORMAT UNCERTAIN" so that you can handle them using your knowledge of the system (or by returning to your raw data if need be).
# Make a dataframe with dates in various formats and a grouping column my_df <- data.frame("data_enterer" = c("person A", "person B", "person B", "person B", "person C", "person D", "person E", "person F", "person G"), "bad_dates" = c("2022.13.08", "2021/2/02", "2021/2/03", "2021/2/04", "1899/1/15", "10-31-1901", "26/11/1901", "08.11.2004", "6/10/02")) # Now we can invoke the function! supportR::date_format_guess(data = my_df, date_col = "bad_dates", group_col = "data_enterer", return = "dataframe") # If preferred, do it without groups and return a vector supportR::date_format_guess(data = my_df, date_col = "bad_dates", groups = FALSE, return = "vector")
I've created a set of custom ggplot2
theme
elements to guarantee that all of my figures share similar aesthetics. Feel free to use theme_lyon
if you have similar preferences!
theme_lyon
does the following changes to a ggplot2
plot:
#| message: false #| warning: false #| fig.width: 5 #| fig.align: "center" # Load ggplot2 library(ggplot2) # Create a plot and allow default ggplot themeing to be added ggplot(penguins, aes(x = species, y = body_mass_g, fill = species)) + geom_boxplot(outlier.shape = 24) # Compare with the same plot with my theme ggplot(penguins, aes(x = species, y = body_mass_g, fill = species)) + geom_boxplot(outlier.shape = 24) + supportR::theme_lyon()
I've also created ordination
for Nonmetric Multidimensional Scaling (NMS) or Principal Coordinates Analysis (PCoA) ordinations. Note that this function requires your multidimensional scaling object be created by either ape::pcoa
or vegan::metaMDS
.
#| message: false #| warning: false #| results: "hide" #| fig.height: 5 #| fig.width: 5 #| fig.align: "center" # Load data from the `vegan` package utils::data("varespec", package = "vegan") # Make a columns to split the data into 4 groups treatment <- c(rep.int("Trt_1", (nrow(varespec)/4)), rep.int("Trt_2", (nrow(varespec)/4)), rep.int("Trt_3", (nrow(varespec)/4)), rep.int("Trt_4", (nrow(varespec)/4))) # And combine them into a single data object data <- cbind(treatment, varespec) # Actually perform multidimensional scaling mds <- vegan::metaMDS(data[-1], autotransform = FALSE, expand = FALSE, k = 2, try = 10) # With the scaled object and original dataframe we can use this function ordination(mod = mds, grps = data$treatment, x = "bottomright", legend = paste0("Treat-", 1:4))
Finally, I've written several functions that allow you to interact with APIs outside of R via R functions with hopefully more comfortable syntax. Because these functions rely on user credentials, they cannot be run non-interactively (as in a CRAN submission) so the following code chunks are not evaluated and are included as examples of the proper syntax for your reference.
For GitHub users, I've developed two related functions: github_ls
and github_tree
. github_ls
accepts the URL to a GitHub repository to which you have access (public or private). It creates a dataframe of that repository's contents including their names, types, and full paths within the repository. Listing of a particular folder and recursive listing of all nested subfolders within a repository are both supported via additional arguments.
If the folder
argument is set to NULL
(the default) the top level of the repository is listed.
#| eval: false # List all files in a GitHub repository supportR::github_ls(repo = "https://github.com/njlyon0/supportR", recursive = TRUE, quiet = FALSE) # Or list files in only a particular folder supportR::github_ls(repo = "https://github.com/njlyon0/supportR", folder = "R", recursive = FALSE, quiet = TRUE)
github_tree
is an extension of github_ls
that identifies all files in a repository and creates a file tree diagram of that folder structure that is simple and human-readable. Unlike github_ls
, github_tree
only supports recursive identification of all files beginning at the top level of the repository. It does however allow users to exclude the listings of particular folders by specifying their names in the exclude
argument.
I think this could be particularly useful to embed in a repository's README.Rmd
to create a quick-and-easy file map for visitors to use as a guide in navigating the repository's contents.
#| eval: false # Create a file tree diagram of a GitHub repository supportR::github_tree(repo = repo = "https://github.com/njlyon0/supportR", exclude = c("docs", "man", ".github"), quiet = FALSE)
Valuable information is sometimes stored as markdown files which--while consistently formatted internally--are not always easily parsed through R. I've written tabularize_md
to ingest a markdown file and collapse it into a table while still preserving the nested structure of any headings that may be in the source file. This function accepts either a local markdown file name/path or a connection (via URL) to an online markdown file. I'll demonstrate the URL-based variant here but to use it on a local file you need only provide the file name/path as you would to any other reading function (e.g., read.csv
, etc.)
#| eval: false # Identify URL to the NEWS.md file in `supportR` GitHub repo md_cxn <- url("https://raw.githubusercontent.com/njlyon0/supportR/main/NEWS.md") # Transform it into a table md_df <- tabularize_md(file = md_cxn) # Close connection (just good housekeeping to do so) close(md_cxn) # Check out the table format str(md_df)
For users who create RMarkdown reports and want to store them in a Google Drive folder, rmd_export
knits and exports a given R Markdown file both locally and to a user-designated Google Drive folder. Note that you MUST authenticate your R session with the googledrive
package so that it has permission to access the Drive folder you supply. I recommend running googledrive::drive_auth()
and doing the authentication "dance" in a browser before using rmd_export
to reduce the chances of any errors.
#| eval: false # Authorize R to interact with GoogleDrive googledrive::drive_auth() # Use `rmd_export()` to knit and export an .Rmd file supportR::rmd_export(rmd = "my_markdown.Rmd", in_path = file.path("Folder in my WD with the .Rmd named in `rmd`"), out_path = file.path("Folder in my WD to save the knit file to"), out_name = "desired name for output", out_type = "html", drive_link = "<Full Google Drive link>")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.