knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%" ) set.seed(42)
Miscellaneous, Analytic R Kernels
An R package with a set of general use functions for data analytics. This is developed mostly for personal use and has no real goal other than to limit the time I spend searching where I did that thing that I think I could use again because it worked well but this problem might be slightly different and I know I had to change it before.
Some parts happily ripped from and (hopefully) credited to others.
You can download the current CRAN version with:
install.packages("mark")
You can the development version from GitHub with:
remotes::install_github("jmbarbone/mark")
This package contains a many variety of functions, some useful, some not so much. Below are a selection of a few functions that could potential be useful for others:
library(mark)
Get dates from sloppy entries:
bad_dates <- c("2020 Dec 8th", "1970 May", "??", "1984 UNK UN") date_from_partial(bad_dates) date_from_partial(bad_dates, method = "max") date_from_partial(c("May 2000", "08Dec2020"), format = "dmy")
Slice strings:
x <- stringi::stri_rand_lipsum(1) str_slice(x, n = 50L) str_slice_by_word(x)
Read in bibliographies:
file <- system.file("extdata", "example-bib.txt", package = "mark") bib <- read_bib(file) tibble::as_tibble(bib)
More matching:
1:10 %out% c(1, 3, 5, 9) # opposite of %in% letters[1:5] %wo% letters[3:7] letters[1:5] %wi% letters[3:7]
Small functions for working with data.frames:
x <- list(a = 1:5, b = letters[1:5]) quick_df(x) vector2df(x[["b"]], name = NULL) quick_dfl(a = 1:3, b = list(1:5, 6:10, 11:15))
Counts and proportions:
set.seed(42) x <- sample(1:5, 20, TRUE, 5:1/2) counts(x) props(x) df <- as.data.frame(matrix(sample(1:2, 60, TRUE), byrow = TRUE, ncol = 3)) counts(df, c("V1", "V2")) props(df, 1:3)
Date time differences:
x <- as.POSIXlt("2021-02-13 05:02:30", tz = "America/New_York") + c(0, -1, 2) * 3600 * 24 y <- as.POSIXlt("2020-02-13 05:02:30", tz = "America/New_York") + c(0, -2, 4) * 3600 * 24 # comparison with base::difftime() (note the order of x and y) difftime(y, x, units = "days") diff_time_days(x, y) difftime(y, x, units = "secs") diff_time_secs(x, y) # Year (by days, months, etc) diff_time_years(x, y) diff_time_myears(x, y) # Set time zones diff_time_hours(x, y, "GMT", "America/New_York") diff_time_hours(x, x, "GMT", c("America/Los_Angeles", "America/New_York", "Europe/London")) # note x, x diff_time_days(x, y, NULL, 31536000)
Simple factors:
fact(c("a", "c", NA, "a", "b", NA, "a", "c")) # no sorting fact(c(-1, 5, 2, NA, 3)) # sorting fact(c(NA, FALSE, TRUE, FALSE, TRUE, NA)) # fixed
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.