inst/doc/janitor.R

## ---- echo = FALSE, message = FALSE-------------------------------------------
knitr::opts_chunk$set(collapse = T, comment = "#>")
library(janitor)

## ---- message = FALSE, warning = FALSE----------------------------------------
# Create a data.frame with dirty names
test_df <- as.data.frame(matrix(ncol = 6))
names(test_df) <- c("firstName", "ábc@!*", "% successful (2009)",
                    "REPEAT VALUE", "REPEAT VALUE", "")

## -----------------------------------------------------------------------------
test_df %>%
  clean_names()

## -----------------------------------------------------------------------------
make.names(names(test_df))

## -----------------------------------------------------------------------------
df1 <- data.frame(a = 1:2, b = c("big", "small"))
df2 <- data.frame(a = 10:12, b = c("medium", "small", "big"), c = 0, stringsAsFactors = TRUE) # here, column b is a factor
df3 <- df1 %>%
  dplyr::mutate(b = as.character(b))

compare_df_cols(df1, df2, df3)

compare_df_cols(df1, df2, df3, return = "mismatch")
compare_df_cols(df1, df2, df3, return = "mismatch", bind_method = "rbind") # default is dplyr::bind_rows

## -----------------------------------------------------------------------------
compare_df_cols_same(df1, df3)
compare_df_cols_same(df2, df3)

## -----------------------------------------------------------------------------
mtcars %>%
  tabyl(gear, cyl) %>%
  adorn_totals("col") %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 2) %>%
  adorn_ns() %>%
  adorn_title()

## -----------------------------------------------------------------------------
get_dupes(mtcars, wt, cyl) # or mtcars %>% get_dupes(wt, cyl) if you prefer to pipe

## ----message=FALSE------------------------------------------------------------
library(dplyr)
starwars[1:4,] %>%
  get_one_to_one()

## -----------------------------------------------------------------------------
tibble::as_tibble(iris, .name_repair = janitor::make_clean_names)

## -----------------------------------------------------------------------------
not_one_to_one <- data.frame(
  X = rep(1:3, each = 2),
  Y = c(rep(1:2, each = 2), 1:2))

not_one_to_one

# throws informative error:
try(not_one_to_one %>%
      dplyr::group_by(X) %>%
      dplyr::mutate(
        Z = single_value(Y, info = paste("Calculating Z for group X =", X)))
      )

## -----------------------------------------------------------------------------
q <- data.frame(v1 = c(1, NA, 3),
                v2 = c(NA, NA, NA),
                v3 = c("a", NA, "b"))
q %>%
  remove_empty(c("rows", "cols"))

## -----------------------------------------------------------------------------
a <- data.frame(good = 1:3, boring = "the same")
a %>% remove_constant()

## -----------------------------------------------------------------------------
nums <- c(2.5, 3.5)
round(nums)
round_half_up(nums)

## -----------------------------------------------------------------------------
excel_numeric_to_date(41103)
excel_numeric_to_date(41103.01) # ignores decimal places, returns Date object
excel_numeric_to_date(41103.01, include_time = TRUE) # returns POSIXlt object
excel_numeric_to_date(41103.01, date_system = "mac pre-2011")

## -----------------------------------------------------------------------------
convert_to_date(c("2020-02-29", "40000.1"))

## -----------------------------------------------------------------------------
dirt <- data.frame(X_1 = c(NA, "ID", 1:3),
           X_2 = c(NA, "Value", 4:6))

row_to_names(dirt, 2)

## -----------------------------------------------------------------------------
f <- factor(c("strongly agree", "agree", "neutral", "neutral", "disagree", "strongly agree"),
            levels = c("strongly agree", "agree", "neutral", "disagree", "strongly disagree"))
top_levels(f)
top_levels(f, n = 1)

Try the janitor package in your browser

Any scripts or data that you put into this service are public.

janitor documentation built on Feb. 16, 2023, 10:16 p.m.