knitr::opts_chunk$set(collapse = T, comment = "#>") # options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) # devtools::load_all("C:/Users/stern/Dropbox/sternclean") library(sternclean) library(pander) set.seed(1014)
sternclean
seeks to simplify cleaning dataframes as much as
possible.
Multiple cleaning steps can be done in just one function.
For example, you can change column types, impute one set of columns' NAs with a set value, impute another set of columns' NAs with a group mean, and impute another set of columns' infinite values with another set value in a few lines of clean code
Here is the order of operations under the hood:
This allows multiple cleaning processes to happen in this one function
We will start out with simple one step cleaning examples. Later we will take on more complex situations
people <- c("Rickle", "Mortan", "Jerry", "Pickle Rickle") original_person <- c("Rickle", "Mortan", "Jerry", "Rickle") intelligence <- c(Inf, 9, 0.1, Inf) evil_rank <- c(5, 2.75, 2, NA) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank) rickle_and_mortan$people <- as.factor(rickle_and_mortan$people) pander::pandoc.table(rickle_and_mortan) rickle_and_mortan$intelligence <- as.character(rickle_and_mortan$intelligence)
class(rickle_and_mortan$people) sternclean("rickle_and_mortan", class_to_strng = "people") class(rickle_and_mortan$people)
class(rickle_and_mortan$intelligence) sternclean("rickle_and_mortan", class_to_numer = "intelligence") class(rickle_and_mortan$intelligence)
sternclean("rickle_and_mortan", remove_columns = "intelligence")
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", remove_na_rows = "evil_rank")
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", removeby_regex = "pe")
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", remove_all_nas = TRUE)
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", remove_non_num = TRUE)
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", remove_all_exc = c("people", "evil_rank"))
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", impute_na2mean = "evil_rank")
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", impute_na_cols = "evil_rank", impute_na_with = 1738)
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", impute_grpmean = "evil_rank", impute_grpwith = "original_person")
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", impute_inf_col = "intelligence", impute_inf_wit = 1738)
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
sternclean("rickle_and_mortan", impute_cust_cl = "evil_rank", impute_cust_fn = quantile, probs = .25, na.rm = TRUE )
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
Here we:
sternclean("rickle_and_mortan", class_to_strng = "people", class_to_numer = "intelligence", remove_columns = "original_person", impute_na2mean = "evil_rank", impute_inf_col = "intelligence", impute_inf_wit = 1738 )
pander::pandoc.table(rickle_and_mortan) rickle_and_mortan <- data.frame(people, original_person, intelligence, evil_rank)
pander::pandoc.table(rickle_and_mortan)
"For more, checkout my " (Github)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.