knitr::opts_chunk$set(collapse = T, comment = "#>")
# options(tibble.print_min = 4L, tibble.print_max = 4L)
library(dplyr)
# devtools::load_all("C:/Users/stern/Dropbox/sternclean")
library(sternclean)
library(pander)
set.seed(1014)

sternclean seeks to simplify cleaning dataframes as much as possible.

Multiple cleaning steps can be done in just one function.

For example, you can change column types, impute one set of columns' NAs with a set value, impute another set of columns' NAs with a group mean, and impute another set of columns' infinite values with another set value in a few lines of clean code

Here is the order of operations under the hood:

This allows multiple cleaning processes to happen in this one function

Simple Examples

We will start out with simple one step cleaning examples. Later we will take on more complex situations

Rickle and Mortan Dataset

people <- c("Rickle", "Mortan", "Jerry", "Pickle Rickle")
original_person <- c("Rickle", "Mortan", "Jerry", "Rickle")
intelligence <- c(Inf, 9, 0.1, Inf)
evil_rank <- c(5, 2.75, 2, NA)

rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)


rickle_and_mortan$people <- as.factor(rickle_and_mortan$people)

pander::pandoc.table(rickle_and_mortan)

rickle_and_mortan$intelligence <- as.character(rickle_and_mortan$intelligence)

Class Change Parameters

class(rickle_and_mortan$people)

sternclean("rickle_and_mortan",
           class_to_strng = "people")

class(rickle_and_mortan$people)
class(rickle_and_mortan$intelligence)

sternclean("rickle_and_mortan",
           class_to_numer = "intelligence")

class(rickle_and_mortan$intelligence)

Column/Row Removal Parameters

sternclean("rickle_and_mortan",
           remove_columns = "intelligence")
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           remove_na_rows =  "evil_rank")
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           removeby_regex = "pe")
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           remove_all_nas = TRUE)
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           remove_non_num = TRUE)
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           remove_all_exc = c("people", "evil_rank"))
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)

Impute Parameters

sternclean("rickle_and_mortan",
           impute_na2mean = "evil_rank")
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           impute_na_cols = "evil_rank",
           impute_na_with = 1738)
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           impute_grpmean = "evil_rank",
           impute_grpwith = "original_person")
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           impute_inf_col = "intelligence",
           impute_inf_wit = 1738)
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)
sternclean("rickle_and_mortan",
           impute_cust_cl = "evil_rank",
           impute_cust_fn = quantile,
           probs = .25,
           na.rm = TRUE
           )
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)

More Complex Example

Here we:

sternclean("rickle_and_mortan",
           class_to_strng = "people",
           class_to_numer = "intelligence",
           remove_columns = "original_person",
           impute_na2mean = "evil_rank",
           impute_inf_col = "intelligence",
           impute_inf_wit = 1738
           )
pander::pandoc.table(rickle_and_mortan)
rickle_and_mortan <- data.frame(people, original_person,
                                intelligence, evil_rank)

Compared to Original Data Frame

pander::pandoc.table(rickle_and_mortan)

"For more, checkout my " (Github)



basketballbeane/sternclean documentation built on Sept. 10, 2021, 7:50 a.m.