profilr
quickly and easily profiles data using common descriptive statistics.
You can install the latest development version from CRAN:
install.packages("profilr")
Or from GitHub with:
if (packageVersion("devtools") < 1.6) {
install.packages("devtools")
}
devtools::install_github("paulhendricks/profilr")
If you encounter a clear bug, please file a minimal reproducible example on GitHub.
library(dplyr, warn.conflicts = FALSE)
library(profilr)
#>
#> Attaching package: 'profilr'
#> The following object is masked from 'package:stats':
#>
#> profile
mtcars %>%
profile
#> .column_name .column_class .column_type .count_elements .count_uniques
#> 1 mpg numeric double 32 25
#> 2 cyl numeric double 32 3
#> 3 disp numeric double 32 27
#> 4 hp numeric double 32 22
#> 5 drat numeric double 32 22
#> 6 wt numeric double 32 29
#> 7 qsec numeric double 32 30
#> 8 vs numeric double 32 2
#> 9 am numeric double 32 2
#> 10 gear numeric double 32 3
#> 11 carb numeric double 32 6
#> .percent_uniques .count_NULLs .percent_NULLs .count_NAs .percent_NAs
#> 1 0.78125 0 0 0 0
#> 2 0.09375 0 0 0 0
#> 3 0.84375 0 0 0 0
#> 4 0.68750 0 0 0 0
#> 5 0.68750 0 0 0 0
#> 6 0.90625 0 0 0 0
#> 7 0.93750 0 0 0 0
#> 8 0.06250 0 0 0 0
#> 9 0.06250 0 0 0 0
#> 10 0.09375 0 0 0 0
#> 11 0.18750 0 0 0 0
#> .count_zeroes .percent_zeros .mean_value .sd_value .q0_value
#> 1 0 0.00000 20.090625 6.0269481 10.400
#> 2 0 0.00000 6.187500 1.7859216 4.000
#> 3 0 0.00000 230.721875 123.9386938 71.100
#> 4 0 0.00000 146.687500 68.5628685 52.000
#> 5 0 0.00000 3.596563 0.5346787 2.760
#> 6 0 0.00000 3.217250 0.9784574 1.513
#> 7 0 0.00000 17.848750 1.7869432 14.500
#> 8 18 0.56250 0.437500 0.5040161 0.000
#> 9 19 0.59375 0.406250 0.4989909 0.000
#> 10 0 0.00000 3.687500 0.7378041 3.000
#> 11 0 0.00000 2.812500 1.6152000 1.000
#> .q25_value .q50_value .q75_value .q100_value
#> 1 15.42500 19.200 22.80 33.900
#> 2 4.00000 6.000 8.00 8.000
#> 3 120.82500 196.300 326.00 472.000
#> 4 96.50000 123.000 180.00 335.000
#> 5 3.08000 3.695 3.92 4.930
#> 6 2.58125 3.325 3.61 5.424
#> 7 16.89250 17.710 18.90 22.900
#> 8 0.00000 0.000 1.00 1.000
#> 9 0.00000 0.000 1.00 1.000
#> 10 3.00000 4.000 4.00 5.000
#> 11 2.00000 2.000 4.00 8.000
#> .top_5_values
#> 1 (1) 10.4 [2]\n (1) 15.2 [2]\n (1) 19.2 [2]\n (1) 21 [2]\n (1) 21.4 [2]\n
#> 2 (1) 8 [14]\n (2) 4 [11]\n (3) 6 [7]\n
#> 3 (1) 275.8 [3]\n (2) 160 [2]\n (2) 167.6 [2]\n (2) 360 [2]\n (5) 71.1 [1]\n
#> 4 (1) 110 [3]\n (1) 175 [3]\n (1) 180 [3]\n (4) 66 [2]\n (4) 123 [2]\n
#> 5 (1) 3.07 [3]\n (1) 3.92 [3]\n (3) 2.76 [2]\n (3) 3.08 [2]\n (3) 3.15 [2]\n
#> 6 (1) 3.44 [3]\n (2) 3.57 [2]\n (3) 1.513 [1]\n (3) 1.615 [1]\n (3) 1.835 [1]\n
#> 7 (1) 17.02 [2]\n (1) 18.9 [2]\n (3) 14.5 [1]\n (3) 14.6 [1]\n (3) 15.41 [1]\n
#> 8 (1) 0 [18]\n (2) 1 [14]\n
#> 9 (1) 0 [19]\n (2) 1 [13]\n
#> 10 (1) 3 [15]\n (2) 4 [12]\n (3) 5 [5]\n
#> 11 (1) 2 [10]\n (1) 4 [10]\n (3) 1 [7]\n (4) 3 [3]\n (5) 6 [1]\n
#> .bottom_5_values
#> 1 (8) 33.9 [1]\n (8) 32.4 [1]\n (8) 27.3 [1]\n (8) 26 [1]\n (8) 24.4 [1]\n
#> 2 (3) 6 [7]\n (2) 4 [11]\n (1) 8 [14]\n
#> 3 (5) 472 [1]\n (5) 460 [1]\n (5) 440 [1]\n (5) 400 [1]\n (5) 351 [1]\n
#> 4 (8) 335 [1]\n (8) 264 [1]\n (8) 230 [1]\n (8) 215 [1]\n (8) 205 [1]\n
#> 5 (9) 4.93 [1]\n (9) 4.43 [1]\n (9) 4.11 [1]\n (9) 3.85 [1]\n (9) 3.77 [1]\n
#> 6 (3) 5.424 [1]\n (3) 5.345 [1]\n (3) 5.25 [1]\n (3) 4.07 [1]\n (3) 3.845 [1]\n
#> 7 (3) 22.9 [1]\n (3) 20.22 [1]\n (3) 20.01 [1]\n (3) 20 [1]\n (3) 19.9 [1]\n
#> 8 (2) 1 [14]\n (1) 0 [18]\n
#> 9 (2) 1 [13]\n (1) 0 [19]\n
#> 10 (3) 5 [5]\n (2) 4 [12]\n (1) 3 [15]\n
#> 11 (5) 8 [1]\n (5) 6 [1]\n (4) 3 [3]\n (3) 1 [7]\n (1) 4 [10]\n
mtcars %>%
group_by(cyl) %>%
do(profile(.))
#> # A tibble: 33 x 22
#> # Groups: cyl [3]
#> cyl .column_name .column_class .column_type .count_elements
#> <dbl> <chr> <chr> <chr> <int>
#> 1 4 mpg numeric double 11
#> 2 4 cyl numeric double 11
#> 3 4 disp numeric double 11
#> 4 4 hp numeric double 11
#> 5 4 drat numeric double 11
#> 6 4 wt numeric double 11
#> 7 4 qsec numeric double 11
#> 8 4 vs numeric double 11
#> 9 4 am numeric double 11
#> 10 4 gear numeric double 11
#> # ... with 23 more rows, and 17 more variables: .count_uniques <int>,
#> # .percent_uniques <dbl>, .count_NULLs <int>, .percent_NULLs <dbl>,
#> # .count_NAs <int>, .percent_NAs <dbl>, .count_zeroes <int>,
#> # .percent_zeros <dbl>, .mean_value <dbl>, .sd_value <dbl>,
#> # .q0_value <dbl>, .q25_value <dbl>, .q50_value <dbl>, .q75_value <dbl>,
#> # .q100_value <dbl>, .top_5_values <chr>, .bottom_5_values <chr>
To cite package ‘profilr’ in publications use:
Paul Hendricks (2015). profilr: Quickly Profile Data in R. R package version 0.1.0. https://CRAN.R-project.org/package=profilr
A BibTeX entry for LaTeX users is
@Manual{,
title = {profilr: Quickly Profile Data in R},
author = {Paul Hendricks},
year = {2015},
note = {R package version 0.1.0},
url = {https://CRAN.R-project.org/package=profilr},
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.