library(ggplot2) library(pander)
ggAggHist <- getFromNamespace("ggAggHist", "dataMaid") ggAggBarplot <- getFromNamespace("ggAggBarplot", "dataMaid")
The dataset examined has the following dimensions:
Feature Result
Number of observations 45
The following variable checks were performed, depending on the data type of each variable:
character factor labelled numeric integer logical Date
Identify miscoded missing values $\times$ $\times$ $\times$ $\times$ $\times$ $\times$
Identify prefixed and suffixed whitespace $\times$ $\times$ $\times$
Identify case issues $\times$ $\times$ $\times$
Identify misclassified numeric or integer variables $\times$ $\times$ $\times$
Identify levels with < 6 obs. $\times$ $\times$
Non-supported variable types were set to be handled in the following way:
Please note that all numerical values in the following have been rounded to 2 decimals.
Variable class # unique values Missing observations Any problems?
[lastName] character 39 0.00 %
[firstName] character 30 0.00 %
[orderOfPresidency] factor 45 0.00 % $\times$
[birthday] Date 44 0.00 %
[stateOfBirth] character 21 0.00 %
[assassinationAttempt] factor 2 0.00 %
[sex] factor 1 0.00 % $\times$
[ethnicity] factor 2 0.00 % $\times$
[presidencyYears] numeric 10 2.22 % $\times$
[ageAtInauguration] numeric 22 0.00 % $\times$
\bminione
Feature Result
Variable type character
Number of missing obs. 0 (0 %)
Number of unique values 39
\emini \bminitwo
ggAggBarplot(data = structure(list(x = structure(1:39, .Label = c("Adams", "Arthur", "Buchanan", "Bush", "Carter", "Cleveland", "Clinton", "Coolidge", "Eisenhower", "Fillmore", "Ford", "Garfield", "Grant", "Harding", "Harrison", "Hayes", "Hoover", "Jackson", "Jefferson", "Johnson", "Kennedy", "Lincoln", "Madison", "McKinley", "Monroe", "Nixon", "Obama", "Pierce", "Polk", "Reagan", "Roosevelt", "Taft", "Taylor", "Truman", "Trump", "Tyler", "Van Buren", "Washington", "Wilson"), class = "factor"), y = c(2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L )), .Names = c("x", "y"), row.names = c(NA, -39L), class = "data.frame"), vnam = "lastName")
\emini
\fullline
\bminione
Feature Result
Variable type character
Number of missing obs. 0 (0 %)
Number of unique values 30
\emini \bminitwo
ggAggBarplot(data = structure(list(x = structure(1:30, .Label = c("Abraham", "Andrew", "Barack", "Benjamin", "Calvin", "Chester", "Donald", "Dwight", "Franklin", "George", "Gerald", "Grover", "Harry", "Herbert", "James", "Jimmy", "John", "Lyndon", "Martin", "Millard", "Richard", "Ronald", "Rutherford", "Theodore", "Thomas", "Ulysses", "Warren", "William", "Woodrow", "Zachary"), class = "factor"), y = c(1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 5L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L)), .Names = c("x", "y"), row.names = c(NA, -30L), class = "data.frame"), vnam = "firstName")
\emini
\fullline
\fullline
\bminione
Feature Result
Variable type Date
Number of missing obs. 0 (0 %)
Number of unique values 44
Mode "1837-03-18"
Min. and max. 1732-02-22; 1961-08-04
\emini \bminitwo
ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("1732-02-22", "1743-08-13", "1755-02-01", "1766-07-24", "1778-01-12", "1789-07-03", "1800-12-23", "1812-06-13", "1823-12-04", "1835-05-25", "1846-11-13", "1858-05-04", "1869-10-23", "1881-04-14", "1892-10-03", "1904-03-25", "1915-09-14", "1927-03-05", "1938-08-25", "1950-02-13"), class = "factor"), Freq = c(3L, 1L, 1L, 3L, 2L, 4L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 0L, 4L, 3L, 0L, 3L, 1L), xmin = structure(c(-86876, -82686, -78496, -74305, -70115, -65925, -61735, -57545, -53354, -49164, -44974, -40784, -36594, -32403, -28213, -24023, -19833, -15643, -11452, -7262), class = "Date"), xmax = structure(c(-82686, -78496, -74305, -70115, -65925, -61735, -57545, -53354, -49164, -44974, -40784, -36594, -32403, -28213, -24023, -19833, -15643, -11452, -7262, -3072), class = "Date"), ymin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ymax = c(3L, 1L, 1L, 3L, 2L, 4L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 0L, 4L, 3L, 0L, 3L, 1L)), .Names = c("factorV", "Freq", "xmin", "xmax", "ymin", "ymax"), row.names = c(NA, -20L), class = "data.frame"), vnam = "birthday")
\emini
\fullline
\bminione
Feature Result
Variable type character
Number of missing obs. 0 (0 %)
Number of unique values 21
\emini \bminitwo
ggAggBarplot(data = structure(list(x = structure(1:21, .Label = c("Arkansas", "California", "Connecticut", "Georgia", "Hawaii", "Illinois", "Iowa", "Kentucky", "Massachusetts", "Missouri", "Nebraska", "New Hampshire", "New Jersey", "New York", "North Carolina", "Ohio", "Pennsylvania", "Tennessee", "Texas", "Vermont", "Virginia" ), class = "factor"), y = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 5L, 2L, 8L, 1L, 1L, 2L, 2L, 7L)), .Names = c("x", "y"), row.names = c(NA, -21L), class = "data.frame"), vnam = "stateOfBirth")
\emini
\fullline
\bminione
Feature Result
Variable type factor
Number of missing obs. 0 (0 %)
Number of unique values 2
\emini \bminitwo
ggAggBarplot(data = structure(list(x = structure(1:2, .Label = c("0", "1"), class = "factor"), y = c(35L, 10L)), .Names = c("x", "y" ), row.names = c(NA, -2L), class = "data.frame"), vnam = "assassinationAttempt")
\emini
\fullline
\fullline
\bminione
Feature Result
Variable type factor
Number of missing obs. 0 (0 %)
Number of unique values 2
\emini \bminitwo
ggAggBarplot(data = structure(list(x = structure(1:2, .Label = c("African American", "Caucasian"), class = "factor"), y = c(1L, 44L)), .Names = c("x", "y"), row.names = c(NA, -2L), class = "data.frame"), vnam = "ethnicity")
\emini
\fullline
\bminione
Feature Result
Variable type numeric
Number of missing obs. 1 (2.22 %)
Number of unique values 9
Median 4
1st and 3rd quartiles 3.75; 8
\emini \bminitwo
ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("[0,0.6]", "(0.6,1.2]", "(1.2,1.8]", "(1.8,2.4]", "(2.4,3]", "(3,3.6]", "(3.6,4.2]", "(4.2,4.8]", "(4.8,5.4]", "(5.4,6]", "(6,6.6]", "(6.6,7.2]", "(7.2,7.8]", "(7.8,8.4]", "(8.4,9]", "(9,9.6]", "(9.6,10.2]", "(10.2,10.8]", "(10.8,11.4]", "(11.4,12]"), class = "factor"), Freq = c(2L, 1L, 0L, 4L, 4L, 0L, 15L, 0L, 3L, 0L, 0L, 3L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 1L), xmin = c(0, 0.6, 1.2, 1.8, 2.4, 3, 3.6, 4.2, 4.8, 5.4, 6, 6.6, 7.2, 7.8, 8.4, 9, 9.6, 10.2, 10.8, 11.4), xmax = c(0.6, 1.2, 1.8, 2.4, 3, 3.6, 4.2, 4.8, 5.4, 6, 6.6, 7.2, 7.8, 8.4, 9, 9.6, 10.2, 10.8, 11.4, 12), ymin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ymax = c(2L, 1L, 0L, 4L, 4L, 0L, 15L, 0L, 3L, 0L, 0L, 3L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("factorV", "Freq", "xmin", "xmax", "ymin", "ymax"), row.names = c(NA, -20L ), class = "data.frame"), vnam = "presidencyYears")
\emini
\fullline
\bminione
Feature Result
Variable type numeric
Number of missing obs. 0 (0 %)
Number of unique values 22
Median 55
1st and 3rd quartiles 51; 58
\emini \bminitwo
ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("[42,43.4]", "(43.4,44.8]", "(44.8,46.2]", "(46.2,47.6]", "(47.6,49]", "(49,50.4]", "(50.4,51.8]", "(51.8,53.2]", "(53.2,54.6]", "(54.6,56]", "(56,57.4]", "(57.4,58.8]", "(58.8,60.2]", "(60.2,61.6]", "(61.6,63]", "(63,64.4]", "(64.4,65.8]", "(65.8,67.2]", "(67.2,68.6]", "(68.6,70]"), class = "factor"), Freq = c(2L, 0L, 2L, 2L, 3L, 2L, 4L, 2L, 5L, 7L, 4L, 1L, 1L, 3L, 1L, 2L, 1L, 0L, 1L, 2L), xmin = c(42, 43.4, 44.8, 46.2, 47.6, 49, 50.4, 51.8, 53.2, 54.6, 56, 57.4, 58.8, 60.2, 61.6, 63, 64.4, 65.8, 67.2, 68.6), xmax = c(43.4, 44.8, 46.2, 47.6, 49, 50.4, 51.8, 53.2, 54.6, 56, 57.4, 58.8, 60.2, 61.6, 63, 64.4, 65.8, 67.2, 68.6, 70), ymin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ymax = c(2L, 0L, 2L, 2L, 3L, 2L, 4L, 2L, 5L, 7L, 4L, 1L, 1L, 3L, 1L, 2L, 1L, 0L, 1L, 2L)), .Names = c("factorV", "Freq", "xmin", "xmax", "ymin", "ymax"), row.names = c(NA, -20L), class = "data.frame"), vnam = "ageAtInauguration")
\emini
\fullline
\bminione
Feature Result
Variable type complex
Number of missing obs. 0 (0 %)
Number of unique values 11
Median 6
1st and 3rd quartiles 3; 8
\emini \bminitwo
ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("[0,0.5]", "(0.5,1]", "(1,1.5]", "(1.5,2]", "(2,2.5]", "(2.5,3]", "(3,3.5]", "(3.5,4]", "(4,4.5]", "(4.5,5]", "(5,5.5]", "(5.5,6]", "(6,6.5]", "(6.5,7]", "(7,7.5]", "(7.5,8]", "(8,8.5]", "(8.5,9]", "(9,9.5]", "(9.5,10]"), class = "factor"), Freq = c(1L, 2L, 0L, 4L, 0L, 5L, 0L, 6L, 0L, 4L, 0L, 3L, 0L, 6L, 0L, 6L, 0L, 4L, 0L, 4L), xmin = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5), xmax = c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10), ymin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ymax = c(1L, 2L, 0L, 4L, 0L, 5L, 0L, 6L, 0L, 4L, 0L, 3L, 0L, 6L, 0L, 6L, 0L, 4L, 0L, 4L)), .Names = c("factorV", "Freq", "xmin", "xmax", "ymin", "ymax"), row.names = c(NA, -20L ), class = "data.frame"), vnam = "favoriteNumber")
\emini
\fullline
Report generation information:
Created by Claus Thorn Ekstrøm (username: cld189
).
Report creation time: Wed Apr 11 2018 01:02:32
Report Was run from directory: /Users/cld189/ku/R/cleanR/latex
dataMaid v1.1.0 [Pkg: 2018-03-02 from local (ekstroem/dataMaid@NA)]
R version 3.4.4 (2018-03-15).
Platform: x86_64-apple-darwin15.6.0 (64-bit)(macOS High Sierra 10.13.3).
Function call: makeDataReport(data = presidentData, replace = TRUE, vol = "_cleaned",
checks = setChecks(character = defaultCharacterChecks(remove = "identifyLoners")),
reportTitle = "Dirty president data - cleaned", treatXasY = list(Name = "character",
complex = "numeric"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.