library(ggplot2)
library(pander)
ggAggHist <- getFromNamespace("ggAggHist", "dataMaid")
ggAggBarplot <- getFromNamespace("ggAggBarplot", "dataMaid")

Data report overview

The dataset examined has the following dimensions:


Feature Result


Number of observations 46

Number of variables 11

Checks performed

The following variable checks were performed, depending on the data type of each variable:


  character factor labelled numeric integer logical Date


Identify miscoded missing values $\times$ $\times$ $\times$ $\times$ $\times$ $\times$

Identify prefixed and suffixed whitespace $\times$ $\times$ $\times$

Identify case issues $\times$ $\times$ $\times$

Identify misclassified numeric or integer variables $\times$ $\times$ $\times$

Identify levels with < 6 obs. $\times$ $\times$

Identify outliers $\times$ $\times$ $\times$

Non-supported variable types were set to be handled in the following way:

Please note that all numerical values in the following have been rounded to 2 decimals.

Summary table


  Variable class # unique values Missing observations Any problems?


[lastName] Name 40 0.00 % $\times$

[firstName] Name 31 0.00 % $\times$

[orderOfPresidency] factor 46 0.00 % $\times$

[birthday] Date 45 0.00 % $\times$

[stateOfBirth] character 23 0.00 % $\times$

[assassinationAttempt] numeric 2 0.00 %

[sex] factor 1 0.00 % $\times$

[ethnicity] factor 2 0.00 % $\times$

[presidencyYears] numeric 11 4.35 % $\times$

[ageAtInauguration] character 23 0.00 % $\times$

[favoriteNumber] complex 11 0.00 % $\times$

Variable list

lastName

\bminione


Feature Result


Variable type Name

Number of missing obs. 0 (0 %)

Number of unique values 40

Mode "Adams"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:40, .Label = c(" Truman", 
"Adams", "Arathornson", "Arthur", "Buchanan", "Bush", "Carter", 
"Cleveland", "Clinton", "Coolidge", "Eisenhower", "Fillmore", 
"Ford", "Garfield", "Grant", "Harding", "Harrison", "Hayes", 
"Hoover", "Jackson", "Jefferson", "Johnson", "Kennedy", "Lincoln", 
"Madison", "McKinley", "Monroe", "Nixon", "Obama", "Pierce", 
"Polk", "Reagan", "Roosevelt", "Taft", "Taylor", "Trump", "Tyler", 
"Van Buren", "Washington", "Wilson"), class = "factor"), y = c(1L, 
2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("x", "y"), row.names = c(NA, 
-40L), class = "data.frame"), vnam = "lastName")

\emini

\fullline

firstName

\bminione


Feature Result


Variable type Name

Number of missing obs. 0 (0 %)

Number of unique values 31

Mode "James"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:31, .Label = c(".", 
"Abraham", "Andrew", "Aragorn", "Barack", "Benjamin", "Calvin", 
"Chester", "Dwight", "Franklin", "George", "Gerald", "Grover", 
"Harry", "Herbert", "James", "Jimmy", "John", "Lyndon", "Martin", 
"Millard", "Richard", "Ronald", "Rutherford", "Theodore", "Thomas", 
"Ulysses", "Warren", "William", "Woodrow", "Zachary"), class = "factor"), 
    y = c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 
    1L, 1L, 5L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    4L, 1L, 1L)), .Names = c("x", "y"), row.names = c(NA, -31L
), class = "data.frame"), vnam = "firstName")

\emini

\fullline

orderOfPresidency

\fullline

birthday

\bminione


Feature Result


Variable type Date

Number of missing obs. 0 (0 %)

Number of unique values 45

Mode "1837-03-18"

Min. and max. 1300-03-01; 1961-08-04

1st and 3rd quartiles 1790-03-29; 1890-10-14

\emini \bminitwo

ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("1300-03-01", 
"1333-03-27", "1366-04-22", "1399-05-18", "1432-06-13", "1465-07-09", 
"1498-08-04", "1531-08-31", "1564-09-25", "1597-10-21", "1630-11-16", 
"1663-12-13", "1697-01-07", "1730-02-03", "1763-03-01", "1796-03-26", 
"1829-04-22", "1862-05-18", "1895-06-13", "1928-07-09"), class = "factor"), 
    Freq = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 5L, 8L, 6L, 9L, 6L, 7L, 4L), xmin = structure(c(-244653, 
    -232574, -220495, -208416, -196337, -184258, -172179, -160100, 
    -148021, -135942, -123863, -111783, -99704, -87625, -75546, 
    -63467, -51388, -39309, -27230, -15151), class = "Date"), 
    xmax = structure(c(-232574, -220495, -208416, -196337, -184258, 
    -172179, -160100, -148021, -135942, -123863, -111783, -99704, 
    -87625, -75546, -63467, -51388, -39309, -27230, -15151, -3072
    ), class = "Date"), ymin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ymax = c(1L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 8L, 6L, 9L, 6L, 7L, 4L
    )), .Names = c("factorV", "Freq", "xmin", "xmax", "ymin", 
"ymax"), row.names = c(NA, -20L), class = "data.frame"), vnam = "birthday")

\emini

\fullline

stateOfBirth

\bminione


Feature Result


Variable type character

Number of missing obs. 0 (0 %)

Number of unique values 23

Mode "Ohio"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:23, .Label = c("Arkansas", 
"California", "Connecticut", "Georgia", "Gondor", "Hawaii", "Illinois", 
"Iowa", "Kentucky", "Massachusetts", "Missouri", "Nebraska", 
"New Hampshire", "New Jersey", "New York", "New york", "North Carolina", 
"Ohio", "Pennsylvania", "Tennessee", "Texas", "Vermont", "Virginia"
), class = "factor"), y = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
4L, 1L, 1L, 1L, 2L, 4L, 1L, 2L, 8L, 1L, 1L, 2L, 2L, 7L)), .Names = c("x", 
"y"), row.names = c(NA, -23L), class = "data.frame"), vnam = "stateOfBirth")

\emini

\fullline

assassinationAttempt

\bminione


Feature Result


Variable type numeric

Number of missing obs. 0 (0 %)

Number of unique values 2

Mode "0"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:2, .Label = c("0", 
"1"), class = "factor"), y = c(35L, 11L)), .Names = c("x", "y"
), row.names = c(NA, -2L), class = "data.frame"), vnam = "assassinationAttempt")

\emini

\fullline

sex

\fullline

ethnicity

\bminione


Feature Result


Variable type factor

Number of missing obs. 0 (0 %)

Number of unique values 2

Mode "Caucasian"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:2, .Label = c("African American", 
"Caucasian"), class = "factor"), y = c(1L, 45L)), .Names = c("x", 
"y"), row.names = c(NA, -2L), class = "data.frame"), vnam = "ethnicity")

\emini

\fullline

presidencyYears

\bminione


Feature Result


Variable type numeric

Number of missing obs. 2 (4.35 %)

Number of unique values 10

Median 4

1st and 3rd quartiles 3.75; 8

Min. and max. 0; Inf

\emini \bminitwo

ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("[0,0.6]", 
"(0.6,1.2]", "(1.2,1.8]", "(1.8,2.4]", "(2.4,3]", "(3,3.6]", 
"(3.6,4.2]", "(4.2,4.8]", "(4.8,5.4]", "(5.4,6]", "(6,6.6]", 
"(6.6,7.2]", "(7.2,7.8]", "(7.8,8.4]", "(8.4,9]", "(9,9.6]", 
"(9.6,10.2]", "(10.2,10.8]", "(10.8,11.4]", "(11.4,12]"), class = "factor"), 
    Freq = c(2L, 1L, 0L, 4L, 4L, 0L, 15L, 0L, 3L, 0L, 0L, 3L, 
    0L, 10L, 0L, 0L, 0L, 0L, 0L, 1L), xmin = c(0, 0.6, 1.2, 1.8, 
    2.4, 3, 3.6, 4.2, 4.8, 5.4, 6, 6.6, 7.2, 7.8, 8.4, 9, 9.6, 
    10.2, 10.8, 11.4), xmax = c(0.6, 1.2, 1.8, 2.4, 3, 3.6, 4.2, 
    4.8, 5.4, 6, 6.6, 7.2, 7.8, 8.4, 9, 9.6, 10.2, 10.8, 11.4, 
    12), ymin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), ymax = c(2L, 1L, 0L, 4L, 4L, 0L, 15L, 0L, 
    3L, 0L, 0L, 3L, 0L, 10L, 0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("factorV", 
"Freq", "xmin", "xmax", "ymin", "ymax"), row.names = c(NA, -20L
), class = "data.frame"), vnam = "presidencyYears")

\emini

\fullline

ageAtInauguration

\bminione


Feature Result


Variable type character

Number of missing obs. 0 (0 %)

Number of unique values 23

Mode "54"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:23, .Label = c("42", 
"43", "46", "47", "48", "49", "50", "51", "52", "54", "55", "56", 
"57", "58", "60", "61", "62", "64", "65", "68", "69", "70", "87"
), class = "factor"), y = c(1L, 1L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 
5L, 4L, 3L, 4L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L)), .Names = c("x", 
"y"), row.names = c(NA, -23L), class = "data.frame"), vnam = "ageAtInauguration")

\emini

\fullline

favoriteNumber

\fullline

Report generation information:



ekstroem/dataMaid documentation built on Jan. 31, 2022, 9:10 a.m.