library(ggplot2)
library(pander)
ggAggHist <- getFromNamespace("ggAggHist", "dataMaid")
ggAggBarplot <- getFromNamespace("ggAggBarplot", "dataMaid")

Data report overview

The dataset examined has the following dimensions:


Feature Result


Number of observations 45

Number of variables 11

Checks performed

The following variable checks were performed, depending on the data type of each variable:


  character factor labelled numeric integer logical Date


Identify miscoded missing values $\times$ $\times$ $\times$ $\times$ $\times$ $\times$

Identify prefixed and suffixed whitespace $\times$ $\times$ $\times$

Identify case issues $\times$ $\times$ $\times$

Identify misclassified numeric or integer variables $\times$ $\times$ $\times$

Identify levels with < 6 obs. $\times$ $\times$

Identify outliers $\times$ $\times$ $\times$

Non-supported variable types were set to be handled in the following way:

Please note that all numerical values in the following have been rounded to 2 decimals.

Summary table


  Variable class # unique values Missing observations Any problems?


[lastName] character 39 0.00 %

[firstName] character 30 0.00 %

[orderOfPresidency] factor 45 0.00 % $\times$

[birthday] Date 44 0.00 %

[stateOfBirth] character 21 0.00 %

[assassinationAttempt] factor 2 0.00 %

[sex] factor 1 0.00 % $\times$

[ethnicity] factor 2 0.00 % $\times$

[presidencyYears] numeric 10 2.22 % $\times$

[ageAtInauguration] numeric 22 0.00 % $\times$

[favoriteNumber] complex 11 0.00 %

Variable list

lastName

\bminione


Feature Result


Variable type character

Number of missing obs. 0 (0 %)

Number of unique values 39

Mode "Adams"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:39, .Label = c("Adams", 
"Arthur", "Buchanan", "Bush", "Carter", "Cleveland", "Clinton", 
"Coolidge", "Eisenhower", "Fillmore", "Ford", "Garfield", "Grant", 
"Harding", "Harrison", "Hayes", "Hoover", "Jackson", "Jefferson", 
"Johnson", "Kennedy", "Lincoln", "Madison", "McKinley", "Monroe", 
"Nixon", "Obama", "Pierce", "Polk", "Reagan", "Roosevelt", "Taft", 
"Taylor", "Truman", "Trump", "Tyler", "Van Buren", "Washington", 
"Wilson"), class = "factor"), y = c(2L, 1L, 1L, 2L, 1L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
)), .Names = c("x", "y"), row.names = c(NA, -39L), class = "data.frame"), 
    vnam = "lastName")

\emini

\fullline

firstName

\bminione


Feature Result


Variable type character

Number of missing obs. 0 (0 %)

Number of unique values 30

Mode "James"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:30, .Label = c("Abraham", 
"Andrew", "Barack", "Benjamin", "Calvin", "Chester", "Donald", 
"Dwight", "Franklin", "George", "Gerald", "Grover", "Harry", 
"Herbert", "James", "Jimmy", "John", "Lyndon", "Martin", "Millard", 
"Richard", "Ronald", "Rutherford", "Theodore", "Thomas", "Ulysses", 
"Warren", "William", "Woodrow", "Zachary"), class = "factor"), 
    y = c(1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 1L, 
    1L, 5L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 
    1L, 1L)), .Names = c("x", "y"), row.names = c(NA, -30L), class = "data.frame"), 
    vnam = "firstName")

\emini

\fullline

orderOfPresidency

\fullline

birthday

\bminione


Feature Result


Variable type Date

Number of missing obs. 0 (0 %)

Number of unique values 44

Mode "1837-03-18"

Min. and max. 1732-02-22; 1961-08-04

1st and 3rd quartiles 1791-04-23; 1890-10-14

\emini \bminitwo

ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("1732-02-22", 
"1743-08-13", "1755-02-01", "1766-07-24", "1778-01-12", "1789-07-03", 
"1800-12-23", "1812-06-13", "1823-12-04", "1835-05-25", "1846-11-13", 
"1858-05-04", "1869-10-23", "1881-04-14", "1892-10-03", "1904-03-25", 
"1915-09-14", "1927-03-05", "1938-08-25", "1950-02-13"), class = "factor"), 
    Freq = c(3L, 1L, 1L, 3L, 2L, 4L, 3L, 2L, 3L, 3L, 2L, 2L, 
    2L, 3L, 0L, 4L, 3L, 0L, 3L, 1L), xmin = structure(c(-86876, 
    -82686, -78496, -74305, -70115, -65925, -61735, -57545, -53354, 
    -49164, -44974, -40784, -36594, -32403, -28213, -24023, -19833, 
    -15643, -11452, -7262), class = "Date"), xmax = structure(c(-82686, 
    -78496, -74305, -70115, -65925, -61735, -57545, -53354, -49164, 
    -44974, -40784, -36594, -32403, -28213, -24023, -19833, -15643, 
    -11452, -7262, -3072), class = "Date"), ymin = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ymax = c(3L, 
    1L, 1L, 3L, 2L, 4L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 0L, 4L, 
    3L, 0L, 3L, 1L)), .Names = c("factorV", "Freq", "xmin", "xmax", 
"ymin", "ymax"), row.names = c(NA, -20L), class = "data.frame"), 
    vnam = "birthday")

\emini

\fullline

stateOfBirth

\bminione


Feature Result


Variable type character

Number of missing obs. 0 (0 %)

Number of unique values 21

Mode "Ohio"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:21, .Label = c("Arkansas", 
"California", "Connecticut", "Georgia", "Hawaii", "Illinois", 
"Iowa", "Kentucky", "Massachusetts", "Missouri", "Nebraska", 
"New Hampshire", "New Jersey", "New York", "North Carolina", 
"Ohio", "Pennsylvania", "Tennessee", "Texas", "Vermont", "Virginia"
), class = "factor"), y = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 
1L, 1L, 1L, 2L, 5L, 2L, 8L, 1L, 1L, 2L, 2L, 7L)), .Names = c("x", 
"y"), row.names = c(NA, -21L), class = "data.frame"), vnam = "stateOfBirth")

\emini

\fullline

assassinationAttempt

\bminione


Feature Result


Variable type factor

Number of missing obs. 0 (0 %)

Number of unique values 2

Mode "0"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:2, .Label = c("0", 
"1"), class = "factor"), y = c(35L, 10L)), .Names = c("x", "y"
), row.names = c(NA, -2L), class = "data.frame"), vnam = "assassinationAttempt")

\emini

\fullline

sex

\fullline

ethnicity

\bminione


Feature Result


Variable type factor

Number of missing obs. 0 (0 %)

Number of unique values 2

Mode "Caucasian"

\emini \bminitwo

ggAggBarplot(data = structure(list(x = structure(1:2, .Label = c("African American", 
"Caucasian"), class = "factor"), y = c(1L, 44L)), .Names = c("x", 
"y"), row.names = c(NA, -2L), class = "data.frame"), vnam = "ethnicity")

\emini

\fullline

presidencyYears

\bminione


Feature Result


Variable type numeric

Number of missing obs. 1 (2.22 %)

Number of unique values 9

Median 4

1st and 3rd quartiles 3.75; 8

Min. and max. 0; 12

\emini \bminitwo

ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("[0,0.6]", 
"(0.6,1.2]", "(1.2,1.8]", "(1.8,2.4]", "(2.4,3]", "(3,3.6]", 
"(3.6,4.2]", "(4.2,4.8]", "(4.8,5.4]", "(5.4,6]", "(6,6.6]", 
"(6.6,7.2]", "(7.2,7.8]", "(7.8,8.4]", "(8.4,9]", "(9,9.6]", 
"(9.6,10.2]", "(10.2,10.8]", "(10.8,11.4]", "(11.4,12]"), class = "factor"), 
    Freq = c(2L, 1L, 0L, 4L, 4L, 0L, 15L, 0L, 3L, 0L, 0L, 3L, 
    0L, 11L, 0L, 0L, 0L, 0L, 0L, 1L), xmin = c(0, 0.6, 1.2, 1.8, 
    2.4, 3, 3.6, 4.2, 4.8, 5.4, 6, 6.6, 7.2, 7.8, 8.4, 9, 9.6, 
    10.2, 10.8, 11.4), xmax = c(0.6, 1.2, 1.8, 2.4, 3, 3.6, 4.2, 
    4.8, 5.4, 6, 6.6, 7.2, 7.8, 8.4, 9, 9.6, 10.2, 10.8, 11.4, 
    12), ymin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), ymax = c(2L, 1L, 0L, 4L, 4L, 0L, 15L, 0L, 
    3L, 0L, 0L, 3L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("factorV", 
"Freq", "xmin", "xmax", "ymin", "ymax"), row.names = c(NA, -20L
), class = "data.frame"), vnam = "presidencyYears")

\emini

\fullline

ageAtInauguration

\bminione


Feature Result


Variable type numeric

Number of missing obs. 0 (0 %)

Number of unique values 22

Median 55

1st and 3rd quartiles 51; 58

Min. and max. 42; 70

\emini \bminitwo

ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("[42,43.4]", 
"(43.4,44.8]", "(44.8,46.2]", "(46.2,47.6]", "(47.6,49]", "(49,50.4]", 
"(50.4,51.8]", "(51.8,53.2]", "(53.2,54.6]", "(54.6,56]", "(56,57.4]", 
"(57.4,58.8]", "(58.8,60.2]", "(60.2,61.6]", "(61.6,63]", "(63,64.4]", 
"(64.4,65.8]", "(65.8,67.2]", "(67.2,68.6]", "(68.6,70]"), class = "factor"), 
    Freq = c(2L, 0L, 2L, 2L, 3L, 2L, 4L, 2L, 5L, 7L, 4L, 1L, 
    1L, 3L, 1L, 2L, 1L, 0L, 1L, 2L), xmin = c(42, 43.4, 44.8, 
    46.2, 47.6, 49, 50.4, 51.8, 53.2, 54.6, 56, 57.4, 58.8, 60.2, 
    61.6, 63, 64.4, 65.8, 67.2, 68.6), xmax = c(43.4, 44.8, 46.2, 
    47.6, 49, 50.4, 51.8, 53.2, 54.6, 56, 57.4, 58.8, 60.2, 61.6, 
    63, 64.4, 65.8, 67.2, 68.6, 70), ymin = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), ymax = c(2L, 
    0L, 2L, 2L, 3L, 2L, 4L, 2L, 5L, 7L, 4L, 1L, 1L, 3L, 1L, 2L, 
    1L, 0L, 1L, 2L)), .Names = c("factorV", "Freq", "xmin", "xmax", 
"ymin", "ymax"), row.names = c(NA, -20L), class = "data.frame"), 
    vnam = "ageAtInauguration")

\emini

\fullline

favoriteNumber

\bminione


Feature Result


Variable type complex

Number of missing obs. 0 (0 %)

Number of unique values 11

Median 6

1st and 3rd quartiles 3; 8

Min. and max. 0; 10

\emini \bminitwo

ggAggHist(data = structure(list(factorV = structure(1:20, .Label = c("[0,0.5]", 
"(0.5,1]", "(1,1.5]", "(1.5,2]", "(2,2.5]", "(2.5,3]", "(3,3.5]", 
"(3.5,4]", "(4,4.5]", "(4.5,5]", "(5,5.5]", "(5.5,6]", "(6,6.5]", 
"(6.5,7]", "(7,7.5]", "(7.5,8]", "(8,8.5]", "(8.5,9]", "(9,9.5]", 
"(9.5,10]"), class = "factor"), Freq = c(1L, 2L, 0L, 4L, 0L, 
5L, 0L, 6L, 0L, 4L, 0L, 3L, 0L, 6L, 0L, 6L, 0L, 4L, 0L, 4L), 
    xmin = c(0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 
    6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5), xmax = c(0.5, 1, 1.5, 2, 
    2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 
    10), ymin = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), ymax = c(1L, 2L, 0L, 4L, 0L, 5L, 0L, 6L, 
    0L, 4L, 0L, 3L, 0L, 6L, 0L, 6L, 0L, 4L, 0L, 4L)), .Names = c("factorV", 
"Freq", "xmin", "xmax", "ymin", "ymax"), row.names = c(NA, -20L
), class = "data.frame"), vnam = "favoriteNumber")

\emini

\fullline

Report generation information:



ekstroem/dataMaid documentation built on Jan. 31, 2022, 9:10 a.m.