#### Global chunk options ----------------------------- knitr::opts_chunk$set( echo = TRUE, # Whether to show code chunk in final output results = "asis", NULL ) #### Other options -------------------------------- ## Width for code chunks ---------------- options(width = 120) ## Scientific notation --------------- # Find out what it is set to # getOption("scipen") # Turn off scientific notation options(scipen = 999) ## define format ---------------- # If you don't define format here, you'll need put `format = "html"` in # every kable function. options(knitr.table.format = "html") # use this option when knitting html # options(knitr.table.format = "latex") # use this option when knitting pdf #### Packages -------------------------------------- # This is where we load in all the packages we plan to use ## Define the repository for packages ---------------- options(repos = c(CRAN = "https://cran.rstudio.com")) ## universally useful packages ---------------- # if (!require("pacman")) {install.packages("pacman")} # if (!require("devtools")) {install.packages("devtools")} ## Package version ---------------- # As of 2019-10-18, I've found the GitHub version to be more reliable and # recommend installing from there. # https://github.com/dcomtois/summarytools # install_github("dcomtois/summarytools") ## Load the list of packages ---------------- pacman::p_load( tidyverse, # packages ggplot2, dplyr, tidyr, readr, purrr, tibble, # stringr, and forcats broom, # functions tidy(), glance(), augment() flextable, # easily create tables for reporting and publications fs, # Cross-platform interface to file system operations glue, # Glue strings to data in R here, # Constructs paths to your project's files janitor, # for working with dirty data mice, # Multiple imputation using Fully Conditional Specification naniar, # structures, summaries, and visualisations for missing data officer, # Used by flextable readxl, # read in excel files summarytools, # neatly and quickly summarize data install = FALSE ) #### Other packages ----------------------------- #### Some resources --------------------------- #### Themes and colors ------------------------------- #### Load fonts -------------------------------- # extrafont::fonts() # Vector of font family names # extrafont::fonttable() # Show entire table # extrafont::font_import() # imports fonts installed on the system extrafont::loadfonts(device = "pdf", quiet = TRUE) #### Summarytools options -------------------------------- summarytools::st_options(bootstrap.css = FALSE, # Already part of the theme so no need for it plain.ascii = FALSE, # One of the essential settings style = "rmarkdown", # Idem. dfSummary.silent = TRUE, # Suppresses messages about temporary files dfSummary.varnumbers = FALSE, # This keeps results narrow enough footnote = NA) # Keeping the results minimalistic # summarytools::st_css()
# data("riskfactors", package = "naniar") data <- riskfactors
print(summarytools::dfSummary(data, style = "grid", graph.magnif = 0.75, valid.col = FALSE), method = 'render')
data |> summarise( n_rows = dim(data)[[1]], n_missing = naniar::n_case_miss(data), prop_missing = n_missing / n_rows, n_complete = naniar::n_case_complete(data), prop_complete = n_complete / n_rows ) |> flextable::flextable() |> flextable::fontsize(part = "body", size = 12) |> flextable::fontsize(part = "header", size = 14)
Missingness plot with variables that have any missing at all selected.
# https://cran.r-project.org/web/packages/naniar/vignettes/getting-started-w-naniar.html dplyr::select_if(.tbl = data, .predicate = naniar::any_na) |> naniar::vis_miss(cluster = TRUE, sort_miss = FALSE)
Summary of the number of columns with any missing values.
purrr::map_df(.x = data, .f = ~ sum(is.na(.x))) |> tidyr::pivot_longer(cols = dplyr::everything(), names_to = "vars", values_to = "n_miss") |> summarise(n_var = length(n_miss), n_missing = sum(n_miss > 0), prop_missing = n_missing / n_var, n_complete = sum(n_miss == 0), prop_complete = n_complete / n_var, ) |> flextable::flextable() |> flextable::fontsize(part = "body", size = 12) |> flextable::fontsize(part = "header", size = 14)
Summary for each variable of the number or missings and the percent missings of the variables. By default, it orders by the most missings in each variable
data |> naniar::miss_var_summary() |> # dplyr::slice(1:20) |> flextable::flextable() |> flextable::fontsize(part = "body", size = 12) |> flextable::fontsize(part = "header", size = 14)
The number of cases with 0, 1, 2, up to n, missing values and the proportion of the number of cases those cases make up.
data |> naniar::miss_case_table() |> flextable::flextable() |> flextable::fontsize(part = "body", size = 12) |> flextable::fontsize(part = "header", size = 14)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.