#' @title Generate a Table Profile
#'
#' @param .data (`data.frame`) The table to profile.
#'
#' @return (`list`) A nested list with the table attributes.
#'
#' @export
#'
#' @family TableCleaner high-level functions
#'
#' @examples
#' \dontrun{
#' generate_table_profile(mtcars)
#' }
#'
generate_table_profile <- function(.data){
.assert_is_non_empty_data.frame(.data)
## Helper Functions
.add_attribute <- function(profile, attribute, attribute_name){
.assert_is_list(profile)
for(var_name in names(attribute))
profile[[var_name]][attribute_name] <- as.list(attribute[[var_name]])
return(profile)
}
## Programming Logic
profile <-
list() %>%
.add_attribute(find_col_types(.data), "type") %>%
.add_attribute(mark_cols_with_na(.data), "na") %>%
.add_attribute(mark_cols_with_duplicates(.data), "duplicates")
}
# find_col_types ---------------------------------------------------------------
#
#' @title Find the Class of each Variable in a data.frame
#'
#' @inheritParams generate_table_profile
#'
#' @return (`character`) A character vector with the variable classes as vector
#' values and variable names as vector names.
#'
#' @export
#' @keywords internal
#'
#' @note Date and Time variables return tibble-style notations rather than their
#' class:
#' * A date, e.g. "2019-06-16" returns "date" rather than "Date"
#' * A time within a day, e.g. "12:57:50" returns "time" rather than "POSIXct"
#' * A date-time, e.g. "2019-06-16 12:57:50" returns "dttm" rather than POSIXct"
#'
#' @seealso \link[base]{class}
#' @family generate_table_profile low-level functions methods
#'
#' @examples
#' \dontrun{
#' find_col_types(mtcars)
#' }
#'
find_col_types <- function(.data){
## Helper Functios
.relabel_Date_as_date <- function(x) ifelse(x %in% "Date", "date", x)
.relabel_POSIXct_as_dttm <- function(x) ifelse(x %in% c("POSIXct", "POSIXt"), "dttm", x)
.relabel_Period_as_time <- function(x) ifelse(x %in% "Period", "time", x)
.class <- function(x) class(x)[1]
## Programming Logic
col_types <-
sapply(.data, .class) %>%
.relabel_Date_as_date() %>%
.relabel_POSIXct_as_dttm() %>%
.relabel_Period_as_time() %>%
stats::setNames(names(.data))
}
#
#' @title Mark Variables that Contain NA values
#'
#' @note There are two marking options:
#' * If \code{na} is \code{FLASE}, the variable doen't have NA values
#' * If \code{na} is \code{TRUE}, the variable has at least one NA value
#'
#' @inheritParams generate_table_profile
#'
#' @return (`character`) A character vector with the presence of NAs as vector
#' values and variable names as vector names.
#'
#' @export
#' @keywords internal
#'
#'
#' @seealso \link[base]{class}
#' @family generate_table_profile low-level functions methods
#'
#' @examples
#' \dontrun{
#' mark_cols_with_na(mtcars)
#' }
#'
mark_cols_with_na <- function(.data){
.has_na <- function(x) x %>% is.na() %>% any()
apply(.data, 2, .has_na)
}
#
#' @title Mark Variables that Have Duplicates
#'
#' @note There are two marking options:
#' * If \code{duplicates} is \code{FLASE}, the variable doen't have duplicates
#' * If \code{duplicates} is \code{TRUE}, the variable has one or more
#' duplicates
#'
#' @inheritParams generate_table_profile
#'
#' @return (`character`) A character vector with the presence of duplicates as
#' vector values and variable names as vector names.
#'
#' @export
#' @keywords internal
#'
#'
#' @seealso \link[base]{class}
#' @family generate_table_profile low-level functions methods
#'
#' @examples
#' \dontrun{
#' mark_cols_with_duplicates(mtcars)
#' }
#'
mark_cols_with_duplicates <- function(.data){
.has_duplicates <- function(x) x %>% duplicated() %>% any()
apply(.data, 2, .has_duplicates)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.