R/TableProfiler-methods.R

################################################################################
##                               Find Functions                               ##
################################################################################
# Profiler ---------------------------------------------------------------------
#'
#' @title Generate a Table Profile
#'
#' @param .data (`data.frame`) The table to profile.
#'
#' @return (`list`) A nested list with the table attributes.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' generate_table_profile(mtcars)
#' }
#'
generate_table_profile <- function(.data){
    .assert_is_non_empty_data.frame(.data)

    ## Helper Functions
    .add_attribute <- function(profile, attribute, attribute_name){
        .assert_is_list(profile)
        for(var_name in names(attribute))
            profile[[var_name]][attribute_name] <- as.list(attribute[[var_name]])

        return(profile)
    }

    ## Programming Logic
    profile <-
        list() %>%
        .add_attribute(find_col_types(.data), "type") %>%
        .add_attribute(mark_cols_with_na(.data), "na")
}

# find_col_types ---------------------------------------------------------------
#
#' @title Find the Class of each Variable in a data.frame
#'
#' @inheritParams generate_table_profile
#'
#' @return (`character`) A character vector with the variable classes as vector
#'   values and variable names as vector names.
#'
#' @export
#' @keywords internal
#'
#' @note Date and Time variables return tibble-style notations rather than their
#'   class:
#' * A date, e.g. "2019-06-16" returns "date" rather than "Date"
#' * A time within a day, e.g. "12:57:50" returns "time" rather than "POSIXct"
#' * A date-time, e.g. "2019-06-16 12:57:50" returns "dttm" rather than POSIXct"
#'
#' @seealso \link[base]{class}
#' @family TableProfiler methods
#'
#' @examples
#' \dontrun{
#' find_col_types(mtcars)
#' }
#'
find_col_types <- function(.data){
    ## Helper Functios
    .relabel_Date_as_date <- function(x) ifelse(x %in% "Date", "date", x)
    .relabel_POSIXct_as_dttm <- function(x) ifelse(x %in% c("POSIXct", "POSIXt"), "dttm", x)
    .relabel_Period_as_time <- function(x) ifelse(x %in% "Period", "time", x)
    .class <- function(x) class(x)[1]

    ## Programming Logic
    col_types <-
        sapply(.data, .class) %>%
        .relabel_Date_as_date() %>%
        .relabel_POSIXct_as_dttm() %>%
        .relabel_Period_as_time() %>%
        stats::setNames(names(.data))
}
#
#' @title Mark Variable that Contain NA values
#'
#' @note There are two marking options:
#'  * If \code{na} is \code{FLASE}, the variable doen't have NA values
#'  * If \code{na} is \code{TRUE}, the variable has at least one NA value
#'
#' @inheritParams generate_table_profile
#'
#' @return (`character`) A character vector with the presence of NAs as vector
#'   values and variable names as vector names.
#'
#' @export
#' @keywords internal
#'
#'
#' @seealso \link[base]{class}
#' @family TableProfiler methods
#'
#' @examples
#' \dontrun{
#' mark_cols_with_na(mtcars)
#' }
#'
mark_cols_with_na <- function(.data){
    .has_na <- function(x) x %>% is.na() %>% any()
    apply(.data, 2, .has_na)
}
tidylab/tidylab.dqa documentation built on June 21, 2019, 7 p.m.