R/misc_helpers.R

Defines functions empty_as_na make_time_stamp make_date_underscored get_df_miss_perc get_miss_perc

Documented in empty_as_na get_df_miss_perc get_miss_perc make_date_underscored make_time_stamp

#' @title get missing percent within a column of a tibble or a dataframe
#' @description get the percent of observations that are missing in a column of a tibble or dataframe.
#' Can be mapped or future_mapped over multiple columns in a dataframe
#' @usage get_miss_perc(data, missing_value, ...)
#' @param data is the dataframe or tibble to which the column of interest belongs
#' @param missing_value is the value to be counted as "missing", i.e "NA" or "", or c("NA", NA, "")
#' @param ... additional arguments passed
#' @return a character string of the missing percentage of miss_col
#' @importFrom scales percent
#' @examples
#' \dontrun{
#' require(tibble)
#'
#' col1 <- seq(1, 10, 1)
#' col2 <- c("NA", 5, 7, NA, 41, "NA", 6, 8, "NA", NA)
#' my_tibble <- tibble(col1 = col1, col2 = col2)
#' get_miss_perc(my_tibble$col1, missing_value = "NA")
#' }
#' @export
get_miss_perc <- function(data, missing_value = NA, ...) {

  char_data <- as.character(data)

  prop_missing <- length(char_data[char_data %in% c(missing_value)]) / length(char_data)

  percent(prop_missing)
}


#' @title apply get_miss_perc to all columns and return tibble
#' @description apply get_miss_perc to all columns and return tibble
#' @usage get_df_miss_perc(data)
#' @param x is the dataframe or tibble
#' @importFrom dplyr mutate
#' @importFrom tidyr unnest
#' @importFrom tibble enframe
#' @importFrom purrr map
#' @importFrom stringr str_remove
#' @importFrom scales percent
#' @examples
#' \dontrun{
#' library(tibble)
#' col1 <- seq(1, 10, 1)
#' col2 <- c("NA", 5, 7, NA, 41, "NA", 6, 8, "NA", NA)
#' my_tibble <- tibble(col1 = col1, col2 = col2)
#' get_df_miss_perc(my_tibble)
#' }
#' @export
get_df_miss_perc <- function(x) {

  x %>%
    map(function(data) {
      char_data <- as.character(data)
      prop_missing <- length(char_data[char_data %in% c(NA)]) / length(char_data)
      percent(prop_missing)
    }) %>%
    enframe(., name = "col_name", value = "miss_perc") %>%
    unnest(miss_perc) %>%
    mutate(miss_perc = as.numeric(str_remove(miss_perc, "%"))/100)
}


#' @title print the date
#' @description print today's date like 2019_05_21
#' @usage make_date_underscored()
#' @return a character string of the date
#' @importFrom stringr str_replace_all
#' @examples
#' \dontrun{
#' make_date_underscored()
#' }
#' @export
make_date_underscored <- function() {
  my_date <- Sys.Date()
  my_date <- str_replace_all(my_date, "-", "_")
  my_date
}


#' @title print a timestamp
#' @description print a timestamp like "20190521_0800"
#' @usage make_time_stamp()
#' @return a character string of the timestamp
#' @importFrom stringr str_replace_all str_sub str_remove_all
#' @examples
#' \dontrun{
#' make_time_stamp()
#' }
#' @export
make_time_stamp <- function() {

  time_stamp <- as.character(Sys.time())
  time_stamp <- str_replace_all(time_stamp, "-", "")
  time_stamp <- str_replace_all(time_stamp, " ", "_")
  time_stamp <- str_sub(time_stamp, 1, -4)
  time_stamp <- str_remove_all(time_stamp, ":")

  as.character(time_stamp)
}


#' @title replace empty ("") values with "NA"
#' @description "NA" are often easier to see and deal with compared to empty
#'   values. Further testing is needed for this function
#' @param df the dataframe or tibble to be modified
#' @usage df %>% empty_as_na()
#'
#'   # or
#'
#'   empty_as_na(df)
#' @return df with empty values replaced with "NA"
#' @importFrom dplyr mutate_all if_else
#' @examples
#' \dontrun{
#' require(tibble)
#'
#' col1 <- seq(1, 10, 1)
#' col2 <- c("", 3, 4, "", 6, 7, 8, "", 10, "")
#' my_df <- tibble(col1 = col1, col2 = col2)
#' empty_as_na(df = my_df)
#' }
#' @export
empty_as_na <- function(df) {
  df %>%
    mutate_all(~as.character(.)) %>%
    mutate_all(~if_else(. == "", "NA", .))
}
tknoch8/helpRs documentation built on May 11, 2022, 9:34 p.m.