#devtools::document()
#' @export
#' @importFrom purrr is_empty
#' @importFrom na.tools any_na
#' @importFrom na.tools which_na
#' @importFrom na.tools n_na
#' @importFrom rlog log_info
na_diagnostic <- function(data) {
df_name <- paste(substitute(data)) %>% as.character()
df <- if(!is.data.frame(data)) {
data_frame <- as.data.frame(data)
return(data_frame)
} else {
data
}
#Any NA Values in the columns?
na_col_check <- lapply(df, na.tools::any_na)[lapply(df, na.tools::any_na) == TRUE] %>% names()
#If so, where are they located in each column?
na_col_pos <- lapply(df[names(df) %in% na_col_check], na.tools::which_na)
#Count number of NA values
na_count_vals <- lapply(df[names(df) %in% na_col_check], na.tools::n_na)
na_msg <- if(purrr::is_empty(na_col_check)) {
rlog::log_info(paste("There are no NA values in any columns"
))
} else {
rlog::log_info(paste("There are", na_count_vals,
"NA values in columns", na_col_check
))
return(list(
na_col_names = na_col_check,
na_row_index = na_col_pos,
na_sum = na_count_vals
))
}
}
#' @export
#' @importFrom rlog log_info
dup_diagnostic <- function(data) {
#need to filter nms todo
df <- if(!is.data.frame(data)) {
data_frame <- as.data.frame(data)
} else {
data
}
dup_col_logical <- lapply(df[, names(df)], duplicated)
dup_col_names <- dup_col_logical %>% names()
df_fct <- lapply(df[names(df) %in% dup_col_names], as.factor)
df_fct_tbl <- lapply(df_fct, table) %>% lapply(as.data.frame) %>%
lapply(function(x) dplyr::filter(x, x[,2] > 1)) %>%
lapply(function(x) x[order(x[,2], decreasing = TRUE),])
check <- lapply(df_fct_tbl, nrow) %>% unlist %>% as.data.frame()
#anydups <- nrow(check)
if(sum(check$.) == 0) {
rlog::log_info("There are no duplicate values in any column")
} else {
rlog::log_info(paste(
"There are duplicate values in columns", dup_col_names
))
return(list(
duplicated_col_names = dup_col_names,
duplicated_col_count = df_fct_tbl,
duplicated_col_summary = check
))
}
}
#' @export
round_ts_midnight <- function(ts_col, origin) {
collection_col_ts_norm <- format(ts_col, "%Y-%m-%d")
collection_col_ts_norm <- paste(collection_col_ts_norm, "23:59:59")
collection_col_ts_norm <- as.POSIXct(collection_col_ts_norm, tz = "UTC",
format = "%Y-%m-%d %H:%M%OS",
origin = origin)
return(collection_col_ts_norm)
}
#' @export
parse_fct <- function(data, col_names, choose_class) {
my_data <- data
my_data[, c(col_names)] <- lapply(my_data[,c(col_names)], choose_class)
return(my_data)
}
#' @export
env_warn <- function(env_nm) {
if(!exists(env_nm)) {
env_nm <- new.env()
rlog::log_info("new env created")
return(env_nm)
} else {
rlog::log_info("env already exists. no env overwrite made.")
get(env_nm, envir = .GlobalEnv)
}
}
#' @export
#This function compares two date columns to determine if they are sequential.
#If not then FALSE, if equiv.
#If dates are same then I classify as TRUE.
#key_column helps join the resultant df.
date_logic_seq <- function(key_column, prior_date_col, latter_date_col) {
col_nm_testkit_key <- substitute(key_column) %>% deparse()
col_nm_prior <- substitute(prior_date_col) %>% deparse()
col_nm_latter <- substitute(latter_date_col) %>% deparse()
date_df <- dplyr::tibble(key_column, prior_date_col, latter_date_col)
#For some reason I need a seperate statement for equality,
date_df$date_ck <- ifelse(prior_date_col < latter_date_col, "Time Order", "Time Unorder")
date_df$date_ck <- ifelse(prior_date_col == latter_date_col, "Time Order", date_df$date_ck)
date_df_time_unorder <- dplyr::filter(date_df, date_ck == "Time Unorder")
date_df_time_unorder <- date_df_time_unorder[,c(1:3)]
colnames(date_df_time_unorder) <- c(col_nm_testkit_key, col_nm_prior, col_nm_latter)
readout <- ifelse(nrow(date_df_time_unorder) == 0, paste("No datetime comparison errors.",
deparse(substitute(prior_date_col)),
"occurs before",
deparse(substitute(latter_date_col)),
"in all rows"),
paste(nrow(date_df_time_unorder), "instances where",
deparse(substitute(prior_date_col)), "occurs before",
deparse(substitute(latter_date_col))))
return(list(datetime_error_exists = readout,
datetime_error_rows = date_df_time_unorder))
}
#' @export
#' @importFrom magrittr %>%
rs_connect <- function(rs_host,
rs_port,
rs_user,
rs_password,
rs_dbname) {
rlog::log_info("Connecting to Redshift...")
connect_rs <- DBI::dbConnect(RPostgres::Postgres(),
host = rs_host,
port = rs_port,
user = rs_user,
password = rs_password,
dbname = rs_dbname)
if(DBI::dbIsValid(connect_rs)) {
rlog::log_info("Successful Redshift Connection")
} else {
rlog::log_info("DB Failed To Connect")
}
return(connect_rs)
}
#' @export
nrow_loaded_info <- function(data, df_name) {
df_nrow <- nrow(data)
statement <- paste(df_nrow,"rows loaded from", df_name)
rlog::log_info(statement)
return(statement)
}
#' @export
#' @importFrom magrittr %>%
#This converts Rmd to R. R format is necessary to use source() in shiny apps
#Having issues? Check your wd()
#For consistent performance, use setwd(here::here()) in the code chunk this is run
#and have an R project open.
purl_mdown <- function(file_name,
new_file_name,
file_path_origin,
file_path_destination) {
if(length(file_name) <= 1) {
paste0(file_path_origin, "/", file_name,".Rmd") %>% knitr::purl()
file.rename(from = paste0(file_path_origin, "/", file_name,".R"),
to = paste0(file_path_destination, "/", new_file_name))
}
}
#' @export
#' @importFrom purrr map2
purl_ordered <- function(fnames) {
file_nm_order <- function(fnames) {
fname_len <- length(fnames)
fname_ordered <- paste0("0", 1:length(fnames), "_", fnames, ".R")
return(fname_ordered)
}
file_order_names <- function(fnames, fnames_ordered) {
fnames_ordered[grep(fnames, fnames_ordered)]
}
fnames_ordered <- file_nm_order(fnames)
fname_vars <- purrr::map2(fnames, fnames_ordered, file_order_names)
return(fname_vars)
}
# -------------------------------------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.