R/misc.R

Defines functions na_row_find na_identify dups_reporter dups_table rm_duplicates

rm_duplicates <- function(data) {
  dup_df <- data[duplicated(data),]
  dup_ct <- nrow(dup_df)
  dup_exists <- ifelse(nrow(dup_df) == 0,
                       paste("There are no duplicates values in",
                             substitute(data) %>% deparse()),
                       paste("There are",
                             dup_ct, "values with duplicate entries"))
  
  dup_rm <- data[!duplicated(data),]
  
  return(list(do_duplicates_exist = dup_exists, all_duplicated_rows = dup_df,
              duplicate_free_data = dup_rm))
}


dups_table <- function(data, column_names) {
  col_nm_len <- c(1:length(column_names))
  
  unique_check <- lapply(data[,c(column_names)], duplicated) %>%
    as.data.frame()
  colnames(unique_check) <- paste0(column_names, "_dup_ct")
  
  iterate_me <- function(index_num) {
    result1 <- cbind(data[, c(column_names[index_num])],
                     unique_check[index_num])
    result1 <- result1[result1[, 2] == TRUE,] %>% table()
    return(result1)
  }
  lapply(col_nm_len, iterate_me)
}

#' @importFrom purrr is_empty
dups_reporter <- function(table_name, dups_table_output, unique_check_vec) {
  unique_ck_logical <- lapply(dups_table_output, purrr::is_empty) %>% unlist
  unique_ck_cols <- unique_check_vec[!unique_ck_logical]
  
  if (!is.null(unique_ck_cols)) {
    rlog::log_warn(paste("Duplicate values exist in columns", unique_ck_cols, 
                         "from", table_name))
  } else {
    
    rlog::log_info(paste("No duplicates exist in columns", unique_ck_cols, 
                         "from", table_name))
  }
}


na_identify <- function(data, column) {
  na_df <- dplyr::filter(data, is.na(column))
  na_msg <- paste("There are", nrow(na_df), "omissions in column", deparse(substitute(column)))
  
  return(list(are_there_na = na_msg, na_data = na_df))
}

na_row_find <- function(data, column_names = names(data)) {
  
  index_me <- function(index_num) {
    result <-  data[is.na(data[index_num]),]
    if(nrow(result) == 0) {
      
    } else {
      return(result) 
    }
    
  }
  out <-  lapply(1:length(column_names), index_me)
  out <- setNames(out, names(data))
  
  out_not_na <- lapply(out, is.null)
  out_not_na_nms <- out_not_na[out_not_na == FALSE] %>% names()
  final_out <- out[out_not_na_nms]
  return(final_out)
}
virusgeeks/vgtools documentation built on April 25, 2022, 12:38 p.m.