R/join_check_keys.R

Defines functions check_keys

Documented in check_keys

library(tidyverse)

# Like other join checks, we don't return any type here, just messages
#' Checking join keys
#'
#' This function checks to see if keys being used to join are unique, as needed
#' for a join,and returns potential problem keys to the user.
#'
#' @param left dataframe that we want to merge on the left side of the join
#' @param right dataframe that we want to merge on the right side of the join
#' @param by headers of columns to use to join the left and right data frames
#' @param verbose if true, returns more comprehensive output
#' @param join_type type of join ('left', right', 'inner', etc.) user is checking
#'
#' @export

check_keys <- function(left, right, by = by, verbose = F, join_type = "left") {
  left_nondistinct <- left %>% count_(by) %>% filter(n > 1)
  right_nondistinct <- right %>% count_(by) %>% filter(n > 1)
  left_primary_key_valid <- nrow(left_nondistinct) == 0
  right_primary_key_valid <- nrow(right_nondistinct) == 0

  warning_string <-
    paste0(
      "\ncheck_keys: ", by, ":\n",
      "    Key given is not a primary key")
  if(join_type == "left") {
    warning_string_verbose <-
      paste0(
        warning_string,
        "\n    Right: ", nrow(right_nondistinct), " nondistinct row(s)"
      )
  }
  if(join_type == "right") {
    warning_string_verbose <-
      paste0(
        warning_string,
        "\n    Left: ", nrow(left_nondistinct), " nondistinct row(s)"
      )
  }
  if(join_type == "full") {
    warning_string_verbose <-
      paste0(
        warning_string,
        "\n    Left: ", nrow(left_nondistinct), " nondistinct row(s)\n",
        "    Right: ", nrow(right_nondistinct), " nondistinct row(s)"
      )
  }
  if(left_primary_key_valid & join_type == "right" |
     right_primary_key_valid & join_type == "left") {
    message("check_keys: pass")
  } else {
    if(verbose) {
      warning(warning_string_verbose)
    } else {
      warning(warning_string)
    }
  }
}

# Tests:
# Check the keys are actually in the dang df
# Check the dfs aren't null
# check_keys(mtcars, mtcars, c("mpg", "hp"))
associatedpress/apdatacheck documentation built on Feb. 17, 2020, 9:27 p.m.