R/check.link.r

Defines functions check.link

Documented in check.link

#' Check foreign key constraints
#'
#' Check for violation of a foreign key constraint
#'
#' @param child.tbl Data frame representing the table on the 'many' side of an
#'   'one-to-many' relation. See notes.
#' @param child.col Name of a column from \code{child.tbl} providing the link
#'   to the other table (character string).
#' @param parent.tbl Data frame representing the table on the 'one' side of an
#'   'one-to-many' relation. See notes.
#' @param parent.col Name of a column from \code{parent.tbl} providing the link
#'   to the other table (character string).
#' @param na.ok Logical. Allow \code{NA} values in the child column without
#'   the respective complement in the parent column?
#' @param silent Logical. If \code{FALSE}, details on constraint
#'  violations are shown using \code{\link[base]{print}}.
#' 
#' @return \code{TRUE} if the check was passed successfully and
#'   \code{FALSE} otherwise.
#'
#' @note The function is made to verify that two tables having a one-to-many
#'   relation (\url{https://en.wikipedia.org/wiki/One-to-many_(data_model)})
#'   can successfully be joined on the specified columns.
#'
#' @seealso There are more functions to check constraints, namely
#'   \code{\link{check.notnull}}, \code{\link{check.unique}},
#'   and \code{\link{check.key}}.
#'   See also the example for \code{\link{db.read}}.
#'
#' @author David Kneis \email{david.kneis@@tu-dresden.de}
#'
#' @export
#'
#' @examples
#' 
#' data(people, countries)
#' print(people)
#' print(countries)
#'
#' # Should succeed
#' check.link(people, "id_country", countries, "id")
#' 
#' # Example of an orphaned child record
#' check.link(people, "id_country", countries[1:2,], "id")
#' 
#' # Example of ambiguity
#' countries2 <- rbind(countries, data.frame(id=3, country="India"))
#' check.link(people, "id_country", countries2, "id")

check.link <- function(child.tbl, child.col, parent.tbl, parent.col, na.ok=FALSE, silent=FALSE) {

  # check child table  
  if (!is.data.frame(child.tbl))
    stop("'child.tbl' must be a data frame")
  if (!is.character(child.col) || (length(child.col) != 1))
    stop("'child.col' must be a character string")
  if (!child.col %in% colnames(child.tbl))
    stop("column specified as 'child.col' not present in 'child.tbl'")
  # check parent table
  if (!is.data.frame(parent.tbl))
    stop("'parent.tbl' must be a data frame")
  if (!is.character(parent.col) || (length(parent.col) != 1))
    stop("'parent.col' must be a character string")
  if (!parent.col %in% colnames(parent.tbl))
    stop("column specified as 'parent.col' not present in 'parent.tbl'")

  # get field values
  childValues <- child.tbl[,child.col]
  if (any(is.na(childValues))) {
    if (!na.ok) {
      stop("found NAs in 'child.col'")
    } else {
      childValues <- childValues[!is.na(childValues)]
    }
  }
  if (length(childValues) < 1) {
    stop("'child.col' does not contain usable values")
  }

  parentValues <- parent.tbl[,parent.col]
  if (length(parentValues) < 1) {
    stop("'parent.col' does not contain usable values")
  }

  # check identity of types
  if (!identical(typeof(childValues), typeof(parentValues))) {
    if (!silent)
      print(paste0("invalid relation between 'child.tbl' and 'parent.tbl'",
        " since 'child.col' has type ",typeof(childValues),
        " whereas 'parent.col' has type ",typeof(parentValues)))
    return(FALSE)
  }

  # check for duplicates in the parent field
  if (anyDuplicated(parentValues)) {
    if (!silent)
      print(paste0("invalid relation between 'child.tbl' and 'parent.tbl'",
        " since entries in 'parent.col' are not unique"))
    return(FALSE)
  }
    
  # check for orphaned records in child table
  bad <- childValues[!childValues %in% parentValues]
  if (length(bad) > 0) {
    if (!silent)
      print(paste0("invalid relation between 'child.tbl' and 'parent.tbl'",
        " since 'child.tbl' contains ",length(bad),
        " orphaned record(s); the first orphaned record has value '",
        bad[1],"' in 'child.col'"))
    return(FALSE)
  }

  return(TRUE)
}
dkneis/tabular documentation built on Feb. 9, 2023, 12:34 a.m.