R/internals.R

Defines functions create_match_fun assert_match_fun assert_col_class assert_max_dist assert_by

## check by
assert_by <- function(x, y, by) {

  if(is.vector(by)) {
    if(!inherits(by, "character")) {
      stop("by must be a character vector")
    }
    col_match <- matrix(c(match(by, names(x)),
                          match(by, names(y))),
                        ncol = 2)
    if(any(is.na(col_match[,1]))) {
      stop(paste(by[is.na(col_match[,1])], collapse = ", "), " not found in x")
    }
    if(any(is.na(col_match[,2]))) {
      stop(paste(by[is.na(col_match[,2])], collapse = ", "), " not found in y")
    }
  } else if(is.matrix(by)) {
    if(ncol(by) != 2) {
      stop("by must be a matrix with two columns")
    }
    if(is.character(by)) {
      col_match <- matrix(c(match(by[,1], names(x)),
                            match(by[,2], names(y))),
                          ncol = 2)
      if(any(is.na(col_match[,1]))) {
        stop(paste(by[is.na(col_match[,1]), 1], collapse = ", "), " not found in x")
      }
      if(any(is.na(col_match[,2]))) {
        stop(paste(by[is.na(col_match[,2]), 2], collapse = ", "), " not found in y")
      }
    } else if(is.numeric(by)) {
      if(any(by[,1] > ncol(x), by[,2] > ncol(y))) {
        stop("a value in by is greater than the number of columns in x or y")
      }
      col_match <- by
    } else {
      stop("by must be a character or numeric matrix")
    }
  } else {
    stop("by must be a matrix or character vector")
  }
  return(col_match)
}

## check max_dist
assert_max_dist <- function(max_dist, col_match) {

  if(is.null(max_dist)) {
    max_dist <- rep(0, nrow(col_match))
  }
  if(length(max_dist) == 1) {
    max_dist <- rep(max_dist, nrow(col_match))
  }
  if(length(max_dist) != nrow(col_match)) {
    stop("max_dist must be the same length as the number of column matches")
  }
  if(!is.numeric(max_dist)) {
    stop("max_dist must be a numeric vector")
  }
  
  return(max_dist)

}

## check than columns classes match
assert_col_class <- function(x, y, col_match) {

  class_1 <- vapply(x[,col_match[,1], drop = FALSE], class, "")
  class_2 <- vapply(y[,col_match[,2], drop = FALSE], class, "")

  class_1[class_1 == 'integer'] <- 'numeric'
  class_2[class_2 == 'integer'] <- 'numeric'

  if(!all(class_1 == class_2)) {
    stop("Matched columns must be of the same class")
  }

  rownames(class_1) <- NULL

  return(class_1)

}

## check match_fun
assert_match_fun <- function(match_fun, col_match) {
  if(length(match_fun) != nrow(col_match)) {
    stop("match_fun must be of the same length as the number of column matches")
  }
  arg_len <- vapply(match_fun, function(fun) length(formals(fun)), 1L)
  if(!all(arg_len == 2)) {
    stop("functions in match_fun must accept two arguments")
  }
  names(match_fun) <- NULL
  return(match_fun)
}

## generate matching functions with dist value embedded
create_match_fun <- function(class, dist) {
  if(class == "character") {
    return(function(a, b) abs(stringdist::stringdist(a, b)) <= dist)
  } else if(class == "numeric") {
    return(function(a, b) abs(a - b) <= dist)
  } else if(class == "Date") {
    return(function(a, b) abs(as.numeric(a - b)) <= dist)
  } else {
    stop("column class must be numeric, Date or character")
  }
}
finlaycampbell/casematcher documentation built on May 8, 2020, 8:29 p.m.