tests/testthat/test_generate_matches.R

context("Test generate matches")

# need to load data.
# need to test errors if things missing
# need to test output
# need to test input/output var names?
# library(tibble)
# library(dplyr)
# library(postalcodes)
# library(magrittr)

br <- tibble::tibble(
  name = c("A.-B. SECURITY",
           "Armada Security Canada"),
  address = c("Unit 212, 833 103 Ave",
              "9605 14 St"),
  postal_code = c("V1G2G2", "V1G3Y1")
)

other <- dplyr::sample_frac(br, size = 1)

postal_input <- postalcodes::postal_coords[1:5, 'postalcode'][[1]]
bl <- fuzzy_block(postal_input = postal_input, postal_coords = postal_coords)

br <- dplyr::select(br, name, address, postal_code)
br <- dplyr::mutate(br,
               name = standardize(name, dictionary = company_dictionary),
               address = fix_unit_names(standardize(address, dictionary = address_dictionary)))

other <- dplyr::select(other, name, address, postal_code)
other <- dplyr::mutate(other,
                name = standardize(name, dictionary = company_dictionary),
                address = fix_unit_names(standardize(address, dictionary = address_dictionary)))

postal_input <- unique(br$postal_code, other$postal_code)
block <- fuzzy_block(postal_input = postal_input, postal_coords = postalcodes::postal_coords)
matches <- generate_matches(br, other, block = block)
#so this can't be a one col tbl, or anything else. fix that later.

test_that("Test that generate_matches() returns a tbl_df with two postalcode, name and address columns", {
  expect_output(str(matches), "tbl_df")
  expect_output(str(matches), "postalcode.x", fixed = TRUE)
  expect_output(str(matches), "postalcode.y", fixed = TRUE)
  expect_output(str(matches), "name.x", fixed = TRUE)
  expect_output(str(matches), "name.y", fixed = TRUE)
  expect_output(str(matches), "address.x", fixed = TRUE)
  expect_output(str(matches), "address.y", fixed = TRUE)
})
tweed1e/matchtools documentation built on May 29, 2019, 10:51 a.m.