tests/testthat/test_wrapper.R

context("Tests fastLink() wrapper.")

data(samplematch)
set.seed(738969) ## From random.org, 2018/04/16

## ---------
## Run tests
## ---------
test_that("fastLink() gives correct results on sample data.", {

    ## Run match
    fl_out <- fastLink(
        dfA = dfA, dfB = dfB,
        varnames = c("firstname", "middlename", "lastname", "housenum",
                     "streetname", "city", "birthyear"),
        return.all = TRUE, n.cores = 1
    )

    ## Test class
    expect_is(fl_out, "fastLink", label = "Test class is fastLink.")
    expect_is(fl_out, "confusionTable", label = "Test class is confusionTable.")

    ## Confusion table
    ct_out <- confusion(fl_out)
    
    ## Test output
    expect_equivalent(
        as.vector(round(ct_out$confusion.table, 2)),
        round(c(50.0, 0.3, 0.0, 299.7), 2),
        label = "We get the right baseline results from fastLink()."
    )

})

test_that("fastLink() throws errors when we expect it to.", {

    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname", "not_in_df")
        ), label = "Variable provided not in data frame."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname"),
            stringdist.match = "middlename"
        ), label = "Variable for stringdist.match not in varnames."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname"),
            numeric.match = "birthyear"
        ), label = "Variable for numeric.match not in varnames."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname", "birthyear"),
            stringdist.match = "birthyear",
            numeric.match = "birthyear"
        ), label = "Variable provided for both stringdist.match and numeric.match."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "middlename", "lastname"),
            partial.match = "middlename"
        ), label = "Variable in partial.match but not in either stringdist.match or numeric.match"
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname"),
            address.field = "street_name"
        ), label = "Variable in address.field not present in data frame or varnames."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname"),
            address.field = c("streetname", "city")
        ), label = "Multiple variables provided for address.field."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname"),
            gender.field = "male"
        ), label = "Variable in gender.field not in data frame."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname"),
            gender.field = c("female", "male")
        ), label = "Multiple variables provided for gender.field."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname"),
            reweight.names = TRUE
        ), label = "No argument provided for firstname.field when reweight.names = TRUE."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("middlename", "lastname"),
            firstname.field = "firstname", reweight.names = TRUE
        ), label = "Argument for firstname.field not present in varnames."
    )
    expect_error(
        fastLink(
            dfA = dfA, dfB = dfB,
            varnames = c("firstname", "lastname"),
            stringdist.match = "firstname",
            stringdist.method = "jaro-winkler"
        ), label = "Invalid string distance method provided."
    )

})
kosukeimai/fastLink documentation built on Nov. 17, 2023, 8:11 p.m.