Nothing
# SPDX-Copyright: Copyright (c) Capital One Services, LLC
# SPDX-License-Identifier: Apache-2.0
# Copyright 2017 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied.
#
# UNIT TEST*: matchRows
#
# * matchRows mostly just does a switch on input to call
# the appropriate subfunctions, so it might be more
# appropriate to call it an integration test
#
# matchRows generates subsets of two input dataframes that match
# on rows with zero, one or more shared keys, as well as information
# about which rows were dropped from the subsets
#
context("matchRows")
test_that("matchRows correctly finds matching rows", {
#
# No index example - Simple subset of both data frames A > B
#
ky <- seq(1,20)
dta <- as.character(paste("data", ky))
df1 <- data.frame(ky, dta, stringsAsFactors = FALSE)
ky <- seq(1,15)
dta <- as.character(paste("data", ky))
df2 <- data.frame(ky, dta, stringsAsFactors = FALSE)
# Matching elements
ky <- seq(1,15)
dta <- as.character(paste("data", ky))
dfMtch <- data.frame(ky, dta, stringsAsFactors = FALSE)
mtch <- matchRows(df1, df2, NA) # Expected matched subset
msgA <- seq(16,20) # Expected missing indices from A
msgB <- integer() # Expected missing indices from B
expect_equal(mtch[[1]][,1], dfMtch[,1]) # Keys match subset of A
expect_equal(mtch[[1]][,2], dfMtch[,2]) # Data matches subset of A
expect_equal(mtch[[2]][,1], dfMtch[,1]) # Keys match subset of B
expect_equal(mtch[[2]][,2], dfMtch[,2]) # Data matches subset of B
expect_equal(mtch[[3]][[1]][[1]], msgA) # Missing indices from A
expect_equal(mtch[[3]][[2]][[1]], msgB) # Missing indices from A
#
# No index example - Simple subset of both data frames A < B
#
ky <- seq(1,15)
dta <- as.character(paste("data", ky))
df1 <- data.frame(ky, dta, stringsAsFactors = FALSE)
ky <- seq(1,20)
dta <- as.character(paste("data", ky))
df2 <- data.frame(ky, dta, stringsAsFactors = FALSE)
# Matching elements
ky <- seq(1,15)
dta <- as.character(paste("data", ky))
dfMtch <- data.frame(ky, dta, stringsAsFactors = FALSE)
mtch <- matchRows(df1, df2, NA) # Expected matched subset
msgA <- integer() # Expected missing indices from A
msgB <- seq(16,20) # Expected missing indices from B
expect_equal(mtch[[1]][,1], dfMtch[,1]) # Keys match subset of A
expect_equal(mtch[[1]][,2], dfMtch[,2]) # Data matches subset of A
expect_equal(mtch[[2]][,1], dfMtch[,1]) # Keys match subset of B
expect_equal(mtch[[2]][,2], dfMtch[,2]) # Data matches subset of B
expect_equal(mtch[[3]][[1]][[1]], msgA) # Missing indices from A
expect_equal(mtch[[3]][[2]][[1]], msgB) # Missing indices from A
#
# Single index example
#
ky <- seq(1,10)
dta <- as.character(paste("data", ky))
df1 <- data.frame(ky, dta, stringsAsFactors = FALSE)
ky <- seq(7,15)
dta <- as.character(paste("data", ky))
df2 <- data.frame(ky, dta, stringsAsFactors = FALSE)
# Matching elements
ky <- seq(7,10)
dta <- as.character(paste("data", ky))
dfMtch <- data.frame(ky, dta, stringsAsFactors = FALSE)
mtch <- matchRows(df1, df2, "ky") # Expected matched subset
msgA <- seq(1,6) # Expected missing indices from A
msgB <- seq(11,15) # Expected missing indices from B
expect_equal(mtch[[1]][,1], dfMtch[,1]) # Keys match subset of A
expect_equal(mtch[[1]][,2], dfMtch[,2]) # Data matches subset of A
expect_equal(mtch[[2]][,1], dfMtch[,1]) # Keys match subset of B
expect_equal(mtch[[2]][,2], dfMtch[,2]) # Data matches subset of B
expect_equal(mtch[[3]][[1]][[1]], msgA) # Missing indices from A
expect_equal(mtch[[3]][[2]][[1]], msgB) # Missing indices from A
#
# Multiple index example
#
ky <- seq(1,20)
dta <- as.character(paste("data", ky))
ky1 <- ky %% 10
ky2 <- ky - ky1
df1 <- data.frame(ky1, ky2, ky, dta, stringsAsFactors = FALSE)
ky <- seq(7,25)
ky1 <- ky %% 10
ky2 <- ky - ky1
dta <- as.character(paste("data", ky))
df2 <- data.frame(ky1, ky2, ky, dta, stringsAsFactors = FALSE)
# Matching elements
ky <- seq(7,20)
ky1 <- ky %% 10
ky2 <- ky - ky1
dta <- as.character(paste("data", ky))
dfMtch <- data.frame(ky1, ky2, ky, dta, stringsAsFactors = FALSE)
mtch <- matchRows(df1, df2, c("ky1", "ky2")) # Expected matched subset
msgA <- seq(1,6) # Expected missing indices from A
msgA1 <- msgA %% 10
msgA2 <- msgA - msgA1
msgB <- seq(21,25) # Expected missing indices from B
msgB1 <- msgB %% 10
msgB2 <- msgB - msgB1
mtchSorted <- arrange(mtch[[1]], ky1, ky2)
dfMtchSorted <- arrange(dfMtch, ky1, ky2)
expect_equal(mtchSorted[,1], dfMtchSorted[,1]) # First key matches subset of A
expect_equal(mtchSorted[,2], dfMtchSorted[,2]) # Second key matches subset of A
expect_equal(mtchSorted[,4], dfMtchSorted[,4]) # Data matches subset of A
mtch2Sorted <- arrange(mtch[[2]], ky1, ky2)
expect_equal(mtch2Sorted[,1], dfMtchSorted[,1]) # First key matches subset of B
expect_equal(mtch2Sorted[,2], dfMtchSorted[,2]) # Second key matches subset of B
expect_equal(mtch2Sorted[,4], dfMtchSorted[,4]) # Data matches subset of B
mtchSorted31 <- arrange(mtch[[3]][[1]], ky1, ky2)
mtchSorted32 <- arrange(mtch[[3]][[2]], ky1, ky2)
expect_equal(mtchSorted31[[1]], msgA1) # Missing indices from A
expect_equal(mtchSorted31[[2]], msgA2) # Missing indices from A
expect_equal(mtchSorted32[[1]], msgB1) # Missing indices from A
expect_equal(mtchSorted32[[2]], msgB2) # Missing indices from A
})
test_that("Merged indices remain unique in multi-index cases", {
# Create data frames with values that if merged without a separator, would
# produce the same merged index.
# No differences
ky <- c(1, 2)
ky1 <- c("a1", "a")
ky2 <- c("b", "1b")
df1 <- data.frame(ky1, ky2, ky, stringsAsFactors = FALSE)
# Second data frame. Same as first
df2 <- df1
# Matching data fame will be the same as well
dfMtch <- df1
# Do the actual matching. If the indices weren't unique, an error would be raised
# This shouldn't produce any errors because they are separate indices
expect_silent(mtch <- matchRows(df1, df2, c("ky1", "ky2"))) # Expected matched subset
# Check that the output is still as expected
mtchSorted <- arrange(mtch[[1]], ky1, ky2)
dfMtchSorted <- arrange(dfMtch, ky1, ky2)
expect_equal(mtchSorted[,1], dfMtchSorted[,1])
expect_equal(mtchSorted[,2], dfMtchSorted[,2])
mtch2Sorted <- arrange(mtch[[2]], ky1, ky2)
expect_equal(mtch2Sorted[,1], dfMtchSorted[,1])
expect_equal(mtch2Sorted[,2], dfMtchSorted[,2])
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.