# how to run this test:
# devtools::test(filter="join")
lower_col <- c("a","b","c", "d1a", "1eb","f1c")
upper_col <- c("A","B","C", "D1a", "1EB", "F1C", "G", "H")
value_lower_col <- c(1,2,3,4,5,6)
value_upper_col <- c(8,7,6,5,4,3,2,1)
other_lower_col <- c('aa','bb','cc','dd','ee', 'ff')
other_upper_col <- c('AA','BB','CC','DD','EE', 'FF', "GG", "HH")
lower_df <- data.frame(lower_col, value_lower_col, other_lower_col)
upper_df <- data.frame(upper_col, value_upper_col, other_upper_col)
context("test case insensitive join functions")
test_that("left_join with case insensitive", {
df1 <- lower_df %>% left_join(upper_df, by = c("lower_col" = "upper_col"),
ignorecase = TRUE,
target_columns = c("upper_col","value_upper_col"))
# lower_col value_lower_col other_lower_col value_upper_col
# 1 a 1 aa 8
# 2 b 2 bb 7
# 3 c 3 cc 6
# 4 d1a 4 dd 5
# 5 1eb 5 ee 4
# 6 f1c 6 ff 3
expect_equal(unlist(df1 %>% filter(lower_col == "d1a") %>% select(value_upper_col))[[1]], 5)
})
test_that("left_join with case sensitive", {
df2 <- lower_df %>% left_join(upper_df, by = c("lower_col" = "upper_col"))
#lower_col value_lower_col other_lower_col value_upper_col other_upper_col
# 1 a 1 aa NA <NA>
# 2 b 2 bb NA <NA>
# 3 c 3 cc NA <NA>
# 4 d1a 4 dd NA <NA>
# 5 1eb 5 ee NA <NA>
# 6 f1c 6 ff NA <NA>
result <- df2 %>% filter(lower_col == "d1a") %>% select(value_upper_col)
expect_equal(is.na(result$value_upper_col),TRUE)
})
test_that("right_join with case insensitive", {
df3 <- lower_df %>% right_join(upper_df, by = c("lower_col" = "upper_col"), ignorecase = TRUE)
# > df3
# lower_col value_lower_col value_upper_col
# 1 a 1 8
# 2 b 2 7
# 3 c 3 6
# 4 d1a 4 5
# 5 1eb 5 4
# 6 f1c 6 3
# 7 <NA> NA 2
# 8 <NA> NA 1
expect_equal(unlist(df3 %>% filter(lower_col == "d1a") %>% select(value_upper_col))[[1]], 5)
expect_equal(unlist(df3 %>% filter(lower_col == "d1a") %>% select(value_lower_col))[[1]], 4)
})
test_that("right_join with case sensitive", {
df4 <- lower_df %>% right_join(upper_df, by = c("lower_col" = "upper_col"))
#> df4
# lower_col value_lower_col value_upper_col
# 1 A NA 8
# 2 B NA 7
# 3 C NA 6
# 4 D1a NA 5
# 5 1EB NA 4
# 6 F1C NA 3
# 7 G NA 2
# 8 H NA 1
expect_equal(unlist(df4 %>% filter(lower_col == "D1a") %>% select(value_upper_col))[[1]], 5)
result <- df4 %>% filter(lower_col == "D1a") %>% select(value_lower_col)
expect_equal(is.na(result$value_lower_col),TRUE)
})
test_that("full_join with case insensitive", {
df5 <- lower_df %>% full_join(upper_df, by = c("lower_col" = "upper_col"), ignorecase = TRUE)
# > df5
# lower_col value_lower_col value_upper_col
# 1 a 1 8
# 2 b 2 7
# 3 c 3 6
# 4 d1a 4 5
# 5 1eb 5 4
# 6 f1c 6 3
# 7 <NA> NA 2
# 8 <NA> NA 1
expect_equal(unlist(df5 %>% filter(lower_col == "d1a") %>% select(value_upper_col))[[1]], 5)
expect_equal(unlist(df5 %>% filter(lower_col == "d1a") %>% select(value_lower_col))[[1]], 4)
})
test_that("full_join with case sensitive", {
df6 <- lower_df %>% full_join(upper_df, by = c("lower_col" = "upper_col"))
# > df6
# lower_col value_lower_col value_upper_col
# 1 a 1 NA
# 2 b 2 NA
# 3 c 3 NA
# 4 d1a 4 NA
# 5 1eb 5 NA
# 6 f1c 6 NA
# 7 A NA 8
# 8 B NA 7
# 9 C NA 6
# 10 D1a NA 5
# 11 1EB NA 4
# 12 F1C NA 3
# 13 G NA 2
# 14 H NA 1
expect_equal(unlist(df6 %>% filter(lower_col == "D1a") %>% select(value_upper_col))[[1]], 5)
result <- df6 %>% filter(lower_col == "D1a") %>% select(value_lower_col)
expect_equal(is.na(result$value_lower_col),TRUE)
})
test_that("inner_join with case insensitive", {
df7 <- lower_df %>% inner_join(upper_df, by = c("lower_col" = "upper_col"), ignorecase = TRUE)
# > df7
# lower_col value_lower_col value_upper_col
# 1 a 1 8
# 2 b 2 7
# 3 c 3 6
# 4 d1a 4 5
# 5 1eb 5 4
# 6 f1c 6 3
expect_equal(unlist(df7 %>% filter(lower_col == "d1a") %>% select(value_upper_col))[[1]], 5)
expect_equal(unlist(df7 %>% filter(lower_col == "d1a") %>% select(value_lower_col))[[1]], 4)
})
test_that("inner_join with case sensitive", {
df8 <- lower_df %>% inner_join(upper_df, by = c("lower_col" = "upper_col"))
# > df8
# [1] lower_col value_lower_col value_upper_col
# <0 rows> (or 0-length row.names)
expect_equal(nrow(df8),0)
})
test_that("semi_join with case insensitive", {
df9 <- lower_df %>% semi_join(upper_df, by = c("lower_col" = "upper_col"), ignorecase = TRUE)
# > df9
# lower_col value_lower_col
# 1 a 1
# 2 b 2
# 3 c 3
# 4 d1a 4
# 5 1eb 5
# 6 f1c 6
expect_equal(unlist(df9 %>% filter(lower_col == "d1a") %>% select(value_lower_col))[[1]], 4)
})
test_that("semi_join with case sensitive", {
df10 <- lower_df %>% semi_join(upper_df, by = c("lower_col" = "upper_col"))
#> df10
#[1] lower_col value_lower_col
#<0 rows> (or 0-length row.names) expect_equal(nrow(df10),0)
expect_equal(nrow(df10),0)
})
test_that("anti_join with case insensitive", {
df11 <- lower_df %>% anti_join(upper_df, by = c("lower_col" = "upper_col"), ignorecase = TRUE)
# > df11
# [1] lower_col value_lower_col
# <0 rows> (or 0-length row.names) expect_equal(unlist(df11 %>% filter(lower_col == "d1a") %>% select(value_lower_col))[[1]], 4)
expect_equal(nrow(df11),0)
})
test_that("anti_join with case sensitive", {
df12 <- lower_df %>% anti_join(upper_df, by = c("lower_col" = "upper_col"))
# > df12
# lower_col value_lower_col
# 1 a 1
# 2 b 2
# 3 c 3
# 4 d1a 4
# 5 1eb 5
# 6 f1c 6
expect_equal(unlist(df12 %>% filter(lower_col == "d1a") %>% select(value_lower_col))[[1]], 4)
})
test_that("left_join with NA on target", {
source_df <- readr::read_csv("a, b, c
1, 2, 3
2, 3, 4
3, 4, 5
4, 5, 6
5, 6, 7
NA,7, 8
NA,8, 9
NA,9,10")
target_df <- readr::read_csv("a, d
1, a
2, b
3, c
4, e
5, f
NA,g
NA,h
NA,i
NA,j")
result <- source_df %>% exploratory::left_join(target_df, dplyr::join_by(`a` == `a`))
# result looks like this and it does not match na with target data frame.
# a,b,c,d
# 1,2,3,a
# 2,3,4,b
# 3,4,5,c
# 4,5,6,e
# 5,6,7,f
# NA,7,8, NA
# NA,8,9, NA
# NA,9,10, NA
expect_equal(nrow(result),8)
expect_equal(ncol(result),4)
expect_equal(nrow(dplyr::filter(result, is.na(a))),3)
result <- source_df %>% exploratory::right_join(target_df, dplyr::join_by(`a` == `a`))
# a b c d
# 1 2 3 a
# 2 3 4 b
# 3 4 5 c
# 4 5 6 e
# 5 6 7 f
# NA NA NA g
# NA NA NA h
# NA NA NA i
# NA NA nA j
expect_equal(nrow(result),9)
expect_equal(ncol(result),4)
expect_equal(nrow(dplyr::filter(result, is.na(a))),4)
result <- source_df %>% exploratory::inner_join(target_df, dplyr::join_by(`a` == `a`))
# a b c d
# 1 2 3 a
# 2 3 4 b
# 3 4 5 c
# 4 5 6 e
# 5 6 7 f
expect_equal(nrow(result),5)
expect_equal(ncol(result),4)
expect_equal(nrow(dplyr::filter(result, is.na(a))),0)
result <- source_df %>% exploratory::full_join(target_df, dplyr::join_by(`a` == `a`))
# a b c d
# 1 2 3 a
# 2 3 4 b
# 3 4 5 c
# 4 5 6 e
# 5 6 7 f
# NA 7 8 NA
# NA 8 9 NA
# NA 9 10 NA
# NA NA NA g
# NA NA NA h
# NA NA NA i
# NA NA NA j
expect_equal(nrow(result),12)
expect_equal(ncol(result),4)
expect_equal(nrow(dplyr::filter(result, is.na(a))),7)
})
test_that("cross_join with selected column", {
source <- readr::read_csv('"ID","製品製品名"
1,"机"
2,"ライト"
3,"椅子"')
target <- readr::read_csv('"ID","色","適用"
1,"赤","明るめ"
2,"青","暗め"
3,"白","普通"')
df <- cross_join(source, target, target_columns = (c("適用")), exclude_target_columns=TRUE)
# > df
# A tibble: 9 × 5
# ID.x 製品製品名 ID.y 色
# <dbl> <chr> <dbl> <chr>
# 1 1 机 1 赤
# 2 1 机 2 青
# 3 1 机 3 白
# 4 2 ライト 1 赤
# 5 2 ライト 2 青
# 6 2 ライト 3 白
# 7 3 椅子 1 赤
# 8 3 椅子 2 青
# 9 3 椅子 3 白
expect_equal(nrow(df), 9)
expect_equal(ncol(df), 4)
expect_equal(stringr::str_detect(colnames(df), "適用"), c(FALSE, FALSE, FALSE, FALSE))
})
test_that("column suffix argument with case insensitive", {
df13 <- mtcars %>% left_join(mtcars, by = c("gear" = "gear", "cyl" = "cyl"), suffix = c("1", "2"), ignorecase = TRUE)
expect_equal("mpg1" %in% colnames(df13) , TRUE)
expect_equal("mpg2" %in% colnames(df13) , TRUE)
})
test_that("column suffix argument with case insensitive and empty source suffix", {
df14 <- mtcars %>% left_join(mtcars, by = c("gear" = "gear", "cyl" = "cyl"), suffix = c("", "_1"), ignorecase = TRUE)
expect_equal("mpg" %in% colnames(df14) , TRUE)
expect_equal("mpg_1" %in% colnames(df14) , TRUE)
})
test_that("column suffix argument with case insensitive and empty source suffix and exclude_selected_columns", {
df15 <- mtcars %>% left_join(mtcars, by = c("gear" = "gear", "cyl" = "cyl"), suffix = c("", "_1"), ignorecase = TRUE, target_columns = c("mpg", "am", "vs"), exclude_target_columns = TRUE)
expect_equal("carb" %in% colnames(df15) , TRUE)
expect_equal("carb_1" %in% colnames(df15) , TRUE)
})
test_that("group by target data frame case", {
target <- mtcars %>% group_by(mpg)
df16 <- mtcars %>% select(-mpg) %>% left_join(target, by = c("gear" = "gear", "cyl" = "cyl"), suffix = c("", "_1"), ignorecase = TRUE, target_columns = c("mpg", "am", "vs"), exclude_target_columns = TRUE)
expect_equal("mpg" %nin% colnames(df16) , TRUE)
expect_equal("mpg_1" %nin% colnames(df16) , TRUE)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.