#' @title Joining two data frames
#' @description Simple routine designed to find out (and report) whether two
#' data frames have unique combinations of values of common variables and
#' whether all rows in \code{x} has pair(s) in \code{y} and \emph{vice versa}
#' before performing full join.
#' @param x data frame
#' @param y data frame
#' @param xDescription optionally a description of what is \code{x} (for
#' purpose of providing informative warninig message)
#' @param yDescription optionally a description of what is \code{y} (for
#' purpose of providing informative warninig message)
#' @param xCheckAllMatchesY logical value (\code{TRUE} or \code{FALSE}) - should
#' function check whether all rows in \code{x} match some row in \code{y}
#' @param yCheckAllMatchesX logical value (\code{TRUE} or \code{FALSE}) - should
#' function check whether all rows in \code{y} match some row in \code{x}
#' @param rowsOrObservations "rows" or "observations" (abbreviations "r" and "o"
#' are accepted) - indicates how numbers of rows that do not match should be
#' reported: "rows" is suitable for data coming directly from input files
#' (adds 1 to reported numbers of rows to account that there is also a header
#' row) while "observations" for data that has already been processed
#' @return Data frame returned by \code{full_join(x, y)}.
#' @importFrom dplyr anti_join distinct full_join left_join mutate select
join_with_check <- function(x, y, xDescription = "x", yDescription = "y",
xCheckAllMatchesY = TRUE, yCheckAllMatchesX = TRUE,
rowsOrObservations = "rows") {
stopifnot(is.data.frame(x), is.data.frame(y))
stopifnot(is.character(xDescription), length(xDescription) == 1,
is.character(yDescription), length(yDescription) == 1,
is.logical(xCheckAllMatchesY), length(xCheckAllMatchesY) == 1,
xCheckAllMatchesY %in% c(TRUE, FALSE),
is.logical(yCheckAllMatchesX), length(yCheckAllMatchesX) == 1,
yCheckAllMatchesX %in% c(TRUE, FALSE),
is.character(rowsOrObservations), length(rowsOrObservations) == 1,
rowsOrObservations %in% c("r", "o", "rows", "observations"))
if (grepl("^r", rowsOrObservations)) {
doNotMatchMessage <- "Numery wierszy (w pliku), których dotyczy ten problem"
add <- 1
} else {
doNotMatchMessage <- "Numery obserwacji (wystąpień w zbiorze), których dotyczy ten problem"
add <- 0
}
joiningBy = intersect(names(x), names(y))
if (length(joiningBy) == 0) {
stop("Nie można przyłączyć ", yDescription, " do ", xDescription,
" bo w obu zbiorach nie ma wspólnych kolumn.", call. = FALSE)
}
cat("Łączenie zostanie dokonane na podstawie wartości zmiennej/zmiennych: '",
paste(joiningBy, collapse = "', '"), "'.\n", sep = "")
joiningByNotDistinctX = x %>%
select(joiningBy) %>%
distinct %>%
nrow() != nrow(x)
joiningByNotDistinctY = y %>%
select(joiningBy) %>%
distinct %>%
nrow() != nrow(y)
if (joiningByNotDistinctX & joiningByNotDistinctY) {
stop(paste0("Łączenie nie może zostać przeprowadzone, bo ",
ifelse(length(joiningBy) > 1, "kombinacje wartości ww. kolumn",
"wartości w ww. kolumnie"),
" nie są unikalne zarówno w ", xDescription, ", jak i w ",
yDescription, "."))
}
checkY <- suppressMessages(anti_join(y, x))
rnCheckY <- suppressMessages(left_join(y,
mutate(checkY, .notInX = TRUE)) %>%
{which(.$.notInX)})
nRowCheckY <- nrow(checkY)
rm(checkY)
if (nRowCheckY > 0 & yCheckAllMatchesX) {
warning(paste0("W ", yDescription, " występuje/ą ",
format(nRowCheckY, big.mark = "'"),
" wiersz(e/y), który/e nie ma(ją) odpowiednika w ",
xDescription, ".\n\n",
doNotMatchMessage, ":\n",
paste(strwrap(paste(rnCheckY + add, collapse = ", "),
prefix = " "),
collapse = "\n")),
call. = FALSE, immediate. = TRUE)
cat("\n")
}
checkX <- suppressMessages(anti_join(x, y))
rnCheckX <- suppressMessages(left_join(x,
mutate(checkX, .notInY = TRUE)) %>%
{which(.$.notInY)})
nRowCheckX <- nrow(checkX)
rm(checkX)
if (nRowCheckX > 0 & xCheckAllMatchesY) {
warning(paste0("W ", xDescription, " występuje/ą ",
format(nRowCheckX, big.mark = "'"),
" wiersz(e/y), który/e nie ma(ją) odpowiednika w ",
yDescription, ".\n\n",
doNotMatchMessage, ":\n",
paste(strwrap(paste(rnCheckX + add, collapse = ", "),
prefix = " "),
collapse = "\n")),
call. = FALSE, immediate. = TRUE)
cat("\n")
}
cat("Dane zostaną przyłączone do ",
format(nrow(x) - nRowCheckX, big.mark = "'"), " (spośród ",
format(nrow(x), big.mark = "'"), ") wierszy w ", xDescription, " i ",
format(nrow(y) - nRowCheckY, big.mark = "'"), " (spośród ",
format(nrow(y), big.mark = "'"), ") wierszy w ", yDescription, ".\n", sep = "")
suppressMessages(full_join(x, y)) %>%
return()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.