Nothing
#' Merge Multiple Data Frames
#'
#' This function merges data frames by a common column (i.e., matching variable).
#'
#' There are following requirements for merging multiple data frames: First, each
#' data frame has the same matching variable specified in the \code{by} argument.
#' Second, matching variable in the data frames have all the same class. Third,
#' there are no duplicated values in the matching variable in each data frame.
#' Fourth, there are no missing values in the matching variables. Last, there
#' are no duplicated variable names across the data frames except for the matching
#' variable.
#'
#' Note that it is possible to specify data frames matrices and/or in the argument
#' \code{...}. However, the function always returns a data frame.
#'
#' @param ... a sequence of matrices or data frames and/or matrices to be
#' merged to one.
#' @param by a character string indicating the column used for merging
#' (i.e., matching variable), see 'Details'.
#' @param all logical: if \code{TRUE} (default), then extra rows with \code{NA}s
#' will be added to the output for each row in a data frame that
#' has no matching row in another data frame.
#' @param check logical: if \code{TRUE} (default), argument specification is checked.
#' @param output logical: if \code{TRUE} (default), output is shown on the console.
#'
#' @author
#' Takuya Yanagida \email{takuya.yanagida@@univie.ac.at}
#'
#' @seealso
#' \code{\link{df.duplicated}}, \code{\link{df.move}}, \code{\link{df.rbind}},
#' \code{\link{df.rename}}, \code{\link{df.sort}}, \code{\link{df.subset}}
#'
#' @return
#' Returns a merged data frame.
#'
#' @export
#'
#' @examples
#' adat <- data.frame(id = c(1, 2, 3),
#' x1 = c(7, 3, 8))
#'
#' bdat <- data.frame(id = c(1, 2),
#' x2 = c(5, 1))
#'
#' cdat <- data.frame(id = c(2, 3),
#' y3 = c(7, 9))
#'
#' ddat <- data.frame(id = 4,
#' y4 = 6)
#'
#' # Example 1: Merge 'adat', 'bdat', 'cdat', and 'ddat' by the variable 'id'
#' df.merge(adat, bdat, cdat, ddat, by = "id")
#'
#' # Example 2: Do not show output on the console
#' df.merge(adat, bdat, cdat, ddat, by = "id", output = FALSE)
#'
#' \dontrun{
#' #----------------------------------------------------------------------------
#' # Error messages
#'
#' adat <- data.frame(id = c(1, 2, 3),
#' x1 = c(7, 3, 8))
#'
#' bdat <- data.frame(code = c(1, 2, 3),
#' x2 = c(5, 1, 3))
#'
#' cdat <- data.frame(id = factor(c(1, 2, 3)),
#' x3 = c(5, 1, 3))
#'
#' ddat <- data.frame(id = c(1, 2, 2),
#' x2 = c(5, 1, 3))
#'
#' edat <- data.frame(id = c(1, NA, 3),
#' x2 = c(5, 1, 3))
#'
#' fdat <- data.frame(id = c(1, 2, 3),
#' x1 = c(5, 1, 3))
#'
#' # Error 1: Data frames do not have the same matching variable specified in 'by'.
#' df.merge(adat, bdat, by = "id")
#'
#' # Error 2: Matching variable in the data frames do not all have the same class.
#' df.merge(adat, cdat, by = "id")
#'
#' # Error 3: There are duplicated values in the matching variable specified in 'by'.
#' df.merge(adat, ddat, by = "id")
#'
#' # Error 4: There are missing values in the matching variable specified in 'by'.
#' df.merge(adat, edat, by = "id")
#'
#' # Error 5: There are duplicated variable names across data frames.
#' df.merge(adat, fdat, by = "id")
#' }
df.merge <- function(..., by, all = TRUE, check = TRUE, output = TRUE) {
#_____________________________________________________________________________
#
# Initial Check --------------------------------------------------------------
# List of data frames
df <- lapply(list(...), as.data.frame, stringsAsFactors = FALSE)
#_____________________________________________________________________________
#
# Input Check ----------------------------------------------------------------
# Check inputs
.check.input(logical = c("all", "output"), envir = environment(), input.check = check)
# Additional checks
if (isTRUE(check)) {
# Check input 'by'
if (isTRUE(missing(by))) { stop("Please specify a character string for the argument 'by'.", call. = FALSE) }
# Same matching variable in each data frame
if (isTRUE(any(vapply(df, function(y) !by %in% names(y), FUN.VALUE = logical(1L))))) { stop("Data frames do not have the same matching variable specified in 'by'.", call. = FALSE) }
# Same class
if (isTRUE(unique(vapply(df, function(y) class(y[, by]), FUN.VALUE = character(1L))) |> (\(y) length(y) != 1L & !all(y %in% c("integer", "numeric")))())) { stop("Matching variable in the data frames do not all have the same class.", call. = FALSE) }
# Missing values in the matching variable
if (isTRUE(any(vapply(df, function(y) any(is.na(y[, by])), FUN.VALUE = logical(1L))))) { stop("There are missing values in the matching variable specified in 'by'.", call. = FALSE) }
# Duplicated variable names across the data frames
if (isTRUE(anyDuplicated(unlist(lapply(df, names))[unlist(lapply(df, names)) != by]) != 0L)) { stop("There are duplicated variable names across data frames.", call. = FALSE) }
}
#_____________________________________________________________________________
#
# Main Function --------------------------------------------------------------
# Number of data frames
no.dfs <- length(df)
# Name of data frames
df.names <- as.character(match.call())[2L:(no.dfs + 1L)]
# Number of variables in each data frame
no.var <- vapply(df, ncol, FUN.VALUE = 1L)
# Number of cases in each data frame
no.cases <- vapply(df, nrow, FUN.VALUE = 1L)
# Matching variable
var.match <- lapply(df, function(y) y[, by])
# Match data frames
match.cases <- Reduce(function(xx, yy) misty::df.rbind(xx, yy), x = lapply(var.match, function(xx) data.frame(matrix(xx, ncol = length(xx), dimnames = list(NULL, xx)), stringsAsFactors = FALSE)))
# Number of pattern
match.cases.table <- table(apply(ifelse(is.na(match.cases), 0L, 1L), 2L, paste, collapse = " ")) |>
(\(y) y[rev(order(as.numeric(gsub(" ", "", names(y)))))])()
match.info <- data.frame(n = unname(unclass(match.cases.table)),
matrix(unlist(strsplit(names(match.cases.table), " ")), byrow = TRUE, ncol = no.dfs,
dimnames = list(NULL, df.names)),
stringsAsFactors = FALSE)
# Match data frames and sort by matching variable
object <- Reduce(function(xx, yy) merge(xx, yy, by = by, all = all), x = df) |>
(\(y) y[order(y[, by]), ])()
#_____________________________________________________________________________
#
# Print Output ---------------------------------------------------------------
if (isTRUE(output)) {
print.object <- rbind(c("No. of data frames", no.dfs, rep("", times = no.dfs - 1L)),
rep("", times = no.dfs + 1L),
cbind(c("", "No. of variables", "No. of cases"), rbind(df.names, no.var, no.cases)))
print.object[, 1L] <- paste(" ", print.object[, 1L])
# Format
print.object[, 1L] <- format(print.object[, 1L], justify = "left")
print.object[, -1L] <- format(print.object[, -1], justify = "right")
match.info <- format(data.frame(rbind(c("n", df.names), match.info), stringsAsFactors = FALSE), justify = "right")
match.info[, 1L] <- paste(" ", match.info[, 1L])
# Print output
cat(" Merge Multiple Data Frames\n\n")
write.table(print.object, quote = FALSE, row.names = FALSE, col.names = FALSE)
cat("\n Pattern of matching cases across data frames\n")
write.table(match.info, quote = FALSE, row.names = FALSE, col.names = FALSE)
cat("\n")
}
#_____________________________________________________________________________
#
# Output ---------------------------------------------------------------------
return(object)
}
#_______________________________________________________________________________
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.