Nothing
# original function mockup done by Andy Hanson; re-envisioned by EPH starting 3/20/17
#' Compare two data.frames and report differences
#'
#' Compare two data.frames and report any differences between them,
#' much like SAS's \code{PROC COMPARE} procedure.
#'
#' @param x,y A data.frame to compare
#' @param by,by.x,by.y Which variables are IDs to merge the two data.frames?
#' If set to \code{"row.names"}, merging will occur over the row.names.
#' If set to \code{NULL} (default), merging will occur row-by-row.
#' @param control A list of control parameters from \code{\link{comparedf.control}}.
#' @param ... Other arguments, passed to \code{\link{comparedf.control}} when appropriate.
#' @examples
#'
#' df1 <- data.frame(id = paste0("person", 1:3), a = c("a", "b", "c"),
#' b = c(1, 3, 4), c = c("f", "e", "d"),
#' row.names = paste0("rn", 1:3), stringsAsFactors = FALSE)
#' df2 <- data.frame(id = paste0("person", 3:1), a = c("c", "b", "a"),
#' b = c(1, 3, 4), d = paste0("rn", 1:3),
#' row.names = paste0("rn", c(1,3,2)), stringsAsFactors = FALSE)
#' summary(comparedf(df1, df2))
#' summary(comparedf(df1, df2, by = "id"))
#' summary(comparedf(df1, df2, by = "row.names"))
#' @author Ethan Heinzen, adapted from code from Andrew Hanson
#' @seealso \code{\link{summary.comparedf}}, \code{\link{comparedf.control}},
#' \code{\link{diffs}}, \code{\link{n.diffs}}, \code{\link{n.diff.obs}}
#' @name comparedf
NULL
#> NULL
#' @rdname comparedf
#' @export
comparedf <- function(x, y, by = NULL, by.x = by, by.y = by, control = NULL, ...) {
control <- c(list(...), control)
control <- do.call("comparedf.control", control[!duplicated(names(control))])
xname <- paste0(deparse(substitute(x)), collapse = "")
yname <- paste0(deparse(substitute(y)), collapse = "")
if(!is.data.frame(x) || !is.data.frame(y))
{
stop("Both 'x' and 'y' must be data.frames")
}
if(any(c("..row.x..", "..row.y..", "..row.names..") %in% c(colnames(x), colnames(y))))
{
stop("'..row.x..', '..row.y..', and '..row.names..' are reserved colnames in this function.")
}
if(is.null(by) && is.null(by.x) && is.null(by.y))
{
# user didn't supply any by-variables, so we'll merge by row
by.x <- by.y <- "row.names"
byrow <- TRUE
} else if(is.null(by.x) || is.null(by.y))
{
stop("Either 'by' or both of 'by.x' and 'by.y' must be specified")
} else byrow <- FALSE
if(any(by.x %nin% c("row.names", colnames(x))) || any(by.y %nin% c("row.names", colnames(y))))
{
stop("One or more of 'by.x' doesn't match colnames(x) or 'by.y' doesn't match colnames(y).")
}
#### data frame summary ####
frame.summary <- data.frame(
version = c("x", "y"),
arg = c(xname, yname),
ncol = c(ncol(x), ncol(y)),
nrow = c(nrow(x), nrow(y)), stringsAsFactors = FALSE
)
tmp.attrs <- list(attributes(x), attributes(y))
if("row.names" %in% by.x)
{
x[["..row.names.."]] <- if(byrow) seq_len(nrow(x)) else row.names(x)
by.x[by.x == "row.names"] <- "..row.names.."
}
if("row.names" %in% by.y)
{
y[["..row.names.."]] <- if(byrow) seq_len(nrow(y)) else row.names(y)
by.y[by.y == "row.names"] <- "..row.names.."
}
frame.summary$by <- set_attr(list(by.x, by.y), "byrow", byrow)
frame.summary$attrs <- tmp.attrs
#### tweak the column names ####
tcn <- tweakcolnames(by.x, by.y, colnames(x), colnames(y), control)
rm(by.x, by.y) # just to make sure we don't try to use those anymore
by <- tcn$by
#### now merge the things together ####
together <- merge(cbind(stats::setNames(x, tcn$cn.x), ..row.x.. = seq_len(nrow(x))),
cbind(stats::setNames(y, tcn$cn.y), ..row.y.. = seq_len(nrow(y))), by = by, all = TRUE)
both <- together[!is.na(together[["..row.x.."]]) & !is.na(together[["..row.y.."]]), , drop = FALSE]
#### get the unshared observations ####
getunique <- function(whch, whch2, by. = by)
{
tmp <- together[is.na(together[[whch2]]), c(by., whch), drop = FALSE]
colnames(tmp)[colnames(tmp) == whch] <- "observation"
tmp
}
frame.summary$unique <- list(getunique("..row.x..", "..row.y.."), getunique("..row.y..", "..row.x.."))
frame.summary$n.shared <- rep(nrow(both), times = 2)
#### Make the main object in this function ####
mkdf <- function(df, cn.new)
{
df2 <- data.frame(tmp = cn.new, var = colnames(df), pos = seq_along(df), stringsAsFactors = FALSE)
df2$class <- unname(lapply(df, class))
df2
}
vars.summary <- merge(mkdf(x, tcn$cn.x), mkdf(y, tcn$cn.y), by = "tmp", all = TRUE)
ord <- order(is.na(vars.summary$var.x), is.na(vars.summary$var.y), vars.summary$pos.x, vars.summary$pos.y, na.last = TRUE)
vars.summary <- vars.summary[ord, , drop = FALSE]
row.names(vars.summary) <- NULL
vars.summary$class.x <- lapply(vars.summary$class.x, cleanup.null.na)
vars.summary$class.y <- lapply(vars.summary$class.y, cleanup.null.na)
vars.summary$values <- lapply(seq_len(nrow(vars.summary)), compare_values, v = vars.summary, df = both, byvars = by, contr = control)
vars.summary$attrs <- lapply(seq_len(nrow(vars.summary)), compare_attrs, v = vars.summary, x_ = x, y_ = y)
vars.summary$tmp <- NULL
structure(list(frame.summary = structure(frame.summary, class = c("comparedf.frame.summary", "data.frame")),
vars.summary = structure(vars.summary, class = c("comparedf.vars.summary", "data.frame")),
control = control, Call = match.call()), class = "comparedf")
}
#' @rdname comparedf
#' @export
print.comparedf <- function(x, ...)
{
cat("Compare Object\n\n")
cat("Function Call: \n")
print(x$Call)
cat("\n")
cat("Shared: ", sum(idx_var_sum(x, "nonby.vars.shared")), " non-by variables and ", x$frame.summary$n.shared[1], " observations.\n", sep = "")
cat("Not shared: ", sum(idx_var_sum(x, "vars.not.shared")), " variables and ",
n.diff.obs(x), " observations.\n", sep = "")
cat("\n")
cat("Differences found in ", sum(idx_var_sum(x, "differences.found")), "/", sum(idx_var_sum(x, "vars.compared")), " variables compared.\n", sep = "")
cat(sum(idx_var_sum(x, "non.identical.attributes")), " variables compared have non-identical attributes.\n", sep = "")
invisible(x)
}
#' @export
print.comparedf.vars.summary <- function(x, ...)
{
orig <- x
f <- function(elt, txt1, txt2)
{
if(is.data.frame(elt)) paste0(nrow(elt), txt1) else if(is.null(elt)) txt2 else elt
}
x$values <- vapply(x$values, f, character(1), txt1 = " differences", txt2 = "Not compared")
x$attrs <- vapply(x$attrs, f, character(1), txt1 = " attributes", txt2 = "0 attributes")
NextMethod()
invisible(orig)
}
#' @export
print.comparedf.frame.summary <- function(x, ...)
{
orig <- x
f <- function(elt, txt1, txt2)
{
if(is.data.frame(elt)) paste0(nrow(elt), txt1) else if(is.list(elt)) paste0(length(elt), txt1) else if(is.null(elt)) txt2 else elt
}
x$attrs <- vapply(x$attrs, f, character(1), txt1 = " attributes", txt2 = "0 attributes")
x$unique <- vapply(x$unique, f, character(1), txt1 = " unique obs", txt2 = "")
NextMethod()
invisible(orig)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.