Description Usage Arguments Author(s) See Also Examples
Extended merge with diagnostics.
This is a modification of merge
that combines consistent variables
even if not specified in 'by' to keep a common name.
1 |
x, y |
data frames, or objects to be coerced to one |
by |
|
all |
|
dropdots |
|
verbose |
|
debug |
|
from |
|
... |
Georges Monette
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (x, y, by, all = T, dropdots = F, verbose = F, debug = T,
from = F, ...)
{
help <- "This is a modification of merge that combines consistent variables\neven if not specified in 'by' to keep a common name.\n-- Some errors fixed Apr 24, 2007"
xm <- function(a, b, tofac = is.factor(a) || is.factor(b)) {
if (tofac) {
levs <- union(levels(b), levels(a))
a <- as.character(a)
b <- as.character(b)
}
b[is.na(b)] <- a[is.na(b)]
if (tofac) {
levs <- union(levs, unique(b))
b <- factor(b, levels = levs)
}
b
}
na2f <- function(x) {
x[is.na(x)] <- F
x
}
consistent <- function(a, b) {
if (is.factor(a))
a <- as.character(a)
if (is.factor(b))
b <- as.character(b)
!na2f(a != b)
}
if (from) {
xname <- deparse(substitute(x))
yname <- deparse(substitute(y))
x[[".F"]] <- rep("x", nrow(x))
y[[".F"]] <- rep("y", nrow(y))
}
xby <- x[, by, drop = F]
yby <- y[, by, drop = F]
xby$.file <- rep("x", nrow(xby))
yby$.file <- rep("y", nrow(yby))
by2 <- rbind(xby, yby)
if (verbose)
cat("\nby in x and y:\n")
if (verbose)
print(atotal(do.call("tab", by2), sum, "Total"))
nams <- union(names(x), names(y))
if (verbose)
print(c(DimX = dim(x), DimY = dim(y)))
if (verbose)
cat("\nVariables in both:\n")
if (verbose)
print(intersect(names(x), names(y)))
if (verbose)
cat("\nVariables in X only:\n")
if (verbose)
print(setdiff(names(x), names(y)))
if (verbose)
cat("\nVariables in Y only:\n")
if (verbose)
print(setdiff(names(y), names(x)))
x$FromX <- 1:nrow(x)
y$FromY <- 1:nrow(y)
mm <- merge(x, y, by, all = T, ...)
newroots <- setdiff(intersect(names(x), names(y)), by)
if (verbose)
cat("\nDimension of merged data frames:\n")
if (verbose)
print(c(DimMerge = dim(mm)))
if (verbose)
cat("\nNames of variables in merged data frame:\n")
if (verbose)
print(names(mm))
if (F) {
dotx <- grep("\.x", names(mm), value = T)
if (verbose)
print(c(dotx = dotx))
doty <- grep("\.y", names(mm), value = T)
if (verbose)
print(c(doty = doty))
rootx <- substring(dotx, 1, nchar(dotx) - 2)
rooty <- substring(doty, 1, nchar(doty) - 2)
newroots <- intersect(rootx, rooty)
}
FromBoth <- !is.na(mm$FromX) & !is.na(mm$FromY)
Xonly <- !is.na(mm$FromX) & is.na(mm$FromY)
Yonly <- is.na(mm$FromX) & !is.na(mm$FromY)
if (verbose)
cat("\nRows in:\n")
if (verbose)
print(c(Both = sum(FromBoth), Xonly = sum(Xonly), Yonly = sum(Yonly)))
if (verbose)
cat("\nThe following variables occur in both data frames:\n")
if (verbose)
print(newroots)
drop.list <- character(0)
for (nn in newroots) {
nn.x <- paste(nn, ".x", sep = "")
nn.y <- paste(nn, ".y", sep = "")
mm[[nn]] <- xm(mm[[nn.x]], mm[[nn.y]])
if (all(same <- consistent(mm[[nn.x]], mm[[nn.y]]))) {
if (verbose)
cat("Variable ", nn, " is consistent\n")
drop.list <- c(drop.list, nn)
}
else {
if (verbose)
cat("Variable ", nn, " is inconsistent in the following rows:\n")
if (verbose)
print(mm[same, c(by, nn.x, nn.y, nn)])
}
}
if (dropdots)
drop.list <- newroots
drop <- if (length(drop.list) > 0) {
c(paste(drop.list, "x", sep = "."), paste(drop.list,
"y", sep = "."))
}
else character(0)
if (verbose)
cat("\nDrop list:\n")
if (verbose)
print(drop)
if (length(drop) > 0) {
if (verbose)
print(c(drop = drop))
mm <- mm[, -match(drop, names(mm))]
}
onams <- 1:length(nams)
onams <- c(onams, onams + 0.1, onams + 0.2)
names(onams) <- c(nams, paste(nams, ".x", sep = ""), paste(nams,
".y", sep = ""))
keep <- intersect(names(sort(onams)), names(mm))
mm[, keep]
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.