xmerge: Extended merge with diagnostics

Description Usage Arguments Author(s) See Also Examples

View source: R/fun.R

Description

Extended merge with diagnostics. This is a modification of merge that combines consistent variables even if not specified in 'by' to keep a common name.

Usage

1
xmerge(x, y, by, all = TRUE, dropdots = FALSE, verbose = FALSE, debug = TRUE, from = FALSE, ...)

Arguments

x, y

data frames, or objects to be coerced to one

by
all
dropdots
verbose
debug
from
...

Author(s)

Georges Monette

See Also

merge

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (x, y, by, all = T, dropdots = F, verbose = F, debug = T, 
    from = F, ...) 
{
    help <- "This is a modification of merge that combines consistent variables\neven if not specified in 'by' to keep a common name.\n-- Some errors fixed Apr 24, 2007"
    xm <- function(a, b, tofac = is.factor(a) || is.factor(b)) {
        if (tofac) {
            levs <- union(levels(b), levels(a))
            a <- as.character(a)
            b <- as.character(b)
        }
        b[is.na(b)] <- a[is.na(b)]
        if (tofac) {
            levs <- union(levs, unique(b))
            b <- factor(b, levels = levs)
        }
        b
    }
    na2f <- function(x) {
        x[is.na(x)] <- F
        x
    }
    consistent <- function(a, b) {
        if (is.factor(a)) 
            a <- as.character(a)
        if (is.factor(b)) 
            b <- as.character(b)
        !na2f(a != b)
    }
    if (from) {
        xname <- deparse(substitute(x))
        yname <- deparse(substitute(y))
        x[[".F"]] <- rep("x", nrow(x))
        y[[".F"]] <- rep("y", nrow(y))
    }
    xby <- x[, by, drop = F]
    yby <- y[, by, drop = F]
    xby$.file <- rep("x", nrow(xby))
    yby$.file <- rep("y", nrow(yby))
    by2 <- rbind(xby, yby)
    if (verbose) 
        cat("\nby in x and y:\n")
    if (verbose) 
        print(atotal(do.call("tab", by2), sum, "Total"))
    nams <- union(names(x), names(y))
    if (verbose) 
        print(c(DimX = dim(x), DimY = dim(y)))
    if (verbose) 
        cat("\nVariables in both:\n")
    if (verbose) 
        print(intersect(names(x), names(y)))
    if (verbose) 
        cat("\nVariables in X only:\n")
    if (verbose) 
        print(setdiff(names(x), names(y)))
    if (verbose) 
        cat("\nVariables in Y only:\n")
    if (verbose) 
        print(setdiff(names(y), names(x)))
    x$FromX <- 1:nrow(x)
    y$FromY <- 1:nrow(y)
    mm <- merge(x, y, by, all = T, ...)
    newroots <- setdiff(intersect(names(x), names(y)), by)
    if (verbose) 
        cat("\nDimension of merged data frames:\n")
    if (verbose) 
        print(c(DimMerge = dim(mm)))
    if (verbose) 
        cat("\nNames of variables in merged data frame:\n")
    if (verbose) 
        print(names(mm))
    if (F) {
        dotx <- grep("\.x", names(mm), value = T)
        if (verbose) 
            print(c(dotx = dotx))
        doty <- grep("\.y", names(mm), value = T)
        if (verbose) 
            print(c(doty = doty))
        rootx <- substring(dotx, 1, nchar(dotx) - 2)
        rooty <- substring(doty, 1, nchar(doty) - 2)
        newroots <- intersect(rootx, rooty)
    }
    FromBoth <- !is.na(mm$FromX) & !is.na(mm$FromY)
    Xonly <- !is.na(mm$FromX) & is.na(mm$FromY)
    Yonly <- is.na(mm$FromX) & !is.na(mm$FromY)
    if (verbose) 
        cat("\nRows in:\n")
    if (verbose) 
        print(c(Both = sum(FromBoth), Xonly = sum(Xonly), Yonly = sum(Yonly)))
    if (verbose) 
        cat("\nThe following variables occur in both data frames:\n")
    if (verbose) 
        print(newroots)
    drop.list <- character(0)
    for (nn in newroots) {
        nn.x <- paste(nn, ".x", sep = "")
        nn.y <- paste(nn, ".y", sep = "")
        mm[[nn]] <- xm(mm[[nn.x]], mm[[nn.y]])
        if (all(same <- consistent(mm[[nn.x]], mm[[nn.y]]))) {
            if (verbose) 
                cat("Variable ", nn, " is consistent\n")
            drop.list <- c(drop.list, nn)
        }
        else {
            if (verbose) 
                cat("Variable ", nn, " is inconsistent in the following rows:\n")
            if (verbose) 
                print(mm[same, c(by, nn.x, nn.y, nn)])
        }
    }
    if (dropdots) 
        drop.list <- newroots
    drop <- if (length(drop.list) > 0) {
        c(paste(drop.list, "x", sep = "."), paste(drop.list, 
            "y", sep = "."))
    }
    else character(0)
    if (verbose) 
        cat("\nDrop list:\n")
    if (verbose) 
        print(drop)
    if (length(drop) > 0) {
        if (verbose) 
            print(c(drop = drop))
        mm <- mm[, -match(drop, names(mm))]
    }
    onams <- 1:length(nams)
    onams <- c(onams, onams + 0.1, onams + 0.2)
    names(onams) <- c(nams, paste(nams, ".x", sep = ""), paste(nams, 
        ".y", sep = ""))
    keep <- intersect(names(sort(onams)), names(mm))
    mm[, keep]
  }

gmonette/spida documentation built on May 17, 2019, 7:25 a.m.