inst/doc/comparedf.R

## ----include = FALSE--------------------------------------------------------------------------------------------------
knitr::opts_chunk$set(eval = TRUE, message = FALSE, results = 'asis', comment='')
options(width = 120)

## ----results = 'asis'-------------------------------------------------------------------------------------------------
library(arsenal)

## ---------------------------------------------------------------------------------------------------------------------
df1 <- data.frame(id = paste0("person", 1:3),
                  a = c("a", "b", "c"),
                  b = c(1, 3, 4),
                  c = c("f", "e", "d"),
                  row.names = paste0("rn", 1:3),
                  stringsAsFactors = FALSE)
df2 <- data.frame(id = paste0("person", 3:1),
                  a = c("c", "b", "a"),
                  b = c(1, 3, 4),
                  d = paste0("rn", 1:3),
                  row.names = paste0("rn", c(1,3,2)),
                  stringsAsFactors = FALSE)

## ----results='markup'-------------------------------------------------------------------------------------------------
comparedf(df1, df2)

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(df1, df2))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(df1, df2, by = "id"))

## ---------------------------------------------------------------------------------------------------------------------
data(mockstudy)
mockstudy2 <- muck_up_mockstudy()

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case"))

## ----eval = FALSE-----------------------------------------------------------------------------------------------------
#  summary(comparedf(mockstudy, mockstudy2, by = "case", control = comparedf.control(tol.vars = "case")))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case", tol.vars = "case"))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case",
                tol.vars = c("._ ", "case") # dots=underscores=spaces, ignore case
))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case",
                tol.vars = c(arm = "Arm", fu.stat = "fu stat", fu.time = "fu_time")
))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case",
                tol.vars = c("._ ", "case"), # dots=underscores=spaces, ignore case
                int.as.num = TRUE            # compare integers and numerics
))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case",
                tol.vars = c("._ ", "case"), # dots=underscores=spaces, ignore case
                int.as.num = TRUE,           # compare integers and numerics
                tol.num.val = 10             # allow absolute differences <= 10
))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case",
                tol.vars = c("._ ", "case"), # dots=underscores=spaces, ignore case
                int.as.num = TRUE,           # compare integers and numerics
                tol.num.val = 10,            # allow absolute differences <= 10
                tol.factor = "labels"        # match only factor labels
))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case",
                tol.vars = c("._ ", "case"), # dots=underscores=spaces, ignore case
                int.as.num = TRUE,           # compare integers and numerics
                tol.num.val = 10,            # allow absolute differences <= 10
                tol.factor = "labels",       # match only factor labels
                factor.as.char = TRUE        # compare factors and characters
))

## ---------------------------------------------------------------------------------------------------------------------
summary(comparedf(mockstudy, mockstudy2, by = "case",
                tol.vars = c("._ ", "case"), # dots=underscores=spaces, ignore case
                int.as.num = TRUE,           # compare integers and numerics
                tol.num.val = 10,            # allow absolute differences <= 10
                tol.factor = "labels",       # match only factor labels
                factor.as.char = TRUE,       # compare factors and characters
                tol.char = "case"            # ignore case in character vectors
))

## ----eval=FALSE-------------------------------------------------------------------------------------------------------
#  comparedf.control(tol.char = list(
#    "none",      # the default
#    x1 = "case", # be case-insensitive for the variable "x1"
#    x2 = function(x, y) tol.NA(x, y, x != y | y == "NA") # a custom-defined tolerance
#  ))

## ----results = 'markup'-----------------------------------------------------------------------------------------------
my.tol <- function(x, y, tol)
{
  tol.NA(x, y, x > y)
}

date.df1 <- data.frame(dt = as.Date(c("2017-09-07", "2017-08-08", "2017-07-09", NA)))
date.df2 <- data.frame(dt = as.Date(c("2017-10-01", "2017-08-08", "2017-07-10", "2017-01-01")))
n.diffs(comparedf(date.df1, date.df2)) # default finds any differences
n.diffs(comparedf(date.df1, date.df2, tol.date = my.tol)) # our function identifies only the NA as different...
n.diffs(comparedf(date.df2, date.df1, tol.date = my.tol)) # ... until we change the argument order


## ---------------------------------------------------------------------------------------------------------------------
tol.minus9 <- function(x, y, tol)
{
  idx1 <- is.na(x) & !is.na(y) & y == -9
  idx2 <- tol.num.absolute(x, y, tol) # find other absolute differences
  return(!idx1 & idx2)
}

summary(comparedf(mockstudy, mockstudy2, by = "case",
                tol.vars = c("._ ", "case"), # dots=underscores=spaces, ignore case
                int.as.num = TRUE,           # compare integers and numerics
                tol.num.val = 10,            # allow absolute differences <= 10
                tol.factor = "labels",       # match only factor labels
                factor.as.char = TRUE,       # compare factors and characters
                tol.char = "case",           # ignore case in character vectors
                tol.num = tol.minus9         # ignore NA -> -9 changes
))

## ----results = 'markup'-----------------------------------------------------------------------------------------------
cmp <- comparedf(mockstudy, mockstudy2, by = "case", tol.vars = c("._ ", "case"), int.as.num = TRUE)
n.diffs(cmp)
head(diffs(cmp))

## ----results = 'markup'-----------------------------------------------------------------------------------------------
diffs(cmp, by.var = TRUE)

## ----results = 'markup'-----------------------------------------------------------------------------------------------
diffs(cmp, vars = c("ps", "ast"), by.var = TRUE)
diffs(cmp, vars = c("ps", "ast"))

## ---------------------------------------------------------------------------------------------------------------------
obj <- comparedf(mockstudy, mockstudy2, by = "case")

## ----results='markup'-------------------------------------------------------------------------------------------------
print(obj$frame.summary)

## ----results='markup'-------------------------------------------------------------------------------------------------
print(obj$vars.summary)

Try the arsenal package in your browser

Any scripts or data that you put into this service are public.

arsenal documentation built on June 5, 2021, 1:06 a.m.