R/coding.R

Defines functions assert case case.double case.integer case.character must L LL collect collect.list collect.matrix match_parts set sql sql.data.frame sql.formula

Documented in assert case case.character case.double case.integer collect collect.list collect.matrix L LL match_parts must set sql sql.data.frame sql.formula

assert <- function(cond, orig, msg, quiet = FALSE, ...) {
  if (is.character(cond)) {
    if (missing(msg) || !length(msg))
      msg <- sprintf("assertion '%s' failed for '%%s'", cond)
    cond <- match.fun(cond)(orig, ...)
  } else if (is.function(cond)) {
    if (missing(msg) || !length(msg))
      msg <- sprintf("assertion '%s' failed for '%%s'",
        deparse(match.call()$cond))
    cond <- cond(orig, ...)
  }
  if (!anyNA(cond) && all(cond))
    return(if (is.na(quiet))
        cond
      else if (quiet)
        character()
      else
        TRUE)
  cond[is.na(cond)] <- FALSE
  if (missing(msg) || !length(msg)) {
    msg <- paste0("assertion '", deparse(match.call()$cond), "' failed")
    msg <- if (missing(orig) || !length(orig))
        sprintf(paste0(msg, " in %i of %i cases"), sum(!cond), length(cond))
      else
        paste0(msg, " for ", orig[!cond])
  } else if (missing(orig) || !length(orig)) {
    msg <- sprintf(paste0(msg, " in %i of %i cases"), sum(!cond), length(cond))
  } else {
    msg <- sprintf(msg, orig[!cond])
  }
  if (is.na(quiet)) {
    warning(paste0(msg, collapse = "\n"), call. = FALSE)
    cond
  } else if (quiet) {
    msg
  } else {
    stop(paste0(msg, collapse = "\n"), call. = FALSE)
  }
}

case <- function(EXPR, ...) UseMethod("case")

case.double <- function(EXPR, ...) {
  case.integer(EXPR = as.integer(EXPR), ...)
}

case.integer <- function(EXPR, ...) {
  switch(EXPR = min(EXPR, nargs() - 2L) + 1L, ...)
}

case.character <- function(EXPR, ...) {
  switch(EXPR = EXPR, ..., stop("unmatched 'EXPR' value"))
}

must <- function(expr, msg = NULL, ..., domain = NULL) {
  # For some reason, using stop() directly results in errors that cannot be
  # caught with tryCatch() any more.
  tryCatch(expr = expr, warning = function(w) stop(if (length(msg))
    msg
  else
    conditionMessage(w), call. = FALSE, domain = domain), ...)
}

L <- function(x, .wanted = 1L, .msg = "need object '%s' of length %i",
    .domain = NULL) {
  if (identical(length(x), .wanted))
    return(x)
  stop(sprintf(.msg, deparse(match.call()$x), .wanted), call. = FALSE,
    domain = .domain)
}

LL <- function(..., .wanted = 1L, .msg = "need object '%s' of length %i",
    .domain = NULL) {
  arg.names <- as.character(match.call())[-1L][seq_along(items <- list(...))]
  invisible(mapply(FUN = function(item, name) {
    if (!identical(length(item), .wanted))
      stop(sprintf(.msg, name, .wanted), call. = FALSE, domain = .domain)
    name
  }, item = items, name = arg.names, USE.NAMES = FALSE))
}

setGeneric("listing", function(x, ...) standardGeneric("listing"))

setMethod("listing", "numeric", function(x, ...) {
  x <- signif(x, getOption("digits"))
  storage.mode(x) <- "character"
  listing(x, ...)
}, sealed = SEALED)

setMethod("listing", "factor", function(x, ...) {
  y <- as.character(x)
  names(y) <- names(x)
  listing(y, ...)
}, sealed = SEALED)

setMethod("listing", "ANY", function(x, ...) {
  listing(c(unclass(x)), ...)
}, sealed = SEALED)

setMethod("listing", "list", function(x, ...) {
  listing(unlist(x), ...)
}, sealed = SEALED)

setMethod("listing", "character", function(x, header = NULL, footer = NULL,
    prepend = FALSE, style = "list", collapse = if (style == "sentence")
      ""
    else
      "\n", force.numbers = FALSE,
    last.sep = c("and", "both", "comma", "or", "two"), hf.collapse = collapse,
    ...) {

  spaces <- function(x) {
    if (is.character(x))
      x
    else if (is.numeric(x))
      sprintf(sprintf("%%%is", x), "")
    else if (x)
      "\t"
    else
      ""
  }

  do_prepend <- function(x, prepend) paste0(spaces(L(prepend)), x)

  sentence <- function(x, last, prepend) {
    get_last_sep <- function(last) case(last, and = " and ", comma = ", ",
      both = ", and ", or = " or ", two = ", or ")
    if (is.character(prepend))
      x <- sprintf(prepend, names(x), x)
    else if (prepend)
      x <- paste(names(x), x, sep = ": ")
    case(n <- length(x),
      stop("empty 'x' argument in 'sentence' mode"),
      x,
      paste0(x, collapse = get_last_sep(last)),
      paste(paste0(x[-n], collapse = ", "), x[n], sep = get_last_sep(last))
    )
  }

  to_m4 <- function(x, single) {
    is_macro <- function(x) grepl("^[A-Za-z_]\\w*$", x, FALSE, TRUE)
    do_quote <- function(x, single) {
      x <- chartr("`", "'", x)
      if (single)
        sprintf("`%s'", gsub("'", "''`", x, FALSE, FALSE, TRUE))
      else
        sprintf("``%s''", gsub("'", "'''``", x, FALSE, FALSE, TRUE))
    }
    if (any(bad <- !is_macro(y <- names(x))))
      warning(sprintf("not a valid m4 macro string: '%s'", y[bad][1L]))
    sprintf("define(%s, %s)dnl", do_quote(y, TRUE), do_quote(x, single))
  }

  LL(style, collapse, force.numbers, hf.collapse)
  if ((is.null(names(x)) && style != "insert") || force.numbers)
    names(x) <- seq_along(x)
  if (inherits(style, "AsIs"))
    x <- structure(.Data = names(x), names = x)
  switch(style,
    table =,
    list = x <- do_prepend(formatDL(x = x, style = style, ...), prepend),
    sentence = x <- sentence(x, match.arg(last.sep), prepend),
    m4 = x <- to_m4(x, FALSE),
    M4 = x <- to_m4(x, TRUE),
    x <- do_prepend(sprintf(style, names(x), x), prepend)
  )
  if (identical(collapse, hf.collapse))
    paste0(c(header, x, footer), collapse = collapse)
  else
    paste0(c(header, paste0(x, collapse = collapse), footer),
      collapse = hf.collapse)
}, sealed = SEALED)

setGeneric("flatten", function(object, ...) standardGeneric("flatten"))

setMethod("flatten", "ANY", function(object, ...) {
  if (is.atomic(object))
    return(object)
  stop("need atomic 'object' (or specific 'flatten' method)")
}, sealed = SEALED)

setMethod("flatten", "list", function(object, use.names = TRUE, ...) {
  while (any(is.a.list <- vapply(object, is.list, NA))) {
    object[!is.a.list] <- lapply(object[!is.a.list], list)
    object <- unlist(object, FALSE, use.names)
  }
  object
}, sealed = SEALED)

setGeneric("unnest", function(object, ...) standardGeneric("unnest"))

setMethod("unnest", "data.frame", function(object, sep, col = names(object),
    fixed = TRUE, ..., stringsAsFactors = FALSE) {
  x <- lapply(object[, col, drop = FALSE], strsplit, sep, fixed, !fixed)
  x <- as.data.frame(do.call(cbind, x)) # yields columns of type 'list'
  x <- cbind(object[, setdiff(names(object), col), drop = FALSE], x)
  col <- names(x)
  args <- list(check.names = FALSE, stringsAsFactors = stringsAsFactors, ...)
  x <- lapply(seq.int(nrow(x)),
    function(i) do.call(data.frame, c(x[i, , drop = FALSE], args)))
  for (i in seq_along(x))
    colnames(x[[i]]) <- col
  do.call(rbind, x)
}, sealed = SEALED)

setMethod("unnest", "list", function(object, ..., stringsAsFactors = FALSE) {
  id <- mapply(FUN = rep.int, x = seq_along(object), times = lengths(object),
    SIMPLIFY = FALSE, USE.NAMES = FALSE)
  x <- unlist(object, FALSE, FALSE)
  x <- data.frame(ID = unlist(id, FALSE, FALSE),
    X = if (is.list(x)) I(x) else x, ..., stringsAsFactors = stringsAsFactors)
  attr(x, "total") <- length(object)
  x
}, sealed = SEALED)

setMethod("unnest", "character", function(object, sep, fixed = TRUE, ...,
    stringsAsFactors = FALSE) {
  unnest(object = strsplit(object, sep, fixed, !fixed), ...,
    stringsAsFactors = stringsAsFactors)
}, sealed = SEALED)

collect <- function(x, what, ...) UseMethod("collect")

collect.list <- function(x,
    what = c("counts", "occurrences", "values", "elements", "datasets", "rows"),
    min.cov = 1L, keep.unnamed = FALSE, dataframe = FALSE, optional = TRUE,
    stringsAsFactors = default.stringsAsFactors(), ...) {

  # collecting 'counts' or 'occurrences'
  note_in_matrix <- function(x, count, min.cov, dataframe, optional,
      stringsAsFactors, ...) {
    x <- lapply(lapply(x, unlist), as.character)
    n <- sort.int(unique.default(unlist(x, FALSE, FALSE)))
    result <- matrix(0L, length(x), length(n), FALSE, list(names(x), n))
    if (count)
      for (i in seq_along(x)) {
        value <- table(x[[i]], useNA = "ifany")
        result[i, names(value)] <- unclass(value)
      }
    else
      for (i in seq_along(x))
        result[i, x[[i]]] <- 1L
    if (min.cov > 0L)
      result <- result[, colSums(result) >= min.cov, drop = FALSE]
    if (dataframe)
      as.data.frame(x = result, optional = optional,
        stringsAsFactors = stringsAsFactors, ...)
    else
      result
  }

  # collecting 'values' or 'elements'
  insert_into_matrix <- function(x, flat, keep.unnamed, dataframe, optional,
      stringsAsFactors, verbose, ...) {
    oneify <- function(x) {
      if (is.atomic(x))
        return(x)
      size <- lengths(x, FALSE)
      x[!size] <- NA
      x[size > 1L] <- lapply(x[size > 1L], list)
      x
    }
    keep_validly_named_only <- function(x) {
      if (is.null(names(x)))
        NULL
      else
        x[nzchar(names(x))]
    }
    keep_validly_named_only_but_complain <- function(x) {
      if (is.null(names(x))) {
        warning("removing unnamed vector")
        NULL
      } else if (!all(ok <- nzchar(names(x)))) {
        warning("removing elements with empty names")
        x[ok]
      } else
        x
    }
    enforce_names <- function(x) {
      names(x) <- if (is.null(n <- names(x)))
          seq_along(x)
        else
          ifelse(nzchar(n), n, seq_along(x))
      x
    }
    if (flat)
      x <- lapply(x, unlist)
    else
      x <- lapply(x, oneify)
    if (keep.unnamed)
      x <- lapply(x, enforce_names)
    else if (verbose)
      x <- lapply(x, keep_validly_named_only_but_complain)
    else
      x <- lapply(x, keep_validly_named_only)
    n <- unique.default(unlist(lapply(x, names), FALSE))
    result <- matrix(NA, length(x), length(n), FALSE, list(names(x), n))
    result <- as.data.frame(x = result, stringsAsFactors = FALSE,
      optional = TRUE, ...)
    for (i in seq_along(x))
      result[i, names(x[[i]])] <- x[[i]]
    if (dataframe && stringsAsFactors)
      for (i in which(vapply(result, is.character, NA)))
        result[, i] <- as.factor(result[, i])
    if (!optional)
      names(result) <- make.names(names(result))
    if (dataframe)
      result
    else
      as.matrix(result)
  }

  # collecting 'datasets'
  collect_matrices <- function(x, keep.unnamed, dataframe, optional,
      stringsAsFactors, verbose, all.rows, ...) {
    unfactor <- function(x) {
      for (i in which(vapply(x, is.factor, NA)))
        x[, i] <- as.character(x[, i])
      x
    }
    keep_validly_named_only <- function(x) {
      if (is.null(colnames(x)) || is.null(rownames(x)))
        NULL
      else
        x[nzchar(rownames(x)), nzchar(colnames(x)), drop = FALSE]
    }
    keep_validly_named_only_but_complain <- function(x) {
      if (is.null(colnames(x)) || is.null(rownames(x))) {
        warning("removing unnamed matrix or data frame")
        NULL
      } else {
        row.ok <- nzchar(rownames(x))
        col.ok <- nzchar(colnames(x))
        if (!all(row.ok) || !all(col.ok)) {
          warning("removing rows and/or columns with empty names")
          x[row.ok, col.ok, drop = FALSE]
        } else
          x
      }
    }
    enforce_names <- function(x) {
      enforce <- function(x, n) if (is.null(x))
          seq_len(n)
        else
          ifelse(nzchar(x), x, seq_len(n))
      rownames(x) <- enforce(rownames(x), nrow(x))
      colnames(x) <- enforce(colnames(x), ncol(x))
      x
    }
    add_columns <- function(x, wanted) {
      if (length(n <- setdiff(wanted, colnames(x))))
        x <- cbind(x, matrix(NA, nrow(x), length(n), FALSE, list(NULL, n)))
      x[, wanted, drop = FALSE]
    }
    if (all.atomic <- all(vapply(x, is.atomic, NA))) {
      x <- lapply(x, as.matrix)
      if (keep.unnamed)
        x <- lapply(x, enforce_names)
      else if (verbose)
        x <- lapply(x, keep_validly_named_only_but_complain)
      else
        x <- lapply(x, keep_validly_named_only)
    } else {
      x <- lapply(lapply(X = x, FUN = data.frame, stringsAsFactors = FALSE,
        check.names = !optional), unfactor)
    }
    if (all.rows) {
      cn <- unique.default(unlist(lapply(x, colnames), FALSE, FALSE))
      result <- do.call(rbind, lapply(x, add_columns, cn))
      if (is.null(result))
        result <- matrix(NA, 0L, length(cn), FALSE, list(NULL, cn))
    } else {
      rn <- sort.int(unique.default(unlist(lapply(x, rownames), FALSE, FALSE)))
      cn <- sort.int(unique.default(unlist(lapply(x, colnames), FALSE, FALSE)))
      result <- matrix(NA, length(rn), length(cn), FALSE, list(rn, cn))
      if (!all.atomic)
        result <- as.data.frame(result, stringsAsFactors = FALSE,
          optional = optional)
      for (mat in x)
        result[rownames(mat), colnames(mat)] <- mat
    }
    if (dataframe) {
      if (all.atomic || !is.data.frame(result))
        result <- as.data.frame(result)
      if (stringsAsFactors)
        for (i in which(vapply(result, is.character, NA)))
          result[, i] <- as.factor(result[, i])
    } else if (!all.atomic || is.data.frame(result)) {
      result <- as.matrix(result)
    }
    if (!optional)
      colnames(result) <- make.names(colnames(result))
    result
  }

  LL(min.cov, dataframe, optional, stringsAsFactors, keep.unnamed)
  if (verbose <- is.na(keep.unnamed))
    keep.unnamed <- FALSE
  case(match.arg(what),
    counts = note_in_matrix(x, TRUE, min.cov, dataframe, optional,
      stringsAsFactors, ...),
    occurrences = note_in_matrix(x, FALSE, min.cov, dataframe, optional,
      stringsAsFactors, ...),
    elements = insert_into_matrix(x, TRUE, keep.unnamed, dataframe, optional,
      stringsAsFactors, verbose, ...),
    values = insert_into_matrix(x, FALSE, keep.unnamed, dataframe, optional,
      stringsAsFactors, verbose, ...),
    datasets = collect_matrices(x, keep.unnamed, dataframe, optional,
      stringsAsFactors, verbose, FALSE, ...),
    rows = collect_matrices(x, TRUE, dataframe, optional,
      stringsAsFactors, verbose, TRUE, ...)
  )
}

collect.matrix <- function(x, what = c("columns", "rows"), empty = "?", ...) {

  assort_columns <- function(x, empty) {
    assort <- function(x) {
      result <- integer(nrow(x))
      repeat {
        if (all(result) || !(m <- max(x)))
          break
        pos <- which(x == m, TRUE)[1L, ]
        result[pos[[1L]]] <- pos[[2L]]
        x[pos[[1L]], ] <- x[, pos[[2L]]] <- 0
      }
      if (any(pos <- !result))
        result[pos] <- setdiff(seq_len(ncol(x)), result)[seq_along(which(pos))]
      result
    }
    stopifnot(is.matrix(x), is.character(x))
    x[!nzchar(x) | is.na(x)] <- empty
    n <- unique.default(x)
    n <- matrix(0L, length(n), ncol(x), FALSE, list(n, colnames(x)))
    for (i in seq_len(ncol(x))) {
      cnt <- table(x[, i])
      n[names(cnt), i] <- cnt[]
    }
    n <- sweep(n, 2L, colSums(n), "/")
    n[match(empty, rownames(n), 0L), ] <- 0
    for (i in seq_len(nrow(x)))
      x[i, assort(n[x[i, ], , drop = FALSE])] <- x[i, ]
    x[x == empty] <- ""
    x
  }

  LL(empty)
  case(match.arg(what),
    columns = assort_columns(x, empty),
    rows = t(assort_columns(t(x), empty))
  )
}

setGeneric("map_values",
  function(object, mapping, ...) standardGeneric("map_values"))

setMethod("map_values", c("list", "character"), function(object, mapping,
    coerce = character()) {
  if (isTRUE(coerce)) {
    if (is.null(coerce <- names(mapping)))
      return(object)
    mapfun <- function(item) as(item, map_values(class(item), mapping))
  } else
    mapfun <- if (length(coerce) == 0L || all(coerce == "character"))
        function(item) map_values(item, mapping)
      else
        function(item) {
          result <- map_values(as.character(item), mapping)
          mostattributes(result) <- attributes(item)
          result
        }
  map_values(object, mapping = mapfun, coerce = coerce)
}, sealed = SEALED)

setMethod("map_values", c("list", "function"), function(object, mapping,
    coerce = character(), ...) {
  rapply(object = object, f = mapping, classes = prepare_class_names(coerce),
    how = "replace", ...)
}, sealed = SEALED)

setMethod("map_values", c("list", "NULL"), function(object, mapping,
    coerce = character()) {
  clean_recursively <- function(x) {
    if (!is.list(x))
      return(x)
    x <- lapply(x, clean_recursively)
    x[lengths(x, FALSE) > 0L]
  }
  if (length(coerce))
    object <- rapply(object, as.character, prepare_class_names(coerce), NULL,
      "replace")
  clean_recursively(object)
}, sealed = SEALED)

setMethod("map_values", c("list", "missing"), function(object, mapping,
    coerce = character()) {
  if (isTRUE(coerce)) {
    classes <- "ANY"
    mapfun <- class
  } else {
    classes <- prepare_class_names(coerce)
    mapfun <- as.character
  }
  map_values(rapply(object, mapfun, classes))
}, sealed = SEALED)

setMethod("map_values", c("list", "expression"), function(object, mapping,
    coerce = parent.frame()) {
  e <- list2env(object, NULL, coerce)
  for (subexpr in mapping)
    eval(subexpr, e)
  e <- as.list(e) # return 'e' if the order of list elements doesn't matter
  novel <- setdiff(names(e), names(object))
  for (name in setdiff(names(object), names(e)))
    object[[name]] <- NULL
  object[novel] <- e[novel]
  object
}, sealed = SEALED)

setMethod("map_values", c("data.frame", "function"), function(object, mapping,
    coerce = character(), ...) {
  if (identical("ANY", coerce <- prepare_class_names(coerce)))
    coerce <- unique(unlist((lapply(object, class))))
  for (i in which(vapply(object, inherits, NA, coerce)))
    object[[i]] <- mapping(object[[i]], ...)
  object
}, sealed = SEALED)

setMethod("map_values", c("data.frame", "character"), function(object, mapping,
    coerce = character()) {
  if (isTRUE(coerce)) {
    if (is.null(coerce <- names(mapping)))
      return(object)
    mapfun <- function(item) as(item, map_values(class(item), mapping))
  } else {
    mapfun <- function(item) map_values(as.character(item), mapping)
  }
  map_values(object, mapping = mapfun, coerce = coerce)
}, sealed = SEALED)

setMethod("map_values", c("data.frame", "NULL"), function(object, mapping,
    coerce = character(), ...) {
  if (identical("ANY", coerce <- prepare_class_names(coerce)))
    coerce <- unique(unlist((lapply(object, class))))
  for (i in which(vapply(object, inherits, NA, coerce)))
    object[[i]] <- as.character(object[[i]])
  object
}, sealed = SEALED)

setMethod("map_values", c("data.frame", "missing"), function(object,
    coerce = character()) {
  if (isTRUE(coerce)) {
    result <- unlist(lapply(object, class))
  } else {
    coerce <- prepare_class_names(coerce)
    if (!"ANY" %in% coerce)
      object <- object[, vapply(object, inherits, NA, coerce),
        drop = FALSE]
    result <- unlist(lapply(object, as.character))
  }
  map_values(result)
}, sealed = SEALED)

setMethod("map_values", c("data.frame", "list"), function(object, mapping) {

  do_merge <- function(x, join) {
    join_them <- function(a, b) {
      ifelse(is.na(a) | !nzchar(a), b,
        ifelse(is.na(b) | !nzchar(b), a, paste0(a, join, b)))
    }
    Reduce(join_them, lapply(x, as.character))
  }

  can.map <- vapply(object, is.character, NA) | vapply(object, is.factor, NA)

  # template-ceation mode
  if (!length(mapping)) {
    mapping <- vector("list", sum(can.map))
    mapping[] <- list(structure(.Data = character(), names = character()))
    names(mapping) <- names(object)[can.map]
    return(mapping)
  }

  if (is.null(names(mapping)))
    stop("unnamed list used as 'mapping' argument")

  # separator for merging columns
  if (pos <- match("+", names(mapping), 0L)) {
    join <- unlist(mapping[[pos]], TRUE, FALSE)
    mapping <- mapping[-pos]
  } else {
    join <- "; "
  }

  # mapping of column names
  if (pos <- match(".", names(mapping), 0L)) {
    names(object) <- map_values(names(object), unlist(mapping[[pos]]))
    mapping <- mapping[-pos]
  }

  # merging columns with the same name
  if (anyDuplicated.default(names(object))) {
    last <- ncol(object)
    pos <- split.default(seq_along(object), names(object))
    pos <- pos[lengths(pos) > 1L]
    remove <- NULL
    for (wanted in pos) {
      remove <- c(remove, wanted[-1L])
      object[, wanted[[1L]]] <- do_merge(object[, wanted, drop = FALSE], join)
    }
    object <- object[, -remove, drop = FALSE]
    # must be calculated anew because of possible conversions to character
    can.map <- vapply(object, is.character, NA) | vapply(object, is.factor, NA)
    if (ncol(object) < last)
      message(sprintf("reduced table by merging from %i to %i column(s)",
        last, ncol(object)))
  }

  # addition of columns
  if (pos <- match("_", names(mapping), 0L)) {
    add <- mapping[[pos]]
    mapping <- mapping[-pos]
  } else {
    add <- NULL
  }

  # deletion of rows
  if (pos <- match("-", names(mapping), 0L)) {
    delete <- mapping[[pos]] # must refer to new column names
    mapping <- mapping[-pos]
  } else {
    delete <- NULL
  }

  # deletion of columns
  if (pos <- match("/", names(mapping), 0L)) {
    remove <- mapping[[pos]] # must refer to new column names
    mapping <- mapping[-pos]
  } else {
    remove <- NULL
  }
  if (length(remove)) { # actually delete columns
    last <- ncol(object)
    pos <- match(remove, names(object), 0L)
    pos <- -pos[pos > 0L] # silently skip columns that do not exist anyway
    if (length(pos)) {
      object <- object[, pos, drop = FALSE]
      can.map <- can.map[pos] # must fit to 'object'
    }
    if (ncol(object) < last)
      message(sprintf("reduced table by deletion from %i to %i column(s)",
        last, ncol(object)))
  }

  # mapping of columns
  pos <- match(names(mapping), names(object), 0L) > 0L
  assert(pos, names(mapping),
    "column '%s' present in mapping does not exist", NA)
  if (!all(pos))
    warning("available columns are: ",
      paste0(sprintf("'%s'", names(object)), collapse = ", "))
  wanted <- names(object)[can.map]
  assert(match(names(mapping)[pos], wanted, 0L) > 0L, names(mapping)[pos],
    "column '%s' present in mapping is not 'character'", NA)
  pos <- match(wanted, names(mapping), 0L)
  wanted <- wanted[assert(pos > 0L, wanted,
    "column '%s' exists but is not present in mapping", NA)]
  pos <- pos[pos > 0L]
  for (i in seq_along(wanted)) # map remaining columns
    object[, wanted[[i]]] <- map_values(object[, wanted[[i]]],
      unlist(mapping[[pos[[i]]]]))

  # deletion of rows based on certain values in certain columns
  delete <- delete[assert(match(names(delete), names(object), 0L) > 0L,
    names(delete),
    "column '%s' to be used for deleting rows does not exist", NA)]
  for (name in names(delete)) # actually delete rows
    if (any(pos <- object[, name] %in% delete[[name]]))
      object <- object[!pos, , drop = FALSE]

  # addition to columns if they do not yet exist
  add <- add[assert(match(names(add), names(object), 0L) == 0L,
    names(add), "column '%s' to add already exists", NA)]
  if (length(add)) # add columns with constant default value
    object <- cbind(object, as.data.frame(add))

  object

}, sealed = SEALED)

setMethod("map_values", c("array", "character"), function(object, mapping,
    coerce = TRUE) {
  if (isTRUE(coerce)) {
    storage.mode(object) <- map_values(storage.mode(object), mapping)
    object
  } else {
    coerce <- prepare_class_names(coerce)
    if (!identical("ANY", coerce) && !storage.mode(object) %in% coerce)
      stop("storage mode of 'object' not contained in 'coerce'")
    result <- map_values(as.character(object), mapping)
    attributes(result) <- attributes(object)
    result
  }
}, sealed = SEALED)

setMethod("map_values", c("array", "missing"), function(object, coerce = TRUE) {
  if (isTRUE(coerce)) {
    result <- storage.mode(object)
  } else {
    coerce <- prepare_class_names(coerce)
    if (!identical("ANY", coerce) && !storage.mode(object) %in% coerce)
      stop("storage mode of 'object' not contained in 'coerce'")
    result <- as.character(object)
  }
  map_values(result)
}, sealed = SEALED)

setMethod("map_values", c("array", "function"), function(object, mapping, ...) {
  result <- mapping(as.vector(object), ...)
  mostattributes(result) <- c(attributes(result), attributes(object))
  result
}, sealed = SEALED)

setMethod("map_values", c("character", "function"), function(object, mapping,
    ...) {
  result <- mapping(object, ...)
  mostattributes(result) <- attributes(object)
  result
}, sealed = SEALED)

setMethod("map_values", c("character", "character"), function(object, mapping) {
  mapped <- match(object, names(mapping), 0L)
  object[found] <- mapping[mapped[found <- mapped > 0L]]
  object
}, sealed = SEALED)

setMethod("map_values", c("character", "missing"), function(object) {
  object <- sort.int(unique.default(object))
  structure(.Data = object, names = object)
}, sealed = SEALED)

setMethod("map_values", c("character", "NULL"), function(object, mapping) {
  object
}, sealed = SEALED)

setMethod("map_values", c("character", "numeric"), function(object, mapping,
    ...) {
  adist2map(x = object, max.distance = mapping, ...)
}, sealed = SEALED)

setMethod("map_values", c("factor", "function"), function(object, mapping,
    ...) {
  levels(object) <- map_values(levels(object), mapping, ...)
  object
}, sealed = SEALED)

setMethod("map_values", c("factor", "character"), function(object, mapping) {
  levels(object) <- map_values(levels(object), mapping)
  object
}, sealed = SEALED)

setMethod("map_values", c("factor", "missing"), function(object) {
  map_values(levels(object))
}, sealed = SEALED)

setMethod("map_values", c("factor", "numeric"), function(object, mapping,
    ...) {
  adist2map(x = as.character(object), max.distance = mapping, ...)
}, sealed = SEALED)

setMethod("map_values", c("logical", "function"), function(object, mapping,
    ...) {
  result <- mapping(object, ...)
  mostattributes(result) <- attributes(object)
  result
}, sealed = SEALED)

setMethod("map_values", c("logical", "vector"), function(object, mapping) {
  result <- ifelse(object, mapping[[3L]], mapping[[1L]])
  result[is.na(result)] <- mapping[[2L]]
  attributes(result) <- attributes(object)
  result
}, sealed = SEALED)

setMethod("map_values", c("logical", "NULL"), function(object, mapping) {
  object
}, sealed = SEALED)

setMethod("map_values", c("logical", "missing"), function(object) {
  result <- object * 2L + 1L
  result[is.na(result)] <- 2L
  attributes(result) <- attributes(object)
  result
}, sealed = SEALED)

setMethod("map_values", c("NULL", "function"), function(object, mapping, ...) {
  NULL
}, sealed = SEALED)

setMethod("map_values", c("NULL", "character"), function(object, mapping) {
  NULL
}, sealed = SEALED)

setMethod("map_values", c("NULL", "missing"), function(object, mapping) {
  map_values(character())
}, sealed = SEALED)

setGeneric("map_names",
  function(object, mapping, ...) standardGeneric("map_names"))

setMethod("map_names", c("list", "function"), function(object, mapping, ...) {
  map_names_recursively <- function(item) {
    if (is.list(item)) {
      names(item) <- map_values(names(item), mapping, ...)
      return(lapply(item, map_names_recursively))
    }
    item
  }
  map_names_recursively(object)
}, sealed = SEALED)

setMethod("map_names", c("list", "character"), function(object, mapping) {
  map_names_recursively <- function(item) {
    if (is.list(item)) {
      names(item) <- map_values(names(item), mapping)
      return(lapply(item, map_names_recursively))
    }
    item
  }
  map_names_recursively(object)
}, sealed = SEALED)

setMethod("map_names", c("list", "missing"), function(object) {
  get_names_recursively <- function(item) {
    if (is.list(item))
      c(names(item), unlist(lapply(item, get_names_recursively)))
    else
      character()
  }
  map_values(get_names_recursively(object))
}, sealed = SEALED)

setMethod("map_names", c("data.frame", "function"), function(object, mapping,
    ...) {
  names(object) <- map_values(names(object), mapping, ...)
  object
}, sealed = SEALED)

setMethod("map_names", c("data.frame", "character"), function(object, mapping) {
  names(object) <- map_values(names(object), mapping)
  object
}, sealed = SEALED)

setMethod("map_names", c("data.frame", "missing"), function(object) {
  map_values(dimnames(object))
}, sealed = SEALED)

setMethod("map_names", c("array", "function"), function(object, mapping, ...) {
  dimnames(object) <- map_values(dimnames(object), mapping, ...)
  object
}, sealed = SEALED)

setMethod("map_names", c("array", "character"), function(object, mapping) {
  dimnames(object) <- map_values(dimnames(object), mapping)
  object
}, sealed = SEALED)

setMethod("map_names", c("array", "missing"), function(object) {
  map_values(dimnames(object))
}, sealed = SEALED)

setMethod("map_names", c("ANY", "function"), function(object, mapping, ...) {
  names(object) <- map_values(names(object), mapping, ...)
  object
}, sealed = SEALED)

setMethod("map_names", c("ANY", "character"), function(object, mapping) {
  names(object) <- map_values(names(object), mapping)
  object
}, sealed = SEALED)

setMethod("map_names", c("ANY", "missing"), function(object) {
  map_values(names(object))
}, sealed = SEALED)

setGeneric("contains",
  function(object, other, ...) standardGeneric("contains"))

setMethod("contains", c("list", "list"), function(object, other,
    values = TRUE, exact = FALSE, ...) {
  query.keys <- names(other)
  if (length(query.keys) == 0L && length(other) > 0L)
    return(FALSE)
  found <- match(query.keys, names(object), incomparables = "")
  if (anyNA(found))
    return(FALSE)
  for (idx in seq_along(query.keys)) {
    query.subset <- other[[idx]]
    data.subset <- object[[found[idx]]]
    result <- if (is.list(query.subset)) {
      if (is.list(data.subset))
        Recall(object = data.subset, other = query.subset, values = values,
          exact = exact, ...)
      else if (values)
        FALSE
      else
        is.null(names(query.subset))
    } else if (values) {
      if (exact)
        identical(x = data.subset, y = query.subset, ...)
      else
        all(data.subset %in% query.subset)
    } else
      TRUE
    if (!result)
      return(FALSE)
  }
  TRUE
}, sealed = SEALED)

setGeneric("check", function(object, against, ...) standardGeneric("check"))

setMethod("check", c("list", "character"), function(object, against) {
  # additional tests
  is.available <- function(x) !anyNA(x)
  is.nonempty <- function(x) !is.character(x) ||
    all(nzchar(x, TRUE), na.rm = TRUE)
  is.unique <- function(x) !anyDuplicated.default(x[!is.na(x)])
  is.positive <- function(x) is.numeric(x) && all(x > 0, na.rm = TRUE)
  is.natural <- function(x) is.numeric(x) && all(x >= 0, na.rm = TRUE)
  # main part
  element_is <- function(x, name, checkfun) checkfun(x[[name]])
  ok <- names(against) %in% names(object)
  result <- sprintf("element '%s' is missing", names(against)[!ok])
  against <- against[ok]
  ok <- mapply(checkfun = lapply(sprintf("is.%s", against), match.fun),
    FUN = element_is, name = names(against), MoreArgs = list(x = object))
  c(result, sprintf("element '%s' fails test 'is.%s'",
    names(against)[!ok], against[!ok]))
}, sealed = SEALED)

setMethod("check", c("character", "missing"), function(object, against) {
  if (length(object))
    stop(paste0(object, collapse = "\n"))
  invisible(TRUE)
}, sealed = SEALED)

match_parts <- function(x, pattern, ignore.case = FALSE) {
  m <- regexpr(pattern, x, ignore.case, TRUE)
  f <- m > 0L
  if (is.null(attr(m, "capture.start"))) {
    result <- rep.int(NA_character_, length(x))
    result[f] <- substr(x[f], m[f], m[f] + attr(m, "match.length")[f] - 1L)
    names(result) <- names(x)
    return(result)
  }
  cs <- attr(m, "capture.start")[f, , drop = FALSE]
  cl <- attr(m, "capture.length")[f, , drop = FALSE]
  result <- matrix(NA_character_, length(x), ncol(cs), FALSE,
    list(names(x), attr(m, "capture.names")))
  for (i in seq_len(ncol(result)))
    result[f, i] <- substr(x[f], cs[, i], cs[, i] + cl[, i] - 1L)
  result
}

set <- function(name, expr, template = "%s.Rda", env = parent.frame(),
    inherits = TRUE) {
  if (exists(name, NULL, env, NULL, "any", inherits))
    return(invisible(0L))
  if (!length(template)) {
    assign(name, expr, NULL, env, inherits)
    return(invisible(1L))
  }
  if (dirname(rda <- sprintf(template, name)) == ".")
    rda <- file.path(getOption("rda_store", getwd()), rda)
  if (file.exists(rda)) {
    assign(name, readRDS(rda), NULL, env, inherits)
    return(invisible(2L))
  }
  result <- expr
  saveRDS(result, rda)
  assign(name, result, NULL, env, inherits)
  invisible(3L)
}

sql <- function(x, ...) UseMethod("sql")

sql.data.frame <- function(x, where, table, set = setdiff(colnames(x), where),
    ...) {

  dquote <- function(x) {
    ifelse(grepl("^[a-z][a-z_0-9]+$", x, FALSE, TRUE), x,
      sprintf('"%s"', gsub('"', '""', x, FALSE, FALSE, TRUE)))
  }

  sql_array <- function(x) {
    if (is.list(x))
      x <- vapply(x, sql_array, "")
    else if (is.character(x))
      x <- ifelse(is.na(x), "NULL",
        sprintf('"%s"', gsub('"', '\"', x, FALSE, FALSE, TRUE)))
    else if (is.atomic(x))
      x <- ifelse(is.na(x), "NULL", as.character(x))
    else
      stop("conversion of mode '", typeof(x), "' is not implemented")
    sprintf("{%s}", paste0(x, collapse = ","))
  }

  squote <- function(x, modify) {
    if (is.character(x))
      return(ifelse(is.na(x), "NULL", sprintf("'%s'",
        gsub("'", "''", x, FALSE, FALSE, TRUE))))
    if (is.atomic(x))
      return(ifelse(is.na(x), "NULL", x))
    if (is.list(x)) {
      if (modify)
        return(sprintf("'%s'", vapply(x, sql_array, "")))
      x <- lapply(x, squote)
      x <- vapply(X = x, FUN = paste0, FUN.VALUE = "", collapse = ", ")
      x <- sprintf("(%s)", ifelse(nzchar(x), x, "NULL"))
      attr(x, "list") <- TRUE
      return(x)
    }
    stop("conversion of mode '", typeof(x), "' is not implemented")
  }

  equals <- function(what, value, modify) {
    value <- squote(value, modify)
    sep <- if (modify)
        "="
      else if (isTRUE(attr(value, "list")))
        "IN"
      else
        ifelse(value == "NULL", "IS", "=")
    paste(what, sep, value)
  }

  join <- function(x, modify) {
    if (!ncol(x))
      stop("no columns chosen for ", if (modify) "update" else "selection")
    x <- mapply(FUN = equals, what = dquote(colnames(x)), value = x,
      MoreArgs = list(modify = modify), SIMPLIFY = FALSE, USE.NAMES = FALSE)
    do.call(paste, c(x, list(sep = if (modify) ", " else " AND ")))
  }

  create_map <- function(x) {
    x <- x[grepl("^new\\.(?<=.)", x, FALSE, TRUE) &
      match(substr(x, 5L, nchar(x)), x, 0L)]
    structure(.Data = substr(x, 5L, nchar(x)), names = x)
  }

  for (i in which(vapply(x, is.factor, NA)))
    x[, i] <- as.character(x[, i])

  map <- create_map(colnames(x))

  sprintf("UPDATE %s SET %s WHERE %s;", dquote(table),
    join(map_names(x[, set, drop = FALSE], map), TRUE),
    join(map_names(x[, where, drop = FALSE], map), FALSE))

}

sql.formula <- function(x, ...) {

  double_quote <- function(x) {
    if (any(bad <- !grepl("^[a-z][a-z_0-9]*$", x, FALSE, TRUE)))
      x[bad] <- sprintf('"%s"', gsub('"', '""', x[bad], FALSE, FALSE, TRUE))
    x
  }

  identifier_or_literal <- function(x) {
    if (is.character(x))
      return(sprintf("'%s'", gsub("'", "''", x, FALSE, FALSE, TRUE)))
    if (is.null(x))
      return("NULL")
    if (is.atomic(x))
      return(as.character(x))
    if (!is.name(x))
      stop("expected symbol or atomic vector, got ", typeof(x))
    double_quote(as.character(x))
  }

  operator <- function(x) {
    infix_operator <- function(x)
      if (grepl("^[a-z]+(\\s+[a-z]+)*$", x, TRUE, TRUE))
        toupper(x)
      else if (grepl("^[!#%&*+/<=>?@^|~`-]+$", x, FALSE, TRUE) &&
          !grepl("--|/[*]", x, FALSE, TRUE) &&
          !grepl("[^~!@#%^&|`?][+-]$", x, FALSE, TRUE))
        x
      else
        stop("invalid infix operator ", x)
    switch(x,
      # high-precedence operators that get another meaning
      `:::` =, `::` =, `@` =, `$` = x,
      # high-precedence operators with similar meaning in PostgreSQL
      `:` =, `(` =, `{` =, `[` =, `[[` =, `^` =, `-` =, `+` =, `*` =, `/` =,
        `<` =, `>` =, `<=` =, `>=` =, `!=` =, `||` =, `&&` = x,
      # equality, as it is likely to be used in R code
      `==` = "=",
      # logical operators
      `!` = "NOT", `&` = "AND", `|` = "OR",
      # low-precedence operators unlikely to be used but possible
      `~` =, `->` =, `<-` =, `->>` =, `<<-` =, `=` =, `?` = x,
      # reserved words that are kept
      `function` =, `if` = x,
      # infix operators enclosed in '%'
      `%%` = "%",
      if (grepl("^%.+%$", x, FALSE, TRUE))
        infix_operator(substr(x, 2L, nchar(x) - 1L))
      else
        NULL
    )
  }

  rec_sql <- function(x) {

    convert_pairlist <- function(x) {
      if (bad <- match("...", names(x), 0L)) {
        warning("removing '...' argument")
        x <- x[-bad]
      }
      if (!length(x))
        return(NULL)
      keys <- double_quote(names(x))
      if (any(present <- vapply(x, typeof, "") != "symbol" | nzchar(x))) {
        groups <- unclass(rev.default(pkgutils::sections(rev.default(present))))
        groups[is.na(groups)] <- seq_along(which(is.na(groups))) +
          max(groups, na.rm = TRUE)
        keys <- split.default(keys, match(groups, groups))
        keys <- vapply(keys, paste0, "", collapse = " OR ")
        values <- unlist(lapply(x[present], rec_sql), FALSE, FALSE)
        if (length(keys) > length(values))
          values <- c(values, "NULL")
      } else {
        keys <- paste0(keys, collapse = " OR ")
        values <- "NULL"
      }
      paste0(sprintf("WHEN %s THEN %s", keys, values), collapse = " ")
    }

    join <- function(x) paste0(unlist(x, FALSE, FALSE), collapse = ", ")

    named_join <- function(x, n) paste0(ifelse(nzchar(n), sprintf("%s := ",
      double_quote(n)), n), unlist(x, FALSE, FALSE), collapse = ", ")

    if (!is.call(x))
      return(identifier_or_literal(x))

    if (is.null(op <- operator(as.character(x[[1L]]))))
      return(sprintf("%s(%s)", rec_sql(x[[1L]]),
        if (is.null(names(x)))
          join(lapply(x[-1L], rec_sql))
        else
          named_join(lapply(x[-1L], rec_sql), names(x)[-1L])))

    switch(op,
      `if` = if (length(x) > 3L)
        sprintf("CASE WHEN %s THEN %s ELSE %s END", rec_sql(x[[2L]]),
          rec_sql(x[[3L]]), rec_sql(x[[4L]]))
      else
        sprintf("CASE WHEN %s THEN %s END", rec_sql(x[[2L]]), rec_sql(x[[3L]])),
      `function` = if (is.null(args <- convert_pairlist(x[[2L]])))
          rec_sql(x[[3L]])
        else
          sprintf("CASE %s ELSE %s END", args, rec_sql(x[[3L]])),
      `::` = sprintf("%s.%s", rec_sql(x[[2L]]), rec_sql(x[[3L]])),
      `:::` = sprintf("(%s).%s", rec_sql(x[[2L]]), rec_sql(x[[3L]])),
      `$` = sprintf("%s :: %s", rec_sql(x[[2L]]), rec_sql(x[[3L]])),
      `@` = sprintf("CAST(%s AS %s)", rec_sql(x[[2L]]), rec_sql(x[[3L]])),
      `(` = sprintf("(%s)", rec_sql(x[[2L]])), # always only one argument
      `{` = if (length(x) > 1L)
        sprintf("(%s)", join(lapply(x[-1L], rec_sql)))
      else
        "NULL",
      `[` = sprintf("%s[%s]", rec_sql(x[[2L]]),
        join(lapply(x[-c(1L, 2L)], rec_sql))),
      `[[` = sprintf("%s[[%s]]", rec_sql(x[[2L]]),
        join(lapply(x[-c(1L, 2L)], rec_sql))),
      if (length(x) > 2L) {
        if (identical(right <- rec_sql(x[[3L]]), "NULL"))
          switch(op, `=` = op <- "IS", `!=` = op <- "IS NOT",
            warning("NULL on right side of operator ", op))
        sprintf("%s %s %s", rec_sql(x[[2L]]), op, right)
      } else {
        sprintf("%s %s", op, rec_sql(x[[2L]]))
      }
    )

  }

  selection <- function(x) {
    if (is.call(x)) {
      tablename <- identifier_or_literal(x[[1L]])
      if (length(x) > 1L) {
        columns <- unlist(lapply(x[-1L], rec_sql), FALSE, FALSE)
        map <- allNames(x)[-1L]
        map <- ifelse(nzchar(map), paste0(" AS ", double_quote(map)), map)
        columns <- paste0(columns, map, collapse = ", ")
      } else {
        columns <- "*"
      }
    } else {
      tablename <- identifier_or_literal(x)
      columns <- "*"
    }
    sprintf("SELECT %s FROM %s", columns, tablename)
  }

  if (length(x) > 2L)
    sprintf("%s WHERE %s;", selection(x[[2L]]), rec_sql(x[[3L]]))
  else
    sprintf("%s;", selection(x[[2L]]))
}

Try the pkgutils package in your browser

Any scripts or data that you put into this service are public.

pkgutils documentation built on May 2, 2019, 5:49 p.m.