R/parse_input.R

Defines functions parse_dataframe parse_list

parse_list <- function(combinations)
{
  if (is.null(attr(combinations, "names")))
    stop("when `combinations` is a list, all vectors in that list must be named")

  if (any(names(combinations) == ""))
    stop("all elements of `combinations` must be named")

  if (!all(sapply(combinations, anyDuplicated) == 0))
    stop("vectors in `combinations` cannot contain duplicates")

  if (any(duplicated(names(combinations))))
    stop("names of elements in `combinations` must be unique")

  sets <- names(combinations)
  n <- length(sets)

  id <- bit_indexr(n)

  out <- integer(nrow(id))
  rownames(id) <- apply(id, 1L, function(x) paste(sets[x], collapse = "&"))

  intersect_sets <- as.list(rep(-1, nrow(id)))
  names(intersect_sets) <- rownames(id)
  compute_intersect <- function(bool) {
    ind <- which(bool)
    nm <- paste(sets[ind], collapse = "&")
    if (identical(intersect_sets[[nm]], -1)) { # not computed yet
      if (length(ind) == 1) {
        intersect_sets[[nm]] <<- combinations[[ind]]
      } else {
        bool[] <- FALSE
        bool[ind[1]] <- TRUE
        part1 <- compute_intersect(bool)
        bool[ind] <- TRUE
        bool[ind[1]] <- FALSE
        part2 <- compute_intersect(bool)
        intersect_sets[[nm]] <<- intersect(part1, part2)
      }
    }
    intersect_sets[[nm]]
  }

  apply(id, 1, function(x) length(compute_intersect(x)))
}

parse_dataframe <- function(combinations,
                            weights = NULL,
                            by = NULL,
                            facs,
                            sep = "_",
                            factor_names = TRUE,
                            ...)
{
  if (any(grepl("&", colnames(combinations), fixed = TRUE)))
    stop("names of columns in `combinations` must not contain '$'")

  if (!is.null(facs)) {
    if (is.list(facs)) {
      if (!is.null(names(facs)))
        nms <- names(facs)
      else {
        nms <- sapply(by[-1L], deparse)
      }
    } else
      nms <- deparse(by)

    if (is.list(facs))
      stopifnot(length(facs) < 3)
    else
      facs <- list(facs)

    dd <- as.data.frame(facs, col.names = nms, stringsAsFactors = TRUE)
    groups <- unique(dd)
    rownames(groups) <- NULL

    out <- g <- vector("list", NROW(groups))

    by_ind <- match(nms, colnames(combinations))

    for (i in seq_len(NROW(groups))) {
      ind <- apply(dd, 1, function(x) all(x == groups[i, ]))
      out[[i]] <- combinations[ind, -by_ind, drop = FALSE]
      names(out)[[i]] <- paste(unlist(groups[i, , drop = TRUE]),
                               collapse = ".")
    }

    attr(out, "groups") <- groups
  } else {
    is_factor <- vapply(combinations,
                        function(x) is.factor(x) || is.character(x),
                        logical(1))

    reals <- vapply(combinations,
                    is_real,
                    FUN.VALUE = logical(1))

    if (any(reals))
      stop("you cannot use non-integer numeric variables.")

    if (any(is_factor))
      combinations <- dummy_code(combinations,
                                 sep = sep,
                                 factor_names = factor_names)

    if (is.null(weights))
      weights <- rep.int(1L, NROW(combinations))

    out <- tally_combinations(combinations, weights)
  }
  out
}

Try the eulerr package in your browser

Any scripts or data that you put into this service are public.

eulerr documentation built on Dec. 10, 2022, 1:08 a.m.