R/idxv.R

Defines functions idxv CJI

Documented in CJI idxv

#' @title Precalculate data.table indices
#' @description Precalculate indices for data.table, also known as \emph{Nth key}.
#' @param DT data.table for which generate indices.
#' @param Idx list of character vectors, indices to generate.
#' @param grp logical (default \emph{FALSE}), when \emph{TRUE} it will include groups id which can be used to speedup aggregations.
#' @return list of data.table, including some meta data in attributes.
#' @seealso \link{CJI}
#' @export
#' @example tests/example-idxv.R
idxv <- function(DT, Idx, grp = FALSE){
  stopifnot(is.data.table(DT), is.list(Idx))
  IDX <- lapply(Idx, function(idx){
    res = DT[, idx, with=FALSE]
    res[, `__dwtools_idx` := .I
        ]
    if(is.integer(idx)) setkeyv(res, names(DT)[idx])
    else setkeyv(res, idx)
    if(grp){
      res[, `__dwtools_grp` := .GRP, by=key(res)]
      setattr(res,"grp",res[,.SD[1],keyby=c("__dwtools_grp"),.SDcols=key(res)])
    }
    res[]
  })
  setattr(IDX,"DT.key",lapply(IDX,key))
  setattr(IDX,"DT.names",copy(names(DT)))
  setattr(IDX,"DT.nrow",nrow(DT))
}

#' @title Indexed cross join / filter for data.table
#' @description To be used inside data.table, same as \link{CJ} function. In case of not filtering on particular fields pass \code{TRUE} value to \dots, see examples in \link{idxv}.
#' @param IDX list generated by \link{idxv}, must be produced on the same data.table on which \emph{CJI} is used.
#' @param \dots values to filter data.table. For the columns to skip filtering use \code{TRUE}, see examples in \link{idxv}.
#' @param nomatch integer \emph{0} or \emph{NA}. Default \emph{0} (inner join) to filter data.table.
#' @return Subset of data.table according to arguments passed to \dots.
#' @seealso \link{idxv}
#' @export
#' @examples
#' # see ?idxv examples
CJI <- function(IDX, ..., nomatch = 0){
  DT.names <- attr(IDX,"DT.names",TRUE)
  DT.key <- attr(IDX,"DT.key",TRUE)
  values <- as.list(substitute(list(...)))[-1L]
  idx_skip <- rep(TRUE,length(DT.names))
  idx_skip[0:length(values)] <- sapply(values, isTRUE, USE.NAMES = FALSE)
  args.match <- DT.names[!idx_skip]
  which_IDX <- which(sapply(DT.key, function(idx) identical(idx,args.match)))
  if(!which_IDX){
    stop(paste("You cannot use CJ or CJI because IDX argument does not contain index for: ",paste(args.match,collapse=", "),sep=""))
  }
  IDX[[which_IDX]][do.call(what = CJ, args = setNames(values[!idx_skip],args.match)), nomatch=nomatch][,`__dwtools_idx`][]
}
jangorecki/dwtools documentation built on May 18, 2019, 12:24 p.m.