#' @title Unwrap Nested Data Frames
#'
#' @description
#' Some functions (e.g., \code{\link{getJobPars}}, \code{\link{getJobResources}} or \code{\link{reduceResultsDataTable}}
#' return a \code{data.table} with columns of type \code{list}.
#' These columns can be unnested/unwrapped with this function.
#' The contents of these columns will be transformed to a \code{data.table} and \code{\link[base]{cbind}}-ed
#' to the input data.frame \code{x}, replacing the original nested column.
#'
#' @note
#' There is a name clash with function \code{flatten} in package \pkg{purrr}.
#' The function \code{flatten} is discouraged to use for this reason in favor of \code{unwrap}.
#'
#' @param x [\code{\link{data.frame}} | \code{\link[data.table]{data.table}}]\cr
#' Data frame to flatten.
#' @param cols [\code{character}]\cr
#' Columns to consider for this operation. If set to \code{NULL} (default),
#' will operate on all columns of type \dQuote{list}.
#' @param sep [\code{character(1)}]\cr
#' If \code{NULL} (default), the column names of the additional columns will re-use the names
#' of the nested \code{list}/\code{data.frame}.
#' This may lead to name clashes.
#' If you provide \code{sep}, the variable column name will be constructed as
#' \dQuote{[column name of x][sep][inner name]}.
#' @return [\code{\link{data.table}}].
#' @export
#' @examples
#' x = data.table::data.table(
#' id = 1:3,
#' values = list(list(a = 1, b = 3), list(a = 2, b = 2), list(a = 3))
#' )
#' unwrap(x)
#' unwrap(x, sep = ".")
unwrap = function(x, cols = NULL, sep = NULL) {
assertDataFrame(x)
if (!is.data.table(x))
x = as.data.table(x)
if (is.null(cols)) {
cols = names(x)[vlapply(x, is.list)]
} else {
assertNames(cols, "unique", subset.of = names(x))
qassertr(x[, cols, with = FALSE], "l")
}
assertString(sep, null.ok = TRUE)
res = data.table(.row = seq_row(x), key = ".row")
extra.cols = chsetdiff(names(x), cols)
if (length(extra.cols))
res = cbind(res, x[, extra.cols, with = FALSE])
for (col in cols) {
xc = x[[col]]
new.cols = lapply(xc, function(x) {
if (!is.null(x)) {
ii = !vlapply(x, qtest, c("l", "d", "v1")) # FIXME: add parameter `which` to qtestr
x[ii] = lapply(x[ii], list)
na = which(is.na(names2(x)))
if (length(na) > 0L)
names(x)[na] = sprintf("%s.%i", col, seq_along(na))
}
x
})
new.cols = rbindlist(new.cols, fill = TRUE, idcol = ".row", use.names = TRUE)
if (ncol(new.cols) > 1L) {
if (nrow(new.cols) > nrow(x) || anyDuplicated(new.cols, by = ".row") > 0L)
stopf("Some rows are unsuitable for unnesting. Unwrapping row in column '%s' leads to multiple rows", col)
if (!is.null(sep)) {
nn = setdiff(names(new.cols), ".row")
setnames(new.cols, nn, stri_paste(col, nn, sep = sep))
}
clash = chsetdiff(chintersect(names(res), names(new.cols)), ".row")
if (length(clash) > 0L)
stopf("Name clash while unwrapping data.table: Duplicated column names: %s", stri_flatten(clash, ", "))
res = merge(res, new.cols, all.x = TRUE, by = ".row")
}
}
res[, ".row" := NULL]
kx = key(x)
if (!is.null(kx) && all(kx %chin% names(res)))
setkeyv(res, kx)
res[]
}
#' @rdname unwrap
#' @export
flatten = function(x, cols = NULL, sep = NULL) { #nocov start
"!DEBUG Call of soon-to-be deprecated function flatten. Use unwrap() instead!"
unwrap(x, cols, sep)
} #nocov end
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.