#
# Rdata.R
#Mon 27 Jun 2005 10:49:06 AM CEST
#system("~/src/Rprivate/exportR.sh");
#system("~/src/Rprivate/exportR.sh"); source("RgenericAll.R"); source("Rgenetics.R"); loadLibraries();
#system('~/src/Rprivate/exportR.sh ; cp ~/src/Rprivate/RgenericAllRaw.R .');
# <!> copied to Rmeta.R as being the first file to be exported by now (26.3.2017)
#
# <ยง> abstract data functions
#
defined = function(x) exists(as.character(substitute(x)));
defined.by.name = function(name) { class(try(get(name), silent = TRUE)) != 'try-error' }
# equivalent to i %in% v
is.in = function(i, v)(length((1:length(v))[v == i])>0)
rget = function(name, default = NULL, ..., pos = -1, envir = as.environment(pos)) {
#obj = try(get(name, ...), silent = TRUE);
#r = if(class(obj) == 'try-error') default else obj;
#r = if (exists(name, where = pos, envir = envir)) get(name, ..., pos = pos, envir = envir) else default;
r = if (exists(name, envir = envir)) get(name, ..., envir = envir) else default;
r
}
# .fdE: use notE
firstDef = function(..., .fdInterpolate = FALSE, .fdIgnoreErrors = FALSE, .fdE = FALSE) {
l = if (.fdInterpolate) c(...) else list(...);
for (i in l) {
if ((!is.null(i) && (!.fdE || notE(i))) && (!.fdIgnoreErrors || class(i) != 'try-error'))
return(i)
};
NULL
}
FirstDef = function(..., .fdInterpolate = FALSE, .fdIgnoreErrors = FALSE, .fdE = TRUE)
firstDef(..., .fdInterpolate = .fdInterpolate, .fdIgnoreErrors = .fdIgnoreErrors, .fdE = .fdE)
firstDefNA = function(..., .fdInterpolate = FALSE){
l = if (.fdInterpolate) c(...) else list(...);
for (i in l) { if (!is.na(i)) return(i)};
NULL
}
# <N> NULL behaviour
to.list = function(..., .remove.factors = TRUE){
r = if(is.null(...)) NULL else if (is.list(...)) c(...) else list(...);
if (.remove.factors) {
r = sapply(r, function(e)ifelse(is.factor(e), levels(e)[e], e));
}
r
}
# clean list/vector
Avu = function(v)as.vector(unlist(v))
# pretty much force everything to be a vector
avu = function(v, recursive = TRUE, toNA = TRUE) {
transform = if (toNA)
function(e, condition)(if (condition) NA else avu(e, toNA = TRUE, recursive = TRUE)) else
function(e, ...)avu(e, toNA = FALSE, recursive = TRUE);
r = if (is.list(v)) {
nls = sapply(v, is.null); # detects nulls
# unlist removes NULL values -> NA
unlist(sapply(seq_along(v), function(i)transform(v[[i]], nls[i])));
} else as.vector(v);
if (!length(r)) return(NULL);
r
}
#pop = function(v)rev(rev(v)[-1]);
assign.list = function(l, pos = -1, envir = as.environment(pos), inherits = FALSE, immediate = TRUE) {
for (n in names(l)) {
assign(n, l[[n]], pos, envir, inherits, immediate);
}
}
eval.text = function(text, envir = parent.frame())eval(parse(text = text), envir= envir);
nullomit = function(r)r[!sapply(r, is.null)]
# replace elements base on list
# l may be a list of lists with elements f (from) and t (to), when f is replaced with t
# if both, f and t arguments are not NULL, l will be ignored and f is replaced with t
vector.replace = function(v, l, regex = FALSE, ..., f = NULL, t = NULL) {
# if (!is.null(f) & !is.null(t)) l = list(list(f = f, t = t));
# # replacments are given in f/t pairs
# if (all(sapply(l, length) == 2)) {
# from = list.key(l, "f");
# to = list.key(l, "t");
# } else {
# from = names(l);
# to = unlist(l);
# }
# for (i in 1:length(from)) {
# if (regex) {
# idcs = which(sapply(v, function(e)(length(fetchRegexpr(from[i], e, ...)) > 0)));
# v[idcs] = sapply(v[idcs], function(e)gsub(from[i], to[i], e));
# } else v[which(v == from[i])] = to[i];
# }
repl = if (!is.null(f) & !is.null(t)) listKeyValue(f, t) else l;
# <!> tb tested
v = if (!regex) {
raw = repl[v];
unlist(ifelse(sapply(repl[v], is.null), v, raw))
} else {
sapply(v, function(e){
# first match takes precedent
j = which(sapply(names(repl), function(f)length(fetchRegexpr(f, e, ...)) > 0))[1];
if (is.na(j)) e else gsub(names(repl)[j], repl[[j]], e)
})
}
v
}
vector.with.names = function(v, all_names, default = 0) {
r = rep(default, length(all_names));
names(r) = all_names;
is = which.indeces(names(v), all_names, ret.na = TRUE);
r[is[!is.na(is)]] = v[!is.na(is)];
r
}
vector.named = function(v, Names) {
names(v) = Names;
v
}
# dir: direction of selection: 1: select rows, 2: select columns
mat.sel = function(m, v, dir = 1) {
r = if (dir == 1)
sapply(1:length(v), function(i)m[v[i], i]) else
sapply(1:length(v), function(i)m[i, v[i]]);
r
}
# rbind on list
simplify = sapplyId = function(l)sapply(l, identity);
Simplify = function(l)unlist(simplify(l));
listFind = function(lsed, lsee) {
values = sapply(names(lsee), function(n)list.key(lsed, n), simplify = FALSE, USE.NAMES = FALSE);
values = sapply(values, identity);
found = apply(values, 1, function(r) all(r == lsee));
r = unlist.n(lsed[found], 1);
r
}
#same.vector = function(v)(unique(v) == 1)
same.vector = function(v)all(v == v[1])
# in vector v, find index min j \in 1, ..., N so that v[1:j] contains at least U unique elements
uniqueIndex = function(v, U) {
#Nu = sapply(seq_along(v), function(i)length(unique(data$chr[1:i])));
# more efficient version
u = c();
for (i in seq_along(v)) {
u = unique(c(u, v[i]));
if (length(u) == U) return(i);
}
return(NA);
}
#
# <ยง> string manipulation
#
#join = function(v, sep = " ")if (length(v) == 0) '' else paste(v, collapse = sep);
join = function(v, sep = " ")paste(v, collapse = sep);
con = function(..., Sep_ = '')paste(..., sep = Sep_);
Con = function(..., Sep_ = '')paste(unlist(list(...)), collapse = Sep_);
# pastem = function(a, b, ..., revsort = TRUE) {
# if (revsort)
# as.vector(apply(merge(data.frame(a = b), data.frame(b = a), sort = FALSE), 1,
# function(e)paste(e[2], e[1], ...))) else
# as.vector(apply(merge(data.frame(a = a), data.frame(b = b), sort = FALSE), 1,
# function(e)paste(e[1], e[2], ...)))
# }
pastem = function(a, b, ..., revsort = TRUE) {
df = merge.multi.list(list(Df(a = a), Df(b = b)), .first.constant = revsort);
paste(df[, 1], df[, 2], ...)
}
r.output.to.vector.int = function(s) {
matches = gregexpr("(?<![\\[\\d])\\d+", s, perl=TRUE);
starts = as.vector(matches[[1]]);
lengthes = attr(matches[[1]], "match.length");
v = sapply(1:length(starts), function(i){ substr(s, starts[i], starts[i] + lengthes[i] -1) });
as.integer(v)
}
r.output.to.vector.numeric = function(s) {
matches = gregexpr("\\d*\\.\\d+", s, perl=TRUE);
starts = as.vector(matches[[1]]);
lengthes = attr(matches[[1]], "match.length");
v = sapply(1:length(starts), function(i){ substr(s, starts[i], starts[i] + lengthes[i] -1) });
as.numeric(v)
}
readFile = function(path) { join(scan(path, what = "raw", sep = "\n", quiet = TRUE), sep = "\n") };
circumfix = function(s, post = NULL, pre = NULL) {
if (is.null(s) || length(s) == 0) return('');
sapply(s, function(s)if (s == '') s else con(pre, s, post))
}
abbr = function(s, Nchar = 20, ellipsis = '...') {
ifelse(nchar(s) > Nchar, paste(substr(s, 1, Nchar - nchar(ellipsis)), ellipsis, sep = ''), s)
}
wrapStr = function(s, Nchar = 60, regex = '\\s+', indent = "\n") {
r = '';
while (nchar(s) > Nchar) {
R = gregexpr('\\s+', s, perl = TRUE);
Iws = R[[1]][R[[1]] <= Nchar];
Ichr = max(Iws);
# <i> handle Ichr = 1
r = con(r, substr(s, 1, Ichr - 1), indent);
s = substr(s, Ichr + attr(R[[1]], 'match.length')[length(Iws)], nchar(s));
}
r = con(r, s);
return(r);
}
Which.max = function(l, last.max = TRUE, default = NA) {
if (is.logical(l) && all(!l)) return(default);
r = if (last.max) (length(l) - which.max(rev(l)) + 1) else which.max(l);
r
}
Which.min = function(l, last.min = FALSE, default = NA) {
if (is.logical(l) && all(!l)) return(default);
r = if (last.min) (length(l) - which.min(rev(l)) + 1) else which.min(l);
r
}
# capturesN: named captures; for each name in captureN put the captured value assuming names to be ordered
# captures: fetch only first capture per match <!> deprecated
# capturesAll: fetch all caputers for each match
fetchRegexpr = function(re, str, ..., ret.all = FALSE, globally = TRUE, captures = FALSE, captureN = c(),
capturesAll = FALSE, maxCaptures = 9, returnMatchPositions = FALSE) {
if (length(re) == 0) return(c());
r = if (globally)
gregexpr(re, str, perl = TRUE, ...)[[1]] else
regexpr(re, str, perl = TRUE, ...);
if (all(r < 0)) return(NULL);
l = sapply(1:length(r), function(i)substr(str, r[i], r[i] + attr(r, "match.length")[i] - 1));
if (captures) {
l = sapply(l, function(e)gsub(re, '\\1', e, perl = TRUE, fixed = FALSE));
} else if (length(captureN) > 0) {
l = lapply(l, function(e) {
r = sapply(1:length(captureN), function(i) {
list(gsub(re, sprintf('\\%d', i), e, perl = TRUE, fixed = FALSE))
});
names(r) = captureN;
r
});
} else if (capturesAll) {
l = lapply(l, function(e) {
cs = c(); # captures
# <!> hack to remove zero-width assertions (no nested grouping!)
#re = gsub('(\\(\\?<=.*?\\))|(\\(\\?=.*?\\))', '', re, perl = TRUE, fixed = FALSE);
for (i in 1:maxCaptures) {
n = gsub(re, sprintf('\\%d', i), e, perl = TRUE, fixed = FALSE);
cs = c(cs, n);
}
cs
});
# trim list
#maxEls = maxCaptures - min(c(maxCaptures + 1, sapply(l, function(e)Which.max(rev(e != ''))))
# , na.rm = TRUE) + 1;
maxEls = max(c(sapply(l, function(e)Which.max(e != '', default = 1)), 1));
l = lapply(l, function(e)(if (maxEls > 0) e[1:maxEls] else NULL));
}
if (!ret.all) l = l[l != ""];
ret = if (returnMatchPositions) list(match = l, positions = r) else l;
ret
}
# improved multistring version
FetchRegexpr = function(re, str, ..., ret.all = FALSE, globally = TRUE, captures = FALSE, captureN = c(),
capturesAll = FALSE, maxCaptures = 9, returnMatchPositions = FALSE) {
if (length(re) == 0) return(c());
r = if (globally)
gregexpr(re, str, perl = TRUE, ...) else
list(regexpr(re, str, perl = TRUE, ...));
if (all(unlist(r) < 0)) return(NULL);
l = sapply(seq_along(r),
function(j) {
r0 = r[[j]];
sapply(1:length(r0),
function(i)substr(str[j], r0[i], r0[i] + attr(r0, "match.length")[i] - 1))
});
if (captures) {
l = sapply(l, function(e)gsub(re, '\\1', e, perl = TRUE, fixed = FALSE));
#print(l);
} else if (length(captureN) > 0) {
l = lapply(l, function(e) {
r = sapply(1:length(captureN), function(i) {
list(gsub(re, sprintf('\\%d', i), e, perl = TRUE, fixed = FALSE))
});
names(r) = captureN;
r
});
} else if (capturesAll) {
l = lapply(l, function(e) {
cs = c(); # captures
# <!> hack to remove zero-width assertions (no nested grouping!)
#re = gsub('(\\(\\?<=.*?\\))|(\\(\\?=.*?\\))', '', re, perl = TRUE, fixed = FALSE);
for (i in 1:maxCaptures) {
n = gsub(re, sprintf('\\%d', i), e, perl = TRUE, fixed = FALSE);
cs = c(cs, n);
}
cs
});
# trim list
#maxEls = maxCaptures - min(c(maxCaptures + 1, sapply(l, function(e)Which.max(rev(e != ''))))
# , na.rm = TRUE) + 1;
maxEls = max(c(sapply(l, function(e)Which.max(e != '', default = 1)), 1));
l = lapply(l, function(e)(if (maxEls > 0) e[1:maxEls] else NULL));
}
if (!ret.all) l = l[l != ""];
ret = if (returnMatchPositions) list(match = l, positions = r) else l;
ret
}
regex = Vectorize(fetchRegexpr, 'str', SIMPLIFY = TRUE, USE.NAMES = TRUE);
Regex = Vectorize(FetchRegexpr, 're', SIMPLIFY = TRUE, USE.NAMES = TRUE);
RegexL = Vectorize(FetchRegexpr, 're', SIMPLIFY = FALSE, USE.NAMES = TRUE);
regexIdcs = function(re, s, ...)vectorIdcs(regex(re, s, ...), is.null, not = TRUE)
# unify capture extraction for gregexpr, regexpr
# pos == 0: grexepr, regexpr else by iterating pos as index into str
matchRegexCapture = function(reg, str, pos = NULL) {
if (is.null(attr(reg, 'capture.start'))) return(NULL);
if (!is.null(pos)) str = str[pos] else pos = seq_along(reg);
captures = lapply(1:ncol(attr(reg, 'capture.start')), function(i) {
vs = sapply(pos, function(j)Substr(str,
attr(reg, 'capture.start')[j, i], attr(reg, 'capture.length')[j, i]))
vs
});
names(captures) = attr(reg, 'capture.names');
captures
}
MatchRegexExtract = function(m, s, pos = seq_along(m)) {
matches = ifelse(m[pos] < 0, character(0),
sapply(pos, function(i)Substr(s[i], m[i], attr(m, 'match.length')[i])));
matches
}
matchRegexExtract = function(reg, str, pos = NULL) {
if (!is.null(pos)) str = str[pos] else pos = seq_along(reg);
matches = ifelse(reg[pos] < 0, character(0),
sapply(pos, function(i)Substr(str, reg[i], attr(reg, 'match.length')[i])));
matches
}
# <i> re nested list with sub-res for named captures
# <!> globally == FALSE, removeNonMatch == FALSE
matchRegex = function(re, str, ..., globally = TRUE, simplify = TRUE,
positions = FALSE, removeNonMatch = FALSE) {
if (length(re) == 0) return(NULL);
reg = if (globally) gregexpr(re, str, perl = TRUE, ...) else regexpr(re, str, perl = TRUE, ...);
ms = if (globally)
lapply(seq_along(reg), function(i)matchRegexExtract(reg[[i]], str[i])) else
lapply(seq_along(str), function(i)matchRegexExtract(reg, str, pos = i));
# regmatches(str, reg);
captures = if (globally)
lapply(seq_along(reg), function(i)matchRegexCapture(reg[[i]], str[i])) else
lapply(seq_along(str), function(i)matchRegexCapture(reg, str, pos = i));
if (removeNonMatch) {
nonmatch = sapply(ms, length) == 0 | is.na(ms);
ms = ms[!nonmatch];
captures = captures[!nonmatch];
reg = reg[!nonmatch];
}
if (simplify && length(str) == 1) {
ms = ms[[1]];
captures = captures[[1]];
reg = reg[[1]];
}
r = if(positions) list(match = ms, capture = captures, positions = reg) else
list(match = ms, capture = captures);
r
}
#
# <p> final interface as of 2016/04
#
MatchRegex = function(re, str, mode = 'return') {
r = regexpr(re, str);
if (mode == 'return') {
r = str[which(r > 0)];
}
r
}
# handle attributes
# As.list assumes attributes and vector elements to be paired
# corresponding values/attributes will be put into the list
As.list = function(v) {
as = Recycle(attributes(v));
l = lapply(seq_along(v), function(i) {
# does not preserve matrices
#attrs = list.kp(as, Sprintf('[[%{i}d]]'));
# should become <i>
#attrs = list.kp(as, Sprintf('[[%{i}d]]', accessor = function(e)accessIdx(e, i)));
attrs = lapply(seq_along(as), function(j)accessIdx(as[[j]], i));
Attr(v[i], SetNames(attrs, names(as)))
});
l
}
# transform results from Regexpr captures = TRUE
list.transpose = function(l)lapply(seq_along(l[[1]]), function(i)list.kp(l, Sprintf('[[%{i}d]]')));
# interface as of 2018/06
# if re is vector, iterate over
# by default, return matches
RegexprSingle = function(re, s, captures = FALSE, global = TRUE, simplify = TRUE, concatMatches = TRUE, drop = TRUE) {
matches = if (global) gregexpr(re, s, perl = TRUE) else As.list(regexpr(re, s, perl = TRUE));
#print(gregexpr(re, s, perl = TRUE));
#print(regexpr(re, s, perl = TRUE));
#print(matches);
#matches = if (global) gregexpr(re, s, perl = TRUE) else list(regexpr(re, s, perl = TRUE));
r = pairslapply(matches, s, function(m, s) { # iterate strings
if (captures) {
r = matchRegexCapture(m, s);
if (concatMatches) r = apply(do.call(cbind, r), 1, join, sep = '');
} else {
r = MatchRegexExtract(m, s);
if (drop) r = r[!is.na(r)];
}
r
});
if (simplify && (
(length(s) == 1 && captures && concatMatches)
)) r = r[[1]];
if (simplify && !global) r = Simplify(r);
return(r);
}
Regexpr = function(re, s, ..., reSimplify = TRUE) {
r = lapply(re, RegexprSingle, s = unlist(s), ...);
if (length(re) == 1 && reSimplify) r = r[[1]];
return(r);
}
RegexprM = function(re, s, ..., reSimplify = TRUE) {
r = sapply(Regexpr(re, s, ..., reSimplify = reSimplify), function(e)length(e) > 0);
return(r);
}
splitString = function(re, str, ..., simplify = TRUE) {
l = lapply(str, function(str) {
if (is.na(str)) return(NA);
r = gregexpr(re, str, perl = TRUE, ...)[[1]];
if (r[1] < 0) return(str);
l = sapply(1:(length(r) + 1), function(i) {
substr(str, ifelse(i == 1, 1, r[i - 1] + attr(r, "match.length")[i - 1]),
ifelse(i > length(r), nchar(str), r[i] - 1))
});
});
if (length(l) == 1 && simplify) l = l[[1]];
l
}
# modeled after perl's qq
reString = '(?:([_\\/\\-a-zA-Z0-9.]+)|(?:\\"((?:\\\\\\\\.)*(?:[^"\\\\]+(?:\\\\\\\\.)*)*)\\"))';
# use reSep = '\\s+' to split based on a separator RE
qw = function(s, re = reString, reSep = NULL, names = NULL, byrow = TRUE) {
r = if (notE(reSep)) unlist(splitString(reSep, s)) else {
#r = if (TRUE) unlist(splitString('\\s+', s)) else
unlist(Regexpr(re, unlist(s), captures = TRUE));
}
if (notE(names)) r = Df_(matrix(r, ncol = length(names), byrow = byrow), names = names);
r
}
quoteString = function(s)sprintf('"%s"', s)
trimString = function(s) {
sapply(s, function(e)
if (is.na(e)) NA else FetchRegexpr('^\\s*(.*?)\\s*$', e, captures = TRUE)
)
}
qwi = function(...)as.integer(qw(...))
qwn = function(...)as.numeric(qw(...))
valueMapperRaw = function(n, d)d[[n]]
valueMapperStandard = function(n, d) {
if (is.na(d[[n]])) '{\\bf Value missing}' else (if (is.null(d[[n]])) n else d[[n]])
}
# <N> maxIterations needs to be large as a new iteration is entered after each successful substitution
# this is necessary, as
mergeDictToString = function(d, s,
valueMapper = valueMapperStandard,
#valueMapper = function(s)ifelse(is.na(d[[n]]), '{\\bf Value missing}', d[[n]]),
iterative = FALSE, re = FALSE, maxIterations = 1e4, doApplyValueMap = TRUE, doOrderKeys = TRUE, maxLength = 1e7) {
ns = names(d);
# proceed in order of decreasing key lengthes
if (doOrderKeys) ns = ns[rev(order(sapply(ns, nchar)))];
for (i in 1:maxIterations) {
s0 = s;
for (n in ns) {
# counteract undocumented string interpolation
subst = if (doApplyValueMap)
gsub("[\\\\]", "\\\\\\\\", valueMapper(n, d), perl = TRUE)
else d[[n]];
# <!> quoting
if (!re) n = sprintf("\\Q%s\\E", n);
s = gsub(n, firstDef(subst, ""), s, perl = TRUE, fixed = FALSE);
# <A> if any substitution was made, it is nescessary to reiterate ns to preserver order
# of substitutions
if (iterative && s != s0) break;
}
if (!iterative || s == s0 || nchar(s) > maxLength) break;
}
s
}
mergeDictToStringV = Vectorize(mergeDictToString, 's', SIMPLIFY = TRUE, USE.NAMES = TRUE);
mergeDictToVector = function(d, v) { unlist(ifelse(is.na(names(d[v])), v, d[v])) }
mergeDictToDict = function(dMap, dValues, ..., recursive = TRUE) {
r = lapply(dValues, function(v) {
r = if (class(v) == 'list') {
if (recursive) mergeDictToDict(dMap, v, ...) else v
} else if (class(v) == 'character') mergeDictToString(dMap, v, ...) else v;
r
});
r
}
# double quote if needed
qsSingle = function(s, force = FALSE) {
# <N> better implementation possible: detect unquoted white-space
if (force || length(fetchRegexpr('[ \t"()\\[\\]:,]', s)) > 0) {
s = gsub('([\\"])', '\\\\\\1', s);
s = sprintf('"%s"', s);
} else {
s0 = gsub("([\\'])", '\\\\\\1', s);
if (s0 != s) s = sprintf("$'%s'", s0);
}
s
}
qs = function(s, ...)sapply(s, qsSingle, ...)
# single quote if needed
qssSingle = function(s, force = FALSE) {
# <N> better implementation possible: detect unquoted white-space
if (force || nchar(s) == 0 || length(fetchRegexpr("[ \t'\"()\\[\\]:,]", s)) > 0) {
s = gsub("(['])", "'\"'\"'", s);
s = sprintf("'%s'", s);
}
s
}
qss = function(s, ...)sapply(s, qssSingle, ...)
# include special case for home folder expansion: do not quote initial '~'
qsSinglePath = function(s, ...) {
if (s == '~')
s else
if (nchar(s) >= 2 && substring(s, 1, 2) == '~/')
con('~/', qsSingle(substring(s, 3), ...)) else
qsSingle(s, ...)
}
# include special case for home folder expansion"
qsPath = function(s, ...)sapply(s, qsSinglePath, ...)
#' Return sub-strings indicated by positions or produce a string by substituting those strings with
#' replacements
#'
#' The function behaves similar to sprintf, except that character sequences to be substituted are
#' indicated by name.
#'
#' @param s template string
#' @param start vector of start positions of substrings to substitute
#' @param length vector of lengthes of substrings to substitute
#' @param replacement vector of strings to subsitute. If missing, \code{Substr} returns sub-strings indicated
#' by start/length
#' @return character vector containing extracted sub-strings
#'
# #' @examples
# #' \dontrun{
# #' print(Substr("abc", c(2, 3), c(1, 1), c("def", 'jkl')));
# #' print(Substr("abcdef", c(2, 3, 5), c(1, 1, 1), c("123", '456', '789')));
# #' print(Substr("abcdef", c(1, 3, 5), c(1, 1, 1), c("123", '456', '789')));
# #' print(Substr("abcdef", c(1, 3, 5), c(0, 1, 0), c("123", '456', '789')));
# #' }
Substr = function(s, start, length, replacement) {
if (missing(replacement)) return(substr(s, start, start + length - 1));
start = c(start, nchar(s) + 1);
l = sapply(seq_along(replacement), function(i)c(
replacement[i],
substr(s, start[i] + length[i], start[i + 1] - 1)
));
l = c(substr(s, 1, start[1] - 1), as.vector(l));
r = join(as.vector(l), sep = '');
r
}
sprintfIgnoreEscapes = function(r) {
m = r$match;
L = attr(r$positions, 'capture.length');
if (!(any(L[, 1] == 0 & L[, 2] == 0))) return(r);
Is = which(L[, 1] == 0 & L[, 2] == 0);
r0 = r;
r$match = r0$match[-Is];
r$positions = r0$positions[-Is];
attr(r$positions, 'match.length') = attr(r0$positions, 'match.length')[-Is];
attr(r$positions, 'capture.start') = attr(r0$positions, 'capture.start')[-Is, , drop = FALSE];
attr(r$positions, 'capture.length') = attr(r0$positions, 'capture.length')[-Is, , drop = FALSE];
attr(r$positions, 'capture.names') = attr(r0$positions, 'capture.names')[-Is];
return(r);
}
# <!> quoting
#' Produce string by substituting placeholders
#'
#' The function behaves similar to sprintf, except that character sequences to be substituted are
#' indicated by name. To be implemented: *-specifications
#'
#' #@param s template string
#' #@param d values to substitute into \code{s}
#' #@param template template for substitution pattern. Within this pattern \code{__DICT_KEY__} is
#' # substituted for a key in \code{d}. This string \code{k} is substituted in \code{s} with \code{d[[k]]}.
#' @param .fmt formatting string into which values are interpolated (see details)
#' @param values list or vector of values to be used for interpolation
#' @param sprintf_cartesian boolean to indicate whether cartesian product of values should be used.
#' Otherwise standard recyling rules apply.
#' @param envir environment in which values are to be evaluated
#' @return Interpolated character string
#'
# #' @examples
# #' \dontrun{
# #' Sprintf('These are N %{N} characters.', list(N = 10));
# #' Sprintf('These are N %{N}d characters.', list(N = 10));
# #' Sprintf('These are N %{N}02d characters.', list(N = 10));
# #' }
Sprintfl = function(.fmt, values, sprintf_cartesian = FALSE, envir = parent.frame()) {
dict = extraValues = list();
for (i in seq_along(values)) {
if (is.list(values[[i]]))
dict = merge.lists(dict, values[[i]]) else
if (!is.null(names(values)[i]) && names(values)[i] != '')
dict = merge.lists(dict, values[i]) else
extraValues = c(extraValues, values[i]);
}
# re = '(?x)(?:
# (?:^|[^%]|(?:%%)+)\\K
# [%]
# (?:[{]([^{}\\*\'"]*)[}])?
# ((?:[-]?[*\\d]*[.]?[*\\d]*)?(?:[sdfegG]|))(?=[^%sdfegG]|$)
# )';
# <!> new, untested regexpr as of 22.5.2014
# un-interpolated formats do no longer work
# re = '(?xs)(?:
# (?:[^%]+|(?:%%)+)*\\K
# [%]
# (?:[{]([^{}\\*\'"]*)[}])?
# ((?:[-]?[*\\d]*[.]?[*\\d]*)?(?:[sdfegGDQqu]|))(?=[^sdfegGDQqu]|$)
# )';
re = '(?xs)(?:
(?:[^%]+|(?:%%)+)*
\\K[%]
(?:[{]([^{}\\*\'"]*)[}])?
((?:[-]?[*\\d]*[.]?[*\\d]*)?(?:[stdfegGDQqu]|))(?=[^stdfegGDQqu]|$)
)';
# re = '(?xs)(?:
# (?:(?:[^%]+)(?:(?:%%)+(?:[^%]+))*)
# [%]
# (?:[{]([^{}\\*\'"]*)[}])?
# ((?:[-]?[*\\d]*[.]?[*\\d]*)?(?:[sdfegGDQqu]|))(?=[^sdfegGDQqu]|$)
# )';
r = fetchRegexpr(re, .fmt, capturesAll = TRUE, returnMatchPositions = TRUE);
r = sprintfIgnoreEscapes(r);
# <p> nothing to format
if (length(r$match) == 0) return(.fmt);
typesRaw = sapply(r$match, function(m)ifelse(m[2] == '', 's', m[2]));
types = ifelse(typesRaw %in% c('D', 'Q'), 's', typesRaw);
fmts = sapply(r$match, function(m)sprintf('%%%s',
ifelse(m[2] %in% c('', 'D', 'Q', 'q', 't', 'u'), 's', m[2])));
fmt1 = Substr(.fmt, r$positions, attr(r$positions, 'match.length'), fmts);
keys = sapply(r$match, function(i)i[1]);
nonKeysI = cumsum(keys == ''); # indeces of values not passed by name
nonKeysIdcs = which(keys == '');
# <p> collect all values
allValues = c(extraValues, dict);
# get interpolation variables
interpolation = nlapply(keys[keys != ''], function(k)
if (!is.null(allValues[[k]])) NULL else rget(k, default = NA, envir = envir)
);
# <p> handle %D: current day
keys[typesRaw == 'D'] = '..Sprintf.date..';
dateValue = if (sum(typesRaw == 'D'))
list(`..Sprintf.date..` = format(Sys.time(), '%Y%m%d')) else
list();
allValues = c(allValues, dateValue, List_(interpolation, rm.null = TRUE));
# 14.9.2015 -> convert to indeces
# build value combinations
listedValues = lapply(keys, function(k)allValues[[k]]);
dictDf = if (!sprintf_cartesian) Df_(listedValues) else merge.multi.list(listedValues);
# fill names of anonymous formats
keys[keys == ''] = names(dictDf)[Seq(1, sum(nonKeysI != 0))];
# due to repeat rules of R vectors might have been converted to factors
#dictDf = Df_(dictDf, as_character = unique(keys[types == 's']));
dictDf = Df_(dictDf, as_character = which(types == 's'));
# <p> conversion <i>: new function
#colsQ = keys[typesRaw == 'Q'];
# <!> switch to index based transformation on account of duplicate keys
colsQ = which(typesRaw == 'Q');
dictDf[, colsQ] = apply(dictDf[, colsQ, drop = FALSE], 2, qsPath, force = TRUE);
#colsq = keys[typesRaw == 'q'];
colsq = which(typesRaw == 'q');;
dictDf[, colsq] = apply(dictDf[, colsq, drop = FALSE], 2, qss);
colst = which(typesRaw == 't');;
dictDf[, colst] = apply(dictDf[, colst, drop = FALSE], 2, qss, force = TRUE);
colsu = which(typesRaw == 'u');;
dictDf[, colsu] = apply(dictDf[, colsu, drop = FALSE], 2, uc.first);
colsd = which(typesRaw == 'd');;
dictDf[, colsd] = apply(dictDf[, colsd, drop = FALSE], 2, as.integer);
s = sapply(1:nrow(dictDf), function(i) {
valueDict = as.list(dictDf[i, , drop = FALSE]);
# sprintfValues = lapply(seq_along(keys), function(i)
# ifelse(keys[i] == '', extraValues[[nonKeysI[i]]],
# firstDef(valueDict[[keys[i]]], rget(keys[i], default = '__no value__'), pos = -2)));
# sprintfValues = lapply(seq_along(keys), function(i)
# firstDef(valueDict[[keys[i]]], rget(keys[i], default = '__no value__', envir = envir)));
#sprintfValues = lapply(seq_along(keys), function(i)valueDict[[keys[i]]]);
#do.call(sprintf, c(list(fmt = fmt1), sprintfValues))
# <!> simplify above two lines, now robust against duplicated entries -> <i> needs unit tests
names(valueDict) = NULL;
do.call(sprintf, c(list(fmt = fmt1), valueDict))
});
s
}
# 18.10.2019: fmt -> .fmt to avoid confusion with abbreviated named arguments (e.g. f = x substitutes fmt)
Sprintf = sprintd = function(.fmt, ..., sprintf_cartesian = FALSE, envir = parent.frame(),
resetNames = TRUE, drop = TRUE) {
r = sapply(.fmt, function(.fmt)
Sprintfl(.fmt, list(...), sprintf_cartesian = sprintf_cartesian, envir = envir),
USE.NAMES = !resetNames);
# <!> special case when a single .fmt is provided -> do not return matrix for several values
if (drop && length(.fmt) == 1) r = avu(r);
r
}
#r = getPatternFromStrings(DOC, '(?:\\nDOCUMENTATION_BEGIN:)([^\\n]+)\\n(.*?)(?:\\nDOCUMENTATION_END\\n)');
getPatternFromStrings = function(strings, pattern, keyIndex = 1) {
r = lapply(strings, function(s) {
ps = fetchRegexpr(pattern, s, capturesAll = TRUE);
listKeyValue(sapply(ps, function(e)e[[keyIndex]]), sapply(ps, function(e)e[-keyIndex]));
});
r
}
getPatternFromFiles = function(files, locations = NULL, ...) {
strings = sapply(files, function(f)readFile(f, prefixes = locations));
getPatternFromStrings(strings, ...);
}
#
# hex strings
#
asc = function(x)strtoi(charToRaw(x), 16L);
character.as.characters = function(str) {
sapply(str, function(s) sapply(1:nchar(s), function(i)substr(str, i, i)));
}
# bit_most_sig in bits
hex2int = function(str, bit_most_sig = 32) {
cs = rev(sapply(character.as.characters(tolower(str)), asc));
cms = bit_most_sig / 4; # character containing most significant bit
is = ifelse(cs >= asc('a'), cs - asc('a') + 10, cs - asc('0'));
flipSign = (length(is) >= cms && is[cms] >= 8);
if (flipSign) is[cms] = is[cms] - 8;
r = sum(sapply(1:length(is), function(i)(is[i] * 16^(i-1))));
if (flipSign) r = r - 2^(bit_most_sig - 1);
r = if (r == - 2^(bit_most_sig - 1)) NA else as.integer(r);
r
}
# chunk_size in bits
hex2ints = function(str, chunk_size = 32) {
l = nchar(str);
csc = chunk_size / 4; # chunk_size in characters
chunks = (l + csc - 1) %/% csc;
r = sapply(1:chunks, function(i)hex2int(substr(str, (i - 1)*csc + 1, min(l, i*csc))));
r
}
#
# <ยง> binary numbers/n-adic numbers
#
ord2base = dec2base = function(o, digits = 5, base = 2) {
sapply(1:digits, function(i){(o %/% base^(i-1)) %% base})
}
base2ord = base2dec = function(v, base = 2) {
sum(sapply(1:length(v), function(i)v[i] * base^(i-1)))
}
ord2bin = dec.to.bin = function(number, digits = 5) ord2base(number, digits, base = 2);
bin2ord = bin.to.dec = function(bin) base2ord(bin, base = 2);
# mixed base calculations
#sapply(1:length(base), function(i)((n %/% div[i]) %% base[i]));
cumprod1 = function(v)c(1, cumprod(pop(v)))
# ord2adic = function(n, base = rep(2, 5)) {
# div = cumprod1(base);
# (n %/% div) %% base
# }
# adic2ord = function(v, base = rep(2, 5)) {
# mult = cumprod1(base);
# (v %*% mult)[1, 1]
# }
ord2adic = function(n, base = rep(2, 5))((n %/% cumprod1(base)) %% base)
adic2ord = function(v, base = rep(2, 5))((v %*% cumprod1(base))[1, 1])
#
# <Par> sequences
#
#' Produce constrained sequences
#'
#' This is a wrapper around seq that adds constraints. Setting ascending, descending to NA reverts to
#' standard \code{seq} behaviour.
#'
#' @param ascending restrict sequences to be ascending; return empty list if to < from
#' @param descending restrict sequences to be descending; return empty list if from < to
#' @param from starting value
#' @param to ending value
#' @param neg boolean to indicate wheter sequence should be negated before return
#' @param ... parameters passed on to \code{seq}
#' @return sequence from \code{from} to \code{to}
# #' @examples
# #' \dontrun{
# #' Seq(1, 10, ascending = TRUE)
# #' Seq(1, 10, descending = TRUE)
# #' Seq(10, 1, ascending = NA)
# #' }
Seq = function(from, to, ..., ascending = TRUE, descending = !ascending, neg = FALSE) {
# <!> order matters: if called with only descending == TRUE
if (nif(descending) && to > from) return(if (neg) TRUE else c()) else
if (nif(ascending) && from > to) return(if (neg) TRUE else c());
s = seq(from, to, ...);
r = if (neg) -s else s;
r
}
SeqRows = function(o)Seq(1, nrow(o))
#' Produce index pairs for vector of counts
#'
#' @param counts vector of integers specifying counts
#' @return vector of pairs of indeces indicating the first and last element in a vector for the blocks
#' specified by \code{counts}
#' @keywords internal
# #' @examples
# #' \dontrun{
# #' count2blocks(c(1, 5, 3))
# #' }
count2blocks = function(counts) {
ccts = cumsum(counts);
fidcs = c(1, ccts[-length(ccts)] + 1);
blks = as.vector(rbind(fidcs, fidcs + counts - 1));
blks
}
#
# expand a block list - for example as from count2blocks - to a list of integers
#
expandBlocks = function(blks) {
applyL(matrix(blks, ncol = 2, byrow = TRUE), 1, function(r) { r[1]:r[2] } )
}
# split 1:M into N partitions, return row-wise range
splitListIndcs = function(M, N = 1, .compact = FALSE, .truncate = TRUE) {
if (.truncate & M < N) N = M;
if (.compact) {
n = rep(ceiling(M / N), N); # size of parts
idcs = c(0, cumsum(n));
idcs = idcs[idcs < M];
idcs = c(idcs, M);
} else {
n = rep(floor(M / N), N); # size of parts
R = M - n[1] * N;
n = n + c(rep(1, R), rep(0, N - R));
idcs = c(0, cumsum(n));
}
idcs = cbind(idcs + 1, c(idcs[-1], 0))[-length(idcs), ]; # from:to in a row
# <!> usual R degeneracy
if (!is.matrix(idcs)) idcs = matrix(idcs, nrow = 1);
idcs
}
splitListEls = function(l, N, returnElements = FALSE) {
idcs = splitListIndcs(length(l), N);
li = apply(idcs, 1, function(r)(if (returnElements) l[r[1]:r[2]] else r[1]:r[2]));
# <!> R ambiguity of apply return type
if (is.matrix(li)) li = lapply(1:(dim(li)[2]), function(i)li[, i]);
if (is.vector(li)) li = as.list(li);;
li
}
# @param l list of index positions from another object
# @return return vector indicating to which list element an index was assigned
# Example: glmnet accepts fold numbers per index (as opposed to a partitioning of elements)
index2listPosition = function(l) {
N = sum(sapply(l, length));
na = rep(NA, N);
m = sapply(1:length(l), function(i)vector.assign(na, l[[i]], i, na.rm = NA));
r = apply(m, 1, na.omit);
r
}
# idcs start positions in ragged list, converted to ranges
idcsStart2range = function(idcs, N = max(idcs)) {
if (length(idcs) == 0) return(NULL);
vector.intercalate(idcs, c(shift(idcs - 1), N))
}
# splitting based on fractions
# voting percentages to seats
# simple algorithm based on size of residuals
# tiePreferHigh: for tied residuals add/subtract seats to high indeces (TRUE) or low ones (FALSE)
splitSeatsForFractions = function(Nseats, fractions = vn(rep(1, Nfractions)), Nfractions,
tiePreferHigh = TRUE) {
# number of parties
Nparties = length(fractions);
# fractional seats
Nseats0 = fractions * Nseats;
# garuantee one seat, otherwise round to nearest
Nseats1 = ifelse (Nseats0 < 1, 1, round(Nseats0));
# individual mismatch
Nresid = Nseats0 - Nseats1;
# mismatch total
diff = sum(Nseats1) - Nseats;
# redistribute deficit/overshoot
if (diff != 0) {
Nresid1 = ifelse(Nresid < 0, 1, Nresid); # too few vs too many, too few -> maximal value of 1
# take seats from whom? We need abs(diff) seats.
#subtr = order(Nresid1, decreasing = diff < 0)[1:abs(diff)];
prio = if (tiePreferHigh) 1:Nparties else rev(1:Nparties);
subtr = Order(Df(Nresid1, prio))[1:abs(diff)];
# assume one round of correction is always sufficient <!>
Nseats1[subtr] = Nseats1[subtr] - sign(diff);
}
Nseats1
}
# tranform number of elements (as from splitSeatsForFractions) into from:to per row in a matrix
counts2idcs = function(counts) {
idcs = c(0, cumsum(counts));
idcs = cbind(idcs + 1, c(idcs[-1], 0))[-length(idcs), ];
if (is.null(counts)) return(idcs); # matrix w/ 0 rows
t2r(idcs) # fails on counts == NULL
}
# N is partitioned into fractions from p, where each element of p partitions the remaining part of N
# procedure makes sure to leave space for length(p) elements
cumpartition = function(N, p) {
I = c(); # indeces within 1:N
for (i in 1:length(p)) {
# partition remaining space (ifelse), leave room for subsequent indeces
Ii = floor(p[i] * (ifelse(i == 1, N, N - I[i - 1]) - (length(p) - i))) + 1;
I = c(I, ifelse(i == 1, Ii, I[i - 1] + Ii));
}
as.integer(I)
}
#' Extract parts of a nested structure based on the range from..to
#'
#'
#' @param Ns Vector of integers that specify the size of the substructures
#' @param from absolute index where to start extraction
#' @param to absolute index where to stop extraction
#' @return Return list of lists, where each basic list contains key \code{segment}
#' (which of the elements of Ns) and key \code{range}, a list with elements \code{from} and \code{to},
#' specifying which elements to use from
#' that segment.
# #' @examples
# #' \dontrun{
# #' # TestMe: TRUE1
# #' subListFromRaggedIdcs(c(2, 4, 10, 15), 1, 20)
# #' }
subListFromRaggedIdcs = function(Ns, from = 1, to) {
NsCS = cumsum(Ns);
NsCSs = c(0, pop(NsCS)); # shifted cumsum
segments = which(from <= NsCS & to > NsCSs);
if (missing(to)) to = sum(segments);
r = lapply(segments, function(segment){
N = Ns[segment]; # list-call
from_ = 1;
to_ = N;
if (segment == segments[1]) from_ = from - NsCSs[segment];
if (segment == rev(segments)[1]) to_ = to - NsCSs[segment];
r = list(segment = segment, range = list(from = from_, to = to_));
r
});
r
}
#' Extract parts of nested lists based on the range from..to
#'
#'
#' @param from absolute index where to start extraction
#' @param to absolute index where to stop extraction
#' @param ls nested list structure (currently only two levels supported)
#' @return Return list of list, where each basic list contains key \code{segment}
#' (which of the elements of Ns) and key \code{range}, a list with elements \code{from} and \code{to},
#' specifying which elements to use from
#' that segment.
subListFromRaggedLists = function(ls, from = 1, to = sum(sapply(ls, length))) {
sl = subListFromRaggedIdcs(sapply(ls, length), from = from, to = to);
r = lapply(sl, function(s) with(s, {
r = ls[[segment]][range$from: range$to];
r
}));
r = unlist.n(r, 1);
r
}
#
# <ยง> vector functions
#
# does the position exists in vector v
exists.pos = function(v, i)(is.vector(v) && !is.na(v[i]))
# for a vector blocked by blockSize N, return indeces of elements of block i
rangeBlock = function(i, N)(((i - 1)*N + 1):(i * N))
#
# <par> lists
#
merge.lists = function(..., ignore.nulls = TRUE, listOfLists = FALSE, concat = FALSE, useIndeces = FALSE) {
lists = if (listOfLists) c(...) else list(...);
l1 = lists[[1]];
if (length(lists) > 1) for (i in 2:length(lists)) {
l2 = lists[[i]];
ns = if (useIndeces) 1L:length(l2) else names(l2);
for(n in ns) {
if (is.null(n)) print("Warning: tried to merge NULL key");
if (!is.null(n) & (!ignore.nulls | !is.null(l2[[n]]))) {
if (concat) l1[[n]] = c(l1[[n]], l2[[n]]) else l1[[n]] = l2[[n]];
}
}
}
l1
}
merge.lists.recursive = function(..., ignore.nulls = TRUE, listOfLists = FALSE) {
lists = if (listOfLists) c(...) else list(...);
l1 = lists[[1]];
if (length(lists) > 1) for (i in 2:length(lists)) {
l2 = lists[[i]];
for(n in names(l2)) {
if (is.null(n)) print("Warning: tried to merge NULL key");
if (!is.null(n) & (!ignore.nulls | !is.null(l2[[n]])))
l1[[n]] = if (is.list(l1[[n]]))
merge.lists.recursive(l1[[n]], l2[[n]]) else
l2[[n]]
}
}
l1
}
unshift = function(l, listOfList = TRUE) {
if (!listOfList) l = list(l);
e1 = lapply(l, function(l0)if (is.list(l0)) l0[[1]] else l0[1]);
r1 = lapply(l, function(l0)l0[-1]);
r = list(elements = e1, remainder = r1);
r
}
Merge.lists.raw = function(lists, ignore.nulls = TRUE, recursive = FALSE, keys = NULL) {
if (!is.null(keys)) keys = unshift(keys);
l1 = lists[[1]];
if (length(lists) > 1) for (i in 2:length(lists)) {
l2 = lists[[i]];
for(n in names(l2)) {
if (is.null(n)) print("Warning: tried to merge NULL key");
if (!is.null(n) & (!ignore.nulls | !is.null(l2[[n]])))
l1[[n]] = if (recursive && is.list(l1[[n]]) && (is.null(keys) || n %in% keys$elements))
Merge.lists.raw(list(l1[[n]], l2[[n]]), ignore.nulls, recursive,
if (is.null(keys)) NULL else keys$remainder) else
l2[[n]]
}
}
l1
}
Merge.lists = function(..., ignore.nulls = TRUE, listOfLists = FALSE, recursive = FALSE, keyPathes = NULL) {
lists = if (listOfLists) c(...) else list(...);
keys = if (!is.null(keyPathes)) splitString("[$]", keyPathes, simplify = FALSE) else NULL;
l = Merge.lists.raw(lists, ignore.nulls = ignore.nulls, recursive = recursive, keys = keys);
l
}
# l: list of lists
# take parallel elements from l (1, ...) after recycling
list.combine = function(l, byRow = TRUE, names = NULL, doMerge = FALSE) {
lR = Recycle(l, byRow = byRow);
# <p> number of final elements
N = length(lR[[1]]);
lC = lapply(1:N, function(i) {
lol = list.kp(lR, Sprintf('[[%{i}d]]'));
if (notE(names)) names(lol) = names;
return(if (doMerge) merge.lists(lol, listOfLists = TRUE) else lol);
});
return(lC);
}
# inverse of unlist.n(, 1)
list.embed = function(l, key = 'key')lapply(l, function(e)SetNames(list(e), key));
# use.names preserves names and concatenates with lower level names
# reset sets names to top level names
unlist.n = function(l, n = 1, use.names = TRUE, reset = FALSE) {
if (n > 0) for (i in 1:n) {
ns = names(l);
#names(l) = rep(NULL, length(l)); # <!> untested removal Tue Oct 19 17:11:53 2010
l = unlist(l, recursive = FALSE, use.names = use.names);
if (reset) names(l) = ns;
}
l
}
# <N> obsolete, better: with(l, { ...})
instantiate.list = function(l, n = 1) {
for (nm in names(l)) {
eval.parent(parse(file = "", text = sprintf("%s = %s", nm, deparse(l[[nm]]))), n = n);
# if (is.integer(l[[nm]])) {
# eval.parent(parse(file = "", text = sprintf("%s = %d", nm, l[[nm]])), n = n);
# } else if (is.numeric(l[[nm]])) {
# eval.parent(parse(file = "", text = sprintf("%s = %f", nm, l[[nm]])), n = n);
# } else {
# eval.parent(parse(file = "", text = sprintf("%s = \"%s\"", nm, l[[nm]])), n = n);
# };
}
}
# for use in testing code
instantiate = function(l, ..., envir = parent.frame()) {
l0 = c(l, list(...));
for (i in seq_along(l0)) assign(names(l0)[i], l0[[i]], envir = envir);
invisible(l0)
}
# assume a list of lists (aka vector of dicts) and extract a certain key from each of the lists
list.key = function(v, key, unlist = TRUE, template = NULL, null2na = FALSE) {
l = lapply(v, function(i){
if (is.list(i)) {
if (is.null(i[[key]])) { if (null2na) NA else NULL } else i[[key]]
} else template});
if (unlist) l = unlist(l);
l
}
# iterative gsub
# substs: list with pairs re, sub
gsubI = function(substs, s, ...) {
for (r in substs) s = gsub(r[[1]], r[[2]], s, ...);
s
}
# concatenate lists, leave out lists only containing a single NULL
cList = function(...) {
r = list(...);
isNull = sapply(r, is.null);
do.call(c, r[!isNull])
}
keyPathParse = function(kp) {
kp = gsubI(
list(c('\\*', '.'), c('\\[\\[(\\d+)\\]\\]', 'INDEX__\\1'), c('(\\$|^)\\s*[(]', '\\1PAR__('))
, kp);
Parse(kp)
}
keyPathExpression2key = function(e) {
s = as.character(e);
i = FetchRegexpr('INDEX__(\\d+)', s, captures = TRUE);
r = if (s == '.') '*' else
if (s == 'PAR__') NULL else
if (!is.null(i)) return(as.integer(i)) else s;
if (is.null(r)) NULL else list(r)
}
# level: level of nesting for parallel key pathes
# output: list or vector, a parallel pattern induces two more levels of list nesting
# the parallel keyPathes each of which is forced to be a list
keyPathAstRaw = function(e, level = 0) {
r = if (is.call(e)) {
isPar = e[[1]] == '|';
isPlain = e[[1]] == '$';
levelN = ifelse(isPar, level + 1, 0);
# simple walk through
r = if (isPlain | isPar)
cList(keyPathAstRaw(e[[2]], level = levelN), keyPathAstRaw(e[[3]], level = levelN))
# this is a fake call generated by the PAR__ construct, the path leading to PAR__ is seen as an
# anonymous function
else cList(keyPathAstRaw(e[[1]]), list(keyPathAstRaw(e[[2]], level = 1)));
if (level & isPlain) list(r) else r
} else if (is.name(e)) keyPathExpression2key(e) else {
stop('malformed keyPath');
}
r
}
keyPathAst = function(kp) {
unlist.n(keyPathAstRaw(keyPathParse(kp)[[1]]), n = 0);
}
list.kp.unquote = function(key) {
# un-quote: remove single backslashes
key = sub('(?<![\\\\])[\\\\](?![\\\\])', '', key, perl = TRUE);
# de-quote: double backslashes become single backslashes
key = sub('\\\\', '\\', key, fixed = TRUE);
as.character(key)
}
# extract key path from list, general, recursive version
# key path recursive worker
list.kprw = function(l, keys, unlist.pats, template, null2na, carryNames, test) {
key = keys[1];
# <p> extract key
r = if (key != "*") {
index = fetchRegexpr("\\A\\[\\[(\\d+)\\]\\]\\Z", key, captures = TRUE);
if (length(index) > 0) key = as.integer(index[[1]]);
if (is.list(l)) {
# <N> logical(0) seen as NULL by second condition
r = if (is.null(l[[key]]) || length(l[[key]]) == 0) {
if (null2na) { NA } else firstDef(template, NULL)
} else l[[key]];
if (length(keys) > 1)
list.kprw(r, keys[-1], unlist.pats[-1], template, null2na, carryNames, test) else
if (test) !(is.null(r) || all(is.na(r))) else r;
} else if (class(l) %in% c('character')) {
if (notE(names(l))) l[names(l) %in% key] else l[key]
} else if (class(l) %in% c('data.frame', 'matrix')) {
l[, key]
} else if (class(l) %in% c('numeric', 'integer')) {
l[key]
} else return(template);
# {
# r = template;
# attr(r, 'names') = keys[last(keys)];
# print(c(keys, r));
# return(r);
# }
} else {
if (length(keys) > 1)
lapply(l, function(sl)
list.kprw(sl, keys[-1], unlist.pats[-1], template, null2na, carryNames, test)
) else l;
}
# <p> unlisting
if (notE(unlist.pats)) if (unlist.pats[1]) r = unlist.n(r, 1, reset = carryNames);
r
}
# extract key path from list, general, recursive version
# key path recursive worker: parallel keys
# iterate over recursive keys
list.kprwPar = function(l, keys, ...) {
key = keys[1];
r = if (length(fetchRegexpr("\\|", key)) > 0) {
parKeys = sapply(splitString('\\|', key), list.kp.unquote);
r = lapply(parKeys, function(key)list.kprwkp(l, c(key, keys[-1]), ...));
unlist.n(r, 1);
} else list.kprw(l, keys, ...);
r
}
# worker: keypath
list.kprwkp = function(l, keyPath, ...) {
keysNew = fetchRegexpr("(?:[a-zA-Z0-9_.|\\[\\]*]+(?:\\\\[$])?)+", keyPath[1]);
keys = c(keysNew, keyPath[-1]);
r = list.kprwPar(l, keys, ...);
r
}
list.kp.keys = function(keyPath) fetchRegexpr("[^$]+", keyPath);
# wrapper for list.kprw
# keyPath obeys EL1 $ EL2 $ ..., where ELn is '*' or a literal
# unlist.pat is pattern of truth values TR1 $ TR2 $..., where TRn is in 'T|F' and specifies unlist actions
# carryNames determines names to be carried over from the top level in case of unlist
list.kpr = function(l, keyPath, do.unlist = FALSE, template = NULL,
null2na = FALSE, unlist.pat = NULL, carryNames = TRUE, as.matrix = FALSE, test = FALSE) {
keys = list.kp.keys(keyPath);
unlist.pats = if (notE(unlist.pat)) as.logical(fetchRegexpr("[^$]+", unlist.pat)) else NULL;
# parallel keys
#r = list.kprwkp(l, keyPath, unlist.pats, template, null2na, carryNames, test = test);
r = list.kprw(l, keys, unlist.pats, template, null2na, carryNames, test = test);
if (do.unlist) { r = unlist(r); }
if (as.matrix) r = t(sapply(r, function(e)e));
r
}
# extract key path from list
# <!> interface change: unlist -> do.unlist (Wed Sep 29 18:16:05 2010)
# test: test existance instead of returning value
list.kp = function(l, keyPath, do.unlist = FALSE, template = NULL, null2na = FALSE, test = FALSE, n) {
r = list.kpr(l, sprintf("*$%s", keyPath), do.unlist = do.unlist,
template = template, null2na = null2na, test = test);
if (!missing(n)) r = unlist.n(r, n);
r
}
list.kpu = function(..., do.unlist = TRUE)list.kp(..., do.unlist = do.unlist);
list.keys = function(l, keys, default = NA) {
l = as.list(l);
r = lapply(unlist(keys), function(key) if (is.null(l[[key]])) default else l[[key]]);
r
}
# make A > B into B > A
listReverseHierarchy = function(l, unlist = FALSE) {
ns = names(l[[1]]);
r = lapply(ns, function(n)list.kp(l, n, do.unlist = unlist));
names(r) = ns;
return(r);
}
null2na = function(l) {
if (!length(l)) return(l);
l[sapply(l, is.null)] = NA;
return(l);
}
# return list without listed keys
list.min = function(l, keys) {
l[-which.indeces(keys, names(l))]
}
# list generation on steroids (wraps other functions)
.list = function(l, .min = NULL) {
if (!is.null(.min)) l = list.min(l, .min);
l
}
# get apply
gapply = function(l, key, unlist = FALSE)list.key(l, key, unlist)
# construct list as a dictionary for given keys and values
listKV = listKeyValue = function(keys, values, doRecycle = TRUE) {
if (length(keys) != length(values) && doRecycle) {
r = recycle(keys, values);
keys = r[[1]];
values = r[[2]];
}
if (length(keys) != length(values))
stop("listKeyValue: number of provided keys does not match that of values");
l = as.list(values);
names(l) = keys;
l
}
listNamed = function(l, names)setNames(l, names)
vectorNamed = function(v, names) {
if (length(names) > length(v)) stop("vectorNamed: more names than vector elements");
names(v) = names;
v
}
vn = vectorNormed = function(v, type = 'O') {
v0 = as.matrix(v);
v0n = apply(v0, 2, function(v)norm(as.matrix(v), type = type));
r = if (!is.matrix(v)) v/v0n else sapply(1:ncol(v), function(i) v[, i] / v0n[i]);
r
}
#listInverse = function(l)listKeyValue(avu(l), names(l));
listInverse = function(l, toNA = FALSE) {
n = sapply(l, length);
# <p> values of inverse map
vs = rep.each(names(l), n);
# <p> construct list
r = listKeyValue(avu(l, recursive = FALSE, toNA = toNA), vs);
r
}
# name the list elements by the iterated vector elements ns (names)
nlapply = function(ns, f, ...) {
if (is.list(ns)) ns = names(ns);
r = lapply(ns, f, ...);
names(r) = ns;
r
}
nelapply = function(l, f, ...) {
ns = names(l);
if (is.null(ns)) ns = rep('', length(l));
r = lapply(seq_along(l), function(i, ...)f(ns[i], l[[i]], ...), ...);
names(r) = ns;
r
}
ilapply = function(l, f, ...) {
r = lapply(1:length(l), function(i)f(l[[i]], i, ...));
if (!is.null(names(l))) names(r) = names(l);
r
}
einXapply = function(v, f, ..., einXapplyIterator = lapply) {
l = as.list(v);
ns = names(l);
r = einXapplyIterator(seq_along(l), function(i)f(l[[i]], i, ns[i], ...));
if (length(r) > 0) names(r) = ns;
r
}
# pass element, index, name
einlapply = function(l, f = Identity, ...)einXapply(l, f, ..., einXapplyIterator = lapply);
# pass element, index
eilapply = function(l, f, ...) {
r = lapply(seq_along(l), function(i)f(l[[i]], i, ...));
names(r) = names(l);
r
}
eisapply = function(v, f, ...) {
l = as.list(v);
r = sapply(seq_along(l), function(i)f(l[[i]], i, ...));
names(r) = names(v);
r
}
ensapply = function(l0, f, ...) {
l = as.list(l0);
ns = names(l);
r = sapply(seq_along(l), function(i, ...)f(l[[i]], ns[i], ...), ...);
names(r) = ns;
r
}
einsapply = function(v, f = Identity, ...)einXapply(v, f, ..., einXapplyIterator = sapply)
kvlapply = function(l, f, ...) {
ns = names(l);
r = lapply(1:length(l), function(i)f(ns[i], l[[i]], ...));
names(r) = ns;
r
}
pairsapply = pairsapplyVL = function(l1, l2, f, ..., simplify = TRUE, USE.NAMES = TRUE) {
if (length(l1) != length(l2)) stop('pairsapply: pair of collections of unequal length.');
r = sapply(seq_along(l1), function(i)f(l1[i], l2[[i]], ...),
simplify = simplify, USE.NAMES = USE.NAMES);
r
}
pairsapplyLV = function(l1, l2, f, ..., simplify = TRUE, USE.NAMES = TRUE) {
if (length(l1) != length(l2)) stop('pairsapply: pair of collections of unequal length.');
r = sapply(seq_along(l1), function(i)f(l1[[i]], l2[i], ...),
simplify = simplify, USE.NAMES = USE.NAMES);
r
}
pairslapply = function(l1, l2, f, ...) {
if (length(l1) != length(l2)) stop('pairslapply: pair of collections of unequal length.');
r = lapply(seq_along(l1), function(i)f(l1[[i]], l2[[i]], ...));
names(r) = names(l1);
r
}
sapplyWoI = function(v, f, ...)sapply(v, function(i, ...)f(...), ...)
lapplyWoI = function(v, f, ...)lapply(v, function(i, ...)f(...), ...)
dfapply = function(Df__, f__) {
r = lapply(1:nrow(Df__), function(i) {
r = Df__[i, ];
return(Df_(f__(as.list(r))));
});
Dfr = do.call(rbind, r);
return(Dfr);
}
filterList = function(o, f, ...) {
l = sapply(o, f, ...);
if (length(l) == 0) l = NULL; #list corner case
r = o[l];
return(r);
}
# <i> copy MARGIN handling from apply (aperm)
lapplyDir = function(m, MARGIN, f_, ..., drop = FALSE) {
selector = if (MARGIN == 1)
function(m, i)m[i, , drop = drop] else
function(m, i)m[, i, drop = drop];
setNames(lapply(1:dim(m)[MARGIN], function(i)f_(selector(m, i), ...)), Dimnames(m, MARGIN))
}
# <!> as matrix to avoid warning
#lapplyRows = function(m, ...)lapply(split(as.matrix(m), row(m)), ...)
# lapplyRows = function(m, f_, ..., drop = FALSE)
# setNames(lapply(1:nrow(m), function(i)f_(m[i, , drop = drop], ...), ...), Row.names(m))
lapplyRows = function(m, f_, ..., drop = FALSE)lapplyDir(m, 1, f_ = f_, ..., drop = drop)
lapplyCols = function(m, f_, ..., drop = FALSE)lapplyDir(m, 2, f_ = f_, ..., drop = drop)
getElement = function(v, i)if (is.list(v)) v[[i]] else v[i];
# unify w/ list.takenFrom -> tests
List.takenFrom = function(listOfLists, v)
lapply(1:length(listOfLists), function(j)getElement(listOfLists[[j]], v[j]));
# tuple-apply
tuapply = function(..., fct = Identity, args = list(), names = NULL) {
tupels = list(...);
M = length(tupels);
Ns = sapply(tupels, length);
N = Ns[1];
if (any(Ns != N)) stop('Indexable elements not of same length');
r = lapply(1:N, function(i)do.call(fct, c(List.takenFrom(tupels, rep(i, M)), args)));
if (is.null(names) && !is.null(base::names(tupels[[1]]))) names = base::names(tupels[[1]]);
if (!is.null(names)) base::names(r) = names;
r
}
undrop2row = function(e)(if (is.vector(e)) matrix(e, ncol = length(e)) else e);
Lundrop2row = function(l)lapply(l, undrop2row);
undrop2col = function(e)(if (is.vector(e)) matrix(e, nrow = length(e)) else e);
Lundrop2col = function(l)lapply(l, undrop2col);
# return list from apply (undo simplify)
applyL = function(X, MARGIN, FUN, ...) {
r = apply(X, MARGIN, FUN, ...);
if (is.matrix(r)) return(lapply(1:ncol(r), function(i)r[, i]));
if (!is.list(r) && is.vector(r)) return(lapply(1:length(r), function(i)r[i]));
return(r);
}
# USE.NAMES logic reversed for sapply
sapplyn = function(l, f, ...)sapply(l, f, ..., USE.NAMES = FALSE);
list.with.names = function(..., .key = 'name') {
l = list(...);
ns = names(l);
r = nlapply(l, function(n) c(l[[n]], listKeyValue(.key, n)));
r
}
#
# <p> names
#
Row.names = function(o, vivify = TRUE) {
rn = row.names(o);
if (is.null(rn) && vivify) 1:nrow(o) else rn
}
Col.names = function(o, vivify = TRUE) {
rn = if (is.matrix(o)) dimnames(o)[[2]] else names(o);
if (is.null(rn) && vivify) 1:ncol(o) else rn
}
# <i> implement general MARGINs
Dimnames = function(o, MARGIN, vivify = TRUE) {
if (MARGIN == 1) Row.names(o, vivify) else Col.names(o, vivify)
}
SetNames = function(o, names, rnames, cnames, Dimnames, embed = FALSE) {
if (!missing(Dimnames)) dimnames(o) = Dimnames;
if (!missing(rnames)) row.names(o) = rnames;
if (!missing(cnames)) dimnames(o)[[2]] = cnames;
if (!missing(names)) {
if (class(o) == 'matrix') {
if (embed) dimnames(o)[[2]][seq_along(names)] = names else {
ns = if (is.list(names)) vector.replace(dimnames(o)[[2]], names) else names;
if (is.null(dimnames(o))) dimnames(o) = list(NULL, ns) else dimnames(o)[[2]] = ns;
}
} else {
if (embed) names(o)[seq_along(names)] = names else {
names(o) = if (is.list(names)) vector.replace(names(o), names) else names;
}
}
}
o
}
#
# <p> attributes
#
Attr = function(o, plus_, min_ = NULL) {
if (!missing(plus_)) for (n in names(plus_)) { attr(o, n) = plus_[[n]]; }
if (Nif(min_)) for (a in min_) { attr(o, a) = NULL; }
o
}
#
# <par> data type conversions
#
# assure m has at least 1 column
to.col = function(m) { if (is.null(dim(m))) t(t(m)) else m }
col.frame = function(l, col.name = 'value', minus = NULL, ignore.null = TRUE,
do.paste = NULL, do.format = TRUE, digits = 3, plus = NULL) {
if (ignore.null) { for (n in names(l)) { if (is.null(l[[n]])) l[[n]] = NULL; } }
if (!is.null(minus)) { for (n in minus) { l[[n]] = NULL; } }
my.names = if (!is.null(plus)) plus else names(l);
digits = if (length(digits) > 1) digits else rep(digits, length(l));
if (!is.null(do.paste)) {
if (do.format) {
i = 1;
for (n in my.names) { if (is.vector(l[[n]])) {
l[[n]] = paste(sapply(l[[n]],
function(e){if (is.numeric(e)) sprintf("%.*f", digits[i], e) else e}
), collapse = do.paste)
i = i + 1;
}}
} else {
for (n in my.names) { if (is.vector(l[[n]])) l[[n]] = paste(l[[n]], collapse = do.paste) }
}
}
f = as.data.frame(l);
if (dim(f)[2] > length(col.name) && length(col.name) == 1)
row.names(f) = paste(col.name, 1:dim(f)[1], sep = "")
else row.names(f) = c(col.name);
t(f)
}
# <i> collect recursively until list or data.frame
# convert list of lists to data frame (assuming identical keys for each sub list)
# also works on list of vectors
listOfLists2data.frame = function(l, idColumn = "id", .names = NULL) {
# collect keys
keys = if (is.list(l[[1]]))
sort(unique(as.vector(unlist(sapply(l, function(e)names(e)))))) else 1:length(l[[1]]);
if (is.null(.names)) .names = keys;
# row names
rows = names(l);
if (is.null(rows)) rows = 1:length(l);
# build df
#df = t(sapply(rows, function(r) { unlist(l[[r]][keys]) }));
df = t(sapply(rows, function(r)list2df(l[[r]], keys)));
df = if (!is.null(idColumn)) {
data.frame.types(data.frame(..idColumn.. = rows, df),
row.names = 1:length(rows), names = c(idColumn, .names));
} else {
data.frame.types(df, row.names = rows, names = .names);
}
df
}
# resetColNames: reset column names to names of first data frame
# colsFromFirstDf: take columns from the first data frame
# <i> improved algorithm: unlist everything, bind together: cave: data types,
# strictly valid only for matrices
# Use cases:
# list with named vectors: get data frame that contains all vectors with all possible names represented
# listOfDataFrames2data.frame(cfs, colsFromUnion = TRUE, do.transpose = TRUE, idColumn = NULL);
listOfDataFrames2data.frame = function(l, idColumn = "id", do.unlist = TRUE, direction = rbind,
resetColNames = TRUE, colsFromFirstDf = FALSE, colsFromUnion = FALSE, do.transpose = FALSE, idAsFactor = FALSE,
row.names = FALSE) {
# row names
# <!> 2009-11-20 changed from: rows = firstDef(names(l), list(1:length(l)));
rows = firstDef(names(l), 1:length(l));
# columns
ns = NULL;
if (colsFromUnion) {
ns = unique(unlist(lapply(l, names)));
# get data.frame names
ns = names(do.call(data.frame, listKeyValue(ns, rep(NA, length(ns)))));
resetColNames = FALSE; # <!> mutually exclusive
}
# build df
df = NULL;
for (i in 1:length(rows)) {
if (is.null(l[[i]])) next; # ignore empty entries
# <p> force to data frame
df0 = if (do.transpose) as.data.frame(t(l[[i]])) else as.data.frame(l[[i]]);
# <p> homogenize columns
if (colsFromUnion) {
# add missing columns
ns0 = setdiff(ns, names(df0));
df0 = do.call(data.frame, c(list(df0), listKeyValue(ns0, rep(NA, length(ns0)))));
# correct order of columns
df0 = df0[, ns];
}
if (!is.null(df)) {
if (colsFromFirstDf) df0 = df0[, names(df)] else
if (resetColNames) {
names(df0) = if (is.null(idColumn)) names(df) else names(df)[-1];
}
}
# <p> add id column
df0 = if (is.null(idColumn)) df0 else cbind(rep(rows[i], dim(df0)[1]), df0);
# <A> case differentiation should not me necessary
df = if (i == 1) df0 else direction(df, df0);
}
if (!is.null(idColumn)) names(df)[1] = idColumn;
if (do.unlist) for (n in names(df)) { df[[n]] = unlist(df[[n]]); }
if (idAsFactor) df[[idColumn]] = as.factor(df[[idColumn]]);
if (!row.names) row.names(df) = NULL;
df
}
cbindDataFrames = function(l, do.unlist = FALSE, colsFromUnion = FALSE) {
listOfDataFrames2data.frame(l, idColumn = NULL, do.unlist = do.unlist, direction = cbind,
resetColNames = FALSE, colsFromUnion = colsFromUnion)
}
# @param embed corresponds to colsFromUnion in listOfDataFrames2data.frame
RbindDfs = function(dfl, namesFromFirst = TRUE, embed = FALSE) {
if (namesFromFirst && !embed) dfl = lapply(dfl, setNames, nm = names(dfl[[1]]));
if (embed) {
ns = unique(unlist(sapply(dfl, names)));
df0 = Df_(listKeyValue(ns, rep(NA, length(ns))));
dfl = lapply(dfl, function(d)cbind(d, df0[, setdiff(ns, names(d)), drop = FALSE]));
}
do.call(rbind, dfl)
}
rbindDataFrames = function(l, do.unlist = FALSE, useDisk = FALSE, idColumn = NULL, transpose = FALSE,
resetColNames = FALSE, colsFromFirstDf = FALSE, idAsFactor = FALSE) {
r = if (useDisk) {
tempTable = tempfile();
for (i in 1:length(l)) {
d0 = l[[i]];
if (class(d0) != 'data.frame') d0 = as.data.frame(d0);
if (transpose) d0 = t(d0);
if (!is.null(idColumn)) {
d0 = data.frame(idColumn = names(l)[i], d0);
names(d0)[1] = idColumn;
}
write.table(d0, file = tempTable, col.names = i == 1, append = i != 1, row.names = FALSE);
}
read.table(tempTable, header = TRUE, as.is = TRUE);
} else {
listOfDataFrames2data.frame(l, idColumn = idColumn, do.unlist = do.unlist,
direction = rbind, resetColNames = resetColNames, colsFromFirstDf = colsFromFirstDf,
idAsFactor = idAsFactor)
}
r
}
# names2col assigns names of the list to a column of the data frame and values to the valueCol
list2df = function(l, cols = names(l), row.name = NULL, names2col = NULL, valueCol = 'value') {
idcs = if (is.null(cols)) 1:length(l) else
if (all(is.integer(cols))) cols else which.indeces(names(l), cols);
if (is.null(cols) || all(is.integer(cols))) cols = paste('C', 1:length(l), sep = '');
r = as.list(rep(NA, length(cols)));
names(r) = cols;
r[idcs] = l;
r = as.data.frame(r, stringsAsFactors = FALSE);
if (!is.null(row.name)) row.names(r)[1] = row.name;
if (!is.null(names2col)) {
r = data.frame(name = names(r), value = unlist(r[1, ]), row.names = NULL, stringsAsFactors = FALSE);
names(r) = c(names2col, valueCol);
}
r
}
be.numeric = function(v)
sapply(v, function(e)grepl('^-?\\d*(\\.\\d+)?(e-?\\d+)?$', e, ignore.case = TRUE, perl = TRUE));
list2df.print = function(l, valueCol = 'value', names2col = NULL, ..., digits = 3, scientific = 3) {
l1 = list2df(l, valueCol = valueCol, names2col = names2col, ...);
numericRows = be.numeric(l1[[valueCol]]);
numbers = as.numeric(l1[[valueCol]][numericRows]);
log10range = max(floor(log10(numbers))) - min(floor(log10(numbers)));
#fmt = if (log10range > digits + 1) '%.*e' else '%.*f';
numbers = sprintf(ifelse(abs(floor(log10(numbers))) > scientific, '%.*e', '%.*f'), digits, numbers);
#numbers = sapply(numbers, function(n)sprintf(fmt, digits, n));
separators = as.vector(names(l) == '' & is.na(l));
l1[separators, names2col] = '-';
l1[separators, valueCol] = '';
l1[numericRows, valueCol] = numbers;
print(l1);
}
rbind.list2df = function(d, l, row.name = NULL) {
d = as.data.frame(d);
r = list2df(l, names(d), row.name);
r0 = rbind(d, r);
r0
}
# take list of lists
# names of list elements become column-names
listOfLists2df = function(l, columnNames = names(l[[1]])) {
colV = lapply(columnNames, function(n)Df_(list.kp(l, n, do.unlist = TRUE)));
r = Df_(do.call(cbind, colV), names = columnNames);
r
}
ListOfLists2df_extract = function(l, kp, template) {
l1 = list.kp(l, kp, null2na = TRUE, do.unlist = FALSE, template = template);
do.call(rbind, l1);
}
# advanced version of the above
ListOfLists2df = function(l,
keyPath = '*', columnNames = names(list.kp(l[1], keyPath)[[1]]),
reverseKeys = FALSE, keySep = '-', template = NA) {
colV = lapply(columnNames, function (n) {
kp = Sprintf('%{keyPath}s$%{n}s');
# <A> robustly choose name (assume first element is proper template)
#name = if (collapse) names(ListOfLists2df_extract(l[1], kp, template, collapse)) else NULL;
r = ListOfLists2df_extract(l, kp, template);
# names
kpk = list.kp.keys(Sprintf('%{n}s'));
cns = Col.names(r, vivify = FALSE);
if (is.null(cns)) keySep = '';
ns = if (reverseKeys)
paste(cns, join(rev(kpk), keySep), sep = keySep) else
paste(join(kpk, keySep), cns, sep = keySep);
r = SetNames(r, ns);
r
});
r = do.call(cbind, colV);
# <!> Df_ applies as.data.frame -> normalization of column names
#r = if (collapse == 0) Df_(r0, names = columnNames) else r0;
r
}
# # d: data frame, l: list with names corresponding to cols, values to be searched for in columns
searchDataFrame = function(d, l, .remove.factors = TRUE) {
ns = names(l);
d = d[, ns, drop = FALSE];
if (.remove.factors) {
l = sapply(l, function(e)ifelse(is.factor(e), levels(e)[e], e));
#d = apply(d, 2, function(col)(if (is.factor(col)) levels(col)[col] else col));
}
rs = which(as.vector(apply(apply(d, 1, function(r)(r == l)), 2, all)));
rs
}
.df.cols = which.cols = function(d, cols, regex = FALSE) {
cols[is.numeric(cols)] = as.integer(cols[is.numeric(cols)]);
cols[is.character(cols)] = which.indeces(cols[is.character(cols)], names(d), regex = regex);
as.integer(cols)
}
# select columns by name
.df = function(d, names, regex = TRUE, as.matrix = FALSE) {
cols = which.indeces(names, names(d), regex = regex);
d0 = d[, cols, drop = FALSE];
# <t> simpler version:
# d0 = d[, .df.cols(d, names, regex)];
if (as.matrix) d0 = as.matrix(d0);
d0
}
.df.reorder = function(d, names, regex = TRUE) {
cols = .df.cols(d, names, regex);
d0 = d[, c(cols, setdiff(1:dim(d)[2], cols))];
d0
}
# remove columns by name
.dfm = function(d, names, regex = FALSE, as.matrix = FALSE) {
cols = if (all(is.numeric(names))) as.integer(names) else which.indeces(names, names(d), regex = regex);
d0 = d[, -cols, drop = FALSE];
if (as.matrix) d0 = as.matrix(d0);
d0
}
# remove rows by name
.dfrmr = function(d, names, regex = FALSE, as.matrix = FALSE) {
rows = if (all(is.numeric(names)))
as.integer(names) else
which.indeces(names, row.names(d), regex = regex);
d0 = d[-rows, , drop = FALSE];
if (as.matrix) d0 = as.matrix(d0);
d0
}
# remove rows/columns by name
.dfrm = function(d, rows = NULL, cols = NULL, regex = FALSE, as.matrix = FALSE) {
d = as.data.frame(d); # enforce data frame
rows = if (is.null(rows)) 1:dim(d)[1] else
-(if (all(is.numeric(rows))) as.integer(rows) else which.indeces(rows, row.names(d), regex = regex));
cols = if (is.null(cols)) 1:dim(d)[2] else
-(if (all(is.numeric(cols))) as.integer(cols) else which.indeces(cols, names(d), regex = regex));
d0 = d[rows, cols, drop = FALSE];
if (as.matrix) d0 = as.matrix(d0);
d0
}
# alignByRowNames: logical: use row.names from first element, else use provided vector
Cbind = function(..., stringsAsFactors = FALSE, deparse.level = 0, alignByRowNames = NULL) {
l = list(...);
if (notE(alignByRowNames)) {
if (is.null(row.names(l[[1]]))) stop('Cbind[alignByRowNames]: No row names @ 1');
ref = if (is.logical(alignByRowNames) && alignByRowNames)
row.names(l[[1]]) else
alignByRowNames;
l = pairslapply(l, seq_along(l), function(e, i) {
if (is.null(row.names(e))) stop('Cbind[alignByRowNames]: No row names @ %{i}d');
e[order_align(ref, row.names(e)), , drop = FALSE]
});
}
if (length(l) == 1)
# <p> special case vector
t_(l[[1]]) else
# <p> standard invocation
do.call(cbind, c(l, list(deparse.level = deparse.level)))
}
Rbind = function(..., stringsAsFactors = FALSE) {
l = list(...);
r = if (length(l) == 1) t_(t_(l[[1]])) else {
if (class(l[[1]]) == 'data.frame')
rbind(..., stringsAsFactors = stringsAsFactors) else
rbind(...);
}
r
}
subsetTop = function(obj, sel, N = 1) {
d0 = subset(obj, sel);
d1 = d0[1:min(nrow(d0), N), ];
d1
}
# transpose to create column vector for vector
t_ = function(m)(if (is.vector(m)) t(t(m)) else t(m))
# double transpose aka transpose to row -> vector to 1 x N matrix, otherwise identity
t2r = function(m)t(t_(m))
# convert strings to data frame names
# <i> create a data frame and extract names
.dfns = function(ns)gsub(':', '.', ns);
# manipulate list of vectors
# vectors i = 1,.., n with entries v_ij are represented as vector v_11, ..., v_n1, v_21, ...
vector.intercalate = meshVectors = function(...) {
l = list(...);
if (length(l) == 1) l = l[[1]];
v = as.vector(t(sapply(l, function(v)unlist(v))));
# <N> preferred implementation
# No unlist -> should be part of input sanitization
# v = as.vector(do.call(rbind, l));
v
}
is.sorted = function(...)(!is.unsorted(...))
is.ascending = function(v) {
if (length(v) < 2) return(TRUE);
for (i in 2:length(v)) if (v[i] <= v[i - 1]) return(FALSE);
return(TRUE);
}
# pad a vector to length N
pad = function(v, N, value = NA)c(v, rep(value, N - length(v)));
#
# <par> number sequences
#
rep.each.vector = function(v, n)as.vector(matrix(rep(v, n), n, byrow = TRUE))
rep.each = function(l, n, simplify = unlist) {
l = Avu(l);
if (length(n) == 1) rep.each.vector(l, n) else simplify(pairsapply(l, n, rep))
}
factorWithLevels = function(f, levels_) {
f = as.factor(f);
levels(f) = levels_;
f
}
Rep.each = function(v, n) {
r = rep.each(v, n);
return(if (is.factor(v)) factorWithLevels(r, levels(v)) else r)
}
copyFactorStructure = function(dS, dD) {
factors = which(lapply(dS, class) == 'factor');
for (f in factors) dD[[f]] = factorWithLevels(dD[[f]], levels(dS[[f]]));
dD
}
rep.each.row = function(m, n) {
# r = matrix(rep.each(m, n), ncol = ncol(m));
# if (class(m) == 'data.frame') {
# r = Df_(r, names = names(m));
# r = copyFactorStructure(m, r);
# }
r = if (is.data.frame(m))
Df_(lapply(m, Rep.each, n = n)) else
m[rep.each(Seq(1, nrow(m)), n), , drop = FALSE]
r
}
#rep.list = function(l, n) lapply(1:length(l), function(e)l);
# <!> changed as of 23.8.2016; n not used before
rep.each.list = rep.list = function(l, n) lapply(1:n, function(e)l);
matrix.intercalate = function(..., direction = 1, listOfMatrices = FALSE) {
l = list(...);
if (listOfMatrices) l = l[[1]];
# <!> assume same dimension
d = dim(l[[1]]);
N = prod(d);
# <p> create new matrix
v = c(if (direction == 1) sapply(l, as.vector) else sapply(sapply(l, t), as.vector), recursive = TRUE);
vN = as.vector(matrix(v, ncol = N, byrow = TRUE));
r = if (direction == 1)
matrix(vN, nrow = d[1] * length(l)) else
matrix(vN, ncol = d[2] * length(l), byrow = TRUE);
# <p> return value
if (class(l[[1]]) == 'data.frame') r = Df_(r, names = names(l[[1]]));
r
}
matrixSearch = function(mSearch, mSearched, cols = 1:ncol(mSearch)) {
df1 = Df_(mSearch, names = paste0('c', cols));
df2 = Df_(mSearched[, cols, drop = FALSE], names = paste0('c', cols));
return(DfSearch(df1, df2, returnIdcs = TRUE));
}
arrayFromRowPairs = function(m, halves = FALSE) {
if (halves)
aperm(array(t(m), dim = c(2, dim(m)[1]/2, dim(m)[2])), c(2, 1, 3)) else
# adjecent pairs
aperm(array(t(m), dim = c(2, dim(m)[2], dim(m)[1]/2)), c(3, 1, 2))
}
data.frame.expandWeigths = function(data, weights = 'weights') {
w = data[[weights]];
weightsCol = which(names(data) == weights);
df0 = lapply(1:length(w), function(i) {
if (w[i] > 0) rep.each.row(data[i, -weightsCol], w[i]) else list();
});
df1 = rbindDataFrames(df0);
df1
}
# spread/fill vector to indeces
vector.spread = function(v, idcs, N, default = 0) {
r = rep(default, N);
r[idcs] = v;
r
}
# search vector for value, fill value elements with elements prior to it
# e.g. 1, NA, NA, 2, NA -> 1, 1, 1, 2, 2
vector.propagateValuesForward = function(v, value = NA, vs) {
idcs = if (is.na(value)) which(!is.na(v)) else which(v != value);
Idcs = c(idcs, length(v) + 1); # padded version
# assign positions
iA = lapply(seq_along(idcs), function(i)Seq(idcs[i] + 1, Idcs[i + 1] - 1));
# indeces of values tb assigned
iV = lapply(seq_along(idcs), function(i)rep(idcs[i], length(iA[[i]])));
# fill in values
#r = vector.assign(v, unlist(iA), v[unlist(iV)]);
v[unlist(iA)] = v[unlist(iV)];
return(v);
}
# create new vector with length == length(v) + length(idcs)
# idcs are positions in the final vector
vector.embed = function(v, idcs, e, idcsResult = TRUE) {
if (!idcsResult) idcs = idcs + (1:length(idcs)) - 1;
N = length(v) + length(idcs);
r = rep(NA, N);
r[setdiff(1:N, idcs)] = v;
r[idcs] = e;
# <p> names
if (!is.null(names(v)) || !is.null(names(e))) {
ns = rep(NA, N);
if (!is.null(names(v))) ns[setdiff(1:N, idcs)] = names(v);
if (!is.null(names(e))) ns[idcs] = names(e);
names(r) = ns;
}
r
}
# set values at idcs
vector.assign = function(v, idcs, e, na.rm = 0, N) {
if (!missing(N)) v = rep(v, N);
v[idcs] = e;
if (!is.na(na.rm)) v[is.na(v)] = na.rm;
v
}
# names based assignment
Vector.assign = function(v, e, na.rm = NA) {
idcs = which.indeces(names(e), names(v));
vector.assign(v, idcs, e, na.rm = na.rm)
}
matrix.assign = function(m, idcs, e, byrow = TRUE) {
if (length(dim(idcs)) > 1) {
m[as.matrix(idcs)] = e
} else if (byrow)
m[idcs, ] = e else
m[, idcs] = e
m
}
# extract elements from array/matrix indexed in a row-wise manner by ...
# array.extract(m, c(1, 2), c(1, 2)) -> c(m[1, 1], m[2, 2])
array.extract = function(a, ...) {
r = mapply(function(...)do.call('[', c(list(a), list(...))), ...);
return(r);
}
# are columns/rows same values in matrix
matrix.same = function(m, direction = 1) {
apply(m, direction, function(e)all(e[1] == e))
}
vectorIdcs = function(v, f, ..., not = FALSE) {
r = sapply(v, f, ...);
which(if (not) !r else r)
}
is.seq = function(v, offset = 1)all( (v - offset + 1) == seq_along(v))
# produce indeces for indeces positioned into blocks of blocksize of which count units exists
# example: expand.block(2, 10, 1:2) == c(1, 2, 11, 12)
expand.block = function(count, blocksize, indeces) {
blks = Seq(1,count);
if (is.null(blks)) return(NULL);
as.vector(apply(to.col(blks), 1,
function(i){ (i - 1) * blocksize + t(to.col(indeces)) }
));
}
search.block = function(l, s) {
b.sz = length(s);
which(sapply(
1:(length(l)/b.sz), function(i){all(l[((i - 1) * b.sz + 1):(i * b.sz)] == s)}
));
}
#
# <par> matrix functions
#
# <!> assumes same indeces for rows/columns
matrixFromIndexedDf = function(df, idx.r = 'idx.r', idx.c = 'idx.c', value = 'value', referenceOrder = NULL) {
id = unique(c(df[[idx.r]], df[[idx.c]]));
# matrix indeces
# <A> canonical order is by repeating vector id for row index, constant for columns within repetition
# -> matrix filled by columns
midcs = merge(data.frame(id = id), data.frame(id = id), by = NULL);
midcs = data.frame(midcs, mfid.i = 1:nrow(midcs));
map = merge(df[, c(idx.r, idx.c, value)], midcs,
by.x = c(idx.r, idx.c), by.y = c('id.x', 'id.y'), all.y = TRUE);
# return to midcs order
map = map[order(map$mfid.i), ];
# filled by rows
m = matrix(map[[value]], nrow = length(id));
# reorder matrix
o = order_align(firstDef(referenceOrder, id), id);
# reorder in two steps -> out of mem otherwise
m1 = m[o, ];
m2 = m1[, o];
m2
}
symmetrizeMatrix = function(m) {
m[is.na(m)] = t(m)[is.na(m)];
m
}
which.row = function(m, row) {
cols = names(as.list(row));
if (is.null(cols)) cols = 1:length(row);
rows = 1:(dim(m)[1]);
rows.found = rows[sapply(rows, function(i){ all(m[i, cols] == row) })];
rows.found
}
# lsee: list with searchees
# lsed: list with searched objects
# inverse: lsed are regexes matched against lsee; pre-condition: length(lsee) == 1
# ret.list: for match.multi return list by lsee
# <!><t> cave: semantics changed as of 17.8.2009: return NA entries for unfound lsee-entries
# <!> match multi only implemented for merge = TRUE
which.indeces = function(lsee, lsed, regex = FALSE, ret.na = FALSE, merge = TRUE, match.multi = FALSE, ...,
inverse = FALSE, ret.list = FALSE) {
if (!length(lsed) || !length(lsee)) return(c());
v = if (is.list(lsed)) names(lsed) else lsed;
idcs = if (regex) {
which(sapply(lsed, function(e)(
if (inverse) length(fetchRegexpr(e, lsee, ...)) > 0 else
any(sapply(lsee, function(see)(length(fetchRegexpr(see, e, ...)) > 0)))
)))
} else if (merge) {
d0 = merge(
data.frame(d = lsed, ix = 1:length(lsed)),
data.frame(d = lsee, iy = 1:length(lsee)), all.y = TRUE);
d0 = d0[order(d0$iy), ];
idcs = if (match.multi) {
#d0$ix[unlist(sapply(lsee, function(e)which(d0$d == e)))]
#na.omit(sort(d0$ix))
r = if (ret.list)
unlist.n(by(d0, d0$d, function(d)list(na.omit(d$ix)), simplify = FALSE)) else
na.omit(d0$ix);
r
} else {
d0$ix[pop(which(c(d0$iy, 0) - c(0, d0$iy) != 0))];
}
# less efficient version
# } else d0$ix[unlist(sapply(lsee, function(e)which(d0$d == e)[1]))];
# } else d0$ix[order(d0$iy)]
if (!ret.na) idcs = idcs[!is.na(idcs)];
idcs
} else {
unlist(as.vector(sapply(lsee, function(e){
w = which(e == v);
if (!ret.na) return(w);
ifelse(length(w), w, NA)
})))
};
r = if (ret.list) idcs else as.integer(idcs);
r
}
grep.vector = function(lsee, lsed, regex = FALSE, ret.na = FALSE, merge = TRUE, match.multi = FALSE, ..., inverse = FALSE) {
lsed[which.indeces(lsee, lsed, regex, ret.na, merge, match.multi, ..., inverse = inverse)]
}
grep.infixes = function(lsee, lsed, ...) {
r = grep.vector(sapply(lsee, function(v)sprintf('^%s.*', v)), lsed, regex = TRUE, inverse = FALSE, ... );
r
}
# force structure to be matrix (arrange vector into a row)
MR = function(m) {
if (!is.matrix(m)) m = matrix(m, byrow = TRUE, ncol = length(m));
m
}
# force structure to be matrix (arrange vector into a columns)
MC = function(m) {
if (!is.matrix(m)) m = matrix(m, byrow = FALSE, nrow = length(m));
m
}
#
# <par> data processing
#
# like table but produce columns for all numbers 1..n (not only for counts > 0)
# cats are the expected categories
table.n = function(v, n, min = 1, categories = NULL) {
if (is.null(categories)) categories = min:n;
t = as.vector(table(c(categories, v)) - rep(1, length(categories)));
t
}
table.freq = function(v) {
t0 = table(v);
r = t0 / sum(t0);
r
}
table.n.freq = function(...) {
t0 = table.n(...);
r = t0 / sum(t0);
r
}
Table = function(v, min, max, ..., cats) {
if (missing(min) && missing(max) && missing(cats)) return(table(v, ...));
if (!missing(cats)) {
d = Df_(lapply(v, Avu));
catsV = Df_(merge.multi.list(cats));
return(table(rbind(d, catsV)) - 1);
} else {
if (missing(min)) min = min(v);
if (missing(max)) max = max(v);
return(table.n(v, n = max, min = min));
}
}
v2freq = function(v)(v/sum(v))
#
# <p> numeric function
#
to.numeric = function(x) { SetNames(suppressWarnings(as.numeric(x)), names(x)) }
minFloor = function(x)(x - floor(x))
#
# <par> data types
#
# set types for columns: numeric: as.numeric
data.frame.types = function(df, numeric = c(), character = c(), factor = c(), integer = c(),
do.unlist = TRUE, names = NULL, row.names = NULL, reset.row.names = FALSE, do.rbind = FALSE, do.transpose = FALSE,
stringsAsFactors = FALSE) {
if (do.rbind) {
#old code: df = t(sapply(df, function(e)e));
lengthes = sapply(df, length);
maxL = max(lengthes);
df = t(sapply(1:length(df), function(i)c(df[[i]], rep(NA, maxL - lengthes[i]))));
}
if (do.transpose) df = t(df);
df = as.data.frame(df, stringsAsFactors = stringsAsFactors);
# set or replace column names
if (!is.null(names)) {
if (class(names) == "character") names(df)[1:length(names)] = names;
if (class(names) == "list") names(df) = vector.replace(names(df), names);
}
if (do.unlist) for (n in names(df)) { df[[n]] = unlist(df[[n]]); }
for (n in numeric) { df[[n]] = as.numeric(df[[n]]); }
for (n in integer) { df[[n]] = as.integer(df[[n]]); }
for (n in character) { df[[n]] = as.character(df[[n]]); }
for (n in factor) { df[[n]] = as.factor(df[[n]]); }
if (reset.row.names) row.names(df) = NULL;
if (length(row.names) > 0) row.names(df) = row.names;
df
}
DfStack = function(df0, N)do.call(rbind, rep.list(df0, N));
DfClasses = function(dataFrame)nlapply(dataFrame, function(n)class(dataFrame[[n]]));
DfAsInteger = function(dataFrame, as_integer) {
#dfn = apply(dataFrame[, as_integer, drop = FALSE], 2, function(col)as.integer(avu(col)));
# <!> 6.6.2016 as.integer first needed to retain factor status on factors
dfn = nlapply(as_integer, function(col)avu(as.integer(dataFrame[[col]])));
dataFrame[, as_integer] = as.data.frame(do.call(cbind, dfn));
dataFrame
}
DfAsLogical = function(dataFrame, as_logical) {
dfn = nlapply(as_logical, function(n) {
col = dataFrame[[n]];
if (is.factor(col)) (col == levels(col)[1]) else avu(as.logical(col));
});
dataFrame[, as_logical] = as.data.frame(do.call(cbind, dfn));
dataFrame
}
DfAsCharacter = function(dataFrame, as_character) {
#dfn = apply(dataFrame[, as_character, drop = FALSE], 2, function(col)as.character(avu(col)));
#dataFrame[, as_character] = as.data.frame(dfn, stringsAsFactors = FALSE);
dfn = nlapply(as_character, function(col)avu(as.character(dataFrame[[col]])));
dataFrame[, as_character] = as.data.frame(do.call(cbind, dfn), stringsAsFactors = FALSE);
dataFrame
}
DfFac2num = function(dataFrame) {
return(do.call(data.frame, lapply(dataFrame, function(e)if (is.factor(e)) as.numeric(e) else e)))
}
DfApplyValueMap = function(r, valueMap, Df_doTrimValues = FALSE,
Df_mapping_value = '__df_mapping_value__',
Df_mapping_empty = '__DF_EMPTY__', Do_Df_mapping_empty = TRUE) {
for (n in names(valueMap)) {
vs = if (Df_doTrimValues && class(r[[n]]) %in% c('character', 'factor'))
nina(trimString(as.character(r[[n]])), Df_mapping_value) else
as.character(r[[n]]);
vm = valueMap[[n]];
if (Do_Df_mapping_empty) {
vs = ifelse(nit(vs == ''), Df_mapping_empty, vs);
if (!(Df_mapping_empty %in% names(vm)))
vm = c(vm, listKeyValue(Df_mapping_empty, NA));
}
vs = nina(valueMap[[n]][vs], Df_mapping_value);
vs = ifelse(vs == Df_mapping_value, as.character(r[[n]]), vs);
r[[n]] = vs;
}
return(r);
}
# as of 22.7.2013 <!>: min_ applied before names/headerMap
# as of 19.12.2013 <!>: as.numeric -> as_numeric
# as of 22.5.2014 <!>: t -> t_
# as of 13.11.2014 <!>: sapply -> simplify_
# Create data frames with more options than \code{data.frame}
Df_ = function(df0, headerMap = NULL, names = NULL, min_ = NULL,
as_numeric = NULL, as_character = NULL, as_factor = NULL, as_integer = NULL, as_logical = NULL,
row.names = NA, valueMap = NULL, Df_as_is = TRUE, simplify_ = FALSE,
deep_simplify_ = FALSE, t_ = FALSE, unlist_cols = FALSE, transf_log = NULL, transf_m1 = NULL,
Df_doTrimValues = FALSE, Df_mapping_value = '__df_mapping_value__',
Df_mapping_empty = '__DF_EMPTY__', Do_Df_mapping_empty = TRUE, apply_ = FALSE) {
# <p> input sanitation
#r = as.data.frame(df0);
# for a vector with identical names for each entry, use this as a column name
if (length(unique(names(df0))) == 1 && !Nif(names)) names = unique(names(df0));
# sanitize row.names
dn = dimnames(df0);
if (Nif(dn) && any(duplicated(dn[[1]]))) dimnames(df0)[[1]] = NULL;
# <!> commented out on 4.4.2019, test implemented to fix this behavior
#if (length(row.names) == 0 || !all(is.na(row.names))) base::row.names(df0) = row.names;
if (apply_) df0 = as.data.frame(apply(df0, 2, identity));
#if (!Nif(Apply_)) df0 = as.data.frame(apply(df0, 2, Apply_));
if (t_) df0 = t(df0);
# reset_row_names breaks unit tests (27.9.2017)
#r = data.frame(df0, stringsAsFactors = !Df_as_is, row.names = if (reset_row_names) NA else NULL);
r = data.frame(df0, stringsAsFactors = !Df_as_is);
if (notE(min_)) {
is = which.indeces(min_, names(r));
if (length(is) > 0) r = r[, -is, drop = FALSE];
}
if (simplify_) r = as.data.frame(sapply(r, identity));
if (deep_simplify_) r = as.data.frame(
nlapply(r, function(col)sapply(r[[col]], unlist)), stringsAsFactors = !Df_as_is
);
#
# <p> column names
#
if (notE(names)) {
if (class(names) == 'character') names(r)[1:length(names)] = names;
if (class(names) == 'list') names(r) = vector.replace(names(r), names);
}
if (notE(headerMap)) names(r) = vector.replace(names(r), headerMap);
#
# <p> column types
#
#if (class(df0) == 'data.frame' && ncol(df0) >= 3) browser();
if (notE(as_numeric)) {
#dfn = apply(r[, as_numeric, drop = FALSE], 2, function(col)as.numeric(avu(col)));
dfn = lapply(r[, as_numeric, drop = FALSE], function(col)avu(as.numeric(col)));
r[, as_numeric] = as.data.frame(do.call(cbind, dfn));
}
if (notE(as_logical)) r = DfAsLogical(r, as_logical);
if (notE(as_integer)) r = DfAsInteger(r, as_integer);
if (notE(as_character)) r = DfAsCharacter(r, as_character);
if (notE(as_factor)) {
# <N> does not work
#dfn = apply(r[, as_factor, drop = FALSE], 2, function(col)as.factor(col));
#r[, as_factor] = dfn;
for (f in as_factor) r[, f] = as.factor(r[[f]]);
}
#
# <p> value map
#
if (notE(valueMap)) {
# for (n in names(valueMap)) {
# vs = if (Df_doTrimValues && class(r[[n]]) %in% c('character', 'factor'))
# nina(trimString(as.character(r[[n]])), Df_mapping_value) else
# as.character(r[[n]]);
# vm = valueMap[[n]];
# if (Do_Df_mapping_empty) {
# vs = ifelse(nit(vs == ''), Df_mapping_empty, vs);
# if (!(Df_mapping_empty %in% names(vm)))
# vm = c(vm, listKeyValue(Df_mapping_empty, NA));
# }
# vs = nina(valueMap[[n]][vs], Df_mapping_value);
# vs = ifelse(vs == Df_mapping_value, as.character(r[[n]]), vs);
# r[[n]] = vs;
# }
r = DfApplyValueMap(r, valueMap,
Df_doTrimValues, Df_mapping_value, Df_mapping_empty, Do_Df_mapping_empty);
}
#
# <p> transformations
#
if (notE(transf_log)) r[, transf_log] = log(r[, transf_log, drop = FALSE]);
if (notE(transf_m1)) r[, transf_m1] = r[, transf_m1, drop = FALSE] - 1;
if (length(row.names) == 0 || !all(is.na(row.names))) base::row.names(r) = row.names;
if (unlist_cols) for (n in names(r)) r[[n]] = avu(r[[n]]);
r
}
Df = function(..., headerMap = NULL, names = NULL, min_ = NULL, row.names = NA, Df_as_is = TRUE,
as_numeric = NULL, as_character = NULL, as_factor = NULL, t_ = FALSE, unlist_cols = FALSE) {
r = data.frame(...);
Df_(r, headerMap = headerMap, names = names, min_ = min_, row.names = row.names,
as_numeric = as_numeric,
as_character = as_character,
as_factor = as_factor,
Df_as_is = Df_as_is,
t_ = t_,
unlist_cols = unlist_cols
);
}
Df2list = function(df) {
df = as.data.frame(df);
nlapply(names(df), function(n)df[[n]]);
}
Dfselect = function(data, l, na.rm = nif) {
sel = apply(sapply(nlapply(l, function(n)data[[n]] == l[[n]]), identity), 1, all);
r = data[na.rm(sel), ];
r
}
DfSearch = function(dfSearch, dfSearched,
colNamesReset = 'col', colNameIdx = '.dfSearchIdx', returnIdcs = FALSE) {
if (is.null(dfSearched)) return(NULL);
nms = if (notE(colNamesReset)) {
nms = paste(colNamesReset, 1:ncol(dfSearched), sep = '');
names(dfSearch) = names(dfSearched) = nms;
} else names(dfSearched);
dfm = merge(
Df(1:nrow(dfSearched), dfSearched, names = colNameIdx),
Df(1:nrow(dfSearch), dfSearch, names = colNameIdx), by = nms);
if (returnIdcs)
return(dfm[