# Note: don't change the order of these arguments !!!
scv <- function(x, v, r, set = FALSE, inv = FALSE, vind1 = FALSE) .Call(C_setcopyv, x, v, r, inv, set, vind1)
# inspired by ?dplyr::recode
# Think about adopting this code for as_numeric_factor and as_character_factor
recode_num <- function(X, ..., default = NULL, missing = NULL, set = FALSE) {
if(missing(...)) stop("recode_num requires arguments of the form: value = replacement")
args <- list(...)
nam <- as.numeric(names(args))
# nzchar(names(args)) ... check non-empty names ? -> nah, this package is not for dummies
if(anyNA(nam)) stop(paste("Non-numeric arguments:", paste(names(args)[is.na(nam)], collapse = ", ")))
arglen <- length(args)
missingl <- !is.null(missing)
if(missingl && any(nam == missing)) warning(paste0("To improve performance missing values are replaced prior to recode, so this replaces all missing values with ",
missing, " and those are then again replaced with ", args[[which(nam == missing)]], ". If this is not desired, call replace_na after recode with missing = NULL."))
if(arglen == 1L) {
args <- args[[1L]]
if(is.null(default)) {
if(missingl) {
repfun <- function(y) if(is.numeric(y)) {
z <- scv(y, NA, missing, set) # y[is.na(y)] <- missing
scv(z, nam, args, TRUE) # `[<-`(y, y == nam, value = args)
} else y
} else {
repfun <- function(y) if(is.numeric(y)) scv(y, nam, args, set) else y # `[<-`(y, y == nam, value = args)
}
} else {
nr <- if(is.atomic(X)) NROW(X) else fnrow(X)
if(missingl) {
repfun <- function(y) if(is.numeric(y)) {
nas <- is.na(y)
z <- scv(y, nas, missing, set, vind1 = TRUE)
ind <- whichv(z, nam)
scv(z, nas, default, TRUE, TRUE, vind1 = TRUE) # duplAttributes(alloc(default, nr), y)
scv(z, ind, args, TRUE, vind1 = TRUE) # y == nam
} else y
} else {
repfun <- function(y) if(is.numeric(y)) scv(scv(y, nam, default, set, TRUE), nam, args, TRUE) else y # `[<-`(duplAttributes(alloc(default, nr), y), y == nam, value = args)
}
}
} else {
seqarg <- seq_len(arglen)
if(is.null(default)) {
repfun <- function(y) if(is.numeric(y)) {
if(missingl) y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing
else if(!set) y <- scv(y, 1L, y[1L], vind1 = TRUE) # copy
z <- scv(y, 1L, y[1L], vind1 = TRUE) # copy
for(i in seqarg) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE)
y
} else y
# repfun <- function(y) if(is.numeric(y)) {
# if(missingl) y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing
# if(set) { # Note: not strictly the way this should work...
# for(i in seqarg) scv(y, nam[i], args[[i]], TRUE)
# return(y)
# }
# z <- scv(y, 1L, y[1L], vind1 = TRUE) # copy
# for(i in seqarg) scv(z, whichv(y, nam[i]), args[[i]], TRUE, vind1 = TRUE)
# z
# } else y
} else {
nr <- if(is.atomic(X)) NROW(X) else fnrow(X)
if(missingl) {
repfun <- function(y) if(is.numeric(y)) {
nas <- is.na(y)
y <- scv(y, nas, missing, set, vind1 = TRUE)
z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy
scv(y, nas, default, TRUE, TRUE, vind1 = TRUE)
for(i in seqarg) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE)
y
} else y
} else {
repfun <- function(y) if(is.numeric(y)) {
z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy
y <- scv(y, nam[1L], default, set, TRUE) # duplAttributes(alloc(default, nr), y)
scv(y, nam[1L], args[[1L]], TRUE)
for(i in seqarg[-1L]) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE)
y
} else y
}
}
}
if(is.list(X)) {
if(set) {
lapply(unattrib(X), repfun)
return(invisible(X))
}
res <- duplAttributes(lapply(unattrib(X), repfun), X)
return(if(inherits(X, "data.table")) alc(res) else res)
}
if(!is.numeric(X)) stop("X needs to be numeric or a list")
res <- repfun(X)
return(if(set) invisible(res) else res)
}
recode_char <- function(X, ..., default = NULL, missing = NULL, regex = FALSE,
ignore.case = FALSE, fixed = FALSE, set = FALSE) {
if(missing(...)) stop("recode_char requires arguments of the form: value = replacement")
args <- list(...)
nam <- names(args)
arglen <- length(args)
missingl <- !is.null(missing)
if(missingl && any(nam == missing)) warning(paste0("To improve performance missing values are replaced prior to recode, so this replaces all missing values with ",
missing, " and those are then again replaced with ", args[[which(nam == missing)]], ". If this is not desired, call replace_na after recode with missing = NULL."))
if(regex) {
if(arglen == 1L) {
args <- args[[1L]]
if(is.null(default)) {
if(missingl) {
repfun <- function(y) if(is.character(y)) {
y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing
scv(y, grepl(nam, y, ignore.case, FALSE, fixed), args, TRUE, vind1 = TRUE)
} else y
} else {
repfun <- function(y) if(is.character(y)) scv(y, grepl(nam, y, ignore.case, FALSE, fixed), args, set, vind1 = TRUE) else y
}
} else {
nr <- if(is.atomic(X)) NROW(X) else fnrow(X)
if(missingl) {
repfun <- function(y) if(is.character(y)) {
nas <- is.na(y)
z <- scv(y, nas, missing, set, vind1 = TRUE)
ind <- grepl(nam, z, ignore.case, FALSE, fixed)
scv(z, nas, default, TRUE, TRUE, vind1 = TRUE) # duplAttributes(alloc(default, nr), y)
scv(z, ind, args, TRUE, vind1 = TRUE)
} else y
} else {
repfun <- function(y) if(is.character(y)) {
ind <- grepl(nam, y, ignore.case, FALSE, fixed)
scv(scv(y, ind, default, set, TRUE, vind1 = TRUE), ind, args, TRUE, vind1 = TRUE)
} else y
}
}
} else {
seqarg <- seq_len(arglen)
if(is.null(default)) {
repfun <- function(y) if(is.character(y)) {
if(missingl) y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing
else if(!set) y <- scv(y, 1L, y[1L], vind1 = TRUE) # copy
z <- scv(y, 1L, y[1L], vind1 = TRUE) # copy
for(i in seqarg) scv(y, grepl(nam[i], z, ignore.case, FALSE, fixed), args[[i]], TRUE, vind1 = TRUE)
y
} else y
} else {
nr <- if(is.atomic(X)) NROW(X) else fnrow(X)
if(missingl) {
repfun <- function(y) if(is.character(y)) {
nas <- is.na(y)
y <- scv(y, nas, missing, set, vind1 = TRUE)
z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy
scv(y, nas, default, TRUE, TRUE, vind1 = TRUE)
for(i in seqarg) scv(y, grepl(nam[i], z, ignore.case, FALSE, fixed), args[[i]], TRUE, vind1 = TRUE)
y
} else y
} else {
repfun <- function(y) if(is.character(y)) {
z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy
y <- scv(y, seq_along(y), default, set, vind1 = TRUE) # Initialize all to default
for(i in seqarg) scv(y, grepl(nam[i], z, ignore.case, FALSE, fixed), args[[i]], TRUE, vind1 = TRUE)
y
} else y
}
}
}
} else {
if(arglen == 1L) {
args <- args[[1L]]
if(is.null(default)) {
if(missingl) {
repfun <- function(y) if(is.character(y)) {
z <- scv(y, NA, missing, set) # y[is.na(y)] <- missing
scv(z, nam, args, TRUE) # `[<-`(y, y == nam, value = args)
} else y
} else {
repfun <- function(y) if(is.character(y)) scv(y, nam, args, set) else y # `[<-`(y, y == nam, value = args)
}
} else {
nr <- if(is.atomic(X)) NROW(X) else fnrow(X)
if(missingl) {
repfun <- function(y) if(is.character(y)) {
nas <- is.na(y)
z <- scv(y, nas, missing, set, vind1 = TRUE)
ind <- whichv(z, nam)
scv(z, nas, default, TRUE, TRUE, vind1 = TRUE) # duplAttributes(alloc(default, nr), y)
scv(z, ind, args, TRUE, vind1 = TRUE) # y == nam
} else y
} else {
repfun <- function(y) if(is.character(y)) scv(scv(y, nam, default, set, TRUE), nam, args, TRUE) else y # `[<-`(duplAttributes(alloc(default, nr), y), y == nam, value = args)
}
}
} else {
seqarg <- seq_len(arglen)
if(is.null(default)) {
repfun <- function(y) if(is.character(y)) {
if(missingl) y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing
else if(!set) y <- scv(y, 1L, y[1L], vind1 = TRUE) # copy
z <- scv(y, 1L, y[1L], vind1 = TRUE) # copy
for(i in seqarg) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE)
y
} else y
} else {
nr <- if(is.atomic(X)) NROW(X) else fnrow(X)
if(missingl) {
repfun <- function(y) if(is.character(y)) {
nas <- is.na(y)
y <- scv(y, nas, missing, set, vind1 = TRUE)
z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy
scv(y, nas, default, TRUE, TRUE, vind1 = TRUE)
for(i in seqarg) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE)
y
} else y
} else {
repfun <- function(y) if(is.character(y)) {
z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy
y <- scv(y, nam[1L], default, set, TRUE) # duplAttributes(alloc(default, nr), y)
scv(y, nam[1L], args[[1L]], TRUE)
for(i in seqarg[-1L]) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE)
y
} else y
}
}
}
}
if(is.list(X)) {
if(set) {
lapply(unattrib(X), repfun)
return(invisible(X))
}
res <- duplAttributes(lapply(unattrib(X), repfun), X)
return(if(inherits(X, "data.table")) alc(res) else res)
}
if(!is.character(X)) stop("X needs to be character or a list")
res <- repfun(X)
return(if(set) invisible(res) else res)
}
na_locf <- function(x, set = FALSE) .Call(C_na_locf, x, set)
na_focb <- function(x, set = FALSE) .Call(C_na_focb, x, set)
na_locf_ph <- function(x, ph1, ph2, set = FALSE) .Call(C_na_locf, x, set)
na_focb_ph <- function(x, ph1, ph2, set = FALSE) .Call(C_na_focb, x, set)
replace_na <- function(X, value = 0L, cols = NULL, set = FALSE, type = "const") {
FUN <- switch(type, const =, value = scv, locf = na_locf_ph, focb = na_focb_ph,
stop("Unknown type:", type))
if(set) {
if(is.list(X)) {
if(is.null(cols)) {
lapply(unattrib(X), FUN, NA, value, TRUE)
} else if(is.function(cols)) {
lapply(unattrib(X), function(y) if(cols(y)) FUN(y, NA, value, TRUE) else y)
} else {
cols <- cols2int(cols, X, attr(X, "names"), FALSE)
lapply(unattrib(X)[cols], FUN, NA, value, TRUE)
}
} else FUN(X, NA, value, TRUE) # `[<-`(X, is.na(X), value = value)
return(invisible(X))
}
if(is.list(X)) {
if(is.null(cols)) return(condalc(duplAttributes(lapply(unattrib(X), FUN, NA, value), X), inherits(X, "data.table"))) # function(y) `[<-`(y, is.na(y), value = value)
if(is.function(cols)) return(condalc(duplAttributes(lapply(unattrib(X),
function(y) if(cols(y)) FUN(y, NA, value) else y), X), inherits(X, "data.table")))
clx <- oldClass(X)
oldClass(X) <- NULL
cols <- cols2int(cols, X, names(X), FALSE)
X[cols] <- lapply(unattrib(X[cols]), FUN, NA, value) # function(y) `[<-`(y, is.na(y), value = value)
return(condalc(`oldClass<-`(X, clx), any(clx == "data.table")))
}
FUN(X, NA, value) # `[<-`(X, is.na(X), value = value)
}
replace_NA <- replace_na
# Remove Inf (Infinity) and NaN (Not a number) from vectors or data frames:
replace_inf <- function(X, value = NA, replace.nan = FALSE, set = FALSE) {
if(set) {
if(is.list(X)) {
lapply(unattrib(X), if(replace.nan) (function(y) if(is.numeric(y)) scv(y, is.infinite(y) | is.nan(y), value, TRUE, vind1 = TRUE) else y) else
(function(y) if(is.numeric(y)) scv(y, is.infinite(y), value, TRUE, vind1 = TRUE) else y))
}
if(!is.numeric(X)) stop("Infinite values can only be replaced in numeric objects!")
if(replace.nan) scv(X, is.infinite(X) | is.nan(X), value, TRUE, vind1 = TRUE) else scv(X, is.infinite(X), value, TRUE, vind1 = TRUE)
return(invisible(X))
}
if(is.list(X)) {
# if(!inherits(X, "data.frame")) stop("replace_non_finite only works with atomic objects or data.frames")
res <- duplAttributes(lapply(unattrib(X),
if(replace.nan) (function(y) if(is.numeric(y)) scv(y, is.infinite(y) | is.nan(y), value, vind1 = TRUE) else y) else
(function(y) if(is.numeric(y)) scv(y, is.infinite(y), value, vind1 = TRUE) else y)), X)
return(if(inherits(X, "data.table")) alc(res) else res)
}
if(!is.numeric(X)) stop("Infinite values can only be replaced in numeric objects!")
if(replace.nan) return(scv(X, is.infinite(X) | is.nan(X), value, vind1 = TRUE)) # !is.finite(X) also replaces NA
scv(X, is.infinite(X), value, vind1 = TRUE)
}
replace_Inf <- replace_inf
# replace_non_finite <- function(X, value = NA, replace.nan = TRUE) {
# .Deprecated("replace_Inf")
# replace_Inf(X, value, replace.nan)
# }
Crepoutl <- function(x, limits, value, single_limit, set = FALSE) .Call(C_replace_outliers, x, limits, value, single_limit, set)
sd_limits <- function(x, limits) {
st <- fbstatsCpp(x, stable.algo = FALSE, setn = FALSE)
st[2L] + st[3L] * c(-limits, limits)
}
mad_limits <- function(x, limits) {
med <- fmedian.default(x)
mad <- fmedian.default(abs(x - med))
med + mad * c(-limits, limits)
}
# scaling data using MAD
mad_trans <- function(x) {
if(inherits(x, c("pseries", "pdata.frame"))) {
g <- GRP(x)
tmp <- fmedian(x, g, TRA = "-")
tmp %/=% fmedian(if(is.list(tmp)) lapply(tmp, abs) else abs(tmp), g, TRA = "fill", set = TRUE)
return(tmp)
}
tmp <- fmedian(x, TRA = "-")
tmp %/=% fmedian(if(is.list(tmp)) dapply(tmp, abs) else abs(tmp), TRA = "fill", set = TRUE)
return(tmp)
}
replace_outliers <- function(X, limits, value = NA,
single.limit = c("sd", "mad", "min", "max"),
ignore.groups = FALSE,
set = FALSE) {
if(length(limits) == 1L) {
# "overall_" arguments are legacy, now accommodated via the ignore.groups argument
sl <- switch(single.limit[1L], SDs = 4L, min = 2L, max = 3L,
overall_SDs = 5L, sd = 4L, mad = 6L,
MADs = 6L, overall_MADs = 7L, # Just in case
stop("Unknown single.limit option: ", single.limit[1L]))
if(sl == 5L || sl == 7L) ignore.groups <- TRUE
} else sl <- 0L
if(sl > 3L) { # Outliers according to standard deviation or MAD threshold
if(is.list(X)) {
if(!ignore.groups && inherits(X, c("grouped_df", "pdata.frame"))) {
if(is.character(value)) stop("clipping is not yet supported with grouped/panel data and SDs/MADs thresholds.")
num <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE)
num <- if(inherits(X, "grouped_df")) num & !fgroup_vars(X, "logical") else
num & attr(findex(X), "names") %!in% attr(X, "names")
clx <- oldClass(X)
STDXnum <- if(sl > 5L) mad_trans(fcolsubset(X, num)) else fscale(fcolsubset(X, num))
oldClass(X) <- NULL
res <- .mapply(function(z, y) scv(z, abs(y) > limits, value, set, vind1 = TRUE),
list(unattrib(X[num]), unattrib(STDXnum)), NULL)
if(set) return(invisible(X))
X[num] <- res
res <- `oldClass<-`(X, clx)
} else {
limit_fun <- if(sl > 5L) mad_limits else sd_limits
res <- lapply(unattrib(X), function(y) if(is.numeric(y)) Crepoutl(y, limit_fun(y, limits), value, sl, set) else y)
if(set) return(invisible(X))
res <- duplAttributes(res, X)
}
return(if(inherits(res, "data.table")) alc(res) else res)
}
if(is.matrix(X)) {
if(is.character(value)) stop("clipping is not yet supported with matrices and SDs/MADs thresholds.")
res <- scv(X, abs(if(sl > 5L) mad_trans(X) else fscale(X)) > limits, value, set, vind1 = TRUE)
} else {
res <- Crepoutl(X, if(sl > 5L) mad_limits(X, limits) else sd_limits(X, limits), value, sl, set)
}
return(if(set) invisible(res) else res)
}
# Standard cases
if(set) {
if(is.list(X)) lapply(unattrib(X), function(y) if(is.numeric(y)) Crepoutl(y, limits, value, sl, set) else y) else
Crepoutl(X, limits, value, sl, set)
return(invisible(X))
}
if(is.list(X)) {
res <- duplAttributes(lapply(unattrib(X), function(y) if(is.numeric(y)) Crepoutl(y, limits, value, sl, set) else y), X)
return(if(inherits(res, "data.table")) alc(res) else res)
}
Crepoutl(X, limits, value, sl, set)
}
# pad or fpad? x is vector, matrix or data.frame
pad_atomic <- function(x, i, n, value) {
ax <- attributes(x)
tx <- typeof(x)
if(typeof(value) != tx) value <- as.vector(value, tx)
if(is.matrix(x)) {
k <- dim(x)[2L]
m <- .Call(C_alloc, value, n * k, TRUE) # matrix(value, n, k)
dim(m) <- c(n, k)
m[i, ] <- x
if(length(ax) == 1L) return(m)
ax[["dim"]] <- c(n, k)
# Could also pad row-names? perhaps with names of i ??
if(length(ax[["dimnames"]][[1L]])) ax[["dimnames"]] <- list(NULL, ax[["dimnames"]][[2L]])
if(is.object(x)) ax[["class"]] <- NULL
return(`attributes<-`(m, ax)) # fastest ??
}
r <- .Call(C_alloc, value, n, TRUE) # matrix(value, n) # matrix is faster than rep_len !!!!
r[i] <- x
if(is.null(ax)) return(r)
if(length(names(x))) {
if(length(ax) == 1L) return(r)
ax[["names"]] <- NULL
}
return(`attributes<-`(r, ax))
}
# microbenchmark::microbenchmark(x[-i] <- ri, x[i2] <- ri)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# x[-i] <- ri 255.16654 420.7083 491.7369 446.0340 476.3324 1290.7396 100 b
# x[i2] <- ri 80.18755 136.8012 157.0027 146.8156 166.7158 311.5526 100 a
# microbenchmark::microbenchmark(seq_along(x)[-i])
# Unit: milliseconds
# expr min lq mean median uq max neval
# seq_along(x)[-i] 506.0745 541.7975 605.0245 567.8115 585.8384 1341.035 100
pad <- function(X, i, value = NA, method = c("auto", "xpos", "vpos")) { # 1 - i is same length as X, fill missing, 2 - i is positive: insert missing values in positions
ilog <- is.logical(i)
ineg <- i[1L] < 0L
n <- if(is.list(X) || is.matrix(X)) fnrow(X) else length(X)
xpos <- switch(method[1L], auto = if(ilog) bsum(i) == n else if(ineg) FALSE else length(i) == n,
xpos = TRUE, vpos = FALSE, stop("Unknown method: ", method[1L]))
n <- if(ilog) length(i) else if(xpos && !ineg) bmax(i) else n + length(i)
if(is.atomic(X)) return(pad_atomic(X, if(xpos || ineg) i else if(ilog) !i else -i, n, value))
if(!is.list(X)) stop("X must be atomic or a list")
if(ilog) {
i <- if(xpos) which(i) else whichv(i, FALSE)
} else if(!xpos) {
i <- seq_len(n)[if(ineg) i else -i]
}
ax <- attributes(X)
attributes(X) <- NULL
res <- lapply(X, pad_atomic, i, n, value)
if(length(ax[["row.names"]])) ax[["row.names"]] <- .set_row_names(n)
return(condalcSA(res, ax, any(ax[["class"]] == "data.table")))
}
# Something like this already exists?? -> should work with lists as well...
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.