#' Print message
#'
#' @param \dots Any number of arguments that could also be handed to
#' \code{\link[base]{cat}}
#'
#' @return \code{pr} prints its arguments pasted together followed by
#' a newline.
#'
#' @seealso \code{\link[base]{cat}}, \code{\link[base]{writeLines}}
#'
#' @examples
#' pr("Finished iteration ", 3, " out of ", 10)
#'
#' @export
pr <- function(...) {
cat(..., "\n", sep = "")
}
#' @rdname pr
#'
#' @return \code{pr1} prints its arguments pasted together but doesn't
#' add a trailing newline.
#'
#' @examples
#' pr1("Progress: ")
#' for (i in 1:10) pr1(".")
#'
#' @export
pr1 <- function(...) {
cat(..., sep = "")
}
#' Exit from running R job
#'
#' @param status Exit status
#' @param \dots Other arguments that could also be passed to \code{\link[base]{quit}}
#'
#' @seealso \code{\link[base]{quit}}
#'
#' @export
exit <- function(status = 0L, ...) {
quit(save = "no", status = status, ...)
}
#' Flip 1st and 2nd argument of a function
#'
#' @param f Function whose 1st and 2nd argument should be flipped
#' @return Same function with its 1st and 2nd argument flipped.
#'
#' @examples
#' f <- function(x, y, z) {
#' x + 2*y + 3*z
#' }
#' g <- flip(f)
#' f(1, 2, 3) == 14
#' g(1, 2, 3) == 13
#' g(z = 3, 1, 2) == 13
#'
#' @export
flip <- function(f) {
args <- formals(f)
if (is.null(args) || length(args) == 1L && names(args) != "...")
stop("Function must take at least 2 arguments.")
if ("..." %in% names(formals(f))[1:2])
stop("Function must not have \"...\" as 1st or 2nd argument.")
# Swap 1st and 2nd argument.
names(formals(f))[1:2] <- names(formals(f))[2:1]
f
}
#' Return number of unique elements
#'
#' @param x List or vector
#'
#' @export
nuniq <- function(x) {
length(unique(x))
}
#' Flatten list of strings of comma-separated elements
#'
#' @param x Character vector of strings of separated elements
#' @param sep Separator used to separate elements
#' @param fixed If TRUE (default), separator is interpreted as a
#' constant string, otherwise it is considered a regular
#' expression in perl flavor
#'
#' @details
#' The separator can also be an arbitrary perl regular expression in
#' which case `fixed' should be set to \code{FALSE}.
#'
#' @examples
#' stopifnot(all(1:5 == flatten_csv(c("1,2", "3,4,5"))))
#' stopifnot(all(c("a", "b", "c", "d") == flatten_csv(c("a1b", "c23d"), sep = "\\d+", fixed = FALSE)))
#'
#' @export
flatten_csv <- function(x, sep = ",", fixed = TRUE) {
if (length(x) == 0)
return(character())
unlist(strsplit(x, split = sep, fixed = fixed, perl = !fixed), use.names = FALSE)
}
#' Return number of columns in file
#'
#' @param con File or connection for which columns should be counted
#' @param sep Separator used to separate columns in \code{file}
#'
#' @examples
#' \dontrun{
#' nfields("data.csv", sep = ",")
#' }
#'
#' @export
nfields <- function(con, sep = "\t") {
length(scan(con, what = character(), sep = sep, nlines = 1L, quiet = TRUE))
}
#' Expand template into \code{\link[utils]{read.table}}s \code{colClasses} argument
#'
#' @param fmt Format string to be expanded and then used as the
#' \code{colClasses} argument to \code{\link[utils]{read.table}}.
#'
#' @details The following format letters are available:
#' \tabular{ll}{
#' c\tab "character"\cr
#' i\tab "integer"\cr
#' l\tab "logical"\cr
#' n\tab "numeric"\cr
#' N\tab "NULL"\cr
#' r\tab "raw"\cr
#' x\tab "complex"\cr
#' }
#' The digits say how many times the expansion of the character
#' should be included in the result.
#'
#' @return A vector that can be used as the \code{colClasses} argument
#' to \code{\link[utils]{read.table}}.
#'
#' @examples
#' stopifnot(colClasses(c("N 2 c i n")) ==
#' c("NULL", "character", "character", "integer", "numeric"))
#'
#' @export
colClasses <- function(fmt) {
if (!is.character(fmt) || is.na(fmt) || length(fmt) != 1 || grepl("^\\s*$", fmt))
stop("fmt must be a non-empty, non-NA character vector of length 1")
translate <- function(x) {
type <- switch(letter <- substr(x, nchar(x), nchar(x)),
N = "NULL", c = "character", i = "integer", n = "numeric",
l = "logical", r = "raw", x = "complex",
stop("unknown letter in format: ", letter))
if (nchar(x) == 1)
return(type)
length <- as.integer(substr(x, 1, nchar(x) - 1))
rep_len(type, length)
}
groups <- unlist(strsplit(fmt, "(?<=[[:alpha:]])", perl = TRUE), use.names = FALSE)
unlist(lapply(trimws(groups), translate), use.names = FALSE)
}
#' Apply function over level combinations of factors
#'
#' @description Group a vector into subsets according to level
#' combinations of one or several factors and apply a function
#' separately to every subset. This function differs from
#' \code{\link[stats]{ave}} in three respects. Firstly, factors
#' have to be supplied in an explicit list. Secondly, the
#' function is a positional parameter. Thirdly and most
#' importantly, there is now a way to supply any number of
#' additional arguments to the function via \code{\dots} as in the
#' apply-family of functions.
#'
#' @param x Atomic vector
#' @param factors List of factors of the same length as \code{x}.
#' @param f Function
#' @param \dots Further arguments to \code{f}
#' @return A vector of the same length as \code{x}. Subsets of the
#' result vector equal the result of applying \code{f} to the
#' corresponding subsets of \code{x}.
#'
#' @seealso \code{\link[stats]{ave}}
#'
#' @examples
#' stopifnot(identical(
#' ave2(c(1, 2, 3, 4), list(c(1, 1, 2, 2)),
#' function(x, sep) {
#' paste(x, rev(x), sep = sep)
#' }, "-"),
#' c("1-2", "2-1", "3-4", "4-3")))
#' @export
ave2 <- function (x, factors, f, ...) {
if (is.null(factors))
stop("Argument \"factors\" must not be NULL.")
if (length(factors) == 0)
stop("Argument \"factors\" must not be of length 0.")
grouping <- interaction(factors)
if (length(grouping) != length(x))
stop("Vectors in argument \"factors\" must have same length as argument \"x\".")
split(x, grouping) <- lapply(split(x, grouping), f, ...)
x
}
#' Return modified names of object
#'
#' @param x Object whose \code{names} should be modified
#' @param mapping Named character vector whose \"names\" attribute
#' matches all, some, or none of the names of \code{x} and whose
#' elements will be used to make up the new names of \code{x}.
#'
#' @return Return a character vector that can be used with
#' \code{\link[base]{names<-}} to change the names of \code{x}.
#'
#' @details The \code{rename} function does not modify \code{x} and
#' thereby avoids potentially expensive copies due to R's
#' copy-on-modify semantics. Use \code{\link[base]{names<-}} with
#' the result of \code{rename} to change the names of \code{x} in
#' a non-expensive way.
#'
#' Note that \code{mapping} does not have to contain new values
#' for all elements of \code{names(x)}. If an element of
#' \code{names(x)} is not among \code{names(mapping)}, it will be
#' unchanged. In particular, if \code{mapping} and \code{x} have
#' no names in common, \code{rename} will return \code{names(x)}.
#' It is an error for \code{names(mapping)} to contain duplicate
#' names. It is okay for \code{mapping} to contain a mixture of
#' named and unnamed elements. Unnamed elements of \code{x} will
#' never be renamed.
#'
#' @examples
#' rename(c(a = 1, b = 2, c = 3, 4, `9` = 9),
#' c(a = "A", b = NA, "foo", g = "G", `9` = "nine"))
#'
#' @export
rename <- function(x, mapping) {
if (is.null(names(x)))
stop("x must have names")
if (is.null(names(mapping)))
stop("mapping must have names")
if (anyDuplicated(names(mapping)))
stop("mapping must not have duplicate names")
result <- names(x)
matched <- names(x) %in% names(mapping)
result[matched] <- mapping[names(x)[matched]]
result[names(x) == ""] <- ""
result
}
#' Find intervals containing points
#'
#' @param point Points
#' @param start Lower boundaries of intervals
#' @param end Upper boundaries of intervals
#'
#' @return A list of the same length as \code{point} where element
#' \code{i} contains the indexes of closed intervals
#' [\code{start}, \code{end}] containing \code{point[k]}.
#' \code{NA} points result in an \code{NA} vector of indexes.
#' Points that don't belong to any interval result in a length-0
#' vector of indexes.
#'
#' @seealso \code{\link{find_matching_intervals}}
#'
#' @export
match_intervals <- function(point, start, end) {
lapply(point, function(x) {
if (is.na(x))
return(NA)
which(x >= start & x <= end)
})
}
#' Extract duplicated rows from data frame.
#'
#' @description
#' Extract all rows from a data frame that occur more than once when
#' compared in terms of the columns specified in \code{columns}. Two
#' rows are duplicates with respect to the columns specified in
#' \code{columns} if they agree element-wise in all of the specified
#' columns.
#'
#' @param data Data frame
#' @param columns Columns in \code{data} to be considered when looking
#' for duplicated rows. Can be a character vector of column names
#' or a numeric vector giving the column indexes.
#' @param select Columns to include in resulting data frame. By
#' default all columns will be included.
#'
#' @return A data frame containing all rows of \code{data} that had
#' duplicates with respect to the columns specified in
#' \code{columns}.
#'
#' @seealso \code{\link{is_duplicated_in}}
#'
#' @examples
#' d <- read.table(text = "
#' x y z
#' 1 2 3
#' 1 3 4
#' 1 2 4
#' ", header = TRUE)
#'
#' find_duplicates(d, "x")
#' find_duplicates(d, "x", c("y", "z"))
#' find_duplicates(d, c("x", "y"))
#' find_duplicates(d, c("x", "z"))
#'
#' @export
find_duplicates <- function(data, columns, select = NULL) {
if (is.null(select))
select <- colnames(data)
if (nrow(data) == 0L)
return(data[, select])
if (any(columns %not_in% colnames(data)))
stop("nonexistent variables in COLUMNS")
if (any(select %not_in% colnames(data)))
stop("nonexistent variables in SELECT")
key <- Reduce(function(x, y) paste(x, y, sep = "\r"), data[columns])
data[is_duplicated_in(key), select, drop = FALSE]
}
#' Duplicated elements
#'
#' @param x Vector or list
#'
#' @return A logical vector of the same length as \code{x} that is
#' TRUE if the corresponding element of \code{x} occurs more than
#' once in \code{x}, and otherwise FALSE. NAs always result in
#' FALSE.
#'
#' @seealso \code{\link[base]{duplicated}}
#'
#' @export
is_duplicated_in <- function(x) {
if (is.null(x))
return(logical())
result <- x %in% x[duplicated(x)]
result[is.na(x)] <- FALSE
result
}
#' Opposite of \code{\%in\%}
#'
#' @param x Values to be matched
#' @param table Values to be matched against
#'
#' @return Returns the logical negation of \code{\%in\%}.
#'
#' @seealso \code{\link[base]{\%in\%}}
#'
#' @export
`%not_in%` <- function(x, table) {
Negate(`%in%`)(x, table)
}
#' Inverting a map
#'
#' @description
#' Given a named list, that is, a map, return the inverse map.
#'
#' @param map A named list
#'
#' @details NA values are dropped during the inversion.
#'
#' @return A list representing the inverse of \code{map}
#'
#' @examples
#' map <- list(a = c("A", "C"), b = c("B", "C", NA))
#' invert_map(map)
#'
#' @export
invert_map <- function(map) {
values <- unlist(map, use.names = FALSE)
names(values) <- rep(names(map), times = sapply(map, length))
split(names(values), values)
}
#' One value per row
#'
#' @description
#'
#' In a data frame with a column or columns where entries are
#' comma-separated lists of values, split entries with multiple
#' values, and put them into separate rows. Eventually there will be
#' only "one value per row" in the specified column(s).
#'
#' @param data Data frame
#' @param columns Columns of data frame in which to spread multi-value
#' entries across multiple rows
#' @param sep Separator used in multi-value entries
#'
#' @return A data frame with the same columns as `d' but where there
#' is only one value per row in the columns `cols'.
#'
#' @examples
#' d <- read.table(text = "
#' x y z
#' 1 a,c A,C
#' 2 b,d B,D
#' 3 e E
#' ", header = TRUE, stringsAsFactors = FALSE)
#' one_per_row(d, c("y", "z"))
#'
#' @export
one_per_row <- function(data, columns, sep = ",") {
if (length(columns) == 0)
stop("COLUMNS must be of length >= 1")
col <- columns[1]
x <- strsplit(data[[col]], sep)
duplicate_some_rows <- rep(seq_len(nrow(data)), times = sapply(x, length))
data2 <- data[duplicate_some_rows, ]
data2[[col]] <- unlist(x, use.names = FALSE)
if (length(columns) > 1)
return(one_per_row(data2, columns[-1], sep))
rownames(data2) <- NULL
data2
}
#' One value per vector element
#'
#' @description
#' Split comma-separated values in a character vector across multiple
#' indexes such that eventually there is "one value per element".
#'
#' @param x Character vector
#' @param sep Separator used in multi-value vector elements
#' @param perl Consider `sep' as a perl regular expression. If FALSE
#' (default), `sep' is considered a literal (fixed) string.
#'
#' @return Returns a character vector where every element contains
#' exactly one string. Empty strings in `x' will not be part of the
#' return vector.
#'
#' @examples
#' x <- c("1,2", "3,4,5", "6", "", "7,8")
#' y <- c("1---2", "3--4----5", "6-7")
#' one_per_element(x)
#' one_per_element(y, sep = "-+", perl = TRUE)
#'
#' @export
one_per_element <- function(x, sep = ",", perl = FALSE) {
unlist(strsplit(x, split = sep, fixed = !perl, perl = perl), use.names = FALSE)
}
#' Test whether a binary predicate holds for all direct neighbors in a list
#'
#' @param f Binary function used for comparing neighboring elements of
#' `x'
#' @param \dots One or more lists or vectors (holding elements of the
#' same type)
#'
#' @return TRUE if all neighboring elements (even across lists)
#' compare TRUE, otherwise FALSE.
#'
#' @examples
#' all_neighbors(identical, c(1,1,1))
#' all_neighbors(identical, c(1,2,1))
#' all_neighbors(identical, c(NA,NA,NA))
#' all_neighbors(identical, c(1,NA,1))
#' all_neighbors(function(x,y) TRUE, 1:5)
#' all_neighbors(function(x,y) FALSE, c(1,1,1))
#' all_neighbors(function(x,y) y - x == 1L, 1:10)
#' all_neighbors(`<`, 1:10, 100:200)
#' all_neighbors(`<=`, list(a = 1, b = 3), list(c = 3, d = 5))
#'
#' @export
all_neighbors <- function(f, ...) {
force(f)
lists <- list(...)
x <- if (length(lists) == 1) lists[[1]] else do.call(c, lists)
tryCatch({
Reduce(function(x, y) if (f(x,y)) y else stop(), x)
# We got thru the Reduce without throwing an exception. In
# other words, all comparisons of neighboring elements
# returned TRUE. Hence we return TRUE.
TRUE
}, error = function(e) {
# An exception was thrown meaning that a pair of neighboring
# elements compared FALSE. Hence we return FALSE.
FALSE
})
}
#' Surround string(s) with double/single quotes
#'
#' @param x Character vector
#'
#' @return Surrounds every element of `x' with double/single quotes,
#' escaping nested double quotes as needed.
#'
#' @aliases single_quote
#'
#' @export
double_quote <- function(x) {
x <- gsub("\"", "\\\"", x, fixed = TRUE)
paste0("\"", x, "\"")
}
#' @rdname double_quote
single_quote <- function(x) {
x <- gsub("'", "\\\'", x, fixed = TRUE)
paste0("'", x, "'")
}
#' Do nothing
#'
#' @param \dots Ignored
#'
#' @return None.
#'
#' @examples
#' noop(ignore_whatever_is_in_here)
#'
#' @export
noop <- function(...) {
invisible(NULL)
}
#' Create function for keeping track of values
#'
#' @return A function with signature `function(x, show = FALSE)'. If
#' `show' is TRUE, the function returns a character vector with all
#' values (coerced to "character") that it has `seen' so far. If
#' `show' is FALSE and argument `x' was `seen' before, the function
#' returns "TRUE", otherwise it returns "FALSE". The argument `x'
#' must be a of length 1 and class "character", "numeric", or
#' "integer". If the argument is of class "numeric" or "integer",
#' conversion to class "character" is done via
#' \code{\link[base]{as.character}}. The order of element in the list
#' returned if `show' is TRUE is not guaranteed to be the same order
#' in which the values were `seen'.
#'
#' @examples
#' seen <- make_observer()
#' for (i in c(1:5, 1:5)) pr("Seen ", i, " -> ", seen(i))
#' seen(show = TRUE)
#'
#' @export
make_observer <- function() {
e <- new.env()
function (x, show = FALSE) {
if (show)
return(ls(envir = e))
if (length(x) != 1L || length(class(x)) != 1L
|| ! class(x) %in% c("character", "numeric", "integer"))
stop("Argument must be of length 1 and class must be ",
"\"character\", \"numeric\", or \"integer\".")
if (! class(x) == "character")
x <- as.character(x)
if (exists(x, e, inherits = FALSE))
return(TRUE)
assign(x, TRUE, envir = e, inherits = FALSE)
return(FALSE)
}
}
#' Group words in chunks of given length
#'
#' @param x Vector of strings to be split and regrouped
#' @param n Number of characters per chunk of regrouped words
#' @param split Pattern on which to split strings in `x' into words
#' @param perl TRUE if `split' and `special_sep' should be interpreted
#' as perl-style regular expressions. Takes precedence over
#' `fixed'
#' @param fixed TRUE if `split' and `special_sep' should be
#' interpreted as fixed strings
#' @param sep Separator used when combining words into chunks
#' @param special Pattern identifying words that should be combined in
#' a special way (see Details)
#' @param special_sep Separator used to combine two words if the first
#' matches `special'
#' @param trim_leading If TRUE, leading whitespace will be removed
#' from chunks
#' @param trim_trailing If TRUE, trailing whitespace will be removed
#' from chunks
#' @param tol Chunks will be allowed to be `tol' characters longer
#' than `n' if that makes their lengths more even in overall
#' @param punct If TRUE, words consisting of a single punctuation mark
#' will always be merged to the previous chunk
#'
#' @details
#' The meaning of `word' depends on the value of `split'. By default,
#' strings are split on whitespace resulting in a list of words in the
#' normal sense (maybe including some punctuation characters and the
#' like). Clever choice of `split', `sep', `special', and
#' `special_sep' let's you obtain a variety of effects (see
#' \code{wrap_lines}).
#'
#' @return A list of character vectors where every character vector
#' contains one or more chunks of words. Every chunk contains from 1
#' to `n + tol' characters unless it contains a single word of length
#' > `n' in which case its length is the length of the word.
#'
#' @seealso \code{\link{wrap_lines}}
#'
#' @examples
#' s <- "The quick yellow-orange-brown fox jumps over the 3-year-old, lazy dog."
#' group_words(s, 10, split = "\\\\s+|(?<=-)", special = "-$")
#'
#' @export
group_words <- function(x, n, split = "\\s+", perl = TRUE,
fixed = !perl, sep = " ", special = NULL, special_sep = "",
trim_leading = TRUE, trim_trailing = trim_leading, tol = 0L,
punct = TRUE) {
if (n < 1L)
stop("N must be an integer >= 1.")
if (!fixed && !perl)
stop("Either FIXED or PERL must be TRUE.")
if (fixed && perl) {
fixed <- FALSE
warning("FIXED conflicts with PERL: setting FIXED to FALSE.")
}
f <- function(xs) {
g <- function(acc, word) {
# Check if current line ends in a special way.
is_special <- FALSE
if (!is.null(special))
is_special <- grepl(special, acc[1L])
# Current chunk has enough space for word.
if (nchar(acc[1L]) + nchar(word) + nchar(sep) <= n + tol)
# Add word to current chunk.
acc[1L] <- paste(acc[1L], word,
sep = if (is_special) special_sep else sep)
# Add punctuation sign to previous chunk.
else if (grepl("^[[:punct:]]$", word))
acc[1L] <- paste(acc[1L], word, sep = "")
# Start new chunk.
else {
if (trim_leading)
word <- sub("^\\s+", "", word, perl = TRUE)
acc <- c(word, acc)
}
acc
}
xs <- rev(Reduce(g, xs))
# Trim whitespace.
if (trim_trailing)
xs <- sub("\\s+$", "", xs, perl = TRUE)
xs
}
lapply(strsplit(x, split, fixed, perl), f)
}
#' Wrap strings across lines
#'
#' @param x Strings to be wrapped
#' @param n Number of characters per line after wrapping
#' @param sep Separator used for combining lines
#' @param max_lines Wrap every string to at most `max_lines' lines
#' @param dots String to be inserted to indicate that a string was
#' truncated
#' @param hard TRUE if lines should have a fixed length of `n' even if
#' that means line breaks in the middle of a word
#' @param tol Lines will be allowed to be `tol' characters longer than
#' `n' if that makes the line lengths more even overall
#'
#' @return A list of strings each of which spans at most `max_lines'
#' physical lines of length 1 to `n + tol'.
#'
#' @seealso \code{\link{group_words}}
#'
#' @examples
#' s <- "The quick yellow-orange-red-brown fox jumps over a 3-year-old, lazy dog."
#' pr(wrap_lines(s, 12))
#' pr(wrap_lines(s, 12, hard = TRUE))
#'
#' @export
wrap_lines <- function(x, n, sep = "\n", max_lines = Inf,
dots = "...", hard = FALSE, tol = if (hard) 0L else 3L) {
if (nchar(dots) >= n)
stop("DOTS must have < N characters.")
# Split into lines.
if (hard)
y <- group_words(x, n, split = NULL, sep = "", tol = tol)
else
y <- group_words(x, n, split = "\\s+|(?<=-)", special = "-$", tol = tol)
# Truncate to at most `max_lines' lines and paste together using
# `sep' as line separator.
f <- function(lines) {
if (length(lines) <= 1L)
return(lines)
if (length(lines) > max_lines) { # too many lines
# Keep only the first max_lines lines.
lines <- lines[seq_len(max_lines)]
# Insert dots in last line.
last <- lines[max_lines]
substr(last, n - nchar(dots) + 1L, n) <- dots
lines[max_lines] <- substr(last, 1L, n)
}
paste(lines, collapse = sep)
}
sapply(y, f)
}
#' Test for directory
#'
#' @param fs Filenames to be tested for directory status
#'
#' @return TRUE for directories, otherwise FALSE.
#'
#' @export
is.directory <- function(fs) {
file.info(fs)$isdir
}
#' Read and concatenate tables
#'
#' @param files Files with tables to be read and concatenated
#' @param sep Field separator in input tables
#' @param ncore Number of cores to use in parallel
#' @param header See documentation of \code{\link[utils]{read.table}}
#' @param colClasses See documentation of \code{\link[utils]{read.table}}
#' @param verbose If TRUE, report progress while running
#' @param check.names See documentation of \code{\link[utils]{read.table}}
#' @param comment.char See documentation of \code{\link[utils]{read.table}}
#' @param \dots Passed on to \code{\link[utils]{read.table}}
#'
#' @details
#' If not explicitly specified, the field separator will be inferred
#' from the first non-comment, non-empty line of one of the input
#' files. Possible candidates are tab, comma, semicolon, single
#' space. They will be tested in the above order. The first
#' candidate found will become the field separator. The field
#' separator must be the same for all input files.
#'
#' @return A data frame that results from concatenating all input
#' tables.
#'
#' @examples
#' \dontrun{
#' read.tables(paste0("table", 1:100, ".txt"), ncore = 10L)
#' }
#'
#' @export
read.tables <- function(files, sep = NULL, ncore = 1L, header = TRUE,
colClasses = "character", verbose = TRUE, check.names = FALSE,
comment.char = "#", ...) {
# Drop empty files.
files <- Filter(function(x) file.info(x)$size > 0L, files)
stopifnot(length(files) > 0L)
stopifnot(all(vapply(files, file.exists, logical(1L))))
# Determine field separator from one of the files.
if (is.null(sep)) {
is_comment <- function(x) {
grepl(paste0("^", comment.char), x)
}
# Find a non-comment, non-empty line.
for (i in seq_along(files)) {
con <- file(files[i], "r")
tryCatch({
line <- readLines(con, 1L) # read 1st line
while (length(line) && nchar(line) && is_comment(line))
line <- readLines(con, n = 1L) # read another line
}, finally = close(con))
if (length(line) && nchar(line))
break # found a non-comment, non-empty line
}
candidate_separators <- c("\t", ",", ";", " ")
sep <- match1of(candidate_separators, line, fixed = TRUE)
if (is.na(sep))
stop("Unable to figure out field separator in ", files[i])
}
# Read and concatenate tables.
f <- function(filename) {
if (verbose)
pr("Reading ", filename)
utils::read.table(filename, sep = sep, colClasses = colClasses,
header = header, comment.char = comment.char, ...)
}
do.call(rbind, parallel::mclapply(files, f, mc.preschedule = FALSE,
mc.cores = ncore, mc.silent = FALSE))
}
#' Suspend execution until deadline
#'
#' @param date_string String specifying when to continue execution
#'
#' @details
#' The date string must have the format "%d-%m-%y %H:%M", basically
#' `day-month-year hour:minute'. For more details see
#' \code{\link[base]{strptime}}.
#'
#' It is an error to specify a date string that designates a time
#' point that, at the moment \code{wait_until} is called, lies in the
#' past.
#'
#' The accuracy of \code{wait_until} is +/- 1 minute.
#'
#' @examples
#' \dontrun{
#' wait_until("26-4-34 17:03")
#' }
#'
#' @export
wait_until <- function(date_string) {
deadline <- strptime(date_string, format = "%d-%m-%y %H:%M")
if (is.na(deadline))
stop("Unable to convert to date: ", date_string)
# Check whether deadline has arrived.
past_deadline <- function() {
difftime(Sys.time(), deadline) > 0
}
if (past_deadline())
stop("We're already past the deadline.")
cat("Waiting until", date_string, "")
# Wait until deadline.
while (!past_deadline()) {
Sys.sleep(60)
cat(".")
}
pr()
}
#' Find matching candidate
#'
#' @param candidates Character vector of candidates to be matched
#' against `x'
#' @param x Character vector
#' @param \dots Further arguments passed on to \code{\link[base]{grepl}}
#'
#' @return A character vector of the same length as `x' where element
#' i contains the first element of `candidates' that matches in
#' `x[i]', or NA if none of the candidates match.
#'
#' @examples
#' match1of(c("foo", "bar", "xxx", "baz", "quux"), c("foo", "bar", "quux"))
#'
#' @export
match1of <- function(candidates, x, ...) {
if (length(candidates) == 0L || !is.character(candidates))
stop("`candidates' must be a character vector of length >= 1.")
if (length(x) == 0L || !is.character(x))
stop("`x' must be a character vector of length >= 1.")
# If length(x) > 1, create a length(x)-by-length(candidates)
# matrix such that the entry in row i and column j is TRUE iff
# candidate[j] matches in x[i]. If length(x) == 1, return a
# logical vector of the same length as `candidates'.
hit_matrix <- vapply(candidates, grepl, logical(length(x)), x, ...)
# Convert `hit_matrix' to matrix, if necessary.
if (length(x) == 1L)
hit_matrix <- t(as.matrix(hit_matrix))
apply(hit_matrix, 1L, function(hits) {
if (!any(hits))
NA_character_
else
candidates[which.max(hits)]
})
}
#' Find first matching candidate
#'
#' @param candidates Character vector of candidates to be matched
#' against `x'
#' @param x Character vector
#' @param \dots Further arguments passed on to \code{\link[base]{grepl}}
#'
#' @return #' Index of the element of `candidates' that matches the
#' element of `x' with the smallest index. In case of ties, the
#' smallest of the tied indexes is returned. If none of the
#' candidates match any of the elements in `x', NA is returned.
#'
#' @examples
#' find_first_match(c("foo", "bar", "baz", "quux"), c("baz", "quux", "bar"))
#'
#' @export
find_first_match <- function(candidates, x, ...) {
if (length(x) == 0L)
stop("`x' must be a character vector of length >= 1.")
if (length(candidates) == 0L)
stop("`candidates' must be a character vector of length >= 1.")
choices <- vapply(lapply(candidates, grepl, x, ...), function(ys) {
if (any(ys))
which.max(ys)
else
NA
}, integer(1L))
if (all(is.na(choices)))
return(NA_integer_)
min(choices, na.rm = TRUE)
}
#' Count number of parenthesized subexpressions
#'
#' @param x Character vector
#'
#' @examples
#' nsubexp("a_[(string)]_with_(2)_parenthesized_(sub)expressions")
#'
#' @export
nsubexp <- function(x) {
# Remove any character classes since parentheses in character
# classes don't create submatches.
y <- gsub("\\[.*?\\]", "", x)
# Remove any escaped opening parentheses since those also cannot
# create submatches.
y <- gsub("\\\\\\(", "", y)
# Count open parentheses.
nchar(gsub("[^(]", "", y))
}
#' Extract submatches from strings
#'
#' @param pattern An extended regular expression (see
#' \link[base]{regex}) including at least one parenthesized
#' subexpression
#' @param xs Character vector with strings from which to extract the
#' subexpressions matched by `pattern'
#' @param drop If TRUE instead of returning a single-column or
#' single-row matrix a vector is returned
#'
#' @return Returns a matrix containing submatches extracted from `xs'
#' according to `pattern'. The entry in row i and column j
#' corresponds to the jth submatch extracted from `x[i]'. If
#' `pattern' did not match an element of `xs', the corresponding row
#' in the returned matrix will consist entirely of NA_character_
#' values. If `drop = TRUE' single-column and single-row matrices are
#' returned as vectors.
#'
#' @examples
#' submatch(".*chr(\\\\d+)_(\\\\d+)",
#' c("path_chr1_2/to/the_chr10_3_foo.txt",
#' "path_chr1_2/to/the_chr7_5_bar.txt",
#' "path_chrA_B/to/the_chrX_Y_baz.txt"))
#'
#' # Submatch an expression within parentheses.
#' submatch("[(](.*)[)]", "foo (bar) baz")
#'
#' @export
submatch <- function(pattern, xs, drop = FALSE) {
nsubmatch <- nsubexp(pattern)
if (nsubmatch == 0L)
stop("`pattern' must contain a parenthesized subexpression.")
# Extract submatches.
matches <- regexec(pattern, xs)
lengths <- lapply(matches, attr, "match.length")
extract1 <- function(s, pos, len) {
vapply(seq_along(pos)[-1L], function(i) {
substr(s, pos[i], pos[i] + len[i] - 1L)
}, character(1L))
}
z <- unname(Map(extract1, xs, matches, lengths))
# Fill non-matches with `nsubmatch' NAs.
z <- lapply(z, function(x) {
if (length(x) == 0L)
rep(NA_character_, nsubmatch)
else
x
})
# Convert to matrix.
do.call(rbind, z)[, , drop = drop]
}
#' Create directories
#'
#' @param dirs Character vector with paths of directories to be
#' created
#' @param recursive If TRUE then \code{mkdir} works like `mkdir -p' in
#' unix and makes parent directories as needed
#' @param verbose If TRUE show a message for each newly created
#' directory
#'
#' @details
#' If a path in `dirs' points to an existing directory, there is
#' nothing left to do and the directory will just be ignored. If
#' there is already a file that has the same name as the directory to
#' be created, an error is thrown. Therefore \code{mkdir} only
#' returns if every directory in `dirs' either already exists or was
#' successfully created. Failing early---instead of, for example,
#' returning a logical vector indicating which of the directories in
#' `dirs' could be created---seems a desirable behavior since later
#' code depending on the existence of the directories in `dirs' would
#' fail anyway: There is no need to start possibly long-running
#' computations just to get an error when trying to write the precious
#' results to a file in a non-existent directory.
#'
#' @export
mkdir <- function(dirs, recursive = TRUE, verbose = TRUE) {
f <- function(d) {
if (!file.exists(d)) {
if (!dir.create(d, recursive = recursive))
stop("Failed to create directory: ", d)
if (verbose)
pr("Created directory: ", d)
}
else if (!is.directory(d))
stop("Can't create directory: ", d,
"\nThere is already a file of the same name.")
}
invisible(lapply(dirs, f))
}
#' Maybe apply function to value
#'
#' @param f Function to be applied to `x'
#' @param x Object to which `f' should be applied
#' @param y Alternative value to return if applying `f' to `x' leads
#' to an error or a warning
#'
#' @details
#' Errors or warnings caused by evaluating `x' will not be caught.
#' The purpose of \code{maybe} is only to catch errors and warning
#' caused by evaluating `f(x)'.
#'
#' @return `f(x)' unless that results in an error or a warning,
#' otherwise `y'.
#'
#' @examples
#' x <- "12"
#' y <- "foo"
#' maybe(as.integer, x)
#' maybe(as.integer, y)
#' maybe(as.integer, y, 0L)
#' tryCatch(maybe(as.integer, stop(), 0L),
#' error = function(e) pr("Evaluating `x' caused an error."))
#'
#' @export
maybe <- function(f, x, y = x) {
# We don't want to catch errors or warnings resulting from
# evaluating the expression that yields the value of `x'. The
# concern of `maybe' is to catch warnings and errors caused by
# applying `f' to `x', not errors due to evaluating `x'.
force(x)
# If evaluating `f(x)' gives an error or a warning, return `y'.
alternative <- function(ignored) y
tryCatch(f(x), warning = alternative, error = alternative)
}
#' Output the first part of gzip-files
#'
#' @param files Character vector with paths to gzip-files
#' @param n Number of lines to print
#' @param simplify If TRUE and `n = 1L', return result as character vector
#'
#' @return In case `simplify = FALSE', returns the first `n' lines of
#' each gzip-file in `files' as a list of character vectors. If
#' `simplify = TRUE' and `n = 1' returns the results as a character
#' vector of the same length as `files'.
#'
#' @export
gzhead <- function(files, n = 1L, simplify = TRUE) {
not_there <- ! file.exists(files)
if (any(not_there))
stop("Some `files' don't exist:\n", paste(files[not_there], collapse = ", "))
x <- lapply(files, readLines, n)
if (simplify && n == 1L)
do.call(c, x)
else
unname(x)
}
#' Group one-dimensional data into clusters of contiguous points
#'
#' @param x Numeric vector of points to be grouped into clusters
#' @param gap Size of gap that separates two neighboring clusters
#' @param frac Size of gap that separates two neighboring clusters
#' expressed as a fraction of the range of `x'
#'
#' @return Returns an integer vector that is aligned with `x' and
#' contains the cluster for the associated element in `x'. NA values
#' in `x' will have NA values in the resulting vector.
#'
#' @examples
#' x <- c(9, 12, 3, 4, 17, 10, 12, NA, 6, 1)
#' cluster1d(x, gap = 3)
#' cluster1d(x, frac = .15)
#'
#' @export
cluster1d <- function(x, gap, frac = NULL) {
if ((missing(gap) && is.null(frac))
|| (!missing(gap) && !is.null(frac)))
stop("You must supply either `gap' or `frac'.")
if (!is.null(frac) && (frac <= 0 || 1 <= frac))
stop("`frac' must be in (0,1).")
ord <- order(x)
xs <- x[ord]
if (length(x) == 0L)
stop("`x' must contain at least one non-NA value.")
if (!missing(frac))
gap <- diff(range(xs, na.rm = TRUE)) * frac
breaks <- c(0L, which(diff(xs) >= gap), length(xs))
cluster <- as.integer(cut(seq_along(xs), breaks))
cluster[is.na(xs)] <- NA
cluster[order(ord)]
}
#' Count how many points fall into each interval
#'
#' @param pos Points
#' @param start Left endpoints of interval
#' @param end Right endpoints of interval
#'
#' @return An integer vector of the same length as `start` and `end`
#' containing the number of points in `pos` that fall into each of the
#' intervals.
#'
#' @examples
#' count_points_per_interval(0:1, 2:1, 3:4)
#'
#' @export
count_points_per_interval <- function(pos, start, end) {
if (length(start) != length(end))
stop("START and END must have the same length.")
pos <- as.double(pos)
start <- as.double(start)
end <- as.double(end)
inf_intervals <- is.infinite(start) | is.infinite(end)
start[inf_intervals] <- 0
end[inf_intervals] <- 0
inf_points <- is.infinite(pos)
pos[inf_points] <- NA_real_
intervals <- rcpp_find_matching_intervals(pos, start, end)$interval
stopifnot(all(intervals %in% seq_along(start)))
counts <- factor(intervals, levels = seq_along(start))
n <- as.integer(table(counts, useNA = "no"))
n[inf_intervals] <- NA
n
}
#' Find intervals containing a set of points
#'
#' @param pos Double vector of positions
#' @param start Double vector with left endpoints of intervals
#' @param end Double vector with right endpoints of intervals
#'
#' @return A list with elements "position" and "interval". Both are
#' integer vectors containing indexes. The "position" vector
#' indexes the \code{pos} vector and the "interval" vector the
#' \code{start} and \code{end} vectors. The kth index in the
#' "position" vector forms a pair with the kth index in the
#' "interval" vector. A pair of indexes (i,j) means that
#' \code{pos[i]} belongs to the interval defined by
#' \code{start[j]} and \code{end[j]}.
#'
#' @examples
#' x <- find_matching_intervals(c(2, 7), c(5, 0, 2), c(9, 1, 3))
#' stopifnot(identical(x$position, c(1L, 2L)))
#' stopifnot(identical(x$interval, c(3L, 1L)))
#' @export
find_matching_intervals <- function(pos, start, end) {
if (length(start) != length(end))
stop("START and END must have the same length.")
.Call('miscFun_rcpp_find_matching_intervals', PACKAGE = 'miscFun', pos, start, end)
}
#' Order intervals in increasing order
#'
#' @param start Left endpoints of interval
#' @param end Right endpoints of interval
#'
#' @return An integer vector `x' which, when used as an index to will
#' sort `start` and `end` into increasing order just as if
#' \code{\link[miscFun]{sort_intervals}} was used.
#'
#' @seealso \code{\link[miscFun]{sort_intervals}}
#'
#' @examples
#' start <- 2:1
#' end <- 3:4
#' ord <- order_intervals(start, end)
#' start_sorted <- start[ord]
#' end_sorted <- end[ord]
#' old_order <- order(ord)
#' stopifnot(all(start_sorted[old_order] == start))
#' stopifnot(all(end_sorted[old_order] == end))
#'
#' @export
order_intervals <- function(start, end) {
order(start > end, is.na(start), is.na(end), start, end)
}
#' Sort intervals in increasing order
#'
#' @param start Left endpoints of interval
#' @param end Right endpoints of interval
#'
#' @return A list with two vectors named "start" and "end" of the same
#' length as `start` and `end` and representing the left and right
#' endpoints of the sorted intervals.
#'
#' @examples
#' sort_intervals(2:1, 3:4)
#'
#' @export
sort_intervals <- function(start, end) {
new_order <- order_intervals(start, end)
list(start = start[new_order], end = end[new_order])
}
#' Test whether all arguments have the same length
#'
#' @param \dots Objects that will be compared in terms of their length
#'
#' @return TRUE if all objects have the same length, otherwise FALSE.
#'
#' @examples
#' stopifnot(
#' same_length(NA, 0),
#' same_length(NULL, integer()),
#' same_length(list(), integer()),
#' same_length(letters, letters, letters),
#' !same_length(letters[1], letters[1:2], letters[1:3]))
#'
#' @export
same_length <- function(...) {
all_neighbors(`==`, vapply(list(...), length, 1L))
}
#' Create all [un]ordered pairs from two vectors
#'
#' @param x,y Atomic vectors to be paired off
#'
#' @return A data frame with column names "x" and "y" where every row
#' corresponds to a pair.
#'
#' @name make_pairs
NULL
#' @rdname make_pairs
#'
#' @examples
#' make_ordered_pairs(1:3, 1:2)$x == c(1, 2, 3, 1, 2, 3)
#' make_ordered_pairs(1:3, 1:2)$y == c(1, 1, 1, 2, 2, 2)
#'
#' @export
make_ordered_pairs <- function(x, y = NULL) {
make_pairs(x, y, ordered = TRUE)
}
#' @rdname make_pairs
#'
#' @examples
#' make_unordered_pairs(1:3, 1:2)$x == c(1, 2, 3, 2, 3)
#' make_unordered_pairs(1:3, 1:2)$y == c(1, 1, 1, 2, 2)
#'
#' @export
make_unordered_pairs <- function(x, y = NULL) {
make_pairs(x, y, ordered = FALSE)
}
make_pairs <- function(x, y = NULL, ordered = TRUE) {
if (is.null(y))
y <- x
xy <- outer(seq_along(x), seq_along(y), paste)
if (!ordered) # Remove duplicates.
xy <- xy[lower.tri(xy, diag = TRUE)]
dim(xy) <- NULL
pairs <- submatch("(\\d+) (\\d+)", xy)
data.frame(
x = x[as.integer(pairs[, 1L])],
y = y[as.integer(pairs[, 2L])],
stringsAsFactors = FALSE)
}
#' Eval a piece of code supplied in a C-style string format
#'
#' @param fmt C-style string format in length-1 character vector
#' @param \dots values to be passed into \code{fmt}
#'
#' @return Returns the result of evaluating the code specified by
#' \code{fmt} in the calling environment.
#'
#' @examples
#' x <- 1; y <- 2
#' evalf("%s + y", "x")
#'
#' @export
evalf <- function(fmt, ...) {
eval(parse(text = sprintf(fmt, ...)), envir = parent.frame())
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.