# merge multi-line expressions to single line:
match_brackets <- function (x, curly = FALSE) {
open_sym <- "\\("
close_sym <- "\\)"
both_sym <- "\\((.+)?\\)"
collapse_sym <- " "
if (curly) {
open_sym <- "\\{"
close_sym <- "\\}"
both_sym <- "\\{(.+)?\\}"
collapse_sym <- "; "
}
brseq <- bracket_sequences (x, open_sym, close_sym, both_sym)
br_open <- brseq$br_open
br_closed <- brseq$br_closed
if (length (br_open) == 0 & length (br_closed) == 0)
return (x)
else if (any (is.na (br_open)) & any (is.na (br_closed)))
return (NULL) # error in parsing brackets
x <- match_one_brackets (x, br_open, br_closed, collapse_sym)
# catch instances where curly brackets are only used on first condition,
# with second condition being a single line
if (curly)
x <- catch_curly_else (x)
return (x)
}
match_one_brackets <- function (x, br_open, br_closed, collapse_sym) {
has_gg_pluses <- FALSE
for (i in seq_along (br_open)) {
xmid <- x [br_open [i]:br_closed [i]]
if (grepl ("\\{\\s?$", xmid [1])) {
# join line after opening curly bracket
xmid <- c (paste0 (xmid [1:2], collapse = " "),
xmid [3:length (xmid)])
}
if (grepl ("^\\s?\\}", xmid [length (xmid)])) {
# join line before closing curly
if (length (xmid) > 2)
xmid <- c (xmid [1:(length (xmid) - 2)],
paste0 (xmid [(length (xmid) - 1):length (xmid)],
collapse = " "))
else
xmid <- paste0 (xmid, collapse = " ")
}
# plus any ggplot-type lines with terminal "+". Formulae can also end
# with "+", so presume only "ggplot" commands will have this, and grep
# for that also
index <- grep ("\\+\\s?$", xmid)
if (length (index) > 0 & any (grepl ("gg", xmid))) {
has_gg_pluses <- TRUE
rms <- NULL
for (j in index) {
if (j < length (xmid)) {
xmid [j + 1] <- paste0 (xmid [j],
xmid [j + 1],
collapse = " ")
rms <- c (rms, j)
}
}
if (!is.null (rms))
xmid <- xmid [-rms]
}
x [br_closed [i]] <- paste0 (xmid, collapse = collapse_sym)
}
x <- rm_intervening_lines (x, br_open, br_closed)
x <- gsub ("\\s+", " ", x)
x <- rm_final_ggplus_lines (x, has_gg_pluses)
return (x)
}
#' remove intervening lines, making sure to remove any
#' pipes into ggplot2 expression from preceding lines:
#' @noRd
rm_intervening_lines <- function (x, br_open, br_closed) {
if (length (br_open) > 0) {
index <- unlist (lapply (seq_along (br_open), function (i)
br_open [i]:(br_closed [i] - 1)))
index2 <- (index - 1) [index > 1]
pipe_sym <- "\\\\%>\\\\%$"
terminal_pipe <- grep (pipe_sym, x [index2])
if (length (terminal_pipe) > 0) {
terminal_pipe <- index2 [terminal_pipe]
x [terminal_pipe] <- gsub (pipe_sym, "", x [terminal_pipe])
}
x <- x [-index]
}
return (x)
}
rm_final_ggplus_lines <- function (x, has_gg_pluses) {
if (has_gg_pluses) {
index <- grep ("\\+\\s?$", x)
index2 <- cumsum (c (FALSE, diff (index) > 1))
index <- lapply (split (index, f = as.factor (index2)), function (i)
c (i, max (i) + 1))
for (i in index) {
x [i [1]] <- paste0 (x [i], collapse = " ")
}
x <- x [-unlist (lapply (index, function (i) i [-1]))]
}
return (x)
}
catch_curly_else <- function (x) {
index <- rev (grep ("else\\s?$", x))
if (length (index) > 0) {
for (i in index) {
x [i] <- paste0 (x [i], " ", x [i + 1])
x <- x [- (i + 1)]
}
}
return (x)
}
#' Get sequences of line & character numbers within quotations
#'
#' @param x Lines of code
#' @return List of sequence indices, one for each line of `x`, identifying
#' characters within quotations
#' @noRd
quote_sequences <- function (x) {
qts <- gregexpr ("\\\"|\\\'", x)
qts_not_esc <- gregexpr ("'", x)
qts <- lapply (seq_along (qts), function (i) {
if (qts [[i]] [1] > 0) {
qts [[i]] <- qts [[i]] [which (!qts [[i]] %in%
qts_not_esc [[i]])]
}
if (length (qts [[i]]) == 0)
qts [[i]] <- -1L
return (qts [[i]])
})
ln_nums <- lapply (seq_along (qts), function (i)
rep (i, length (qts [[i]])))
qts <- cbind (unlist (ln_nums), unlist (qts))
qts <- qts [which (qts [, 2] > 0), ]
if (nrow (qts) == 0L)
return (NULL)
index <- seq (nrow (qts) / 2) * 2 - 1
qts <- cbind (qts [index, , drop = FALSE],
qts [index + 1, , drop = FALSE])
# split sequences which extend across multiple lines:
index <- which (qts [, 1] != qts [, 3])
if (length (index) > 0) {
reps <- rep (1, nrow (qts))
reps [index] <- 2
reps <- rep (seq (nrow (qts)), times = reps)
qts <- qts [reps, ]
index <- which (duplicated (qts))
qts [index - 1, 3] <- qts [index - 1, 1]
qts [index - 1, 4] <- nchar (x [qts [index - 1, 1]])
qts [index, 1] <- qts [index, 3]
qts [index, 2] <- 1
}
linenums <- apply (qts, 1, function (i) i [1])
qts <- apply (qts, 1, function (i) as.vector (seq (i [2], i [4])))
if (!is.list (qts)) # apply when all vecs have same length
qts <- lapply (apply (qts, 2, function (i) list (i)), unlist)
names (qts) <- linenums
return (qts)
}
bracket_sequences <- function (x, open_sym, close_sym, both_sym) {
# `gregexpr` return -1 for no match; these are removed here
br_open <- lapply (gregexpr (open_sym, x), function (i)
as.integer (i [i >= 0]))
br_closed <- lapply (gregexpr (close_sym, x), function (i)
as.integer (i [i >= 0]))
# remove any that are inside quotations, like L#44 in stats::spline
qts <- quote_sequences (x)
quotes <- gregexpr ("\"|\'", x)
multi_line_quote <- start_multi_line_quote <- FALSE
for (i in seq (x)) {
if (multi_line_quote && !start_multi_line_quote) {
if (any (quotes [[i]]) > 0L) {
qindex <- seq (1L, quotes [[i]] [1])
if (length (br_open [[i]]) > 0L) {
br_open [[i]] <- br_open [[i]] [!br_open [[i]] %in% qindex]
}
if (length (br_closed [[i]]) > 0L) {
br_closed [[i]] <- br_closed [[i]] [!br_closed [[i]] %in% qindex]
}
multi_line_quote <- FALSE
} else {
br_open [[i]] <- br_closed [[i]] <- integer (0L)
}
} else if (any (quotes [[i]] > 0)) {
index <- seq (length (quotes [[i]]) / 2) * 2
qstart <- quotes [[i]] [index - 1]
qend <- quotes [[i]] [index]
qstart_1 <- utils::tail (qstart, 1L)
qend_1 <- utils::tail (qend, 1L)
if (!multi_line_quote && !is.na (qstart_1) && is.na (qend_1)) {
multi_line_quote <- TRUE
start_multi_line_quote <- TRUE
} else if (is.na (qstart_1) && !is.na (qend_1)) {
multi_line_quote <- FALSE
}
if (!multi_line_quote || (multi_line_quote && start_multi_line_quote)) {
if (start_multi_line_quote) {
qindex <- seq (max (qstart), nchar (x) [i])
start_multi_line_quote <- FALSE
} else {
qindex <- unlist (lapply (seq_along (qstart), function (i)
qstart [i]:qend [i]))
}
if (length (br_open [[i]]) > 0L) {
br_open [[i]] <- br_open [[i]] [!br_open [[i]] %in% qindex]
}
if (length (br_closed [[i]]) > 0L) {
br_closed [[i]] <- br_closed [[i]] [!br_closed [[i]] %in% qindex]
}
}
}
}
# examples may have rogue brackets, like in stats::spline, where it arises
# in a plot axis label (line#62)
if (length (unlist (br_open)) != length (unlist (br_closed))) {
return (list (br_open = NA,
br_closed = NA))
}
# Remove all instances of matched brackets on one line
for (i in seq (x)) {
len <- min (c (length (br_open [[i]]), length (br_closed [[i]])))
index <- which (br_open [[i]] [seq (len)] < br_closed [[i]] [seq (len)])
if (length (index) > 0) {
br_open [[i]] <- br_open [[i]] [-index]
br_closed [[i]] <- br_closed [[i]] [-index]
}
}
# convert to sequences of line numbers where brackets close, noting that
# there may be multiple matched closing brackets on one line, hence the
# `length` function here. There may also be values of -1 from the initial
# `gregexpr` above; these need to be ignored here
br_open2 <- br_closed2 <- NULL
for (i in seq (br_open))
br_open2 <- c (br_open2, rep (i, length (br_open [[i]])))
for (i in seq (br_closed))
br_closed2 <- c (br_closed2, rep (i, length (br_closed [[i]])))
# no matching brackets just gives empty lines for all that follows:
nested <- nested_sequences (br_open2, br_closed2)
# rev to ensure lines are sequentially joined
br_open <- rev (nested$br_open)
br_closed <- rev (nested$br_closed)
index <- which (!duplicated (cbind (br_open, br_closed)))
br_open <- br_open [index]
br_closed <- br_closed [index]
list (br_open = br_open,
br_closed = br_closed)
}
# return positions of paris of outer matching brackets on one line, as [open1,
# close1, open2, close2, ...]. Matching brackets enclosed within others are
# removed, and can be found by iterating over the inner portion once the outer
# brakcets have been identified.
bracket_sequences_one_line <- function (x,
open_sym = "\\(",
close_sym = "\\)") {
br_open <- lapply (gregexpr (open_sym, x), function (i)
as.integer (i [i >= 0])) [[1]]
br_closed <- lapply (gregexpr (close_sym, x), function (i)
as.integer (i [i >= 0])) [[1]]
if (length (br_open) > 1 & length (br_closed) > 1) {
while (br_open [2] < br_closed [1]) {
br_open <- br_open [-2]
br_closed <- br_closed [-1]
if (length (br_open) < 2)
break
}
}
# unparseable junk lines need not have matching brackets:
if (length (br_open) != length (br_closed)) {
len <- min (c (length (br_open), length (br_closed)))
br_open <- br_open [seq (len)]
br_closed <- br_closed [seq (len)]
}
return (as.vector (rbind (br_open, br_closed)))
}
# check for nesting where another bracket opens before current one has
# been closed.
# open: x x + 1
# closed: x + 2 x + 3
# the actual grouping should be (x, x + 3). The following moves the x to 2nd
# position and deletes the first.
nested_sequences <- function (br_open, br_closed) {
i2 <- seq_along (br_open) [-1]
i1 <- i2 - 1
index <- which (br_open [i2] < br_closed [i1])
if (length (index) > 0) {
for (i in index)
br_open [i + 1] <- br_open [i]
br_open <- br_open [-index]
br_closed <- br_closed [-index]
}
list (br_open = br_open,
br_closed = br_closed)
}
# Expressions are multiple lines of code embedded within curly brackets. When
# individual components of these span multiple lines, they must first be
# concatenated to single lines, then the whole thing concatenated with each of
# these single lines terminated with a semi-colon. This function must be run
# prior to standard "match_brackets" calls.
# example: stats::approx
parse_expressions <- function (x) {
brseq <- bracket_sequences (x,
open_sym = "\\{",
close_sym = "\\}",
both_sym = "\\{(.+)?\\}")
br_open <- brseq$br_open
br_closed <- brseq$br_closed
for (i in seq_along (br_open)) {
xmid <- x [br_open [i]:br_closed [i]]
if (length (xmid) > 2) {
# rm content up to first curly
cstart <- which (vapply (gregexpr ("\\{", xmid), function (i)
any (i > 0), logical (1))) [1]
j <- regexpr ("\\{", xmid [cstart])
xstart <- substring (xmid [cstart], 1, j)
xmid [cstart] <- gsub (".*\\{", "", xmid [cstart])
# rm content after last curly
cend <- which (vapply (gregexpr ("\\}", xmid), function (i)
any (i > 0), logical (1)))
cend <- utils::tail (cend, 1)
j <- regexpr ("\\}", xmid [cend])
xend <- substring (xmid [cend], j, nchar (xmid [cend]))
xmid [cend] <- gsub ("\\}.*", "", xmid [cend])
# rm blank lines
xmid <- xmid [which (!grepl ("^\\s?$", xmid))]
# join any `if ... else ...` lines
index <- rev (grep ("^\\s*else", xmid))
if (length (index) > 0) {
xmid [index - 1] <- paste0 (xmid [index - 1], xmid [index])
xmid <- xmid [-index]
}
index <- rev (grep ("^\\s*if\\s*\\(", xmid))
if (length (index) > 0) {
# check that if clauses do not continue to next line without
# "{", and concatenate any which do
br1 <- gregexpr ("\\(", xmid [index])
br2 <- gregexpr ("\\)", xmid [index])
br_end <- grep (NA, length (index))
for (j in seq_along (br1)) {
brseq <- nested_sequences (br1 [[j]], br2 [[j]])
br_end [j] <- brseq$br_closed [1]
}
xmid_after <- substring (xmid [index],
br_end + 1,
nchar (xmid [index]))
index2 <- grep ("^\\s*$", xmid_after)
if (length (index2) > 0) {
xmid [index [index2]] <- paste0 (xmid [index [index2]],
xmid [index [index2] + 1])
xmid <- xmid [- (index [index2] + 1)]
}
}
xmid <- match_brackets (c (xstart, match_brackets (xmid), xend),
curly = TRUE)
xfirst <- xlast <- NULL
if (br_open [i] > 1)
xfirst <- x [1:(br_open [i] - 1)]
if (br_closed [i] < length (x))
xlast <- x [(br_closed [i] + 1):length (x)]
x <- c (xfirst, xmid, xlast)
}
}
return (x)
}
# Join function defititions within examples into single lines. This presumes
# `match_brackets` has already been run, so merging is only ever between
# isolation "f <- function (...)" lines and subsequent definitions with or
# without curly brackets.
# example: stats::binomial
join_function_lines <- function (x) {
fns <- rev (grep ("\\sfunction\\s?\\(", x))
if (length (fns) > 0) {
fn_defs <- x [fns]
# remove everything prior to "function(":
fn_start <- regexpr ("function\\s?\\(", fn_defs)
fn_defs <- substring (fn_defs, fn_start, nchar (fn_defs))
# Then everything up to closing bracket of fn def:
br_open <- gregexpr ("\\(", fn_defs)
br_closed <- gregexpr ("\\)", fn_defs)
for (i in seq_along (br_open)) {
temp <- nested_sequences (br_open [[i]], br_closed [[i]])
br_open [[i]] <- temp$br_open
br_closed [[i]] <- temp$br_closed
}
br_open <- vapply (br_open, function (i) i [1], integer (1))
br_closed <- vapply (br_closed, function (i) i [1], integer (1))
fn_defs <- substring (fn_defs, br_closed + 1, nchar (fn_defs))
defs_on_next_line <- rev (grep ("^\\s?$", fn_defs))
if (length (defs_on_next_line) > 0) {
index <- fns [defs_on_next_line]
x [index] <- paste0 (x [index], x [index + 1])
x <- x [- (index + 1)]
}
}
return (x)
}
# strip any if conditionals from any example lines which include the focal
# function, returning the functional lines alone from all (if + else) conditions
# x here is a single line only
strip_if_cond <- function (x) {
if (grepl ("^\\s?if", x)) {
br_open <- gregexpr ("\\(", x) [[1]]
br_closed <- gregexpr ("\\)", x) [[1]]
ns <- nested_sequences (br_open, br_closed)
br_open <- ns$br_open
br_closed <- ns$br_closed
# strip first conditional:
xi <- substring (x, br_closed [1] + 1, nchar (x))
if (grepl ("^\\s?\\{", xi)) {
br_open <- gregexpr ("\\{", xi) [[1]]
br_closed <- gregexpr ("\\}", xi) [[1]]
ns <- nested_sequences (br_open, br_closed)
br_open <- ns$br_open
br_closed <- ns$br_closed
x1 <- substring (xi, br_open [1] + 1, br_closed [1] - 1)
x2 <- substring (xi, br_closed [1] + 1, nchar (x))
if (grepl ("else", x2)) {
x2 <- gsub ("\\s?else\\s?", "", x2)
br_open <- gregexpr ("\\{", x2) [[1]]
if (br_open [1] > 0) {
br_closed <- gregexpr ("\\}", x2) [[1]]
ns <- nested_sequences (br_open, br_closed)
br_open <- ns$br_open
br_closed <- ns$br_closed
x2 <- substring (x2, br_open [1] + 1, br_closed [1] - 1)
}
}
x <- c (x1, x2)
}
}
return (x)
}
# crude un-piping operation to split marittr-piped expressions into multiple,
# distinct base-R lines. Note that the matching of brackets is currently
# inadequate, and will only work if each expression contains only one primary
# parenthesised expression.
unpipe <- function (x) {
x <- strsplit (x, "%>%") [[1]]
for (i in seq_along (x)) {
x [i] <- paste0 ("var", i, " <- ", x [i])
if (i > 1) {
br1 <- gregexpr ("\\(", x [i]) [[1]] [1]
br2 <- max (gregexpr ("\\)", x [i]) [[1]])
not_empty <- grepl ("[A-Za-z0-9]", substring (x [i], br1, br2))
comma <- ""
if (not_empty)
comma <- ", "
x [i] <- paste0 (substring (x [i], 1, br1),
"var", i - 1, comma,
substring (x [i], br1 + 1, nchar (x [i])))
}
}
return (x)
}
split_piped_lines <- function (x) {
index <- rev (grep ("(.*)%>%(.*)", x))
for (i in index) {
xinsert <- unpipe (x [i])
if (i == 1) {
if (length (x) == 1) {
x <- xinsert
} else {
x <- c (xinsert, x [2:length (x)])
}
} else if (i == length (x)) {
if (length (x) == 1) {
x <- xinsert
} else {
x <- c (x [1:(i - 1)], xinsert)
}
} else {
x <- c (x [1:(i - 1)], xinsert, x [(i + 1):length (x)])
}
}
return (x)
}
# match opening and corresponding closing curly brackets in terms of line
# numbers of input character vector, `x`.
match_curlies <- function (x) {
opens <- vapply (gregexpr ("\\{", x), function (i) {
if (all (i <= 0))
return (0L)
else
return <- length (i)
}, integer (1))
closes <- vapply (gregexpr ("\\}", x), function (i) {
if (all (i <= 0))
return (0L)
else
return <- length (i)
}, integer (1))
oc <- cumsum (opens) - cumsum (closes)
return (which (oc == 0) [1] - 1)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.