Nothing
# Copyright (C) 2021 Brodie Gaslam
#
# This file is part of "diffobj - Diffs for R Objects"
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license.
# Split by guides; used by nested structures to retrieve contents within
# guides. Each element has an attribute indicating the indices from the
# text element it was drawn from
#
# @param drop.leading keeps the section preceding guides; originally this was
# always dropped, but caused problems with lists of depth > 1
split_by_guides <- function(txt, guides, drop.leading=TRUE) {
stopifnot(
is.character(txt), !anyNA(txt), is.integer(guides),
all(guides %in% seq_along(txt))
)
empty <- list(`attr<-`(txt, "idx", seq_along(txt)))
if(!length(guides)) {
empty
} else {
guide.l <- logical(length(txt))
guide.l[guides] <- TRUE
sections <- cumsum(c(if(guides[1L] == 1L) 1L else 0L, diff(guide.l) == 1L))
ids <- seq_along(txt)
# remove actual guidelines
ids.net <- ids[-guides]
sec.net <- sections[-guides]
txt.net <- txt[-guides]
# split and drop leading stuff if it exists (those with section == 0)
dat <- unname(split(txt.net, sec.net))
ind <- unname(split(ids.net, sec.net))
if(drop.leading) {
dat <- tail(dat, max(sec.net))
ind <- tail(ind, max(sec.net))
}
# Generate indices and attach them to each element of list
Map(`attr<-`, dat, "idx", ind )
}
}
# Detect which rows are likely to be meta data rows (e.g. headers) in tabular
# data (data.frames, timeseries with freq > 1).
#
# note due to ts use, can't use rownames, colnames, etc.
#
# Also, right now we're overloading a bunch of different formats (data.table,
# data.frame, etc. Probably would be better to separate the regexes into
# different functions and keep the wrapping logic in here).
detect_2d_guides <- function(txt) {
stopifnot(is.character(txt))
# Start by looking for first row that leads spaces, this should be the
# beginning of the actual data, typically the column headers. This ways we can
# skip the meta data in tibbles and the like
res <- integer(0L)
if(any(crayon::has_style(txt))) txt <- crayon::strip_style(txt)
first.spaces <- grep("^\\s+\\S+", txt)
if(length(first.spaces)) {
# Now look for data; space.rows are rows that start with spaces, and thus
# likely contain the column headers.
first.space <- min(first.spaces)
space.rows <-
!grepl("^\\S+|^\\s+[0-9]+|^\\s+---\\s*$", txt) &
seq_along(txt) >= first.space
if(!any(space.rows) || all(space.rows)) {
if(length(space.rows)) res <- 1L
} else {
head.row <- min(which(space.rows))
first.row <- min(which(!space.rows & seq_along(space.rows) > head.row))
last.row <- max(which(!space.rows))
# Between first.row and last.row, look for repeating sequences of head rows
# and non head rows; should have the same number of each for each block in
# a wrapped 2d object
if(last.row > head.row) {
space.bw <- space.rows[head.row:last.row]
seq.dat <- vapply(
split(space.bw, cumsum(c(TRUE, diff(space.bw) == 1L))),
FUN=function(x) c(sum(x), sum(!x)),
integer(2L)
)
# Which of the sets of true and false head rows have the same repeating
# sequence as the first? One thing to think about is what happens when
# print gets truncated; should allow last in sequence to have fewer rows,
# but we don't do that yet...
valid.grps <- colSums(seq.dat - seq.dat[,1L] == 0L) == 2L
if(any(valid.grps)) {
# Figure out which rows the headers correspond to by cumsuming the
# header and non-header rows, and then adding the initial offset.
res <- array(cumsum(seq.dat), dim=dim(seq.dat))[1L, valid.grps] +
head.row - 1L
# If there is more than one row for each header, expand out the header
if(seq.dat[1L, 1L] > 1L)
# sequence only gained `from` param in R4.x, so this is our
# "backport"
res <- base::unname(
sequence(seq.dat[1L,]) + rep(res - seq.dat[1L,], seq.dat[1L,])
)
}
} } }
res
}
# Definitely approximate matching, we are lazy in matching the `$` versions
# due to the possibility of pathological names (e.g., containing `)
detect_list_guides <- function(txt) {
stopifnot(is.character(txt))
res <- integer(0L)
if(length(txt)) {
# match stuff like "[[1]][[2]]" or "$ab[[1]]$cd" ...
square.brkt <- "(\\[\\[\\d+\\]\\])"
dollar.simple <- sprintf("(\\$%s)", .reg.r.ident)
pat <- sprintf("^(%s|%s)*(\\$`.*`.*)?$", square.brkt, dollar.simple)
# Only keep those that are first, preceded by an empty string, or by
# another matching pattern
has.pat <- grepl(pat, txt) & nzchar(txt)
has.chars <- c(FALSE, head(nzchar(txt), -1L))
has.pat.prev <- c(FALSE, head(has.pat, -1L))
valid.pat <- has.pat & (!has.chars | has.pat.prev)
# For any sequence of matching patterns, only keep the last one since
# the other ones are redundant
if(any(valid.pat)) {
v.p.rle <- rle(valid.pat)
valid.pat[-with(v.p.rle, cumsum(lengths)[values])] <- FALSE
}
res <- which(valid.pat)
}
res
}
# Matrices
detect_matrix_guides <- function(txt, dim.n) {
stopifnot(
is.character(txt), !anyNA(txt),
is.null(dim.n) || (is.list(dim.n) && length(dim.n) == 2L)
)
n.d.n <- names(dim.n)
row.n <- n.d.n[1L]
col.n <- n.d.n[2L]
# try to guard against dimnames that contain regex
# identify which lines could be row and col headers
n.p <- "(\\[|\\]|\\(|\\)|\\{|\\}|\\*|\\+|\\?|\\.|\\^|\\$|\\\\|\\|)"
c.h <- if(!is.null(col.n) && nzchar(col.n)) {
col.pat <- sprintf("^\\s{2,}%s$", gsub(n.p, "\\\1", col.n))
grepl(col.pat, txt)
} else {
rep(FALSE, length(txt))
}
r.h <- if(!is.null(row.n) && nzchar(row.n)) {
# a bit lazy, should include col headers as well
row.pat <- sprintf("^%s\\s+\\S+", gsub(n.p, "\\\1", row.n))
grepl(row.pat, txt)
} else {
pat.extra <- if(!is.null(dim.n[[2L]]) && is.character(dim.n[[2L]])) {
paste0(c("", gsub(n.p, "\\\1", dim.n[[2L]])), collapse="|")
}
grepl(paste0("^\\s+(\\[,[1-9]+[0-9]*\\]", pat.extra, ")(\\s|$)"), txt)
}
# Classify each line depending on what pattern it matches so we can then
# analyze sequences and determine which are valid
row.types <- integer(length(txt))
row.types[r.h] <- 1L # row meta / col headers
row.types[c.h] <- 2L # col meta
mx.starts <- integer(0L)
if(is.null(n.d.n)) {
mx.start.num <- 1L
mx.starts <- which(row.types == mx.start.num)
} else {
mx.start.num <- 2L
tmp <- which(row.types == mx.start.num)
if(sum(r.h) == sum(c.h) && identical(which(c.h) + 1L, which(r.h))) {
mx.starts <- tmp
}
}
mx.start <- head(mx.starts, 1L)
res <- integer(0L)
if(length(mx.start)) {
# Now try to see if pattern repeats to identify the full list of wrapped
# guides, and return the indices that are part of repeating pattern
mx.end <- head(mx.starts[which(mx.starts > mx.start)], 1L) - 1L
if(!length(mx.end)) mx.end <- length(txt)
pat.inds <- mx.start:(mx.end)
template <- rep(
row.types[pat.inds],
floor((length(txt) - mx.start + 1L) / length(pat.inds))
)
res <- which(head(row.types, length(template)) == template & !!template) +
mx.start - 1L
}
res
}
# Here we want to get the high dimension counter as well as the column headers
# of each sub-dimension
detect_array_guides <- function(txt, dim.n) {
n.d.n <- names(dim.n)
stopifnot(
is.character(txt),
is.list(dim.n) || is.null(dim.n),
(is.character(n.d.n) && length(n.d.n) > 2L) || is.null(n.d.n)
)
# Detect patterns for higher dimensions, and then use the matrix guide
# finding functions to get additional guides
dim.guides <- which(grepl("^, ,", txt))
blanks <- which(txt == "")
res <- integer(0L)
if(
length(dim.guides) && length(blanks) &&
all(dim.guides + 1L %in% blanks) &&
(length(dim.guides) == 1L || length(unique(diff(dim.guides)) == 1L))
) {
# Make sure within each array section there is a matrix representation
dim.guide.fin <- sort(c(dim.guides, dim.guides + 1L))
sub.dat <- split_by_guides(txt, dim.guide.fin)
heads <- lapply(sub.dat, detect_matrix_guides, head(dim.n, 2L))
if(
all(vapply(heads, identical, logical(1L), heads[[1L]])) &&
all(vapply(heads, length, integer(1L)))
)
res <- dim.guide.fin
}
res
}
# Utility fun to determin whether an object would be shown with the default show
# method
is_default_show_obj <- function(obj) {
stopifnot(isS4(obj))
s.m <- selectMethod("show", class(obj))
identical(
class(s.m),
structure("derivedDefaultMethod", package = "methods")
)
}
# Basic S4 guide detection, does not handle nesting or anything fancy like that
# and could easily be fooled
detect_s4_guides <- function(txt, obj) {
stopifnot(isS4(obj))
# Only try to do this if relying on default S4 show method
if(is_default_show_obj(obj)) {
# this could be an issue if they start using curly quotes or whatever...
guides <- c(
sprintf("An object of class \"%s\"", class(obj)),
sprintf("Slot \"%s\":", slotNames(obj))
)
guides.loc <- which(txt %in% guides)
guides.txt <- txt[guides.loc]
if(!identical(guides, guides.txt)) {
integer() # nocov really no way to test this, and harmless
} else {
guides.loc
}
} else integer()
}
#' Generic Methods to Implement Flexible Guide Line Computations
#'
#' Guides are context lines that would normally be omitted from the
#' diff because they are too far from any differences, but provide particularly
#' useful contextual information. Column headers are a common example.
#' Modifying guide finding is an advanced feature intended for package
#' developers that want special treatment for the display output of their
#' objects.
#'
#' \code{Diff} detects these important context lines by looking for patterns in
#' the text of the diff, and then displays these lines in addition to the
#' normal diff output. Guides are marked by a tilde in the gutter, and
#' are typically styled differently than normal context lines, by default in
#' grey. Guides may be far from the diff hunk they are juxtaposed to. We
#' eschew the device of putting the guides in the hunk header as \code{git diff}
#' does because often the column alignment of the guide line is meaningful.
#'
#' Guides are detected by the \code{guides*} methods documented here.
#' Each of the \code{diff*} methods (e.g. \code{\link{diffPrint}}) has a
#' corresponding \code{guides*} method (e.g.
#' \code{\link{guidesPrint}}), with the exception of \code{\link{diffCsv}}
#' since that method uses \code{diffPrint} internally. The \code{guides*}
#' methods expect an R object as the first parameter and the captured display
#' representation of the object in a character vector as the second. The
#' function should then identify which elements in the character representation
#' should be treated as guides, and should return the numeric indices for them.
#'
#' The original object is passed as the first argument so that the generic can
#' dispatch on it, and so the methods may adjust their guide finding behavior
#' to data that is easily retrievable from the object, but less so from the
#' character representation thereof.
#'
#' The default method for \code{guidesPrint} has special handling for 2D
#' objects (e.g. data frames, matrices), arrays, time series, tables, lists, and
#' S4 objects that use the default \code{show} method. Guide finding is on a
#' best efforts basis and may fail if your objects contain \dQuote{pathological}
#' display representations. Since the diff will still work with failed
#' \code{guides} finding we consider this an acceptable compromise. Guide
#' finding is more likely to fail with nested recursive structures. A known
#' issue is that list-like S3 objects without print methods [reset the tag
#' buffers](https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17610) so the
#' guides become less useful for them.
#'
#' \code{guidesStr} highlights top level objects. The default methods for the
#' other \code{guide*} generics do not do anything and exist only as a mechanism
#' for providing custom guide line methods.
#'
#' If you dislike the default handling you can also define your own methods for
#' matrices, arrays, etc., or alternatively you can pass a guide finding
#' function directly via the \code{guides} parameter to the \code{diff*}
#' methods.
#'
#' If you have classed objects with special patterns you can define your own
#' methods for them (see examples), though if your objects are S3 you will need
#' to use \code{\link{setOldClass}} as the \code{guides*} generics are S4.
#'
#' @note The mechanism for identifying guides will almost certainly change in
#' the future to allow for better handling of nested guides, so if you do
#' implement custom guideline methods do so with the understanding that they
#' will likely be deprecated in one of the future releases.
#'
#' @aliases guidesPrint, guidesStr, guidesChr, guidesDeparse
#' @rdname guides
#' @name guides
#' @param obj an R object
#' @param obj.as.chr the character representation of \code{obj} that is used
#' for computing the diffs
#' @return integer containing values in \code{seq_along(obj.as.chr)}
#' @examples
#' ## Roundabout way of suppressing guides for matrices
#' setMethod("guidesPrint", c("matrix", "character"),
#' function(obj, obj.as.chr) integer(0L)
#' )
#' ## Special guides for "zulu" S3 objects that match lines
#' ## starting in "zulu###" where ### is a nuber
#' setOldClass("zulu")
#' setMethod("guidesPrint", c("zulu", "character"),
#' function(obj, obj.as.chr) {
#' if(length(obj) > 20) grep("^zulu[0-9]*", obj.as.chr)
#' else integer(0L)
#' } )
NULL
#' @export
#' @rdname guides
setGeneric(
"guidesPrint",
function(obj, obj.as.chr) standardGeneric("guidesPrint")
)
#' @rdname guides
setMethod(
"guidesPrint", c("ANY", "character"),
function(obj, obj.as.chr) {
if(anyNA(obj.as.chr))
stop("Cannot compute guides if `obj.as.chr` contains NAs")
if(is.matrix(obj)) {
detect_matrix_guides(obj.as.chr, dimnames(obj))
} else if(
length(dim(obj)) == 2L ||
(is.ts(obj) && frequency(obj) > 1)
) {
detect_2d_guides(obj.as.chr)
} else if (is.array(obj)) {
detect_array_guides(obj.as.chr, dimnames(obj))
} else if (is.list(obj)) {
detect_list_guides(obj.as.chr)
} else if (isS4(obj)) {
detect_s4_guides(obj.as.chr, obj)
} else integer(0L)
}
)
#' @export
#' @rdname guides
setGeneric(
"guidesStr",
function(obj, obj.as.chr) standardGeneric("guidesStr")
)
#' @rdname guides
setMethod("guidesStr", c("ANY", "character"),
function(obj, obj.as.chr) {
if(anyNA(obj.as.chr))
stop("Cannot compute guides if `obj.as.chr` contains NAs")
starts.w.dollar <- grepl("^ \\$", obj.as.chr)
which(starts.w.dollar & !c(tail(starts.w.dollar, -1L), FALSE))
} )
#' @export
#' @rdname guides
setGeneric(
"guidesChr",
function(obj, obj.as.chr) standardGeneric("guidesChr")
)
#' @rdname guides
setMethod("guidesChr", c("ANY", "character"),
function(obj, obj.as.chr) integer(0L)
)
#' @export
#' @rdname guides
setGeneric(
"guidesDeparse",
function(obj, obj.as.chr) standardGeneric("guidesDeparse")
)
#' @rdname guides
setMethod("guidesDeparse", c("ANY", "character"),
function(obj, obj.as.chr) integer(0L)
)
#' @export
#' @rdname guides
setGeneric(
"guidesFile",
function(obj, obj.as.chr) standardGeneric("guidesFile")
)
#' @rdname guides
setMethod("guidesFile", c("ANY", "character"),
function(obj, obj.as.chr) integer(0L)
)
# Helper function to verify guide line computation worked out
apply_guides <- function(obj, obj.as.chr, guide_fun) {
guide <- try(guide_fun(obj, obj.as.chr))
msg.extra <- paste0(
"If you did not specify a `guides` function or define custom `guides*` ",
"methods contact maintainer (see `?guides`). Proceeding without guides."
)
if(inherits(guide, "try-error")) {
warning(
"`guides*` method produced an error when attempting to compute guide ",
"lines ; ", msg.extra
)
guide <- integer()
}
if(
!is.integer(guide) || anyNA(guide) || anyDuplicated(guide) ||
!all(guide %in% seq_along(obj.as.chr))
)
stop(
"`guides*` method must produce an integer vector containing unique ",
"index values for the `obj.as.chr` vector; ", msg.extra
)
guide
}
make_guides <- function(target, tar.capt, current, cur.capt, guide_fun) {
tar.guides <- apply_guides(target, tar.capt, guide_fun)
cur.guides <- apply_guides(current, cur.capt, guide_fun)
GuideLines(target=tar.guides, current=cur.guides)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.