Nothing
# Copyright (C) 2021 Brodie Gaslam
#
# This file is part of "diffobj - Diffs for R Objects"
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license.
#' Diffs for R Objects
#'
#' Generate a colorized diff of two R objects for an intuitive visualization of
#' their differences. See `vignette(package="diffobj", "diffobj")` for details.
#'
#' @import crayon
#' @import methods
#' @importFrom utils capture.output file_test packageVersion read.csv
#' @importFrom stats ave frequency is.ts setNames
#' @importFrom grDevices rgb
#' @name diffobj-package
#' @docType package
NULL
# Because all these functions are so similar, we have constructed them with a
# function factory. This allows us to easily maintain consistent formals during
# initial development process when they have not been set in stone yet.
make_diff_fun <- function(capt_fun) {
# nocov start
function(
target, current,
mode=gdo("mode"),
context=gdo("context"),
format=gdo("format"),
brightness=gdo("brightness"),
color.mode=gdo("color.mode"),
word.diff=gdo("word.diff"),
pager=gdo("pager"),
guides=gdo("guides"),
trim=gdo("trim"),
rds=gdo("rds"),
unwrap.atomic=gdo("unwrap.atomic"),
max.diffs=gdo("max.diffs"),
disp.width=gdo("disp.width"),
ignore.white.space=gdo("ignore.white.space"),
convert.hz.white.space=gdo("convert.hz.white.space"),
tab.stops=gdo("tab.stops"),
line.limit=gdo("line.limit"),
hunk.limit=gdo("hunk.limit"),
align=gdo("align"),
style=gdo("style"),
palette.of.styles=gdo("palette"),
frame=par_frame(),
interactive=gdo("interactive"),
term.colors=gdo("term.colors"),
tar.banner=NULL,
cur.banner=NULL,
strip.sgr=gdo("strip.sgr"),
sgr.supported=gdo("sgr.supported"),
extra=list()
) {
# nocov end
frame # force frame so that `par_frame` called in this context
call.dat <- extract_call(sys.calls(), frame)
target # force target/current so if one missing we get an error here
current # and not later
# Check args and evaluate all the auto-selection arguments
etc.proc <- check_args(
call=call.dat$call, tar.exp=call.dat$tar, cur.exp=call.dat$cur,
mode=mode, context=context, line.limit=line.limit, format=format,
brightness=brightness, color.mode=color.mode, pager=pager,
ignore.white.space=ignore.white.space, max.diffs=max.diffs,
align=align, disp.width=disp.width,
hunk.limit=hunk.limit, convert.hz.white.space=convert.hz.white.space,
tab.stops=tab.stops, style=style, palette.of.styles=palette.of.styles,
frame=frame, tar.banner=tar.banner, cur.banner=cur.banner, guides=guides,
rds=rds, trim=trim, word.diff=word.diff, unwrap.atomic=unwrap.atomic,
extra=extra, interactive=interactive, term.colors=term.colors,
strip.sgr=strip.sgr, sgr.supported=sgr.supported,
call.match=match.call()
)
# If in rds mode, try to see if either target or current reference an RDS
if(rds) {
target <- get_rds(target)
current <- get_rds(current)
}
# Force crayon to whatever ansi status we chose; note we must do this after
# touching vars in case someone passes `options(crayon.enabled=...)` as one
# of the arguments
# old.crayon.opt <- options(
# crayon.enabled=
# is(etc.proc@style, "StyleAnsi") ||
# (!is(etc.proc@style, "StyleHtml") && etc.proc@sgr.supported)
# )
# on.exit(options(old.crayon.opt), add=TRUE)
err <- make_err_fun(sys.call())
# Compute gutter values so that we know correct widths to use for capture,
# etc. If not a base text type style, assume gutter and column padding are
# zero even though that may not always be correct
etc.proc@gutter <- gutter_dat(etc.proc)
col.pad.width <-
nchar2(etc.proc@style@text@pad.col, sgr.supported=etc.proc@sgr.supported)
gutt.width <- etc.proc@gutter@width
half.width <- as.integer((etc.proc@disp.width - col.pad.width) / 2)
etc.proc@line.width <-
max(etc.proc@disp.width, .min.width + gutt.width)
etc.proc@text.width <- etc.proc@line.width - gutt.width
etc.proc@line.width.half <- max(half.width, .min.width + gutt.width)
etc.proc@text.width.half <- etc.proc@line.width.half - gutt.width
# If in side by side mode already then we know we want half-width, and if
# width is less than 80 we know we want unitfied
if(etc.proc@mode == "auto" && etc.proc@disp.width < 80L)
etc.proc@mode <- "unified"
if(etc.proc@mode == "sidebyside") etc.proc <- sideBySide(etc.proc)
# Capture and diff
diff <- capt_fun(target, current, etc=etc.proc, err=err, extra)
diff
}
}
#' Diff \code{print}ed Objects
#'
#' Runs the diff between the \code{print} or \code{show} output produced by
#' \code{target} and \code{current}. Given the extensive parameter list, this
#' documentation page is intended as a reference for all the \code{diff*}
#' methods. For a high level introduction see \code{vignette("diffobj")}.
#'
#' Almost all aspects of how the diffs are computed and displayed are
#' controllable through the \code{diff*} methods parameters. This results in a
#' lengthy parameter list, but in practice you should rarely need to adjust
#' anything past the \code{color.mode} parameter. Default values are specified
#' as options so that users may configure diffs in a persistent manner.
#' \code{\link{gdo}} is a shorthand function to access \code{diffobj} options.
#'
#' Parameter order after \code{color.mode} is not guaranteed. Future versions
#' of \code{diffobj} may add parameters and re-order existing parameters past
#' \code{color.mode}.
#'
#' This and other \code{diff*} functions are S4 generics that dispatch on the
#' \code{target} and \code{current} parameters. Methods with signature
#' \code{c("ANY", "ANY")} are defined and act as the default methods. You can
#' use this to set up methods to pre-process or set specific parameters for
#' selected classes that can then \code{callNextMethod} for the actual diff.
#' Note that while the generics include \code{...} as an argument, none of the
#' methods do.
#'
#' Strings are re-encoded to UTF-8 with \code{\link{enc2utf8}} prior to
#' comparison to avoid encoding-only differences.
#'
#' The text representation of `target` and `current` should each have no more
#' than ~INT_MAX/4 lines.
#'
#' @section Matrices and Data Frames:
#'
#' While \code{diffPrint} attempts to handle the default R behavior that wraps
#' wide tables, the results are often sub-optimal. A better approach is to set
#' the \code{disp.width} parameter to a large enough value such that wrapping is
#' not necessary, and a browser-based \code{pager}. In the future we will add
#' the capability to specify different capture widths and wrap widths so that
#' this is an option for terminal output (see
#' \href{https://github.com/brodieG/diffobj/issues/109}{issue 109}).
#'
#' One thing to keep in mind is that \code{diffPrint} is not designed to work
#' with very large data frames.
#'
#' @export
#' @seealso \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{diffDeparse}} to compare deparsed objects, \code{\link{ses}}
#' for a minimal and fast diff @param target the reference object
#' @param target the reference object
#' @param current the object being compared to \code{target}
#' @param mode character(1L), one of:
#' \itemize{
#' \item \dQuote{unified}: diff mode used by \code{git diff}
#' \item \dQuote{sidebyside}: line up the differences side by side
#' \item \dQuote{context}: show the target and current hunks in their
#' entirety; this mode takes up a lot of screen space but makes it easier
#' to see what the objects actually look like
#' \item \dQuote{auto}: default mode; pick one of the above, will favor
#' \dQuote{sidebyside} unless \code{getOption("width")} is less than 80,
#' or in \code{diffPrint} and objects are dimensioned and do not fit side
#' by side, or in \code{diffChr}, \code{diffDeparse}, \code{diffFile} and
#' output does not fit in side by side without wrapping
#' }
#' @param context integer(1L) how many lines of context are shown on either side
#' of differences (defaults to 2). Set to \code{-1L} to allow as many as
#' there are. Set to \dQuote{auto} to display as many as 10 lines or as few
#' as 1 depending on whether total screen lines fit within the number of lines
#' specified in \code{line.limit}. Alternatively pass the return value of
#' \code{\link{auto_context}} to fine tune the parameters of the auto context
#' calculation.
#' @param format character(1L), controls the diff output format, one of:
#' \itemize{
#' \item \dQuote{auto}: to select output format based on terminal
#' capabilities; will attempt to use one of the ANSI formats if they
#' appear to be supported, and if not or if you are in the Rstudio console
#' it will attempt to use HTML and browser output if in interactive mode.
#' \item \dQuote{raw}: plain text
#' \item \dQuote{ansi8}: color and format diffs using basic ANSI escape
#' sequences
#' \item \dQuote{ansi256}: like \dQuote{ansi8}, except using the full range
#' of ANSI formatting options
#' \item \dQuote{html}: color and format using HTML markup; the resulting
#' string is processed with \code{\link{enc2utf8}} when output as a full
#' web page (see docs for \code{html.output} under \code{\link{Style}}).
#' }
#' Defaults to \dQuote{auto}. See \code{palette.of.styles} for details
#' on customization, \code{\link{style}} for full control of output format.
#' See `pager` parameter for more discussion of Rstudio behavior.
#' @param brightness character, one of \dQuote{light}, \dQuote{dark},
#' \dQuote{neutral}, useful for adjusting color scheme to light or dark
#' terminals. \dQuote{neutral} by default. See \code{\link{PaletteOfStyles}}
#' for details and limitations. Advanced: you may specify brightness as a
#' function of \code{format}. For example, if you typically wish to use a
#' \dQuote{dark} color scheme, except for when in \dQuote{html} format when
#' you prefer the \dQuote{light} scheme, you may use
#' \code{c("dark", html="light")} as the value for this parameter. This is
#' particularly useful if \code{format} is set to \dQuote{auto} or if you
#' want to specify a default value for this parameter via options. Any names
#' you use should correspond to a \code{format}. You must have one unnamed
#' value which will be used as the default for all \code{format}s that are
#' not explicitly specified.
#' @param color.mode character, one of \dQuote{rgb} or \dQuote{yb}.
#' Defaults to \dQuote{yb}. \dQuote{yb} stands for \dQuote{Yellow-Blue} for
#' color schemes that rely primarily on those colors to style diffs.
#' Those colors can be easily distinguished by individuals with
#' limited red-green color sensitivity. See \code{\link{PaletteOfStyles}} for
#' details and limitations. Also offers the same advanced usage as the
#' \code{brightness} parameter.
#' @param word.diff TRUE (default) or FALSE, whether to run a secondary word
#' diff on the in-hunk differences. For atomic vectors setting this to
#' FALSE could make the diff \emph{slower} (see the \code{unwrap.atomic}
#' parameter). For other uses, particularly with \code{\link{diffChr}}
#' setting this to FALSE can substantially improve performance.
#' @param pager one of \dQuote{auto} (default), \dQuote{on},
#' \dQuote{off}, a \code{\link{Pager}} object, or a list; controls whether and
#' how a pager is used to display the diff output. If you require a
#' particular pager behavior you must use a \code{\link{Pager}}
#' object, or \dQuote{off} to turn off the pager. All other settings will
#' interact with other parameters such as \code{format}, \code{style}, as well
#' as with your system capabilities in order to select the pager expected to
#' be most useful.
#'
#' \dQuote{auto} and \dQuote{on} are the same, except that in non-interactive
#' mode \dQuote{auto} is equivalent to \dQuote{off}. \dQuote{off} will always
#' send output to the console. If \dQuote{on}, whether the output
#' actually gets routed to the pager depends on the pager \code{threshold}
#' setting (see \code{\link{Pager}}). The default behavior is to use the
#' pager associated with the \code{Style} object. The \code{Style} object is
#' itself is determined by the \code{format} or \code{style} parameters.
#'
#' Depending on your system configuration different styles and corresponding
#' pagers will get selected, unless you specify a \code{Pager} object
#' directly. On a system with a system pager that supports ANSI CSI SGR
#' colors, the pager will only trigger if the output is taller than one
#' window. If the system pager is not known to support ANSI colors then the
#' output will be sent as HTML to the IDE viewer if available or to the web
#' browser if not. Even though Rstudio now supports ANSI CSI SGR at the
#' console output is still formatted as HTML and sent to the IDE viewer.
#' Partly this is for continuity of behavior, but also because the default
#' Rstudio pager does not support ANSI CSI SGR, at least as of this writing.
#'
#' If \code{pager} is a list, then the same as with \dQuote{on}, except that
#' the \code{Pager} object associated with the selected \code{Style} object is
#' re-instantiated with the union of the list elements and the existing
#' settings of that \code{Pager}. The list should contain named elements that
#' correspond to the \code{\link{Pager}} instantiation parameters. The names
#' must be specified in full as partial parameter matching will not be carried
#' out because the pager is re-instantiated with \code{\link{new}}.
#'
#' See \code{\link{Pager}}, \code{\link{Style}}, and
#' \code{\link{PaletteOfStyles}} for more details and for instructions on how
#' to modify the default behavior.
#' @param guides TRUE (default), FALSE, or a function that accepts at least two
#' arguments and requires no more than two arguments. Guides
#' are additional context lines that are not strictly part of a hunk, but
#' provide important contextual data (e.g. column headers). If TRUE, the
#' context lines are shown in addition to the normal diff output, typically
#' in a different color to indicate they are not part of the hunk. If a
#' function, the function should accept as the first argument the object
#' being diffed, and the second the character representation of the object.
#' The function should return the indices of the elements of the
#' character representation that should be treated as guides. See
#' \code{\link{guides}} for more details.
#' @param trim TRUE (default), FALSE, or a function that accepts at least two
#' arguments and requires no more than two arguments. Function should compute
#' for each line in captured output what portion of those lines should be
#' diffed. By default, this is used to remove row meta data differences
#' (e.g. \code{[1,]}) so they alone do not show up as differences in the
#' diff. See \code{\link{trim}} for more details.
#' @param rds TRUE (default) or FALSE, if TRUE will check whether
#' \code{target} and/or \code{current} point to a file that can be read with
#' \code{\link{readRDS}} and if so, loads the R object contained in the file
#' and carries out the diff on the object instead of the original argument.
#' Currently there is no mechanism for specifying additional arguments to
#' \code{readRDS}
#' @param unwrap.atomic TRUE (default) or FALSE. Relevant primarily for
#' \code{diffPrint}, if TRUE, and \code{word.diff} is also TRUE, and both
#' \code{target} and \code{current} are \emph{unnamed} one-dimension atomics ,
#' the vectors are unwrapped and diffed element by element, and then
#' re-wrapped. Since \code{diffPrint} is fundamentally a line diff, the
#' re-wrapped lines are lined up in a manner that is as consistent as possible
#' with the unwrapped diff. Lines that contain the location of the word
#' differences will be paired up. Since the vectors may well be wrapped with
#' different periodicities this will result in lines that are paired up that
#' look like they should not be paired up, though the locations of the
#' differences should be. If is entirely possible that setting this parameter
#' to FALSE will result in a slower diff. This happens if two vectors are
#' actually fairly similar, but their line representations are not. For
#' example, in comparing \code{1:100} to \code{c(100, 1:99)}, there is really
#' only one difference at the \dQuote{word} level, but every screen line is
#' different. \code{diffChr} will also do the unwrapping if it is given a
#' character vector that contains output that looks like the atomic vectors
#' described above. This is a bug, but as the functionality could be useful
#' when diffing e.g. \code{capture.output} data, we now declare it a feature.
#' @param line.limit integer(2L) or integer(1L), if length 1 how many lines of
#' output to show, where \code{-1} means no limit. If length 2, the first
#' value indicates the threshold of screen lines to begin truncating output,
#' and the second the number of lines to truncate to, which should be fewer
#' than the threshold. Note that this parameter is implemented on a
#' best-efforts basis and should not be relied on to produce the exact
#' number of lines requested. In particular do not expect it to work well for
#' for values small enough that the banner portion of the diff would have to
#' be trimmed. If you want a specific number of lines use \code{[} or
#' \code{head} / \code{tail}. One advantage of \code{line.limit} over these
#' other options is that you can combine it with \code{context="auto"} and
#' auto \code{max.level} selection (the latter for \code{diffStr}), which
#' allows the diff to dynamically adjust to make best use of the available
#' display lines. \code{[}, \code{head}, and \code{tail} just subset the text
#' of the output.
#' @param hunk.limit integer(2L) or integer (1L), how many diff hunks to show.
#' Behaves similarly to \code{line.limit}. How many hunks are in a
#' particular diff is a function of how many differences, and also how much
#' \code{context} is used since context can cause two hunks to bleed into
#' each other and become one.
#' @param max.diffs integer(1L), number of \emph{differences} (default 50000L)
#' after which we abandon the \code{O(n^2)} diff algorithm in favor of a naive
#' \code{O(n)} one. Set to \code{-1L} to stick to the original algorithm up to
#' the maximum allowed (~INT_MAX/4).
#' @param disp.width integer(1L) number of display columns to take up; note that
#' in \dQuote{sidebyside} \code{mode} the effective display width is half this
#' number (set to 0L to use default widths which are \code{getOption("width")}
#' for normal styles and \code{80L} for HTML styles. Future versions of
#' \code{diffobj} may change this to larger values for two dimensional objects
#' for better diffs (see details).
#' @param ignore.white.space TRUE or FALSE, whether to consider differences in
#' horizontal whitespace (i.e. spaces and tabs) as differences (defaults to
#' TRUE).
#' @param convert.hz.white.space TRUE or FALSE, whether modify input strings
#' that contain tabs and carriage returns in such a way that they display as
#' they would \bold{with} those characters, but without using those
#' characters (defaults to TRUE). The conversion assumes that tab stops are
#' spaced evenly eight characters apart on the terminal. If this is not the
#' case you may specify the tab stops explicitly with \code{tab.stops}.
#' @param tab.stops integer, what tab stops to use when converting hard tabs to
#' spaces. If not integer will be coerced to integer (defaults to 8L). You
#' may specify more than one tab stop. If display width exceeds that
#' addressable by your tab stops the last tab stop will be repeated.
#' @param align numeric(1L) between 0 and 1, proportion of
#' words in a line of \code{target} that must be matched in a line of
#' \code{current} in the same hunk for those lines to be paired up when
#' displayed (defaults to 0.25), or an \code{\link{AlignThreshold}} object.
#' Set to \code{1} to turn off alignment which will cause all lines in a hunk
#' from \code{target} to show up first, followed by all lines from
#' \code{current}. Note that in order to be aligned lines must meet the
#' threshold and have at least 3 matching alphanumeric characters (see
#' \code{\link{AlignThreshold}} for details).
#' @param style \dQuote{auto}, a \code{\link{Style}} object, or a list.
#' \dQuote{auto} by default. If a \code{Style} object, will override the
#' the \code{format}, \code{brightness}, and \code{color.mode} parameters.
#' The \code{Style} object provides full control of diff output styling.
#' If a list, then the same as \dQuote{auto}, except that if the auto-selected
#' \code{Style} requires instantiation (see \code{\link{PaletteOfStyles}}),
#' then the list contents will be used as arguments when instantiating the
#' style object. See \code{\link{Style}} for more details, in particular the
#' examples.
#' @param palette.of.styles \code{\link{PaletteOfStyles}} object; advanced
#' usage, contains all the \code{\link{Style}} objects or
#' \dQuote{classRepresentation} objects extending \code{\link{Style}} that are
#' selected by specifying the \code{format}, \code{brightness}, and
#' \code{color.mode} parameters. See \code{\link{PaletteOfStyles}} for more
#' details.
#' @param frame an environment to use as the evaluation frame for the
#' \code{print/show/str}, calls and for \code{diffObj}, the evaluation frame
#' for the \code{diffPrint} / \code{diffStr} calls. Defaults to the return
#' value of \code{\link{par_frame}}.
#' @param interactive TRUE or FALSE whether the function is being run in
#' interactive mode, defaults to the return value of
#' \code{\link{interactive}}. If in interactive mode, pager will be used if
#' \code{pager} is \dQuote{auto}, and if ANSI styles are not supported and
#' \code{style} is \dQuote{auto}, output will be send to viewer/browser as
#' HTML.
#' @param term.colors integer(1L) how many ANSI colors are supported by the
#' terminal. This variable is provided for when
#' \code{\link[=num_colors]{crayon::num_colors}} does not properly detect how
#' many ANSI colors are supported by your terminal. Defaults to return value
#' of \code{\link[=num_colors]{crayon::num_colors}} and should be 8 or 256 to
#' allow ANSI colors, or any other number to disallow them. This only
#' impacts output format selection when \code{style} and \code{format} are
#' both set to \dQuote{auto}.
#' @param tar.banner character(1L), language, or NULL, used to generate the
#' text to display ahead of the diff section representing the target output.
#' If NULL will use the deparsed \code{target} expression, if language, will
#' use the language as it would the \code{target} expression, if
#' character(1L), will use the string with no modifications. The language
#' mode is provided because \code{diffStr} modifies the expression prior to
#' display (e.g. by wrapping it in a call to \code{str}). Note that it is
#' possible in some cases that the substituted value of \code{target} actually
#' is character(1L), but if you provide a character(1L) value here it will be
#' assumed you intend to use that value literally.
#' @param cur.banner character(1L) like \code{tar.banner}, but for
#' \code{current}
#' @param strip.sgr TRUE, FALSE, or NULL (default), whether to strip ANSI CSI
#' SGR sequences prior to comparison and for display of diff. If NULL,
#' resolves to TRUE if `style` resolves to an ANSI formatted diff, and
#' FALSE otherwise. The default behavior is to avoid confusing diffs where
#' the original SGR and the SGR added by the diff are mixed together.
#' @param sgr.supported TRUE, FALSE, or NULL (default), whether to assume the
#' standard output device supports ANSI CSI SGR sequences. If TRUE, strings
#' will be manipulated accounting for the SGR sequences. If NULL,
#' resolves to TRUE if `style` resolves to an ANSI formatted diff, and
#' to `crayon::has_color()` otherwise. This only controls how the strings are
#' manipulated, not whether SGR is added to format the diff, which is
#' controlled by the `style` parameter. This parameter is exposed for the
#' rare cases where you might wish to control string manipulation behavior
#' directly.
#' @param extra list additional arguments to pass on to the functions used to
#' create text representation of the objects to diff (e.g. \code{print},
#' \code{str}, etc.)
#' @param ... unused, for compatibility of methods with generics
#' @return a \code{Diff} object; this object has a \code{show}
#' method that will display the diff to screen or pager, as well as
#' \code{summary}, \code{any}, and \code{as.character} methods.
#' If you store the return value instead of displaying it to screen, and
#' display it later, it is possible for the display to be thrown off if
#' there are environment changes (e.g. display width changes) in between
#' the time you compute the diff and the time you display it.
#' @rdname diffPrint
#' @name diffPrint
#' @export
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffPrint(letters, letters[-5], pager="off")
setGeneric(
"diffPrint", function(target, current, ...) standardGeneric("diffPrint")
)
#' @rdname diffPrint
setMethod("diffPrint", signature=c("ANY", "ANY"), make_diff_fun(capt_print))
#' Diff Object Structures
#'
#' Compares the \code{str} output of \code{target} and \code{current}. If
#' the \code{max.level} parameter to \code{str} is left unspecified, will
#' attempt to find the largest \code{max.level} that fits within
#' \code{line.limit} and shows at least one difference.
#'
#' Due to the seemingly inconsistent nature of \code{max.level} when used with
#' objects with nested attributes, and also due to the relative slowness of
#' \code{str}, this function simulates the effect of \code{max.level} by hiding
#' nested lines instead of repeatedly calling \code{str} with varying values of
#' \code{max.level}.
#'
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{diffDeparse}} to compare deparsed objects,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @rdname diffStr
#' @export
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' with(mtcars, diffStr(lm(mpg ~ hp)$qr, lm(mpg ~ disp)$qr, pager="off"))
setGeneric("diffStr", function(target, current, ...) standardGeneric("diffStr"))
#' @rdname diffStr
setMethod("diffStr", signature=c("ANY", "ANY"), make_diff_fun(capt_str))
#' Diff Character Vectors Element By Element
#'
#' Will perform the diff on the actual string values of the character vectors
#' instead of capturing the printed screen output. Each vector element is
#' treated as a line of text. NA elements are treated as the string
#' \dQuote{NA}. Non character inputs are coerced to character and attributes
#' are dropped with \code{\link{c}}.
#'
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffDeparse}} to compare deparsed objects,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @rdname diffChr
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffChr(LETTERS[1:5], LETTERS[2:6], pager="off")
setGeneric("diffChr", function(target, current, ...) standardGeneric("diffChr"))
#' @rdname diffChr
setMethod("diffChr", signature=c("ANY", "ANY"), make_diff_fun(capt_chr))
#' Diff Deparsed Objects
#'
#' Perform diff on the character vectors produced by \code{\link{deparse}}ing
#' the objects. Each element counts as a line. If an element contains newlines
#' it will be split into elements new lines by the newlines.
#'
#' @export
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @rdname diffDeparse
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffDeparse(matrix(1:9, 3), 1:9, pager="off")
setGeneric(
"diffDeparse", function(target, current, ...) standardGeneric("diffDeparse")
)
#' @rdname diffDeparse
setMethod("diffDeparse", signature=c("ANY", "ANY"), make_diff_fun(capt_deparse))
#' Diff Files
#'
#' Reads text files with \code{\link{readLines}} and performs a diff on the
#' resulting character vectors.
#'
#' @export
#' @param target character(1L) or file connection with read capability; if
#' character should point to a text file
#' @param current like \code{target}
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @rdname diffFile
#' @examples
#' \dontrun{
#' url.base <- "https://raw.githubusercontent.com/wch/r-source"
#' f1 <- file.path(url.base, "29f013d1570e1df5dc047fb7ee304ff57c99ea68/README")
#' f2 <- file.path(url.base, "daf0b5f6c728bd3dbcd0a3c976a7be9beee731d9/README")
#' diffFile(f1, f2)
#' }
setGeneric(
"diffFile", function(target, current, ...) standardGeneric("diffFile")
)
#' @rdname diffFile
setMethod("diffFile", signature=c("ANY", "ANY"), make_diff_fun(capt_file))
#' Diff CSV Files
#'
#' Reads CSV files with \code{\link{read.csv}} and passes the resulting data
#' frames onto \code{\link{diffPrint}}. \code{extra} values are passed as
#' arguments are passed to both \code{read.csv} and \code{print}. To the
#' extent you wish to use different \code{extra} arguments for each of those
#' functions you will need to \code{read.csv} the files and pass them to
#' \code{diffPrint} yourself.
#'
#' @export
#' @param target character(1L) or file connection with read capability;
#' if character should point to a CSV file
#' @param current like \code{target}
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} functions,
#' \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @rdname diffCsv
#' @examples
#' iris.2 <- iris
#' iris.2$Sepal.Length[5] <- 99
#' f1 <- tempfile()
#' f2 <- tempfile()
#' write.csv(iris, f1, row.names=FALSE)
#' write.csv(iris.2, f2, row.names=FALSE)
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffCsv(f1, f2, pager="off")
#' unlink(c(f1, f2))
setGeneric(
"diffCsv", function(target, current, ...) standardGeneric("diffCsv")
)
#' @rdname diffCsv
setMethod("diffCsv", signature=c("ANY", "ANY"), make_diff_fun(capt_csv))
#' Diff Objects
#'
#' Compare either the \code{print}ed or \code{str} screen representation of
#' R objects depending on which is estimated to produce the most useful
#' diff. The selection process tries to minimize screen lines while maximizing
#' differences shown subject to display constraints. The decision algorithm is
#' likely to evolve over time, so do not rely on this function making
#' a particular selection under specific circumstances. Instead, use
#' \code{\link{diffPrint}} or \code{\link{diffStr}} if you require one or the
#' other output.
#'
#' @inheritParams diffPrint
#' @seealso \code{\link{diffPrint}} for details on the \code{diff*} methods,
#' \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly
#' \code{\link{diffDeparse}} to compare deparsed objects,
#' \code{\link{ses}} for a minimal and fast diff
#' @return a \code{Diff} object; see \code{\link{diffPrint}}.
#' @export
#' @examples
#' ## `pager="off"` for CRAN compliance; you may omit in normal use
#' diffObj(letters, c(letters[1:10], LETTERS[11:26]), pager="off")
#' with(mtcars, diffObj(lm(mpg ~ hp)$qr, lm(mpg ~ disp)$qr, pager="off"))
setGeneric("diffObj", function(target, current, ...) standardGeneric("diffObj"))
diff_obj <- make_diff_fun(identity) # we overwrite the body next
body(diff_obj) <- quote({
if(length(extra))
stop("Argument `extra` must be empty in `diffObj`.")
# frame # force frame so that `par_frame` called in this context
# Need to generate calls inside a new child environment so that we do not
# pollute the environment and create potential conflicts with ... args
# used to run this inside a `local` call, but issues cropped up with the
# advent of JIT, and can't recall why just storing arguments at first
# was a problem
args <- as.list(environment())
call.dat <- extract_call(sys.calls(), frame)
err <- make_err_fun(call.dat$call)
if(is.null(args$tar.banner)) args$tar.banner <- call("quote", call.dat$tar)
if(is.null(args$cur.banner)) args$cur.banner <- call("quote", call.dat$cur)
call.print <- as.call(c(list(quote(diffobj::diffPrint)), args))
call.str <- as.call(c(list(quote(diffobj::diffStr)), args))
call.str[["extra"]] <- list(max.level="auto")
res.print <- try(eval(call.print, frame), silent=TRUE)
res.str <- try(eval(call.str, frame), silent=TRUE)
if(inherits(res.str, "try-error"))
err(
"Error in calling `diffStr`: ",
conditionMessage(attr(res.str, "condition"))
)
if(inherits(res.print, "try-error"))
err(
"Error in calling `diffPrint`: ",
conditionMessage(attr(res.print, "condition"))
)
# Run both the print and str versions, and then decide which to use based
# on some weighting of various factors including how many lines needed to be
# omitted vs. how many differences were reported
diff.p <- count_diff_hunks(res.print@diffs)
diff.s <- count_diff_hunks(res.str@diffs)
diff.l.p <- diff_line_len(
res.print@diffs, res.print@etc, tar.capt=res.print@tar.dat$raw,
cur.capt=res.print@cur.dat$raw
)
diff.l.s <- diff_line_len(
res.str@diffs, res.str@etc, tar.capt=res.str@tar.dat$raw,
cur.capt=res.str@cur.dat$raw
)
# How many lines of the input are in the diffs, vs how many lines of input
diff.line.ratio.p <- lineCoverage(res.print)
diff.line.ratio.s <- lineCoverage(res.str)
# Only show the one with differences
res <- if(!diff.s && diff.p) {
res.print
} else if(!diff.p && diff.s) {
res.str
# If one fits in full and the other doesn't, show the one that fits in full
} else if(
!res.str@trim.dat$lines[[1L]] &&
res.print@trim.dat$lines[[1L]]
) {
res.str
} else if(
res.str@trim.dat$lines[[1L]] &&
!res.print@trim.dat$lines[[1L]]
) {
res.print
} else if (diff.l.p <= console_lines() / 2) {
# Always use print if print output is reasonable size
res.print
} else {
# Calculate the trade offs between the two options
s.score <- diff.s / diff.l.s * diff.line.ratio.s
p.score <- diff.p / diff.l.p * diff.line.ratio.p
if(p.score >= s.score) res.print else res.str
}
res
})
#' @export
setMethod("diffObj", signature=c("ANY", "ANY"), diff_obj)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.