#' @title INWT's own linters
#'
#' @name INWTLinters
#'
#' @description Linters added by INWT. Usually not called directly but used
#' with \code{\link[lintr]{lint}}.
#'
#' @examples \dontrun{
#' writeLines(con = "lintExample.txt",
#' # nolint start
#' text = c("# Example script to demonstrate INWT's own linters",
#' "",
#' "source(anotherScript.R)",
#' "",
#' "foo <- function(x = 1, y) {",
#' " 2 * x + 1 ",
#' "}",
#' "",
#' "# This line contains double spaces",
#' "",
#' "print(INWTUtils:::scriptLinters())"))
#' # nolint end
#' lintr::lint("lintExample.txt",
#' linters = list(argsWithoutDefault = function_argument_linter,
#' doubeWhitespace = double_space_linter,
#' sourceLinter = source_linter))
#' }
#'
NULL
#' @describeIn INWTLinters Are there double whitespaces?
#' @export
double_space_linter <- function() {
# X-Path logic to find the deepest node with in-text double spaces. Either
# COMMENT or STR_CONST.
comment_string_xpath <- "//descendant::*[contains(., ' ') and not(descendant::*[contains(., ' ')])]"
# Same logic for non-in-text double spaces (in remaining script content).
code_xpath <- "//descendant::*[not(descendant::*) and @end and not(ancestor-or-self::COMMENT)]"
Linter(function(source_expression) {
if (!is_lint_level(source_expression, level = "file")) {
return(list())
}
# Set the correct format of the xml-parsed script.
xml <- source_expression$full_xml_parsed_content
xml_df <- source_expression$full_parsed_content
# Type 1: Lints in strings and comments
comment_string_nodes <- xml_find_all(xml, comment_string_xpath)
comment_string_nodes_filt <- extract_double_space_nodes(comment_string_nodes)
adjust_lint_location(xml_df, comment_string_nodes_filt)
# The `range_start_xpath` is altered such that we correct for the loc_diff
comment_string_lints <- xml_nodes_to_lints(
comment_string_nodes_filt,
source_expression = source_expression,
lint_message = "Inappropriate spacing between elements in non-code content detected.",
type = "style",
range_start_xpath = "number(./@lint_loc) + number(./@loc_diff)",
range_end_xpath = "number(./@col2)"
)
# Type 2: Lints in code
code_nodes <- xml_find_all(xml, code_xpath)
code_lines <- xml_attr(code_nodes, "line1") # code lines with same line number
unique_lines <- unique(code_lines)
# Nodes have already been converted to lints in `get_code_lints()`.
code_lints <- get_code_lints(unique_lines, code_nodes, source_expression)
# Type 3: code and comment in same line
code_before_comment_nodes <- get_comment_nodes_with_prior_code(comment_string_nodes, code_nodes)
code_before_comment_lints <- xml_nodes_to_lints(
code_before_comment_nodes,
source_expression = source_expression,
lint_message = "Inappropriate spacing between elements in code-before-comment content detected.",
type = "style",
range_start_xpath = "number(./@col1) + 1",
range_end_xpath = "number(./@col2)"
)
# Put collected lints of all three types together.
lintlist <- c(comment_string_lints, code_lints, code_before_comment_lints)
})
}
#' @title Extract nodes with incorrect double spaces
#' @param comment_nodes Node list extracted with the XPath for Comments/Strings
#' @returns Node list of valid lints, so invalid double spaces
#' @description First collects the locations of the in-text double spaces in
#' a dataframe. Distinguishes between "#" and "#'" in the beginning.
extract_double_space_nodes <- function(comment_nodes) {
valid_nodes <- list()
for (node in comment_nodes) {
text <- xml_text(node)
double_spaces <- str_locate_all(text, "\\s{2,}")[[1]]
if (nrow(double_spaces) > 0) {
has_invalid_match <- FALSE
starts <- double_spaces[, "start"]
ends <- double_spaces[, "end"]
# This loop picks out the problematic double spaces and neglects the other
for (position in seq_along(starts)) {
start_pos <- starts[position]
end_pos <- ends[position]
char_before <- ifelse((start_pos > 1), substr(text, start_pos - 1, start_pos - 1), "")
char_after <- ifelse((end_pos < nchar(text)), substr(text, end_pos + 1, end_pos + 1), "")
# First condition to flag general invalid patterns. Double spaces after
# "#" could be an indented line which was commented out, whereas double
# spaces before "#" are often used in headers as a measure of style.
if (char_before != "#" && char_after != "#") {
has_invalid_match <- TRUE
}
# Second condition to handle exceptions for "#'". This refers to roxygen2
# type documentation, where we use "#'" in the beginning. Using only a
# single position character as before, "' CHAR" would be categorized
# a lint, which is not true.
# roxygen2 correction
if (substr(text, start_pos - 2, start_pos - 1) == "#'" && char_after != "#") {
has_invalid_match <- FALSE
}
# Rule out cases where line breaks produce double spaces
if (xml_attr(node, "line1") != xml_attr(node, "line2") && grepl("\n", xml_text(node))) {
has_invalid_match <- FALSE
}
if (has_invalid_match) {
# Stops at the first lint detection.
break
}
}
if (has_invalid_match) {
valid_nodes[[length(valid_nodes) + 1]] <- node
xml2::xml_set_attr(node, "lint_loc", as.character(start_pos))
# "lint_loc" is used in the `range_start_xpath` for `comment_string_lints`.
}
}
}
return(valid_nodes)
}
#' @title Correcting Lint Locations
#' @param df A dataframe of the full parsed content of the underlying script
#' @param valid_nodes A node list containing detected lints from comments/strings
#' @returns An node list extended by the loc_diff
#' @description The lint location must be corrected and the
#' following code seeks to add the amount of spaces that was overlooked
#' when interpreting a COMMENT with code in front of it (code is invisible).
adjust_lint_location <- function(df, valid_nodes) {
unique_lines <- unique(df[, "line1"])
for (line_number in unique_lines) {
line_data <- df[df[, "line1"] == line_number, ]
if (any(line_data[, "token"] %in% c("STR_CONST", "COMMENT"))) {
target_col1 <-
line_data[line_data[, "token"] %in% c("STR_CONST", "COMMENT"), "col1"][1]
# Subtracting by 1 gives us the amount of space before comments. It consists of
# code and white space.
diff <- target_col1 - 1
for (node in valid_nodes) {
if (as.integer(xml_attr(node, "line1")) == line_number) {
xml2::xml_set_attr(node, "loc_diff", as.character(diff))
}
}
}
}
}
#' @title Extract lints from code
#' @param unique_lines A vector of line numbers for each instance of written code
#' @param code_nodes Node list extracted with the XPath for code
#' @param source_expression Contains the XML parsed content. Needed for
#' transforming nodes to lints
#' @returns List of lints containing invalid double spaces
#' @description The function logic is to compare locations of nodes in the
#' same `line1` value, which stands for the line number of a node in a Script.
#' A node in code can be, for example, a comma, an assignment arrow or a number.
#' F.e., consider these three nodes: "(", "," and ")". If the first node ends at
#' position "4" and the second node starts at a position larger than "7", we
#' have at least a double space in between (position 5 + 6). The second node
#' `i+1` (the right part of a double space) is directly converted into a lint
#' list.
get_code_lints <- function(unique_lines, code_nodes, source_expression) {
# Extract attributes for each node
code_lints <- list()
code_lines <- xml_attr(code_nodes, "line1") # code lines with same line number
code_starts <- as.integer(xml_attr(code_nodes, "start"))
code_ends <- as.integer(xml_attr(code_nodes, "end"))
for (line in unique_lines) {
indices_on_line <- which(code_lines == line)
for (index in indices_on_line[-length(indices_on_line)]) {
current_end <- code_ends[index]
next_start <- code_starts[index + 1]
if (next_start - current_end > 2) {
lint <- xml_nodes_to_lints(
code_nodes[index + 1],
source_expression = source_expression,
lint_message = "Inappropriate spacing between elements in code content detected.",
type = "style",
range_start_xpath = "number(./@col1) - 1",
range_end_xpath = "number(./@col2)"
)
code_lints <- c(code_lints, lint)
}
}
}
return(code_lints)
}
#' @title Extract lints between code and comments
#' @param comment_nodes Node list extracted with the XPath for comments/strings
#' @param code_nodes Node list extracted with the XPath for code
#' @returns Node List containing lints between code and comments
#' @description We need to account for special cases like
#' "x <- 3 # Hello world", where both double spaces are invalid and
#' should therefore be detected as lint. This function first pairs nodes
#' that have code and text on the same line and then compares the amount of
#' spaces
#' - between code and "#",
#' - between "#" and text.
get_comment_nodes_with_prior_code <- function(comment_nodes, code_nodes) {
valid_comments <- list()
for (node in comment_nodes) {
comment_node <- node
comment_line <- xml_attr(comment_node, "line1")
comment_start <- as.integer(xml_attr(comment_node, "start"))
comment_text <- xml_text(comment_node)
hash_pos <- regexpr("#", comment_text)
if (hash_pos != -1) { # Default if there is no "#" equals -1.
substring_after_hash <- substring(comment_text, hash_pos + 1)
space_count_after_hash <- str_count(substring_after_hash, "\\G\\s")
num_spaces_after_hash <- ifelse((space_count_after_hash > 0),
space_count_after_hash,
nchar(substring_after_hash)
)
code_on_same_line <- code_nodes[xml_attr(code_nodes, "line1") == comment_line]
if (length(code_on_same_line) > 0) {
code_ends <- as.integer(xml_attr(code_on_same_line, "end"))
valid_code_ends <- code_ends[code_ends < comment_start]
if (length(valid_code_ends) > 0) {
last_code_end <- max(valid_code_ends)
# The node is written in a node list if at least one case is
# met ...
space_between_code_and_comment <- comment_start - last_code_end - 1
if (space_between_code_and_comment > 1 ||
num_spaces_after_hash > 1) {
valid_comments[[length(valid_comments) + 1]] <- comment_node
}
}
}
}
}
return(valid_comments)
}
#' @describeIn INWTLinters Internal functions should not be used since there is
#' in general a reason why they have not been exported by the package author.
#' They may not have been tested outside the context of the function they are
#' used in.
#' @export
internal_function_linter <- function() {
Linter(function(source_file) {
# nolint start
ids <- grep(paste0(exceptInComments(), ":::"), source_file$file_lines)
# nolint end
lapply(ids, function(id) {
Lint(filename = source_file$filename,
line_number = id,
column_number = 1L,
type = "style",
# nolint start
message = "Internal functions (addressed via :::) should not be used.")
# nolint end
})},
"internal_function_linter")
}
#' @describeIn INWTLinters Changing the working directory in package functions
#' can have unexpected side effects. (only for package functions)
#' @export
setwd_linter <- function() {
Linter(function(source_file) {
ids <- grep(paste0(exceptInComments(), "setwd\\("), source_file$file_lines)
# Starting with arbitrary number of characters which are NOT # or quotes (to
# exclude comments and quoted text)
# String "setwd("
lapply(ids, function(id) {
Lint(filename = source_file$filename,
line_number = id,
column_number = 1L,
type = "style",
message = "Avoid side effects caused by setwd.")
})},
"setwd_linter")
}
#' @describeIn INWTLinters Sourcing files in package functions can have
#' unexpected side effects. (only for package functions)
#' @export
source_linter <- function() {
Linter(function(source_file) {
ids <- grep(paste0(exceptInComments(), "source\\("), source_file$file_lines)
# Starting with arbitrary number of characters which are NOT # or quotes (to
# exclude comments and quoted text)
# String "source("
lapply(ids, function(id) {
Lint(filename = source_file$filename,
line_number = id,
column_number = 1L,
type = "style",
message = "Don't use source in package functions.")
})},
"source_linter")
}
#' @describeIn INWTLinters Changing options in package functions can have
#' unexpected side effects and is not visible from the outside. (only for
#' package functions)
#' @export
options_linter <- function() {
Linter(function(source_file) {
ids <- grep(paste0(exceptInComments(), "options\\("), source_file$file_lines)
# Starting with arbitrary number of characters which are NOT # or quotes (to
# exclude comments and quoted text)
# String "options("
lapply(ids, function(id) {
Lint(filename = source_file$filename,
line_number = id,
column_number = 1L,
type = "style",
message = "Don't use options() in package functions.")
})},
"options_linter")
}
#' @describeIn INWTLinters The automatic simplification performed by
#' \code{\link[base]{sapply}} introduces uncertainty. If the input changes, the
#' output can change unexpectedly and the code crashes. Replace it with
#' \code{\link[base]{sapply}}. If you use \code{sapply} with
#' \code{simplify = FALSE}, it is equivalent to \code{lapply} anyway.
#' @export
sapply_linter <- function() {
Linter(function(source_file) {
ids <- grep(paste0(exceptInComments(), "sapply\\("), source_file$file_lines)
lapply(ids, function(id) {
Lint(filename = source_file$filename,
line_number = id,
column_number = 1L,
type = "style",
message = paste("Don't use sapply. It can simplify the output in an",
"unexpected way. Choose lapply."))
})},
"sapply_linter")
}
#### trailing whitespaces existiert im lintr repo und kann importiert werden,
#### wenn es die funktion erfüllt
#' @describeIn INWTLinters Trailing whitespaces are superfluos. In contrast to
#' \code{\link[lintr]{trailing_whitespace_linter}}, this function detects
#' whitespaces after \code{\link[dplyr]{\%>\%}} only if there are at least two
#' (since one whitespace is inserted automatically after
#' \code{\link[dplyr]{\%>\%}}).
#' @export
trailing_whitespaces_linter <- function() {
Linter(function(source_file) {
ids <- grep("([^(%>%)(#') ] +$)|(%>% {2,})|(#' {2,})$", source_file$file_lines)
# Space at the end of string
# Or: two spaces after pipe operator (at the end of the string)
lapply(ids, function(id) {
Lint(filename = source_file$filename,
line_number = id,
column_number = 1L,
type = "style",
message = "Trailing whitespaces.")
})},
"trailing_whitespaces_linter")
}
#' First part of a regular expression to exclude the pattern if it appears in
#' a comment
#' @description Must be pasted together with the actual pattern that should be
#' found only outside comment (and roxygen comments).
exceptInComments <- function() {
"^[^#\'\"]*"
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.