#' @describeIn INWTLinters Are there double whitespaces?
#' @export
double_space_linter <- function() {
# X-Path logic to find the deepest node with in-text double spaces. Either
# COMMENT or STR_CONST.
comment_string_xpath <- "//descendant::*[contains(., ' ') and not(descendant::*[contains(., ' ')])]"
# Same logic for non-in-text double spaces (in remaining script content).
code_xpath <- "//descendant::*[not(descendant::*) and @end and not(ancestor-or-self::COMMENT)]"
Linter(function(source_expression) {
if (!is_lint_level(source_expression, level = "file")) {
return(list())
}
# Set the correct format of the xml-parsed script.
xml <- source_expression$full_xml_parsed_content
xml_df <- source_expression$full_parsed_content
# Type 1: Lints in strings and comments
comment_string_nodes <- xml_find_all(xml, comment_string_xpath)
comment_string_nodes_filt <- extract_double_space_nodes(comment_string_nodes)
adjust_lint_location(xml_df, comment_string_nodes_filt)
# The `range_start_xpath` is altered such that we correct for the loc_diff
comment_string_lints <- xml_nodes_to_lints(
comment_string_nodes_filt,
source_expression = source_expression,
lint_message = "Inappropriate spacing between elements in non-code content detected.",
type = "style",
range_start_xpath = "number(./@lint_loc) + number(./@loc_diff)",
range_end_xpath = "number(./@col2)"
)
# Type 2: Lints in code
code_nodes <- xml_find_all(xml, code_xpath)
code_lines <- xml_attr(code_nodes, "line1") # code lines with same line number
unique_lines <- unique(code_lines)
# Nodes have already been converted to lints in `get_code_lints()`.
code_lints <- get_code_lints(unique_lines, code_nodes, source_expression)
# Type 3: code and comment in same line
code_before_comment_nodes <- get_comment_nodes_with_prior_code(comment_string_nodes, code_nodes)
code_before_comment_lints <- xml_nodes_to_lints(
code_before_comment_nodes,
source_expression = source_expression,
lint_message = "Inappropriate spacing between elements in code-before-comment content detected.",
type = "style",
range_start_xpath = "number(./@col1) + 1",
range_end_xpath = "number(./@col2)"
)
# Put collected lints of all three types together.
lintlist <- c(comment_string_lints, code_lints, code_before_comment_lints)
})
}
#' @title Extract nodes with incorrect double spaces
#' @param comment_nodes Node list extracted with the XPath for Comments/Strings
#' @returns Node list of valid lints, so invalid double spaces
#' @description First collects the locations of the in-text double spaces in
#' a dataframe. Distinguishes between "#" and "#'" in the beginning.
extract_double_space_nodes <- function(comment_nodes) {
valid_nodes <- list()
for (node in comment_nodes) {
text <- xml_text(node)
double_spaces <- str_locate_all(text, "\\s{2,}")[[1]]
if (nrow(double_spaces) > 0) {
has_invalid_match <- FALSE
starts <- double_spaces[, "start"]
ends <- double_spaces[, "end"]
# This loop picks out the problematic double spaces and neglects the other
for (position in seq_along(starts)) {
start_pos <- starts[position]
end_pos <- ends[position]
char_before <- ifelse((start_pos > 1), substr(text, start_pos - 1, start_pos - 1), "")
char_after <- ifelse((end_pos < nchar(text)), substr(text, end_pos + 1, end_pos + 1), "")
# First condition to flag general invalid patterns. Double spaces after
# "#" could be an indented line which was commented out, whereas double
# spaces before "#" are often used in headers as a measure of style.
if (char_before != "#" && char_after != "#") {
has_invalid_match <- TRUE
}
# Second condition to handle exceptions for "#'". This refers to roxygen2
# type documentation, where we use "#'" in the beginning. Using only a
# single position character as before, "' CHAR" would be categorized
# a lint, which is not true.
# roxygen2 correction
if (substr(text, start_pos - 2, start_pos - 1) == "#'" && char_after != "#") {
has_invalid_match <- FALSE
}
# Rule out cases where line breaks produce double spaces
if (xml_attr(node, "line1") != xml_attr(node, "line2") && grepl("\n", xml_text(node))) {
has_invalid_match <- FALSE
}
if (has_invalid_match) {
# Stops at the first lint detection.
break
}
}
if (has_invalid_match) {
valid_nodes[[length(valid_nodes) + 1]] <- node
xml2::xml_set_attr(node, "lint_loc", as.character(start_pos))
# "lint_loc" is used in the `range_start_xpath` for `comment_string_lints`.
}
}
}
return(valid_nodes)
}
#' @title Correcting Lint Locations
#' @param df A dataframe of the full parsed content of the underlying script
#' @param valid_nodes A node list containing detected lints from comments/strings
#' @returns An node list extended by the loc_diff
#' @description The lint location must be corrected and the
#' following code seeks to add the amount of spaces that was overlooked
#' when interpreting a COMMENT with code in front of it (code is invisible).
adjust_lint_location <- function(df, valid_nodes) {
unique_lines <- unique(df[, "line1"])
for (line_number in unique_lines) {
line_data <- df[df[, "line1"] == line_number, ]
if (any(line_data[, "token"] %in% c("STR_CONST", "COMMENT"))) {
target_col1 <-
line_data[line_data[, "token"] %in% c("STR_CONST", "COMMENT"), "col1"][1]
# Subtracting by 1 gives us the amount of space before comments. It consists of
# code and white space.
diff <- target_col1 - 1
for (node in valid_nodes) {
if (as.integer(xml_attr(node, "line1")) == line_number) {
xml2::xml_set_attr(node, "loc_diff", as.character(diff))
}
}
}
}
}
#' @title Extract lints from code
#' @param unique_lines A vector of line numbers for each instance of written code
#' @param code_nodes Node list extracted with the XPath for code
#' @param source_expression Contains the XML parsed content. Needed for
#' transforming nodes to lints
#' @returns List of lints containing invalid double spaces
#' @description The function logic is to compare locations of nodes in the
#' same `line1` value, which stands for the line number of a node in a Script.
#' A node in code can be, for example, a comma, an assignment arrow or a number.
#' F.e., consider these three nodes: "(", "," and ")". If the first node ends at
#' position "4" and the second node starts at a position larger than "7", we
#' have at least a double space in between (position 5 + 6). The second node
#' `i+1` (the right part of a double space) is directly converted into a lint
#' list.
get_code_lints <- function(unique_lines, code_nodes, source_expression) {
# Extract attributes for each node
code_lints <- list()
code_lines <- xml_attr(code_nodes, "line1") # code lines with same line number
code_starts <- as.integer(xml_attr(code_nodes, "start"))
code_ends <- as.integer(xml_attr(code_nodes, "end"))
for (line in unique_lines) {
indices_on_line <- which(code_lines == line)
for (index in indices_on_line[-length(indices_on_line)]) {
current_end <- code_ends[index]
next_start <- code_starts[index + 1]
if (next_start - current_end > 2) {
lint <- xml_nodes_to_lints(
code_nodes[index + 1],
source_expression = source_expression,
lint_message = "Inappropriate spacing between elements in code content detected.",
type = "style",
range_start_xpath = "number(./@col1) - 1",
range_end_xpath = "number(./@col2)"
)
code_lints <- c(code_lints, lint)
}
}
}
return(code_lints)
}
#' @title Extract lints between code and comments
#' @param comment_nodes Node list extracted with the XPath for comments/strings
#' @param code_nodes Node list extracted with the XPath for code
#' @returns Node List containing lints between code and comments
#' @description We need to account for special cases like
#' "x <- 3 # Hello world", where both double spaces are invalid and
#' should therefore be detected as lint. This function first pairs nodes
#' that have code and text on the same line and then compares the amount of
#' spaces
#' - between code and "#",
#' - between "#" and text.
get_comment_nodes_with_prior_code <- function(comment_nodes, code_nodes) {
valid_comments <- list()
for (node in comment_nodes) {
comment_node <- node
comment_line <- xml_attr(comment_node, "line1")
comment_start <- as.integer(xml_attr(comment_node, "start"))
comment_text <- xml_text(comment_node)
hash_pos <- regexpr("#", comment_text)
if (hash_pos != -1) { # Default if there is no "#" equals -1.
substring_after_hash <- substring(comment_text, hash_pos + 1)
space_count_after_hash <- str_count(substring_after_hash, "\\G\\s")
num_spaces_after_hash <- ifelse((space_count_after_hash > 0),
space_count_after_hash,
nchar(substring_after_hash)
)
code_on_same_line <- code_nodes[xml_attr(code_nodes, "line1") == comment_line]
if (length(code_on_same_line) > 0) {
code_ends <- as.integer(xml_attr(code_on_same_line, "end"))
valid_code_ends <- code_ends[code_ends < comment_start]
if (length(valid_code_ends) > 0) {
last_code_end <- max(valid_code_ends)
# The node is written in a node list if at least one case is
# met ...
space_between_code_and_comment <- comment_start - last_code_end - 1
if (space_between_code_and_comment > 1 ||
num_spaces_after_hash > 1) {
valid_comments[[length(valid_comments) + 1]] <- comment_node
}
}
}
}
}
return(valid_comments)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.