OhdsiRTools: Tools Used by Observational Health Data Science and Informatics (OHDSI)

Documented in fixHadesLogo formatRFile formatRFolder formatRText

# @file OhdsiRTools.R
#
# Copyright 2022 Observational Health Data Sciences and Informatics
#
# This file is part of OhdsiRTools
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

.mySplit <- function(fullLine,
                     anchors = c("\\("),
                     separators = c(","),
                     offsets = c(-3),
                     width.cutoff) {
  bestSolution <- c(fullLine)
  minLength <- nchar(fullLine)
  minLines <- 1

  for (i in 1:length(anchors)) {
    anchor <- anchors[i]
    separator <- separators[i]
    offset <- offsets[i]
    potentialAnchors <- gregexpr(anchor, fullLine)
    if (potentialAnchors[[1]][1] != -1) {
      quotationMark <- regexpr("\"", fullLine)
      for (potentialAnchor in potentialAnchors[[1]]) {
        if (quotationMark[[1]][1] == -1 || quotationMark[[1]][1] > potentialAnchor) {
          indent <- potentialAnchor + 1
          newX <- c()
          depth <- 0
          quote <- FALSE
          start <- 1
          for (j in indent:nchar(fullLine)) {
            char <- substr(fullLine, j, j)
            if (char == "\"") {
              quote <- !quote
            } else if (!quote) {
              if (char == "(") {
                depth <- depth + 1
              } else if (char == ")") {
                if (depth == 0) {
                  break
                }
                depth <- depth - 1
              } else if (depth == 0 & char == separator) {
                part <- substr(fullLine, start, j)
                if (start != 1) {
                  part <- paste(paste(rep(" ", indent + offset), collapse = ""), part)
                }
                newX <- c(newX, part)
                start <- j + 1
              }
            }
          }
          part <- substr(fullLine, start, nchar(fullLine))
          if (start != 1) {
            part <- paste(paste(rep(" ", indent + offset), collapse = ""), part)
          }
          newX <- c(newX, part)
          maxLength <- 0
          for (x in newX) {
            if (nchar(x) > maxLength) {
              maxLength <- nchar(x)
            }
          }
          lines <- length(newX)
          better <- FALSE
          if (maxLength <= width.cutoff && minLength > width.cutoff) {
            better <- TRUE
          } else if (maxLength <= width.cutoff && minLength <= width.cutoff) {
            if (lines < minLines) {
              better <- TRUE
            }
          } else {
            if (maxLength < minLength) {
              better <- TRUE
            }
          }
          if (better) {
            bestSolution <- newX
            minLength <- maxLength
            minLines <- lines
          }
        }
      }
    }
  }
  return(bestSolution)
}

.reWrapLines <- function(x, width.cutoff) {
  # Unwrap lines:
  newX <- c()
  i <- 1
  while (i <= length(x)) {
    if (regexpr("[,+-] $", x[i]) != -1 && regexpr("^\\s*#", x[i]) == -1) {
      fullLine <- c(x[i])
      i <- i + 1
      while (regexpr("[,+-] $", x[i]) != -1) {
        fullLine <- c(fullLine, substr(x[i], regexpr("[^ ]", x[i]), nchar(x[i])))
        i <- i + 1
      }
      fullLine <- c(fullLine, substr(x[i], regexpr("[^ ]", x[i]), nchar(x[i])))
      fullLine <- paste(fullLine, collapse = "")
      newX <- c(newX, fullLine)
    } else {
      newX <- c(newX, x[i])
    }
    i <- i + 1
  }
  x <- newX

  # Rewrap lines:
  newX <- c()
  i <- 1
  while (i <= length(x)) {
    if (nchar(x[i]) > width.cutoff && regexpr("^\\s*#", x[i]) == -1) {
      newSplit <- .mySplit(x[i],
        anchors = c("<- ", "\\("),
        separators = c("+", ","),
        offsets = c(-1, -3),
        width.cutoff
      )
      for (j in 1:length(newSplit)) {
        if (nchar(newSplit[j]) <= width.cutoff) {
          newX <- c(newX, newSplit[j])
        } else {
          newerSplit <- .mySplit(newSplit[j],
            anchors = c("\\("),
            separators = c(","),
            offsets = c(-3),
            width.cutoff
          )
          newX <- c(newX, newerSplit)
        }
      }
    } else {
      newX <- c(newX, x[i])
    }
    i <- i + 1
  }
  return(newX)
}

.findStartOfBody <- function(text) {
  i <- 1
  while (i <= length(text)) {
    if (regexpr("^ *(#([^']|$)|$)", text[i]) == -1) {
      return(i)
    }
    i <- i + 1
  }
  return(-1)
}
.tidyExample <- function(text, width.cutoff) {
  start <- 1
  newText <- c()
  snippet <- c()
  inDontRun <- FALSE
  level <- 0
  for (i in 1:length(text)) {
    if (!inDontRun && regexpr("\\\\dont(run|test) *\\{", text[i]) != -1) {
      if (i > start) {
        dontrun <- regexpr("\\\\dont(run|test) *\\{", text[i])
        snippet <- c(snippet, text[start:(i - 1)])
        if (dontrun[[1]] > 1) {
          snippet <- c(snippet, substr(text[i], 1, dontrun[[1]] - 1))
        }
        newText <- c(newText, .myTidy(snippet, width.cutoff))
        snippet <- c(substr(text[i], dontrun[[1]] + attr(dontrun, "match.length"), nchar(text[i])))
        if (gsub("\\s", "", snippet[1]) == "") {
          snippet <- c()
        }
      }
      if (regexpr("\\\\dontrun *\\{", text[i]) != -1) {
        newText <- c(newText, "\\dontrun{")
      } else {
        newText <- c(newText, "\\donttest{")
      }
      start <- i + 1
      inDontRun <- TRUE
    } else if (inDontRun) {
      for (j in 1:nchar(text[i])) {
        if (substr(text[i], j, j) == "{") {
          level <- level + 1
        } else if (substr(text[i], j, j) == "}") {
          level <- level - 1
          if (level == -1) {
            snippet <- c(snippet, text[start:(i - 1)])
            if (j > 2) {
              snippet <- c(snippet, substr(text[i], 1, j - 1))
            }
            newText <- c(newText, .myTidy(snippet, width.cutoff), "}")
            snippet <- c(substr(text[i], j + 1, nchar(text[i])))
            if (gsub("\\s", "", snippet[1]) == "") {
              snippet <- c()
            }
            inDontRun <- FALSE
            start <- i + 1
            level <- 0
            break
          }
        }
      }
    }
  }
  if (length(text) >= start) {
    snippet <- c(snippet, text[start:length(text)])
  }
  if (length(snippet) != 0) {
    newText <- c(newText, .myTidy(snippet, width.cutoff))
  }
  return(newText)
}

.wrapRoxygenLine <- function(line, width.cutoff) {
  newText <- c()
  start <- 1
  i <- 1
  level <- 0
  itemize <- FALSE
  while (i <= nchar(line)) {
    if (!itemize && substr(line, i, i + nchar("\\itemize{") - 1) == "\\itemize{") {
      if (i > start) {
        newText <- c(newText, strwrap(substr(line, start, i - 1), width.cutoff))
      }
      newText <- c(newText, "\\itemize{")
      start <- i + nchar("\\itemize{")
      # start = start + regexpr('[^ ]', substr(line,start, nchar(line))) - 1
      i <- start
      level <- 0
      itemize <- TRUE
    }
    if (itemize) {
      if (substr(line, i, i + nchar("\\item") - 1) == "\\item") {
        if (i > start && regexpr("[^ ]", substr(line, start, i - 1)) != -1) {
          text <- strwrap(substr(line, start, i - 1), width.cutoff - nchar("  \\item"))
          text[1] <- paste("  \\item", text[1])
          if (length(text) > 1) {
            text[2:length(text)] <- paste(
              paste(rep(" ", nchar("  \\item")), collapse = ""),
              text[2:length(text)]
            )
          }
          newText <- c(newText, text)
        }
        start <- i + nchar("\\item")
        i <- start
      }
      if (substr(line, i, i) == "{") {
        level <- level + 1
      }
      if (substr(line, i, i) == "}") {
        level <- level - 1
        if (level == -1) {
          text <- strwrap(substr(line, start, i - 1), width.cutoff - nchar("  \\item"))
          text[1] <- paste("  \\item", text[1])
          if (length(text) > 1) {
            text[2:length(text)] <- paste(
              paste(rep(" ", nchar("  \\item")), collapse = ""),
              text[2:length(text)]
            )
          }
          newText <- c(newText, text)
          newText <- c(newText, "}")
          start <- i + 1
          itemize <- FALSE
        }
      }
    }
    i <- i + 1
  }
  if (i >= start) {
    newText <- c(newText, strwrap(substr(line, start, i - 1), width.cutoff))
  }
  return(newText)
}

.tidyRoxygenBlock <- function(text, width.cutoff) {
  # Remove #' and unwrap lines:
  newText <- c()
  line <- ""
  examples <- FALSE
  for (i in 1:length(text)) {
    chunk <- sub("^\\s*#'\\s*", "", text[i]) # Remove leading spaces and #'
    chunk <- sub("\\s*$", "", chunk) # Remove trailing spaces

    if (regexpr("^@|$)", chunk) != -1) {
      examples <- FALSE
    }

    if (chunk == "" || regexpr("^@|$)", chunk) != -1 || examples) {
      if (!(length(newText) == 0 && line == "")) {
        newText <- c(newText, line)
      }
      line <- ""
    }
    if (regexpr("^@examples", chunk) != -1) {
      examples <- TRUE
    }
    if (chunk != "") {
      line <- paste(line, chunk, " ", sep = "")
    }
  }
  newText <- c(newText, line)
  text <- newText

  # Put most keywords on their own line:
  newText <- c()
  for (i in 1:length(text)) {
    if ((regexpr(
      "^@",
      text[i]
    ) != -1) && (regexpr(
      "^(@param|@template|@export|@keyword|@docType|@importFrom|@import|@useDynLib|@name)",
      text[i]
    ) == -1)) {
      keyword <- regexpr("^@[a-zA-Z0-9]*", text[i])
      newText <- c(newText, substr(text[i], 1, attr(keyword, "match.length")))
      if (attr(keyword, "match.length") + 2 < nchar(text[i])) {
        newText <- c(newText, substr(text[i], attr(keyword, "match.length") + 2, nchar(text[i])))
      }
    } else {
      newText <- c(newText, text[i])
    }
  }
  text <- newText

  # Perform wrapping
  maxParamLength <- 0
  for (i in 1:length(text)) {
    keyword <- regexpr("^@param\\s+[a-zA-Z0-9.]+", text[i])
    if (attr(keyword, "match.length") > maxParamLength) {
      maxParamLength <- attr(keyword, "match.length")
    }
  }
  examples <- FALSE
  example <- c()
  newText <- c()
  for (i in 1:length(text)) {
    if (examples) {
      if (regexpr("^@", text[i]) == -1) {
        example <- c(example, text[i])
      } else {
        examples <- FALSE
        if (length(example) != 0) {
          example <- .tidyExample(example, width.cutoff = width.cutoff - 3)
          newText <- c(newText, example)
        }
        example <- c()
      }
    }
    if (!examples) {
      if (regexpr("^@param", text[i]) == -1) {
        newText <- c(newText, .wrapRoxygenLine(text[i], width.cutoff = width.cutoff))
      } else {
        param <- regexpr("^@param\\s+[a-zA-Z0-9._]+", text[i])
        definition <- regexpr("^@param\\s+[a-zA-Z0-9._]+\\s+", text[i])
        part1 <- substr(text[i], 1, attr(param, "match.length"))
        part2 <- substr(text[i], attr(definition, "match.length") + 1, nchar(text[i]))
        part2Wrapped <- .wrapRoxygenLine(part2, width.cutoff = width.cutoff - maxParamLength - 2)
        line1 <- paste(part1, paste(rep(" ", 3 + maxParamLength - attr(param, "match.length")),
          collapse = ""
        ), part2Wrapped[1], sep = "")
        newText <- c(newText, line1)
        if (length(part2Wrapped) > 1) {
          otherLines <- paste(
            paste(rep(" ", 2 + maxParamLength), collapse = ""),
            part2Wrapped[2:length(part2Wrapped)]
          )
          newText <- c(newText, otherLines)
        }
      }
      if (regexpr("^@examples", text[i]) != -1) {
        examples <- TRUE
      }
    }
  }
  if (length(example) != 0) {
    example <- .tidyExample(example, width.cutoff = width.cutoff - 3)
    newText <- c(newText, example)
  }
  text <- paste("#'", newText)
  return(text)
}

.roxygenTidy <- function(text, width.cutoff) {
  start <- -1
  toAdd <- 1
  newText <- c()
  i <- 1
  while (i <= length(text)) {
    if (regexpr("^ *#'", text[i]) != -1) {
      if (start == -1) {
        start <- i
      }
    } else {
      if (start != -1) {
        if (start > toAdd) {
          newText <- c(newText, text[toAdd:(start - 1)])
        }
        newText <- c(newText, .tidyRoxygenBlock(text[start:(i - 1)], width.cutoff = width.cutoff))
        toAdd <- i
      }
      start <- -1
    }
    i <- i + 1
  }
  if (length(text) >= toAdd) {
    if (start != -1) {
      newText <- c(
        newText,
        .tidyRoxygenBlock(text[start:length(text)], width.cutoff = width.cutoff)
      )
    } else {
      newText <- c(newText, text[toAdd:length(text)])
    }
  }
  return(newText)
}

.trimTrailingWhiteSpace <- function(text) {
  text <- sub("\\s+$", "", text)
  return(text)
}

.formatRblock <- function(text, width.cutoff) {
  formatROutput <- capture.output(formatR::tidy_source(
    text = text,
    width.cutoff = width.cutoff,
    arrow = TRUE,
    indent = 2,
    output = TRUE
  ))
  if (length(grep("^\\$text.tidy$", formatROutput)) == 0) {
    return(formatROutput)
  } else {
    return(text)
  }
}

.applyFormatR <- function(text, width.cutoff) {
  # Skip all roxygen lines, apply formatR to the rest. Reason: formatR changes the Roxygen blocks,
  # replacing double quotes with single quotes
  newText <- c()
  start <- 1
  for (i in 1:length(text)) {
    if (regexpr("^ *#'", text[i]) != -1) {
      if (i > start) {
        newText <- c(newText, .formatRblock(text[start:(i - 1)], width.cutoff))
      }
      newText <- c(newText, text[i])
      start <- i + 1
    }
  }
  if (length(text) >= start) {
    newText <- c(newText, .formatRblock(text[start:length(text)], width.cutoff))
  }
  return(newText)
}

.myTidy <- function(text, width.cutoff) {
  text <- gsub("\\t", "", text) # Remove all tabs
  text <- .applyFormatR(text, width.cutoff = width.cutoff)
  text <- .reWrapLines(text, width.cutoff = width.cutoff)
  text <- .roxygenTidy(text, width.cutoff = width.cutoff)
  text <- .trimTrailingWhiteSpace(text)
}

#' Format R code
#'
#' @param text           A character vector with the R code to be formatted.
#' @param width.cutoff   Number of characters that each line should be limited to.
#'
#' @details
#' DEPRECRATED. Please use \code{styler::style_text} instead.
#'
#' @return
#' A character vector with formatted R code.
#'
#' @export
formatRText <- function(text, width.cutoff = 100) {
  .Deprecated("styler::style_text")
  startOfBody <- .findStartOfBody(text)
  if (startOfBody == 1) {
    header <- c()
  } else {
    header <- text[1:(startOfBody - 1)]
  }
  body <- text[startOfBody:length(text)]
  body <- .myTidy(body, width.cutoff = width.cutoff)
  text <- c(header, body)
  return(text)
}

#' Format an R file
#'
#' @param file           The path to the file.
#' @param width.cutoff   Number of characters that each line should be limited to.
#'
#' @details
#' DEPRECRATED. Please use \code{styler::style_file} instead.
#'
#' @export
formatRFile <- function(file, width.cutoff = 100) {
  .Deprecated("styler::style_file")
  # Note: Github code window width is 130 characters, but 100 fits better on my laptop
  text <- readLines(file)
  text <- formatRText(text, width.cutoff)
  writeLines(text, con = file)
}

#' Format all R files in a folder
#'
#' @param path                Path to the folder containing the files to format. Only files with the .R
#'                            extension will be formatted.
#' @param recursive           Include all subfolders?
#' @param skipAutogenerated   Skip auto-generated files such as RcppExports.R?
#' @param ...                 Parameters to be passed on the the formatRFile function
#'
#' @details
#' DEPRECRATED. Please use \code{styler::style_dir} instead.
#'
#' @export
formatRFolder <- function(path = ".", recursive = TRUE, skipAutogenerated = TRUE, ...) {
  .Deprecated("styler::style_dir")
  flist <- list.files(path, pattern = "\\.[Rr]$", full.names = TRUE, recursive = recursive)
  for (f in flist) {
    if (skipAutogenerated && regexpr("RcppExports.R$", f) != -1) {
      message("Skipping ", f)
    } else {
      message("Auto code formatting ", f)
      formatRFile(f)
    }
  }
}

#' Fix HADES logo in pkgdown output
#'
#' @description
#' In all HTML files in the docs folder, each occurrence of 'hadesLogo' is replaced with an HTML image
#' tag referring to the HADES logo.
#'
#' @param path   Path to the root of the package for which the pkgdown output needs to be fixed.
#'
#' @return
#' This function returns nothing.
#'
#' @export
fixHadesLogo <- function(path = ".") {
  for (file in list.files(file.path(path, "docs"), ".html", recursive = TRUE, full.names = TRUE)) {
    text <- readChar(file, file.info(file)$size)
    text <- gsub(
      "hadesLogo",
      "<img src='https://ohdsi.github.io/Hades/images/hadesMini.png' width=80 height=17 style='vertical-align: top;'>",
      text
    )
    sink(file)
    cat(gsub("\r", "", text))
    sink()
  }
}
OHDSI/OhdsiRTools documentation built on June 7, 2022, 12:20 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
OHDSI/OhdsiRTools
Tools Used by Observational Health Data Science and Informatics (OHDSI)

R/CodeFormatting.R
In OHDSI/OhdsiRTools: Tools Used by Observational Health Data Science and Informatics (OHDSI)

Defines functions fixHadesLogo formatRFolder formatRFile formatRText .myTidy .applyFormatR .formatRblock .trimTrailingWhiteSpace .roxygenTidy .tidyRoxygenBlock .wrapRoxygenLine .tidyExample .findStartOfBody .reWrapLines .mySplit

Documented in fixHadesLogo formatRFile formatRFolder formatRText

R Package Documentation

Browse R Packages

We want your feedback!

OHDSI/OhdsiRTools Tools Used by Observational Health Data Science and Informatics (OHDSI)

R/CodeFormatting.R In OHDSI/OhdsiRTools: Tools Used by Observational Health Data Science and Informatics (OHDSI)

Defines functions fixHadesLogo formatRFolder formatRFile formatRText .myTidy .applyFormatR .formatRblock .trimTrailingWhiteSpace .roxygenTidy .tidyRoxygenBlock .wrapRoxygenLine .tidyExample .findStartOfBody .reWrapLines .mySplit

Documented in fixHadesLogo formatRFile formatRFolder formatRText

R Package Documentation

Browse R Packages

We want your feedback!

OHDSI/OhdsiRTools
Tools Used by Observational Health Data Science and Informatics (OHDSI)

R/CodeFormatting.R
In OHDSI/OhdsiRTools: Tools Used by Observational Health Data Science and Informatics (OHDSI)