#===============================================================================
#
# generate_function_documentation.R
#
#===============================================================================
# Library references:
# - Uses gtools::odd()
# - Uses sourcetools::tokenize_string()
# Part of the mainline was derived by reading:
# From: http://www.numbertheory.nl/2013/03/24/parsing-complex-text-files-using-regular-expressions-and-vectorization/
#===============================================================================
#' Get type of first token on the line
#'
#' Tokenizes the whole line and then returns the type associated with the
#' first token on the line. This is useful because the first token is what
#' flags whether the line is the start of a chunk, variable, or function or
#' whether it's a continuation line of a function or a variable.
#'
#' This doesn't really need to tokenize the whole line since it only uses the
#' first token. Not sure if there's a way to ask for just one token.
#'
#' @inheritParams decode_var_first_line_of_desc
#'
#' @return character string containing the first token type of the line,
#' e.g., "whitespace" or "symbol"
get_first_token_type_of_line <- function (data_line)
{
line_tokens <- sourcetools::tokenize_string (data_line)
first_token_type_of_line <- line_tokens [1, "type"]
return (first_token_type_of_line)
}
#===============================================================================
#' Write closing brackets for previous subsection if open brackets remain
#'
#' Variables are written out as subsections and that means they start with
#' brackets. Finding a function name means that the var subsection brackets
#' need to be closed and this function does that by just spitting out two
#' brackets.
#'
#' @inheritParams decode_var_first_line_of_desc
#'
#' @return Returns nothing
close_open_subsection_if_necessary <- function (prev_state)
{
if (prev_state == "state__var_first_line_of_desc" |
prev_state == "state__var_desc_cont_line")
{
cat ("\n#\' }}")
}
}
#===============================================================================
#' Clean up any subsection left open when end of file is found
#'
#' @inheritParams decode_var_first_line_of_desc
#' @seealso \code{\link{close_open_subsection_if_necessary}}
#' @return Returns nothing
finish_up <- function (prev_state)
{
close_open_subsection_if_necessary (prev_state)
stop ("\n\n----- FINISHED -----\n\n")
}
#===============================================================================
#' Quit since an unexpected token was found
#'
#' Emits an error message containint the unexpected token, the input line
#' number where it occurred, and the line itself; then it quits.
#'
#' @inheritParams decode_var_first_line_of_desc
#' @param token character string indicating the token type, e.g.,
#' "whitespace" or "symbol"
#'
#' @return Returns nothing
stop_due_to_unexpected_token <- function (token,
data_line_num,
data_line)
{
stop (paste0 ("\n\nUnexpected token at start of line ",
data_line_num,
". Token = '", token,
"'. Line = \n'", data_line,
"'\n\n"))
}
#===============================================================================
#' Start new function block and clean up previous block if necessary
#'
#' At start of new block of variable descriptions for a new function,
#' write some output to visually separate the new output from the previous
#' block. Also clean up any variable subsection that's still open from
#' the previous block, i.e., write closing brackets for it.
#'
#' @inheritParams decode_var_first_line_of_desc
#' @param visual_separator character string to write between the end of the
#' previous block and the new block to make it easier to pick out where
#' one ends and the other begins
#'
#' @seealso \code{\link{close_open_subsection_if_necessary}}
#' @inherit decode_var_first_line_of_desc return
decode_block_start <- function (prev_state,
visual_separator="\n\n\n\n")
{
close_open_subsection_if_necessary (prev_state)
cat (visual_separator)
next_state <- "state__func_first_line_of_decl"
return (next_state)
}
#===============================================================================
#' Title
#'
#' @inheritParams decode_var_first_line_of_desc
#'
#' @inherit decode_var_first_line_of_desc return
decode_func_first_line_of_decl <- function (data_line,
data_line_num,
prev_state,
first_token_of_next_line)
{
# If the last thing you were doing before this was working on
# a variable's subsection, that needs to be closed out before
# doing anything else.
close_open_subsection_if_necessary (prev_state)
cat ("\n\n\n\n\n#\'@section Local Variable Structures and examples:")
cat ("\n#\'Here is the output of str() for each variable visible in the function.")
cat ("\n#\'Note that the particular counts and values given are just examples to show")
cat ("\n#\'what the data might look like.")
cat ("\n#\'")
# First thing on the line is a symbol, so treat it as a
# function name.
# Display function names in bold, so insert the latex
# "strong" command before it and put opening and closing
# brackets for that command around the name, then
# echo the rest of the line not in bold.
cur_line_tokens <- sourcetools::tokenize_string (data_line)
func_name <- cur_line_tokens [1, "value"]
cat ("\n#\' \\strong{FUNCTION: ",
func_name,
"}",
sep='')
cat (stringr::str_sub (data_line,
cur_line_tokens [2, "column"],
stringr::str_length (data_line)))
next_state <-
switch (first_token_of_next_line,
whitespace = "state__func_decl_cont_line",
symbol = "state__var_first_line_of_desc",
operator = "state__block_start",
... = stop_due_to_unexpected_token (
first_token_of_next_line,
data_line_num,
data_line)
)
return (next_state)
}
#===============================================================================
#' Title
#'
#' @inheritParams decode_var_first_line_of_desc
#'
#' @inherit decode_var_first_line_of_desc return
decode_func_decl_cont_line <- function (data_line,
data_line_num,
first_token_of_next_line)
{
# First thing on the line is whitespace and the previous line
# was either a function declaration line or a function declaration
# continuation line.
cat ("\n#\' ", data_line, sep='') # Just echo this line.
next_state <-
switch (first_token_of_next_line,
whitespace = "state__func_decl_cont_line",
symbol = "state__var_first_line_of_desc",
operator = "state__block_start",
... = stop_due_to_unexpected_token (
first_token_of_next_line,
data_line_num,
data_line)
)
return (next_state)
}
#===============================================================================
#' Title
#'
#' @param data_line character string containing one full line from the input
#' file
#' @param data_line_num integer line number of data_line in original input file
#' @param prev_state character string indicating the previous state, e.g.,
#' "state__block_start"
#' @param first_token_of_next_line character string containing the first
#' token type of the next line in the input file, e.g., "whitespace" or
#' "symbol"
#'
#' @return character string indicating the next state to transition to, e.g.,
#' "state__block_start"
decode_var_first_line_of_desc <- function (data_line,
data_line_num,
prev_state,
first_token_of_next_line)
{
# If the last thing you were doing before this was working on
# a variable's subsection, that needs to be closed out before
# doing anything else.
close_open_subsection_if_necessary (prev_state)
# First thing on the line is a symbol, so treat it as a
# variable name.
# Display variable name as a subsection header.
cur_line_tokens <- sourcetools::tokenize_string (data_line)
variable_name <- cur_line_tokens [1, "value"]
cat ("\n#\' \\subsection{", variable_name, "}{", sep='')
cat ("\n#\' \\preformatted{")
cat ("\n#\' ", data_line, sep='') # Echo the whole line in subsection.
next_state <-
switch (first_token_of_next_line,
whitespace = "state__var_desc_cont_line",
symbol = "state__var_first_line_of_desc",
operator = "state__block_start",
... = stop_due_to_unexpected_token (
first_token_of_next_line,
data_line_num,
data_line)
)
return (next_state)
}
#===============================================================================
#' Title
#'
#' @inheritParams decode_var_first_line_of_desc
#'
#' @inherit decode_var_first_line_of_desc return
decode_var_desc_cont_line <- function (data_line,
data_line_num,
first_token_of_next_line)
{
# First thing on the line is whitespace and the previous line
# was either a variable description line or a variable description
# continuation line.
cat ("\n#\' ", data_line, sep='') # Just echo this line.
next_state <-
switch (first_token_of_next_line,
whitespace = "state__var_desc_cont_line",
symbol = "state__var_first_line_of_desc",
operator = "state__block_start",
... = stop_due_to_unexpected_token (
first_token_of_next_line,
data_line_num,
data_line)
)
return (next_state)
}
#===============================================================================
#' Parse reduced input text array and write outputs depending on state
#'
#' This function takes an input array of strings that were lines in the
#' input file and parses it into chunks corresponding to functions and
#' variables. As it encounters each chunk, it writes out a properly
#' formatted roxygen subsection block that can be pasted into the function's
#' documentation. It also writes a bit of header text for the section.
#' Each variable is written as a subsection.
#'
#' @section Assumptions:
#' \describe{
#' \item{Operator begins START and END lines}{Each relevant section of the
#' input file begins with a line containing
#' ">>>> START doc_vars_in_this_func >>>>" and ends with a line
#' containing "<<<< END doc_vars_in_this_func <<<<". It doesn't matter
#' how many "greater than" or "less than" characters are in the line.
#' All that matters is that the line begins with an operator so that the
#' tokenizer will return "operator" as the first token on the line to
#' flag it as the start of a block. }
#' }
#'
#' @section Function declaration in output:
#' For each function block that is parsed, the function's name and argument list
#' are also written to the output, even though they are not intended to be
#' included in what is pasted into the function's documentation. (That
#' information is already in the documentation.) The information is only
#' included to make it easier to identify which function the variable list
#' belongs to when cutting it out of a big file full of outputs.
#'
#' @section Parameter declarations in output:
#' The output also includes descriptions for variables that are a part of the
#' function's parameter list since the output is for all variables known in
#' the function. These variables should already appear in the @@params section
#' of the function's documentation, so they can be deleted from the output.
#' However, they're included here for 2 reasons. First, this code is pretty
#' quick and dirty and it would take more coding to parse out the function's
#' argument list to determine overlapping variables. Second, the information
#' written out here can be helpful in building the @@parames and @@return
#' sections of the function's documentation before removing them from the
#' sections generated here.
#'
#' @section States Used In The Parsing:
#' \describe{
#' \item{state__start_of_file}{Before parsing begins.}
#' \item{state__block_start}{Sitting on a ">>> START..." line at the head
#' of a new function block.}
#' \item{state__func_first_line_of_decl}{Sitting on the first line of a
#' function declaration. Starts with a symbol, not whitespace.
#' Immediately follows the block start line with no intervening lines.}
#' \item{state__func_decl_cont_line}{Sitting on a continuation line of a
#' multi-line function declaration. Line starts with whitespace.}
#' \item{state__var_first_line_of_desc}{Sitting on the first line of the
#' description of a variable. Starts with a symbol, not whitespace.
#' Immediately follows the end of a function declaration or variable
#' declaration with no intervening lines.}
#' \item{state__var_desc_cont_line}{Sitting on a continuation line of a
#' multi-line description of a variable. Line starts with whitespace.}
#' \item{state__finished}{Found end of file, ready to do final cleanup.}
#' }
#'
#' @section Legal Line Starts In The Parsing:
#' \describe{
#' \item{symbol}{An R function name or variable name with no preceding white
#' space.}
#' \item{whitespace}{Spaces and/or tabs.}
#' \item{operator}{An R operator; in this case the only one that should
#' occur is the ">" that is used on the block start lines.}
#' \item{EOF}{End of file; not returned by the tokenizer, but set in this
#' function and its subcalls.}
#' }
#'
#' @param all_data Vector of character strings, each entry containing the text
#' from the corresponding line in the original input file
#' @param doc_line_numbers Vector of integer line numbers serving as an index
#' into the reduced set of lines of interest in the original data. For
#' example, lines 1 and 2 of the original input may be irrelevant, while
#' line 3 of the original file is the first useful line for parsing, so
#' doc_line_numbers[1] = 3.
#'
#' @return nothing
state_transition <- function (all_data, doc_line_numbers)
{
num_doc_lines <- length (doc_line_numbers)
prev_state <- "state__start_of_file"
cur_state <- "state__block_start"
for (cur_idx in 1:num_doc_lines)
{
cur_data_line_num <- doc_line_numbers [cur_idx]
cur_data_line <- all_data [cur_data_line_num]
# cur_state <- check_for_end_of_file (cur_idx, num_doc_lines,
# cur_state, prev_state,
# cur_data_line, legal_line_starts)
#---------------------------------------------------------
# Get first token of next line since it will affect the
# setting of the state at the end of operating on the
# current line.
#---------------------------------------------------------
next_idx <- cur_idx + 1
if (next_idx <= num_doc_lines)
{
# Not at end of file yet.
next_line_num <- doc_line_numbers [next_idx]
next_data_line <- all_data [next_line_num]
first_token_of_next_line <-
get_first_token_type_of_line (next_data_line)
} else # cur line is last line of file
{
first_token_of_next_line <- "EOF"
}
#------------------------------------------------
# Ready now to take action on the current line
# based on what state we're in.
#------------------------------------------------
next_state <-
switch (cur_state,
state__var_first_line_of_desc =
decode_var_first_line_of_desc (
cur_data_line,
cur_data_line_num,
prev_state,
first_token_of_next_line),
state__var_desc_cont_line =
decode_var_desc_cont_line (cur_data_line,
cur_data_line_num,
first_token_of_next_line),
state__func_first_line_of_decl =
decode_func_first_line_of_decl (cur_data_line,
cur_data_line_num,
prev_state,
first_token_of_next_line),
state__func_decl_cont_line =
decode_func_decl_cont_line (cur_data_line,
cur_data_line_num,
first_token_of_next_line),
state__block_start =
decode_block_start (prev_state),
state__finished =
finish_up (prev_state),
... = stop (paste0 ("\n\nFailed at line ", cur_data_line_num,
", i.e., no matching state found for state '",
cur_state, "' at line ", cur_data_line, "'\n\n"))
) # end switch (cur_state)
prev_state <- cur_state
cur_state <- next_state
} # end for - (cur_line_num in 1:num_doc_lines)
#-----------------------------------------------------------------------
# If things were well-behaved, then the looping finished the input
# lines but didn't find the EOF since there was more output in
# the log file after the last function documentation output.
# In that case, there's still an open variable in need of closing
# up, so do that now if necessary.
# If you don't do this, then you get an error during the Build and
# roxygen phase of making the package. Here's an example:
# Warning: @section [gen_bdprob.R#10]: mismatched braces or quote
#-----------------------------------------------------------------------
finish_up (prev_state)
#-----------------------------------------------------------------------
# R CMD CHECK has a problem in using the output from this routine
# when it is written to a file and there is no newline at the end,
# so add one here.
# Here is the error message it was generating:
# Error: testthat unit tests failed
# In addition: Warning messages:
# 1: In readLines(outfile_roxygen_correct) :
# incomplete final line found on 'correct_roxygen_comment_outfile.txt'
#-----------------------------------------------------------------------
cat ("\n")
}
#===============================================================================
#' Parse and convert text generated by doc_vars_in_this_func() from file
#'
#' This is a wrapper function for
#' \code{\link{generate_func_var_roxygen_comments_from_vec}} to load the
#' strings to be parsed from a file into the vector of strings.
#'
#' @param infile character string giving path and file name for input file
#' @param sinkFilePath character string giving path and file name for output file
#'
#' @seealso \code{\link{generate_func_var_roxygen_comments_from_vec}}
#' @return Returns nothing
#' @examples \dontrun{
#' workdir = "/Users/bill/tzar/outputdata/biodivprobgen/default_runset/1837_marxan_simulated_annealing.completedTzarEmulation"
#' infile = file.path (workdir, "consoleSinkOutput.temp.txt")
#' outfile = file.path (workdir, "localvar_roxygen_comments.txt")
#' generate_func_var_roxygen_comments_from_file (infile, outfile)
#' }
#' @export
generate_func_var_roxygen_comments_from_file <- function (infile, sinkFilePath)
{
print (getwd())
# Open a file to echo console to and redirect output there.
tempConsoleOutFile <- file (sinkFilePath, open="wt")
sink (tempConsoleOutFile, split=TRUE)
# Load the text file to be parsed.
all_data <- readLines (infile)
# Parse the data and generate the roxygen comments.
generate_func_var_roxygen_comments_from_vec (all_data)
sink ()
close (tempConsoleOutFile)
}
#===============================================================================
#' Parse and convert text generated by doc_vars_in_this_func() from vec of strings
#'
#' @param all_data vector of character strings containing output from
#' doc_vars_in_this_func()
#'
#' @seealso \code{\link{generate_func_var_roxygen_comments_from_file}}
#' @return Returns nothing
#' @export
#generate_func_var_roxygen_comments <- function (infile, sinkFilePath)
generate_func_var_roxygen_comments_from_vec <- function (all_data)
{
# print (getwd())
#
# # Open a file to echo console to and redirect output there.
# tempConsoleOutFile <- file (sinkFilePath, open="wt")
# sink (tempConsoleOutFile, split=TRUE)
#
# # Load the text file to be parsed.
# all_data <- readLines (infile)
# Sections of interest in the file are bracketed with a start line
# and an end line containing the " doc_vars_in_this_func ", i.e.,
# ">>>> START doc_vars_in_this_func >>>>" and
# "<<<< END doc_vars_in_this_func <<<<".
# Find all of these lines and use them to construct intervals.
# We only have to search for "doc_vars_in_this_func" because that
# occurs in both the START and END lines.
#
# Assign every line in the original file to an interval number.
# Then, every line with an odd interval number is a line of interest,
# so you can filter the file to only look at those lines.
lines_containing_start_or_end <- grep (" doc_vars_in_this_func ", all_data)
# Assign an interval number to each set of lines between
# the START and END markers and between each END markers
# and the next START marker.
original_line_numbers <- 1:length (all_data)
interval_nums <- findInterval (original_line_numbers,
lines_containing_start_or_end)
interval_containing_line <-
cbind (original_line_numbers, interval_nums)
# Select all lines that fall inside the START/END pairs and
# ignore the ones between END/START pairs. In other words,
# just select lines with an odd interval number.
# - findInterval() starts a new interval number each time that
# it encounters a line in the set of start/end pairs.
# This means that the START line is included in the intervals
# that we save and the END line is in the intervals that get
# tossed.
# - Note that choosing the odd intervals (rather than the even
# ones) still works even if there are no input lines before
# the first occurrence of a START line.
# It looks like the findInterval() routine starts numbering
# intervals at 0 until it finds the first line matching its
# search criteria and then sets the interval number to 1 when
# it finds that first match. So, if that match occurs on the
# first line, no lines get assigned an interval number of 0
# and the intervals that we want still have odd numbers.
doc_line_nums_and_intervals <- interval_containing_line [gtools::odd (interval_containing_line [,2]),]
colnames(doc_line_nums_and_intervals) <- c("line_num", "interval_num")
# Build an index from the selected lines back into their
# corresponding line numbers in the original data so that
# you can work only the lines of interest but still retrieve
# their original text
doc_line_numbers <- doc_line_nums_and_intervals [,"line_num"]
# Ready now to parse the selected lines and spit out the appropriate
# roxygen formatting of the data in those lines.
# Do the parsing using a simple state-transition switching mechanism.
state_transition (all_data, doc_line_numbers)
# # All done, so close out the file containing the roxygen output.
#
# sink ()
# close (tempConsoleOutFile)
}
#===============================================================================
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.