Nothing
# Helper functions for do_read_bib()
#' Parse (at)string keys
#'
#' Returns a data.frame with all the string keys and their value to be used on
#' the `bib` file.
#'
#' @param map_string data frame with at least one column. Used as index, the
#' only condition is that it must have one row per string.
#'
#' @param stringlines A vector of characters with the same length that
#' map_string (in terms of number of rows), containing the text of each
#' string.
#'
#' @details
#' Both inputs are generated on [do_read_bib()]. This function parses the
#' (at) string sequentially, i.e. if the first string is used for concatenation
#' in the second string it should work.
#'
#'
#' @examples
#'
#' map_string <- data.frame(
#' entry = c("@STRING", "@STRING", "@STRING")
#' )
#'
#'
#' stringlines <- c(
#' "@STRING{STOC-key = \"OX{\\singleletter{stoc}}\"}",
#' "@STRING{ACM = \"The OX Association for Computing Machinery\"}",
#' "@STRING{STOC = ACM # \" Symposium on the Theory of Computing\"}"
#' )
#'
#'
#' parse_strings(map_string, stringlines)
#' @noRd
parse_strings <- function(map_string,
stringlines) {
# This needs to be done in a loop, since strings can depend also
# on values of previous strings
parsed_string <- NULL
for (i in seq_along(stringlines)) {
row <- map_string[i, ]
singleline <- stringlines[i]
map_braces <- check_balanced_braces(singleline, row$init)
# Extract string between first and last brace
first_bracket_position <- map_braces$pos[1]
last_bracket_position <- min(map_braces[map_braces$cum_flag == 0, ]$pos)
singleline <- trimws(substr(
singleline, first_bracket_position,
last_bracket_position
))
# Remove also the braces
singleline <- trimws(gsub("^\\{|\\}$", "", singleline))
# Split on name/value
splitted <- unlist(strsplit(singleline, "="))
finalrow <- data.frame(
name_string = trimws(splitted[1]),
value = trimws(splitted[-1])
)
# As character to avoid compatibility issues with older R versions
finalrow$name_string <- as.character(finalrow$name_string)
finalrow$value <- as.character(finalrow$value)
parsed_string <- rbind(parsed_string, finalrow)
}
# Need to replace and concat values on loop
# Iterate over map_parsed and concat
init <- parsed_string[1, ]
# Trim leading and trailing commas
init$value <- trimws(init$value, "both")
init$value <- trimws(init$value, "both", whitespace = "\"")
# If only one string return
if (nrow(parsed_string) == 1) {
return(init)
}
for (j in seq(2, nrow(parsed_string))) {
# Search if need to replace and concat
row <- parsed_string[j, ]
value <- replace_string_and_concat(
row$value,
init$name_string,
init$value
)
row$value <- trimws(value, "both", "\"")
init <- rbind(init, row)
}
return(init)
}
#' Parse a single entry
#'
#'
#' Parses a character string to an R object, that would be used to create
#' a [bibentry()] object on a later stage.
#'
#' @return A named character object with two attributes: entry with the
#' BibTeX entry type and key with the assigned key on the file.
#'
#' @param init Number of line where the entry begins
#' @param end Number of line where the entry ends
#' @param lines The content of the .bib file to be parsed, `init` and `end`
#' are used to select the corresponding lines
#' @param map_string_end The output of [parse_strings()] or `NULL`.
#'
#'
#' @examples
#'
#' lines <- readLines(system.file("bib", "utils.bib", package = "bibtex"))
#' parse_single_entry(
#' init = 2,
#' end = 8,
#' lines = lines,
#' map_string_end = NULL
#' )
#' @noRd
parse_single_entry <- function(init, end, lines, map_string_end) {
entry_lines <- lines[seq(init, end)]
# Skipping empty lines
entry_lines <- entry_lines[entry_lines != ""]
# Guess lines outside of the entry
guess_eoentry <- max(grep("\\}$", entry_lines))
entry_lines <- entry_lines[seq(1, guess_eoentry)]
# Collapse to single line
entry_collapsed <- paste0(entry_lines, collapse = "\n")
# Check balanced braces
map_braces <- check_balanced_braces(entry_collapsed, init)
# Extract string from beginning to first balanced brace
lastbrace <- min(map_braces[map_braces$cum_flag == 0, ]$pos)
# This would remove the last brace, it is what we want
cleaned_entry <- trimws(substr(entry_collapsed, 1, lastbrace - 1))
# Identify fields
# Safest pattern I found:
# ,<space or newline>singleword<space or newline>=
# E.g:
# ...}, author={...}
# ...",\ntitle ="
# etc.
# As field entry could have -, _ or : need to replace when identifying the
# fields. This improve the regex matching pattern
cleaned_entry_sub <-
gsub("(?<=[[:alnum:]])_|(?<=[[:alnum:]])-|(?<=[[:alnum:]]):", "x",
cleaned_entry,
perl = TRUE
)
# Protect commas on brackets to avoid error on splitting
protected <- gsub(",(?![^\\}]*(\\{|$))", "|",
cleaned_entry_sub,
perl = TRUE
)
posfields <- unlist(gregexpr(",\\s*\\w+\\s*=", protected))
# Add init and last pos
posfields <- sort(unique(c(1, posfields, nchar(cleaned_entry))))
formatted_bib <- lapply(seq_len(length(posfields) - 1), function(x) {
str <- substr(cleaned_entry, posfields[x], posfields[x + 1])
# Cleanup leading and trailing comma and blanks
str <- trimws(gsub("^,|,$", "", str))
})
formatted_bib <- unlist(formatted_bib)
# Each field is now on a single line
# # Extract type of entry and key
# String to first comma
entry_key <- formatted_bib[1]
entry_key <- gsub("@", "", unlist(strsplit(entry_key, "\\{")))
entry_name <- trimws(entry_key[1])
key_name <- trimws(entry_key[2])
# Now treat fields
fields <- formatted_bib[-1]
treat_fields <- vapply(seq_along(fields), function(x) {
string <- fields[x]
equalsign <- grep("=", strsplit(string, "")[[1]])
field_name <- trimws(substr(string, 1, equalsign - 1))
field_value <- trimws(substr(string, equalsign + 1, nchar(string)))
c(field_name, field_value)
},
FUN.VALUE = character(2)
)
field_names <- trimws(treat_fields[1, ])
field_value <- trimws(treat_fields[2, ])
# Try hard to replace with string values
field_value <- lapply(field_value, replace_string_and_concat,
string_names = map_string_end$name_string,
string_values = map_string_end$value
)
field_value <- unlist(field_value)
# Lead/trailing curly braces ¿?
field_value <- gsub("^\\{|\\}$", "", field_value)
# Lead/trailing quotes
field_value <- gsub("^\"|\"$", "", field_value)
# Build final object
attr(field_value, "entry") <- trimws(entry_name)
names(field_value) <- trimws(field_names)
attr(field_value, "key") <- key_name
attr(field_value, "srcref") <- seq(init, end)
return(field_value)
}
#' Replaces a word with a (at)string value and concatenate
#'
#' Helper function
#'
#'
#' @noRd
#'
#' @param x values to evaluate
#' @param string_names Names of the strings detected on the .bib file
#' @param string_values Values of the string
#'
#' @return A string with the specified treatment
#'
#' @examples
#' x <- "\"CIA means \" # CIA"
#' string <- c("CIA", "FBI")
#' value <- c("Central Intelligence Agency", "Federal Bureau of Investigation")
#'
#' # Replace and concat
#' replace_string_and_concat(x, string, value)
#' x_alt <- "CIA # \" can be abbreviated as CIA, and FBI stands for \" # FBI"
#'
#' replace_string_and_concat(x_alt, string, value)
#'
#'
#' # Full replacement
#' x2 <- "CIA"
#' replace_string_and_concat(x2, string, value)
#'
#' # No replacement
#' x3 <- "\"CIA\""
#' replace_string_and_concat(x3, string, value)
#'
#' # Protected
#' x4 <- "{CIA}"
#' replace_string_and_concat(x3, string, value)
replace_string_and_concat <- function(x, string_names, string_values) {
# As characters instead of factors
string_names <- as.character(string_names)
string_values <- as.character(string_values)
# If has braces then return the same value
if (grepl("\\{|\\}", x)) {
return(x)
}
# If has a concat try to replace
# Use lower for avoid caps mismatching
if (tolower(x) %in% tolower(string_names)) {
# Case of full replacement
index <- match(tolower(x), tolower(string_names))
return(paste0("\"", string_values[index], "\""))
} else if (grepl(" # ", x)) {
# Hardest case, replace and concat
parts <- unlist(strsplit(x, " # "))
parts <- lapply(parts, function(y) {
if (tolower(y) %in% tolower(string_names)) {
index2 <- match(tolower(y), tolower(string_names))
string_values[index2]
} else {
y
}
})
# Flatten and clean
newval <- paste(trimws(parts, "both", "\""), collapse = "")
return(trimws(newval))
} else {
return(x)
}
}
#' Error catching
#'
#' Evaluates if braces \{ are balanced or not and returns an error instead.
#'
#' @param x The string or entry to be evaluated
#' @param line The number of line of the string/entry evaluated. This is used
#' for generating an informative error only.
#'
#' @examples
#' if (FALSE) {
#' # Dont run, throws an error
#' error <- paste0(
#' "@misc{murdoch:2010, author = {Duncan Murdoch}",
#' "title = {I {am} unbalanced",
#' "year = 2010",
#' "url = {http://developer.r-project.org/parseRd.pdf}}"
#' )
#'
#' check_balanced_braces(error, 3)
#' }
#'
#'
#' ok <- paste0(
#' "@misc{murdoch:2010, author = {Duncan Murdoch}",
#' "title = {I {am} balanced}",
#' "year = 2010",
#' "url = {http://developer.r-project.org/parseRd.pdf}}"
#' )
#'
#' check_balanced_braces(ok, 3)
#' @noRd
check_balanced_braces <- function(x, line) {
# Check if braces are balanced ----
# Split the line into chars, extract the position of
# braces (left and right) and manipulate
to_chars <- unlist(strsplit(x, ""))
# Create table with brace map
pos_leftbrace <- grep("\\{", to_chars)
pos_rightbrace <- grep("\\}", to_chars)
map_braces <- data.frame(
brace = c(
rep("{", length(pos_leftbrace)),
rep("}", length(pos_rightbrace))
),
pos = c(pos_leftbrace, pos_rightbrace)
)
map_braces <- map_braces[order(map_braces$pos), ]
# Asign index to each brace: left = 1 and right = -1
# Then cumsum. This reveals the structure: e.g:
# @aa{x, a=1, b={s{S}}, f="x{x}x"}
# This is translated as
# { { { } } { } }
# 1 1 1 -1 -1 1 -1 -1
# 1 2 3 2 1 2 1 0
map_braces$flag <- ifelse(map_braces$brace == "{", 1, -1)
map_braces$cum_flag <- cumsum(map_braces$flag)
# If there is no 0 the braces are unbalanced: Throw an error
if (!(0 %in% map_braces$cum_flag)) {
message(
"Error: Unbalanced braces on entry (line ",
line,
"). Invalid .bib file"
)
# Trigger an error for tryCatch(es)
stop()
}
return(map_braces)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.