#' Convert Word in-text citations to LaTeX \\Cref citations using a .bib file.
#'
#' @param path The path of the folder containing your Word document.
#' @param bibName The name of the .bib file containing all bib entries.
#' @param texName The name fo the .tex file containing in-text citations to be replaced.
#' @param fromWord2grattex TRUE if the bib2grattex function is nested within word2grattex.
#' @param titleLength The number of characters of the document title to be used in the reference key. The reference key is formatted as: AuthorYearTitle. Eg. Norton2018Droppingoutthecostsa when titleLength = 20 (the default).
#' @param variousYears Phrases that have been used to denote various years (in place of a numerical year within the in-text citation).
#'
#' @importFrom readr read_lines write_lines
#' @importFrom stringr str_count
#' @import dplyr
#'
#' @export
#'
#'
## Define global variables (from word2grattex)
globalVariables(c(".", "findCitation1", "findCitation2", "findCitation3", "findCitation4",
"findCitation5", "findCitation6", "findCitationBrackets1", "findCitationBrackets2",
"findCitationBrackets3", "findCitationBrackets4", "findCitationBrackets5",
"findCitationBrackets6", "replaceCitation", "shortAuthor",
"title", "year", "yearCode"))
# This function requires the yearcode of the bib file to MATCH the year code of the Word reference list.
# this isn't _really_ ideal; but is the system for the time being.
# The function requires proper testing, which has yet to be done.
bib2grattex <- function(path = ".",
bibName,
texName,
fromWord2grattex = FALSE,
titleLength = 20,
variousYears = c("multiple years", "Various years", "various years")
) {
findNumberTotal <- 6 #There are X find alternatives (this needs to be updated manually for now)
# Tidy path and bib if needed
path = gsub("\\/$", "" , path)
if (!grepl("\\.bib$", bibName)) bibName <- paste0(bibName, ".bib")
if (!grepl("\\.tex$", texName)) texName <- paste0(texName, ".tex")
# Preserving user's working directory
if (!fromWord2grattex) {
current_wd <- getwd()
setwd(normalizePath(path.expand(path)))
on.exit(setwd(current_wd))
}
# Lint the .bib file and read in lines
lint_bib(bibName)
bib.all <- read_lines(bibName)
# Remove existing key and create temporary unique ID
tempkey = 0
for (bib in grep("^\\@", bib.all)) {
tempkey = tempkey +1
bib.all[bib] <- gsub("(\\@[a-zA-Z]*\\{).*", paste0("\\1",tempkey), bib.all[bib])
}
# Create dataframe and sort by AUTHOR YEAR TITLE
## first: tag each bib with a number
bibNumber <- cumsum(grepl("^\\@", bib.all))
bibTotal <- length(unique(bibNumber)) # 'there are this many bibs'
# For each of those bibs, store the YEAR (or 9999 if missing) and AUTHOR (or editor if no author; error if neither)
## Year
for (bib in 1:bibTotal) {
### Find
thisYear <- gsub(paste0(".*year.*?\\{([0-9]{2,4})\\}\\,"), "\\1", bib.all[(bibNumber == bib & grepl("^\\s*year\\s*\\=", bib.all) == TRUE)])
### Fix if 'various years' or variant
for (x in 1:length(variousYears)) {
thisYear <- gsub(paste0(".*year.*?\\{(([0-9]{2,4})|(", variousYears[x] ,"))\\}\\,"), "\\1", thisYear)
}
### if a year isn't found, set thisYear to "9999"
if (identical(thisYear, character(0))) {
thisYear = "9999"
print(paste0("Year not found for bib ", bib, ", using 9999 instead"))
}
### Add to year list
if (bib == 1) {
yearList <- thisYear
} else {
yearList <- c(yearList, thisYear)
}
}
## authorList
for (bib in 1:bibTotal) {
# Find. Note that this will IGNORE the first set of braces, but include a second
thisAuthor <- gsub(paste0(".*author.*?\\{?\\{([^\\}]*)\\}?\\}\\,"), "\\1", bib.all[(bibNumber == bib & grepl("^\\s*author\\s*\\=", bib.all) == TRUE)])
# If there are no authors, look for editors (Word uses editors for in-text ciation if no author is present)
if (identical(thisAuthor, character(0))) {
thisAuthor <- gsub(paste0(".*editor*?\\{([^\\}]*\\}?)\\}\\,"), "\\1", bib.all[(bibNumber == bib & grepl("^\\s*editor\\s*\\=", bib.all) == TRUE)])
if (!identical(thisAuthor, character(0))) print(paste0("Author not found for bib ", bib, ", using editor instead"))
# If no author and no editor, use title (the case for Acts)
if (identical(thisAuthor, character(0))) {
thisAuthor <- gsub(paste0(".*title*?\\{([^\\}]*\\}?)\\}\\,"), "\\1", bib.all[(bibNumber == bib & grepl("^\\s*title\\s*\\=", bib.all) == TRUE)])
}
if (!identical(thisAuthor, character(0))) print(paste0("Author not found for bib ", bib, ", using title instead (is this an Act? If not, there should be an author or editor)"))
}
# Add to authorList
if (bib == 1) {
authorList <- thisAuthor
} else {
authorList <- c(authorList, thisAuthor)
}
}
# Generate 'findAuthor' options based on the number/type of author
findAuthor <- authorList
authorKey <- authorList
## Originally single brace NO commas can be left as-is; "ABS", "Treasury", "Department of Education and Training"
findAuthor <- findAuthor
### authorKey: "ABS", "Treasury", "DepartmentofEducationandTraining" // retain proper capitalisation?
authorKey <- ifelse( str_count(authorKey, ",") == 0, gsub("\\s", "", authorKey), authorKey)
## Originally single brace ONE comma; extract the author last-name: "Norton"
findAuthor <- ifelse( str_count(findAuthor, ",") == 1,
gsub("^([^,]*),.*", "\\1", findAuthor),
findAuthor)
### authorKey: "Norton"
authorKey <- ifelse(str_count(authorKey, ",") == 1, gsub("^([^,]*),.*", "\\1", authorKey), authorKey)
## Originally single brace TWO commas; extract the two authors' last names separated by "and": "Norton and Cherastidtham"
findAuthor <- ifelse( str_count(findAuthor, ",") == 2,
gsub("^([^,]*),.*and\\s([^,]*).*", "\\1 and \\2", findAuthor),
findAuthor)
### authorKey: "Norton"
authorKey <- ifelse(str_count(authorKey, ",") == 2,
gsub("^([^,]*),.*and\\s([^,]*).*", "\\1", authorKey),
authorKey)
## you can call me et al
### Originally single brace THREE or more commas; extract the first author last-name and add `et al.`: "Norton, et al." [variant 1]
findAuthor1 <- ifelse( str_count(findAuthor, ",") > 2,
gsub("^([^,]*),.*", "\\1, et al\\.", findAuthor),
findAuthor)
### Originally single brace THREE or more commas; extract the first author last-name and add `et al.`: "Norton et al." [variant 2]
findAuthor2 <- ifelse( str_count(findAuthor, ",") > 2,
gsub("^([^,]*),.*", "\\1 et al\\.", findAuthor),
findAuthor)
### Originally single brace THREE or more commas; extract the first author last-name and add italicised `et al.`: "Norton\\\\emph\\{, et al.}" [variant 3]
findAuthor3 <- ifelse( str_count(findAuthor, ",") > 2,
gsub("^([^,]*),.*", "\\1\\\\emph{, et al.}", findAuthor),
findAuthor)
### ALT: Originally single brace THREE commas; extract the first three author names: "Norton, Cherastidtham and Mackey [variant 4]
findAuthor4 <- ifelse( str_count(findAuthor, ",") == 3,
gsub("^([^,]*),.*and\\s([^,]*).*and\\s([^,]*).*", "\\1, \\2 and \\3", findAuthor),
findAuthor)
### ALT: Originally single brace FOUR commas; extract the first four author names: "Norton, Cherastidtham, Goss and Mackey [variant 4]
findAuthor5 <- ifelse( str_count(findAuthor, ",") == 4,
gsub("^([^,]*),.*and\\s([^,]*).*and\\s([^,]*).*and\\s([^,]*).*", "\\1, \\2, \\3 and \\4", findAuthor),
findAuthor)
### ALT: Originally single brace FIVE commas; extract the first five author names: "Norton, Cherastidtham, Goss, Chivers and Mackey [variant 4]
findAuthor6 <- ifelse( str_count(findAuthor, ",") == 5,
gsub("^([^,]*),.*and\\s([^,]*).*and\\s([^,]*).*and\\s([^,]*).*and\\s([^,]*).*", "\\1, \\2, \\3, \\4 and \\5", findAuthor),
findAuthor)
### authorKey: "Norton"
authorKey <- ifelse(str_count(authorKey, ",") > 2,
gsub("^([^,]*),.*", "\\1", authorKey),
authorKey)
## Remove spaces and punctuation from author key
authorKey <- gsub("[[:punct:]]", "", authorKey)
authorKey <- gsub("\\s", "", authorKey)
# Retrieve title for sorting and key generation
for (bib in 1:bibTotal) {
# Find. Note that this will IGNORE the first set of braces, but include a second
thisTitle <- gsub(paste0(".*title.*\\=.*?\\{(.*)\\},"), "\\1", bib.all[(bibNumber == bib & grepl("^\\s*title\\s*\\=", bib.all) == TRUE)])
# If there is no title, send warning and set to ""
if (identical(thisTitle, character(0))) {
thisTitle <- ""
warning(paste0("Title not found for bib ", bib, "! Context: "))
print(bib.all[(bibNumber >= bib & bibNumber <= (bib+20))])
}
# Add to authorList
if (bib == 1) {
titleList <- thisTitle
} else {
titleList <- c(titleList, thisTitle)
}
}
# Create findCitation
findCitation <- tibble(
"fullAuthor" = authorList,
"shortAuthor" = findAuthor1,
"authorKey" = authorKey,
"findAuthor1" = findAuthor1,
"findAuthor2" = findAuthor2,
"findAuthor3" = findAuthor3,
"findAuthor4" = findAuthor4,
"findAuthor5" = findAuthor5,
"findAuthor6" = findAuthor6,
"title" = titleList,
"year" = yearList,
"bibNumber" = unique(bibNumber) # BEFORE arrange
) %>%
# Sort order: AUTHOR, YEAR, TITLE (as seen in docx formatting)
arrange(shortAuthor, year, title) %>%
# Count number of obbservations by shortAuthor+year and generate yearCode
group_by(shortAuthor, year) %>%
mutate(
count = seq(n()),
total = n(),
tempCode = paste0(letters[count]),
yearCode = case_when(total == 1 ~ paste0(year),
total > 1 & grepl("[0-9]{2,5}", year) ~ paste0(year, tempCode),
total > 1 & !grepl("[0-9]{2,5}", year) ~ paste0(year, "-", tempCode) # exception for "multiple years" etc
),
# Create findCite1:6 variables
findCitation1 = paste0(findAuthor1, " (", yearCode, ")"),
findCitation2 = paste0(findAuthor2, " (", yearCode, ")"),
findCitation3 = paste0(findAuthor3, " (", yearCode, ")"),
findCitation4 = paste0(findAuthor4, " (", yearCode, ")"),
findCitation5 = paste0(findAuthor5, " (", yearCode, ")"),
findCitation6 = paste0(findAuthor6, " (", yearCode, ")"),
# And create findCiteBrackets1:6 variables according to Cite Them Right 10th Edition, Harvard
findCitationBrackets1 = paste0("(", findAuthor1, ", ", yearCode, ")"),
findCitationBrackets2 = paste0("(", findAuthor2, ", ", yearCode, ")"),
findCitationBrackets3 = paste0("(", findAuthor3, ", ", yearCode, ")"),
findCitationBrackets4 = paste0("(", findAuthor4, ", ", yearCode, ")"),
findCitationBrackets5 = paste0("(", findAuthor5, ", ", yearCode, ")"),
findCitationBrackets6 = paste0("(", findAuthor6, ", ", yearCode, ")"),
# Create Key variable
shortTitle = substr(title, 1, titleLength) %>%
gsub("[[:punct:]]", "", .) %>%
gsub("\\s", "", .) %>%
tolower(.),
citeKey = case_when(grepl("[0-9]{2,5}", year) ~ paste0(authorKey, year, shortTitle),
!grepl("[0-9]{2,5}", year) ~ paste0(authorKey, shortTitle)), # exception for "multiple years" etc
# Create Replace variable
replaceCitation = paste0("\\textcite[][]{", citeKey, "}")
) %>%
# Re-arrange according to bibNumber
ungroup() %>%
arrange(bibNumber)
# Pull required vectors
citeKey <- pull(findCitation, citeKey)
find1 <- pull(findCitation, findCitation1)
find2 <- pull(findCitation, findCitation2)
find3 <- pull(findCitation, findCitation3)
find4 <- pull(findCitation, findCitation4)
find5 <- pull(findCitation, findCitation5)
find6 <- pull(findCitation, findCitation6)
findbrackets1 <- pull(findCitation, findCitationBrackets1)
findbrackets2 <- pull(findCitation, findCitationBrackets2)
findbrackets3 <- pull(findCitation, findCitationBrackets3)
findbrackets4 <- pull(findCitation, findCitationBrackets4)
findbrackets5 <- pull(findCitation, findCitationBrackets5)
findbrackets6 <- pull(findCitation, findCitationBrackets6)
replace <- pull(findCitation, replaceCitation)
# Replace/add citation key to .bib file for each bib entry
for (bib in 1:bibTotal) {
bib.all[bibNumber == bib] <- gsub("(\\@[a-zA-Z]*\\{)[0-9]*", paste0("\\1",citeKey[bib], ","), bib.all[bibNumber == bib])
}
# Write updated bib file
write_lines(bib.all, bibName)
# ---- Replace in-text citations ---- #
# if (fromWord2grattex) texFile <- out_tex_lines
citationSearch <- function(findNumber, brackets) {
if ( fromWord2grattex) texFile <- readr::read_lines(texName)
if (!fromWord2grattex) texFile <- readr::read_lines(texName)
thisCurrent <- get(paste0("find", findNumber))[bib]
thisReplace <- replace[bib]
if (findNumber == 1 & !brackets) {
message(paste0("Replacing ", thisCurrent, " with ", thisReplace))
message(paste0(" And looking for/replacing a few variations on ", thisCurrent))
}
texFile <- gsub(thisCurrent, thisReplace, texFile, fixed = TRUE)
if ( fromWord2grattex) readr::write_lines(texFile, texName)
if (!fromWord2grattex) readr::write_lines(texFile, texName)
}
# Run over all bibs
for (bib in 1:bibTotal) {
for (num in 1:findNumberTotal) {
citationSearch(findNumber = num, brackets = FALSE)
citationSearch(findNumber = num, brackets = TRUE)
}
}
# ---- ibid functionality ---- #
# NA
# ---- Managing textcites; creating footcites ---- #
if ( fromWord2grattex) texFile <- readr::read_lines(texName)
if (!fromWord2grattex) texFile <- readr::read_lines(texName)
# Remove \emph from texcites in footnotes: if \emph surrounds a \textcite, remove it
texFile <- gsub("\\\\emph\\{(\\\\textcite\\[\\]\\[\\]\\{[^\\}]*\\})\\}",
"\\1",
texFile)
# Add page(s), section and chapter references to \textcite
# p. num WORKS WELL
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?\\(?p\\.?\\s?([0-9]{1,6})\\)?",
"\\\\textcite\\[\\]\\[\\2\\]\\{\\1\\}",
texFile)
# p. letter; need to be more cautious so enforcing p. or p\\s
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?\\(?p[\\.\\s]{1,2}\\s?([a-zA-Z]{1,6})\\)?",
"\\\\textcite\\[\\]\\[\\2\\]\\{\\1\\}",
texFile)
# pp. num-num WORKS WELL
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?\\(?pp\\.?\\s?([0-9]{1,6})\\-\\-?([0-9]{1,6})\\)?",
"\\\\textcite\\[\\]\\[\\2--\\3\\]\\{\\1\\}",
texFile)
# pp. letter-letter WORKS WELL
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?\\(?pp\\.\\s?([a-zA-Z]{1,6})\\-\\-?([a-zA-Z]{1,6})\\)?",
"\\\\textcite\\[\\]\\[\\2--\\3\\]\\{\\1\\}",
texFile)
# Single chapter, section, appendix, table, figure, chart WORKS WELL
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(C|c)hapter\\s([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[chapter~\\3]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(S|s)ection\\s([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[section~\\3]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(A|a)ppendix\\s([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[appendix~\\3]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(T|t)able\\s([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[table~\\3]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(F|f)igure\\s([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[figure~\\3]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(C|c)hart\\s([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[chart~\\3]\\{\\1\\}", texFile)
# Ranges chapter, section, appendix, table NOT YET TESTED properly
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(C|c)hapters\\s([0-9\\.]{1,8})\\s?([andto,\\-]{1,5})\\s?([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[chapters~\\3~\\4~\\5]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(S|s)ections\\s([0-9\\.]{1,8})\\s?([andto,\\-]{1,5})\\s?([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[sections~\\3~\\4~\\5]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(A|a)ppendices\\s([0-9\\.]{1,8})\\s?([andto,\\-]{1,5})\\s?([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[appendices~\\3~\\4~\\5]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(T|t)ables\\s([0-9\\.]{1,8})\\s?([andto,\\-]{1,5})\\s?([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[tables~\\3~\\4~\\5]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(F|f)igures\\s([0-9\\.]{1,8})\\s?([andto,\\-]{1,5})\\s?([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[figures~\\3~\\4~\\5]\\{\\1\\}", texFile)
texFile <- gsub("\\\\textcite\\[\\]\\[\\]\\{([^\\}]*)\\},?\\s?(C|c)harts\\s([0-9\\.]{1,8})\\s?([andto,\\-]{1,5})\\s?([0-9\\.]{1,8})",
"\\\\textcite\\[\\]\\[charts~\\3~\\4~\\5]\\{\\1\\}", texFile)
# ----- Collapse \textcites (logical that this comes before \footcite replacement) ---- #
## Define a single textcite match
findTextciteOneTwo <- "\\\\textcite(\\[[a-zA-Z0-9\\-\\~\\.]*\\]\\[[a-zA-Z0-9\\-\\~\\.]*\\]\\{[a-zA-Z0-9\\-]*\\})"
findTextciteLater <- "(?:\\\\textcite(\\[[a-zA-Z0-9\\-\\~\\.]*\\]\\[[a-zA-Z0-9\\-\\~\\.]*\\]\\{[a-zA-Z0-9\\-]*\\}))?"
# Combine; looking for TWO or more consecutive textcites separated by [; , and]
texFile <- gsub(paste0(findTextciteOneTwo, "[\\s\\;\\,and]*",
findTextciteOneTwo, "[\\s\\;\\,and]*",
findTextciteLater, "[\\s\\;\\,and]*",
findTextciteLater, "[\\s\\;\\,and]*",
findTextciteLater, "[\\s\\;\\,and]*",
findTextciteLater, "[\\s\\;\\,and]*",
findTextciteLater, "[\\s\\;\\,and]*",
findTextciteLater, "[\\s\\;\\,and]*",
findTextciteLater, "[\\s\\;\\,and]*"),
"\\\\textcites\\1\\2\\3\\4\\5\\6\\7\\8\\9",
texFile, perl = TRUE)
# ----- # Replace \footnote{\textcite[][]{...}} with \footcite[][]{...} ---- #
findCitePatternFirst <- "(\\[[a-zA-Z0-9\\-\\~\\.]*\\]\\[[a-zA-Z0-9\\-\\~\\.]*\\]\\{[a-zA-Z0-9\\-]*\\})"
findCitePatternLater <- "(\\[[a-zA-Z0-9\\-\\~\\.]*\\]\\[[a-zA-Z0-9\\-\\~\\.]*\\]\\{[a-zA-Z0-9\\-]*\\})*"
texFile <- gsub(paste0("\\\\footnote\\{\\\\textcite(s?)",
findCitePatternFirst,
findCitePatternLater,
"\\s?\\.?\\s?\\}"),
"\\\\footcite\\1\\2\\3",
texFile, perl = TRUE)
# ----- # Write tex file with updated bib citations ---- #
if ( fromWord2grattex) readr::write_lines(texFile, texName)
if (!fromWord2grattex) readr::write_lines(texFile, paste0(path,"/", texName))
message("End of bib2grattex")
} # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.