Nothing
#' Validate bibliography according to Grattan style
#' @param path Containing the bib file.
#' @param file The bib file if specified.
#' @param .report_error How errors should be reported.
#' @param rstudio Use the RStudio API to jump to errors.
#' @return \code{NULL} if bibliography validated.
#' @export
#'
#' @details This is a highly fastidious test of the bibliography. Useful for collaboration to ensure consistent style.
#' @examples
#' \dontrun{
#' bib_temp <- tempfile(fileext = ".bib")
#' url_bib <-
#' paste0("https://raw.githubusercontent.com/HughParsonage/",
#' "grattex/e6cab97145d38890e44e83d122e995e3b8936fc6",
#' "/bib/Grattan-Master-Bibliography.bib")
#'
#' download.file(url_bib, destfile = bib_temp)
#' validate_bibliography(file = bib_temp)
#'
#' bib_temp <- tempfile(fileext = ".bib")
#' url_bib <-
#' paste0("https://raw.githubusercontent.com/HughParsonage/",
#' "grattex/8f7f52a28789d12a363ceb30cea3b41f590ae58a",
#' "/bib/Grattan-Master-Bibliography.bib")
#' download.file(url_bib, destfile = bib_temp)
#' validate_bibliography(file = bib_temp)
#' }
#'
validate_bibliography <- function(path = ".", file = NULL, .report_error,
rstudio = FALSE) {
if (missing(.report_error)){
if (rstudio) {
.report_error <- function(...) report2console(..., file = file, rstudio = TRUE)
} else {
.report_error <- function(...) report2console(...)
}
}
if (is.null(file)){
bib_files <- dir(path = path, pattern = "\\.bib$", full.names = TRUE)
stopifnot(length(bib_files) == 1L)
bib_file <- bib_files[[1]]
} else {
bib_file <- file
}
bib <-
read_lines(bib_file) %>%
trimws
# Protect from misshapen bibliography entries
is_key <- grepl("^@", bib, perl = TRUE)
is_field <- grepl("^[a-z]+\\s* = \\{", bib, perl = TRUE)
is_closing = bib == "}"
is_null <- bib == ""
if (!all(is_key | is_field | is_closing | is_null)){
bad_lines <- which(!(is_key | is_field | is_closing | is_null))
first_bad_line <- bad_lines[[1]]
.report_error(line_no = first_bad_line,
context = bib[first_bad_line],
error_message = paste0(bib_file, " contains line which is neither a key, nor field, nor closing.")
,advice = paste0("Ensure every line in bibliography is one:\n\t@<EntryType>,\n\t",
"field = { <- including spaces around equals sign\n\t",
"or is a single closing brace, or a blank line. ",
"If you can, run\n\tlint_bib('", bib_file, "')")
)
stop(bib_file, " contains line which is neither a key, nor field, nor closing.")
}
bib[grep("% Valid", bib, fixed = TRUE)] <- ""
if (any(grepl(".[}]$", bib, perl = TRUE))){
line_no <- grep(".[}]$", bib, perl = TRUE)[[1]]
.report_error(line_no = line_no,
context = bib[line_no],
error_message = "Each field line in .bib must end with a comma (to allow reordering).")
stop("Each field line in .bib must end with a comma (to allow intra-entry reordering).")
}
# Abbreviated names
inst_pattern <-
paste0("^\\s*(author).*",
"(?:",
"(?:(?:Australian )?Bureau of Statistics)",
"|",
"(?:Australian Labor Party)",
"|",
"(?:Australian Institute of Health and Welfare)",
"|",
"(?:Australian Taxation Office)",
"|",
"(?:Productivity Commission)",
"|",
"(?:World Health Organi[sz]ation)",
")")
if (any(grepl(inst_pattern, bib, perl = TRUE))){
first_bad <-
grep(inst_pattern,
bib,
perl = TRUE,
value = TRUE) %>%
.[1]
cat(first_bad)
stop(cat(crayon::bgRed(symbol$cross)), "Institutional authors should be abbreviated.")
}
# Ensure URLs suggesting a newspaper article only
# appear in @Article types:
bib_just_keys_and_urls <-
grep("^(@|(\\s+(url)))",
bib,
perl = TRUE,
value = TRUE)
urls_not_articles <- c("http://images.theage.com.au/file/2014/07/31/5639573/Help_interest_rate_options_report.pdf")
bib_just_keys_and_urls <-
gsub("http://images.theage.com.au/file/2014/07/31/5639573/Help_interest_rate_options_report.pdf",
"",
bib_just_keys_and_urls,
fixed = TRUE)
newspapers_pattern <-
paste0("^(url).*",
"(",
"(((theguardian)|(afr))\\.com)",
"|",
"(((theaustralian)|(theage)|(smh)|(canberratimes)|(greatlakesadvocate))\\.com\\.au)",
"|",
"(theconversation\\.((edu\\.au)|(com)))",
"|",
"(insidestory\\.org\\.au)",
")")
should_be_Articles <-
grep(newspapers_pattern,
bib_just_keys_and_urls,
perl = TRUE) - 1
arent_Articles_but_should_be <-
!grepl("@Article",
bib_just_keys_and_urls[should_be_Articles],
fixed = TRUE)
if (any(arent_Articles_but_should_be)){
bib_just_keys_and_urls %>%
.[should_be_Articles] %>%
.[arent_Articles_but_should_be] %>%
.[1] %>%
cat
stop(cat(crayon::bgRed(symbol$cross)), "URL suggests the article type should be used.")
}
# Once we have verified all are articles, check the right journal has been included.
just_key_journal_urls <-
bib %>%
.[grepl("^(@|(journal)|(url))", ., perl = TRUE) | bib == "}"]
both_url_and_journal <- entry_no <- group_by <- is_article <- is_newspaper <-
journal <- journal_actual <- journal_from_url <- text <- NULL
journal_actual_vs_journal_expected <-
data.table(text = just_key_journal_urls) %>%
.[, entry_no := cumsum(grepl("^@", text))] %>%
.[, is_article := any(grepl("^@Article", text)), by = entry_no] %>%
# Journal + URL occur iff
# @
# TRUE
# TRUE
# }
.[, both_url_and_journal := .N == 4L, by = entry_no] %>%
.[, is_newspaper := any(grepl(newspapers_pattern, text, perl = TRUE)), by = entry_no] %>%
.[is_article & both_url_and_journal &is_newspaper] %>%
.[, .(key = grep("^@Article", text, perl = TRUE, value = TRUE),
url = gsub("^url.*[{](.*)[}],?$",
"\\1",
text[grepl("^url", text, perl = TRUE)],
perl = TRUE),
journal_actual = gsub("^journal.*[{](.*)[}],?$",
"\\1",
text[grepl("^journal", text, perl = TRUE)],
perl = TRUE),
journal_from_url = gsub(paste0("^.*(", newspapers_pattern, ").*$"),
"\\3",
text[grepl("^url", text, perl = TRUE)],
perl = TRUE)),
by = entry_no] %>%
setkey(journal_from_url) %>%
.[newspaper_by_url]
incorrect_journal_entries <-
journal_actual_vs_journal_expected[journal_actual != journal]
if (nrow(incorrect_journal_entries) > 0){
if (getOption("TeXCheckR.messages", TRUE)) {
cat(red(symbol$cross), red("Inconsistent treatment of article journal.\n"))
print(incorrect_journal_entries)
}
.report_error(error_message = "",
context = paste0("In entry", "\n\t",
incorrect_journal_entries[1][["key"]], "\n\n",
"I see:", "\n\t",
"url = {", incorrect_journal_entries[1][["url"]], "}", "\n\n",
"which suggests", "\n\t",
"journal = {", incorrect_journal_entries[1][["journal"]], "} ,", "\n\n",
"but\n\t",
"journal = {", incorrect_journal_entries[1][["journal_actual"]], "} ."))
stop("In entry", "\n\t",
incorrect_journal_entries[1][["key"]], "\n\n",
"I see:", "\n\t",
"url = {", incorrect_journal_entries[1][["url"]], "}", "\n\n",
"which suggests", "\n\t",
"journal = {", incorrect_journal_entries[1][["journal"]], "} ,", "\n\n",
"but\n\t",
"journal = {", incorrect_journal_entries[1][["journal_actual"]], "} .")
}
check_legislation <- function(bb){
is_bill_title <-
and(grepl("^(?:@Misc).*(?:Bill)",
shift(bib, type = "lag"),
perl = TRUE,
ignore.case = TRUE),
grepl("^(?:title).*(?:Bill)",
bib,
perl = TRUE,
ignore.case = TRUE))
if (any(!grepl("\\textup", bb[is_bill_title], fixed = TRUE))){
.report_error(line_no = which(is_bill_title)[1],
context = bb[is_bill_title[1]],
error_message = "Bill title in upright font.")
stop("When citing a Bill of Parliament, the title must be in upright font.", "\n",
"Use\n\ttitle = {\\textup{...}},\n\t\t\t\t\tin the .bib file.")
}
}
check_legislation(bib)
## Grattan Institute
## All TechReports should use the /report/ url
## All TechReports should have a number
check_Grattan_entries <- function(trimmed_bib){
techReport_at <- grepl("^@TechReport", trimmed_bib, perl = TRUE, ignore.case = TRUE)
is_closing <- trimmed_bib == "}"
is_TechReport <- techReport_at
is_TechReport[!or(techReport_at, is_closing)] <- NA
is_TechReport <- zoo::na.locf.default(is_TechReport, na.rm = FALSE)
is_TechReport[is.na(is_TechReport)] <- FALSE
is_GrattanReport_url <-
grepl("^url.*https?[:]//grattan\\.edu\\.au", trimmed_bib, perl = TRUE)
if (any(and(is_GrattanReport_url & is_TechReport,
!grepl("https?[:]//grattan\\.edu\\.au/report/", trimmed_bib, perl = TRUE)))){
line_nos <-
which(and(is_GrattanReport_url & is_TechReport,
!grepl("grattan\\.edu\\.au/report/", trimmed_bib, perl = TRUE)))
if (getOption("TeXCheckR.messages", TRUE)) {
for (x in line_nos)
cat(x, ": ", trimmed_bib[[x]], "\n")
}
stop("URL to Grattan Report does not use https://grattan.edu.au/report/ domain.")
}
if (any(and(is_GrattanReport_url,
grepl(".pdf", trimmed_bib, fixed = TRUE)))){
stop("URLs to Grattan Report points to pdf. The URL should be of the landing page.")
}
}
# check_Grattan_entries(bib)
# Peter Goss or Pete Goss?
if (OR(any(grepl("Pete Goss", bib, perl = TRUE)),
any(grepl("Goss, Pete(?!r)", bib, perl = TRUE)))){
line_no <- which(or(grepl("Pete Goss", bib, perl = TRUE),
grepl("Goss, Pete(?!r)", bib, perl = TRUE)))[[1]]
.report_error(line_no = line_no,
context = bib[line_no],
error_message = "Use 'Peter Goss', not 'Pete Goss', in bibliography.")
stop("Use 'Peter Goss', not 'Pete Goss', in bibliography.")
}
# dois should not include the top-level URL
if (any(grepl("^\\s*(doi).*https?[:][/][/]", bib, perl = TRUE))) {
line_no <- grep("^\\s*(doi).*https?[:][/][/]", bib, perl = TRUE)[1]
.report_error(file = file,
line_no = line_no,
error_message = paste0("DOI entries must be in the form", "\n\t",
"10.1787/9789264229945-en", "\n",
"not", "\n\t",
"http://dx.doi.org/10.1787/9789264229945-en"),
context = bib[line_no],
advice = "Either use this as a URL or delete everything except the DOI.")
}
just_years_and_dates <-
grep("^((@)|(year)|(date)|[}])",
bib,
perl = TRUE,
value = TRUE)
year_date_same_entry <-
and(grepl("^((year)|(date))", just_years_and_dates, perl = TRUE),
grepl("^((year)|(date))", lead(just_years_and_dates), perl = TRUE))
if (any(year_date_same_entry)){
bad_entry <- just_years_and_dates[which(year_date_same_entry)[[1]] + c(-1, 0, 1, 2)]
cat(crayon::bgRed(symbol$cross),
bad_entry[1], "\n\t",
bad_entry[2], "\n\t",
bad_entry[3], "\n\t",
bad_entry[4], "\n")
stop("Date and year should not both appear in bibliography.")
}
url_hypercorrected <- grep("^url.*\\\\", bib, perl = TRUE)
if (not_length0(url_hypercorrected)) {
.report_error(file = bib_file,
line_no = url_hypercorrected[1],
column = 1,
context = bib[url_hypercorrected[1]],
error_message = "URL contains hypercorrected escapes.",
advice = paste("Do not escape characters with special meaning in TeX ('control sequences')",
"when they appear in URLs. Delete the backslashes."))
}
invisible(NULL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.