R/build-database.R

Defines functions get_zotero_id get_zotero_key_list remove_blanks convert_keys_to_list_of_lists retrieve_single_record_from_zotero pull_new_items_from_zotero build_database get_database_file load_database update_zotero_database read_citations remove_yaml create_bibliography

Documented in build_database convert_keys_to_list_of_lists create_bibliography get_database_file get_zotero_key_list load_database pull_new_items_from_zotero read_citations remove_blanks retrieve_single_record_from_zotero update_zotero_database

#' Get Zotero ID
#'
#' Tfunction returns the user id that has been stored as an environmental
#' variable. If not found, the function returns an error.
#' @return An integer
#' @export
get_zotero_id <- function() {
  key <- Sys.getenv('ZOTERO_USER')
  if (key == '')
    stop('Zotero User ID needs to be set as an environmental variable.')

  as.integer(key)
}

#' Get Zotero Key List
#'
#' Retrieves a list of all of the keys associated with a Zotero account. Each key
#' corresponds to a single bibliography item.
#' @return A character vector of item keys
#' @importFrom httr GET add_headers content
#' @importFrom stringr str_split
get_zotero_key_list <- function(since = NULL) {
  id_string <- get_zotero_id()

  my_url <- sprintf('https://api.zotero.org/users/%i/items?format=keys',
                    id_string)

  if (!is.null(since))
    my_url <- sprintf('%s&since=%i', my_url, since)

  httr::GET(url = my_url,
            httr::add_headers('Zotero-API-Key' = get_zotero_key()),
            query = list(v = 3)) %>%
    httr::content() %>%
    stringr::str_split('\n') %>%
    `[[`(1) %>%
    remove_blanks()
}

#' Remove Blanks
#'
#' Removes blank elements from a character vector.
#' @param x A character vector
#' @return A character vector with any blank elements removed
#' @importFrom stringr str_trim
remove_blanks <- function(x) {
  x[stringr::str_trim(x) != '']
}

#' Convert Keys to List of Lists
#'
#' Converts a character vector of keys into a list of character vectors of keys.
#' Since only 50 items can be read from Zotero at a time, this function converts
#' a list of keys into a series of character vectors that are each no more than
#' 50 items long.
#'
#' DEPRECATED:  Pulling more than one record at a time does not seem feasible as
#' many of the keys do not return a BibLatex record and there is no way to tell
#' which keys returned nothing when more than one key is read at a time. So this
#' function will likely be unnecessary.
#' @param kl A character vector of keys of the form generated by `get_zotero_key_list`
#' @return A list of character vectors of keys where no item in the list is more
#' than 50 elements long.
convert_keys_to_list_of_lists <- function(kl = NULL) {
  if (is.null(kl) | !is.character(kl))
    stop('Invalid key supplied to convert_keys_to_list_of_lists.')

  ret_list <- list()
  while(length(kl) > 0) {
    n <- length(kl)
    if (n > 50L) {
      add_to <- kl[c(1:n) <= 50L]
      kl <- kl[c(1:n) > 50L]
    } else {
      add_to <- kl
      kl <- character()
    }
    ret_list <- c(ret_list, list(add_to))
  }

  ret_list
}

#' Retrieve Single Record from Zotero
#'
#' Retrieves a the Biblatex for a single item on Zotero.
#' @param key Character vector scalar giving the key for a single Zotero record.  Equivalent
#' to one element from `get_zotero_key_list`.
#' @return A data.frame with a single record containing, the key, the name of the record,
#' and the BibLatex record text.
#' @importFrom httr GET add_headers content
#' @importFrom stringr str_extract
retrieve_single_record_from_zotero <- function(key) {
  raw_text <- httr::GET(url = sprintf('https://api.zotero.org/users/%i/items', get_zotero_id()),
                        httr::add_headers('Zotero-API-Key' = get_zotero_key()),
                        query = list(v = 3,
                                     itemKey = key,
                                     format = 'biblatex'))

  bibtex_record <- httr::content(raw_text) %>%
    rawToChar()

  this_name <- stringr::str_extract(bibtex_record, '^\n@[:alnum:]+\\{[A-z0-9_]+') %>%
    stringr::str_extract('\\{[A-z0-9_]+') %>%
    substring(2)

  data.frame(key = key,
             name = this_name,
             record = bibtex_record,
             version = raw_text$headers[['last-modified-version']],
             stringsAsFactors = FALSE)
}

#' Pull New Keys from Zotero
#'
#' Pulls the Biblatex records from Zotero from the list of keys supplied to the
#' function.
#' @param key_list Character vector of keys, such as that produced by `get_zotero_key_list`.
#' @return Data.frame of BibLatex records
#' @importFrom purrr map_df
pull_new_items_from_zotero <- function(key_list) {
  purrr::map_df(key_list,
                retrieve_single_record_from_zotero)
}

#' Build Database
#'
#' Constructs a database from scratch by pulling all availabel items from Zotero.
#' @param override Logical variable indicating what to do if a database is already
#' found. If TRUE, the existing database will be overwritten. IF FALSE, an error
#' will be thrown.
#' @return TRUE if the function creates a database
#' @export
build_database <- function(override = FALSE) {
  # Start by checking for the existence of a Zotero database
  db_file <- get_database_file()
  if (file.exists(db_file) & !override)
    stop('Database already created.  Update or set override == TRUE.')

  # Read in all available keys
  my_dt <- get_zotero_key_list() %>%
    pull_new_items_from_zotero()

  # Create data subdirectory if it does not exist
  if (!file.exists(dirname(db_file)))
    dir.create(dirname(db_file))

  saveRDS(my_dt,db_file)

  invisible(TRUE)
}

#' Get Database Name
#'
#' Returns the name of the zotero2r database.
#' @export
get_database_file <- function() {
  file_name <- sprintf('zotero2r-data-%i.rds', get_zotero_id())
  #data_dir <- file.path(find.package('zotero2r'), 'data')
  data_dir <- file.path(Sys.getenv('HOME'), 'zotero2r_data')
  file.path(data_dir, file_name)
}

#' Load Database
#'
#' Loads the zotero2r database.
#' @export
load_database <- function() {
  db_file <- get_database_file()

  if (!file.exists(db_file))
    stop('zotero2r database file not found.')

  readRDS(db_file)
}

#' Update Zotero Database
#'
#' Updates an existing Zotero database.  Reads in the database and reads in
#' all keys that have been changed since the last update.
#' @importFrom dplyr bind_rows arrange group_by ungroup slice
#' @export
update_zotero_database <- function() {
  my_db <- load_database()
  latest_version <- max(as.integer(my_db$version), na.rm = TRUE)

  new_keys <- get_zotero_key_list(since = latest_version)

  if (length(new_keys) > 0) {
    new_dt <- pull_new_items_from_zotero(new_keys)
    db_file <- get_database_file()

    dplyr::bind_rows(my_db, new_dt) %>%
      dplyr::arrange(desc(version)) %>%
      dplyr::group_by(key) %>%
      dplyr::slice(1) %>%
      dplyr::ungroup() %>%
      saveRDS(db_file)
  } else {
    warning('No new items found.  Zotero2r database already up to date.',
            call. = FALSE)
  }

  invisible(TRUE)
}

#' Read Citations from RMD File
#'
#' Reads a R Markdown file and generates a list of the citations that are contained
#' within it. Note that the function only reads a single Rmd file and does not
#' currently address files that might be linked to in the Rmd file or citations
#' that only appear in a table or figure.
#' @param rmd_file Path to the R Markdown file to be read
#' @return A character vector of citations (excluding the starting at symbol)
#' @importFrom readr read_file
#' @importFrom stringr str_extract_all
read_citations <- function(rmd_file) {
  if (!file.exists(rmd_file))
    stop('Rmd file not found in read_citations.')

  file_text <- readr::read_file(rmd_file)

  # Identify the unique citations that appear in the Rmd file
  citation_list <- remove_yaml(file_text) %>%
    stringr::str_extract_all('@[A-z0-9_]+') %>%
    unlist() %>%
    unique() %>%
    substring(2)  # removes the @ symbol
}

remove_yaml <- function(x) {
  stringr::str_remove(x, '^---[^-]+---')
}

#' Create Bibliography
#'
#' Reads an R Markdown file, identifies the citations contained therein, and
#' produces a BibLaTeX file. The bibliography file is stored in the same directory
#' as the R Markdown file.
#'
#' If any citations are found in the Rmd file that are not found in the existing
#' zotero2r database, an warning message will appear.
#' @param rmd_file The path to the R Markdown file for which a bibliography is to
#' be generated
#' @importFrom dplyr filter setdiff
#' @importFrom readr write_file
#' @export
create_bibliography <- function(rmd_file) {
  citation_list <- read_citations(rmd_file)

  my_db <- load_database()

  to_export <- dplyr::filter(my_db, name %in% citation_list)
  exported_cites <- to_export$name

  unexported <- dplyr::setdiff(citation_list, exported_cites)
  if (length(unexported) > 0)
    sprintf('The following citations appear in the Rmd file but were not found in the Zotero database:\n %s',
            paste(unexported, collapse = '\n')) %>%
      warning(call. = FALSE)

  readr::write_file(paste0(to_export$record, collapse = ''),
                    file.path(dirname(rmd_file), 'zotero2rbiblatex.bib'),
                    append = FALSE)

  invisible(TRUE)
}
kbrevoort/zotero2r documentation built on Nov. 4, 2019, 3:34 p.m.