#' Open Scanned Versions of Potentially Citing Works for Classification
#'
#' \code{classify_citing} opens the Internet Archive's scanned versions of each potentially citing work in a dataframe generated by `find_citing`, highlighting the potential citations
#'
#' @param df A dataframe representing downloaded texts (the "citing works") generated by `find_citing`
#' @param save_dir The directory in which to save the csv file that records the user's classifications. This file will have the name "classify_[object specified as df].csv".
#'
#' @details
#' `classify_citing` facilitates the process of confirming that the potential citing works returned by `find_citing` actually do contain citations to the cited works.
#' Such hand-checking is necessary because the `find_citing` function casts a broad net for citing works, returning all works that include the cited work's author that were published in or after the cited work's year of publication.
#' When `classify_citing` runs, it opens the Internet Archive's online, scanned version of each citing work (in order by author then date) in the default browser and displays all matches for a cited work.
#' The user can then classify whether the potential citation is a false positive or a duplicate work, or rather an actual citation (perhaps with an additional qualification, such as "disagreeing").
#' As indicated by the prompt, after this determination has been made, the user should enter it and press the [return] key to proceed to the next potential citation.
#'
#' Although classifications can be entered as text (e.g., "false positive", "disagreeing"), a coding scheme such as the following can make the process easier:
#'
#' -1 duplicate source or similar later edition
#' 0 false positive
#' 1 agreeing with cited work or treating cited work as authority
#' 2 disagreeing with cited work
#'
#' The entered classifications are returned as a new variable named `classification` and are saved as an `R` script named "classify_[object specified as df].R" in the directory specified in the `save_dir` argument.
#'
#' @return A dataframe
#'
#' @examples
#' \dontrun{
#' cites_rush <- classify_citing(mentions_rush) %>%
#' filter(classification > 0)
#' }
#'
#'
#' @seealso \code{\link{find_citing}}
#'
#' @importFrom dplyr "%>%" filter arrange left_join
#' @importFrom readr read_csv
#'
#' @export
classify_citing <- function(df, save_dir = ".") {
file_name <- file.path(save_dir, paste0("classify_", deparse(substitute(df)), ".csv"))
df <- df %>%
arrange(date, author, cited)
if (!file.exists(file_name)) {
file.create(file_name)
cat("id, cited, classification, page, notes", file = file_name, sep="\n")
examine_citing(df = df, file = file_name)
} else {
suppressMessages(df_classified <- read_csv(file_name))
if ("classification" %in% names(df)) df <- df %>% select(-classification)
df_all <- df
df <- left_join(df_all, df_classified, by = c("id", "cited"), all.x = TRUE) %>%
filter(is.na(classification)) %>%
arrange(date, author, cited)
examine_citing(df = df, file = file_name)
df <- df_all
}
suppressMessages(df_classified <- read_csv(file_name))
df <- merge(df, df_classified, by = c("id", "cited"), all.x = TRUE)
return(df)
}
examine_citing <- function(df, file) {
for (i in seq_along(df$archive_link)) {
browseURL(df$archive_link[i])
code <- readline(prompt="Enter classification and press [return] to continue ")
if (code!="" & as.numeric(code) > 0) {
page <- readline(prompt="Enter page number and press [return] to continue ")
notes <- readline(prompt="Enter notes and press [return] to continue to next citation ")
} else {
page <- ""
notes <- ""
}
if (!code == "") {
cat(paste(df$id[i], df$cited[i], code, page, notes, sep = ","),
file = file, sep="\n", append = TRUE)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.