R/classify_citing.R

Defines functions examine_citing classify_citing

Documented in classify_citing

#' Open Scanned Versions of Potentially Citing Works for Classification
#'
#' \code{classify_citing} opens the Internet Archive's scanned versions of each potentially citing work in a dataframe generated by `find_citing`, highlighting the potential citations
#' 
#' @param df A dataframe representing downloaded texts (the "citing works") generated by `find_citing`
#' @param save_dir The directory in which to save the csv file that records the user's classifications.  This file will have the name "classify_[object specified as df].csv".
#'  
#' @details
#' `classify_citing` facilitates the process of confirming that the potential citing works returned by `find_citing` actually do contain citations to the cited works.  
#' Such hand-checking is necessary because the `find_citing` function casts a broad net for citing works, returning all works that include the cited work's author that were published in or after the cited work's year of publication.
#' When `classify_citing` runs, it opens the Internet Archive's online, scanned version of each citing work (in order by author then date) in the default browser and displays all matches for a cited work. 
#' The user can then classify whether the potential citation is a false positive or a duplicate work, or rather an actual citation (perhaps with an additional qualification, such as "disagreeing").
#' As indicated by the prompt, after this determination has been made, the user should enter it and press the [return] key to proceed to the next potential citation.
#' 
#' Although classifications can be entered as text (e.g., "false positive", "disagreeing"), a coding scheme such as the following can make the process easier:
#' 
#'  -1 duplicate source or similar later edition
#'  0  false positive
#'  1  agreeing with cited work or treating cited work as authority
#'  2  disagreeing with cited work 
#' 
#' The entered classifications are returned as a new variable named `classification` and are saved as an `R` script named "classify_[object specified as df].R" in the directory specified in the `save_dir` argument.
#' 
#' @return A dataframe
#'
#' @examples
#' \dontrun{
#' cites_rush <- classify_citing(mentions_rush) %>% 
#'      filter(classification > 0)
#' }
#' 
#' 
#' @seealso \code{\link{find_citing}}
#' 
#' @importFrom dplyr "%>%" filter arrange left_join
#' @importFrom readr read_csv
#'
#' @export

classify_citing <- function(df, save_dir = ".") {
    file_name <- file.path(save_dir, paste0("classify_", deparse(substitute(df)), ".csv"))
    df <- df %>%
        arrange(date, author, cited)
    if (!file.exists(file_name)) {
        file.create(file_name)
        cat("id, cited, classification, page, notes", file = file_name, sep="\n")
        examine_citing(df = df, file = file_name)
    } else {
        suppressMessages(df_classified <- read_csv(file_name))
        if ("classification" %in% names(df)) df <- df %>% select(-classification)
        df_all <- df
        df <- left_join(df_all, df_classified, by = c("id", "cited"), all.x = TRUE) %>% 
            filter(is.na(classification)) %>% 
            arrange(date, author, cited)
        examine_citing(df = df, file = file_name)
        df <- df_all
    }
    suppressMessages(df_classified <- read_csv(file_name))
    df <- merge(df, df_classified, by = c("id", "cited"), all.x = TRUE)
    return(df)
}

examine_citing <- function(df, file) {
    for (i in seq_along(df$archive_link)) {
        browseURL(df$archive_link[i])
        code <- readline(prompt="Enter classification and press [return] to continue ")
        if (code!="" & as.numeric(code) > 0) {
            page <- readline(prompt="Enter page number and press [return] to continue ")
            notes <- readline(prompt="Enter notes and press [return] to continue to next citation ")
        } else {
            page <- ""
            notes <- ""
        }
        if (!code == "") {
            cat(paste(df$id[i], df$cited[i], code, page, notes, sep = ","),
                file = file, sep="\n", append = TRUE)            
        }
    }
}
mariolaespinosa/historicalnetworks documentation built on Feb. 9, 2022, 12:31 p.m.