R/api-hypothesis.R

Defines functions hypothesis_process_annotations hypothesis_get_annotations hypothesis_search_annotations hypothesis_api

Documented in hypothesis_api hypothesis_get_annotations hypothesis_process_annotations hypothesis_search_annotations

#' @title
#' Hypothes.is API Utilities
#'
#' @description
#' [Hypothes.is](https://web.hypothes.is/) is a tool for annotating anything
#' on the web. I use this for highlighting online books and whatever
#' information on the web in a lightweight way. I use the Hypothesis API to
#' export the annotations and then be able to port that into Notion where I
#' save all my reading for archival purposes.
#'
#' @name api-hypothesis
NULL

#' @describeIn api-hypothesis
#' Hypothesis API Core Objects
#' @param api_key Hypothesis API Key
#' @export
hypothesis_api <- function(api_key = Sys.getenv("HYPOTHESIS_API_KEY")) {
    api <- list(api = rapiclient::get_api(
        dev_pkg_inst("hypothesis/swagger.json")
    ))
    api$ops <- rapiclient::get_operations(
        api$api,
        .headers = c(
            "Authorization" = glue("Bearer {api_key}"),
            "Accept" = "application/vnd.hypothesis.v1+json"
        )
    )
    return(add_class(api, "hypothesis_api"))
}

#' @describeIn api-hypothesis Perform an search for annotations
#' @param wildcard_uri URI matching (for example "https://google.com/*")
#' @param username     hypothes.is username
#' @param limit        limit for the number of annotations per request (max 200L)
#' @param search_after search for rows created after this time
hypothesis_search_annotations <- function(
    wildcard_uri = NULL,
    username = NULL,
    limit = 200L,
    search_after = NULL,
    api_key = Sys.getenv("HYPOTHESIS_API_KEY")
) {
    hypothesis_api(
        api_key = api_key
    )$ops$Search_for_annotations(
        limit = limit,
        wildcard_uri = wildcard_uri,
        user = glue("acct:{username}@hypothes.is"),
        search_after = search_after,
        sort = "created"
    ) %>%
        httr::content() %>%
        add_class("hypothesis_api_response")
}

#' @describeIn api-hypothesis Fetch a defined set of annotations
#' @param rows_limit limit for the number of annotations in total (default Inf)
hypothesis_get_annotations <- function(
    wildcard_uri = NULL,
    username     = NULL,
    limit        = 200L,
    rows_limit   = Inf,
    api_key      = Sys.getenv("HYPOTHESIS_API_KEY")
) {
    # Rows Gathered
    rows_gathered <- 0L
    rows_total <- Inf
    rows <- list()

    # Collect annotations up to limit
    while (rows_gathered < rows_total & rows_gathered < rows_limit) {

        response <- hypothesis_search_annotations(
            wildcard_uri = wildcard_uri,
            username = username,
            limit = limit,
            api_key = api_key
        )
        log_trace("Gathered {length(response$rows)} rows")

        rows_total <- response$total
        log_trace("Noted {response$total} total rows")
        rows <- append(rows, response$rows)
        rows_gathered <- length(rows)
        log_info("In total, gathered {rows_gathered} rows")

    }

    # Return all rows
    return(add_class(rows, "hypothesis_annotations"))
}

#' @describeIn api-hypothesis Process annotations into the desired format
#' @param annotations results of [hypothesis_get_annotations()]
hypothesis_process_annotations <- function(annotations) {
    assert_that(inherits(annotations, "hypothesis_annotations"))
    annotations %>%
        tibble::tibble(row = .) %>%
        tidyr::unnest_wider(row) %>%
        dplyr::transmute(
            id,
            created_at = created,
            updated_at = updated,
            link = purrr::map_chr(links, "incontext"),
            note_text = text,
            target
        ) %>%
        tidyr::unnest(target) %>%
        tidyr::unnest_wider(target) %>%
        dplyr::select(-source) %>%
        dplyr::mutate(
            selected_text = purrr::map_if(
                selector,
                ~!is.null(.),
                ~purrr::keep(., ~.$type == "TextQuoteSelector") %>%
                    extract2(1) %>%
                    extract2("exact") %>%
                    stringr::str_replace_all("\n", " ")
            ) %>%
                purrr::map_if(~is.null(.), ~NA_character_) %>%
                purrr::flatten_chr()
        ) %>%
        dplyr::select(-selector) %>%
        add_class("hypothesis_processed_annotations")
}

#' @describeIn api-hypothesis Fetch annotations into a data frame
#' @export
hypothesis_annotations <- function(
    wildcard_uri = NULL,
    username     = NULL,
    limit        = 200L,
    rows_limit   = Inf,
    api_key      = Sys.getenv("HYPOTHESIS_API_KEY")
) {
    log_info("Getting annotations")
    hypothesis_get_annotations(
        wildcard_uri = wildcard_uri,
        username     = username,
        limit        = limit,
        rows_limit   = rows_limit,
        api_key      = api_key
    ) %T>%
        { log_info("Processing annotations") } %>%
        hypothesis_process_annotations()
}

#' @describeIn api-hypothesis
#' Process annotations into markdown for pasting
#' into Notion
#' @export
hypothesis_annotations_markdown <- function(processed_annotations) {
    assert_that(inherits(processed_annotations,
                         "hypothesis_processed_annotations"))
    log_info("Generating markdown")
    processed_annotations %>%
        tibble::as_tibble() %>%
        dplyr::mutate(
            markdown = purrr::pmap(
                list(id, link, note_text, selected_text),
                function(id, link, note_text, selected_text) {
                    glue("> {selected_text}\n\n",
                         "[Link]({link}). {note_text}")
                }
            )
        ) %>%
        dplyr::pull(markdown) %>%
        paste(collapse = "\n\n")
}
tjpalanca/tjutils documentation built on Jan. 20, 2021, 2:01 p.m.