R/db-commands.R

Defines functions db_update_parsed_content db_find_unparsed_articles db_insert_article_urls db_count_existing_urls db_find_new_urls db_update_download_status

## Update download status
db_update_download_status <- function(conn, id, status, html) {
    sql_query <- "
        UPDATE blogs.entries SET
            download_status = $2,
            html = $3
        WHERE id = $1
        "
    DBI::dbExecute(conn, sql_query, list(id, status, html))
}

## Find articles that have not been downloaded
db_find_new_urls <- function(con, source_id) {
    sql_query <- paste0(
        "SELECT id, url FROM blogs.entries
        WHERE source_id = '", source_id, "'
        AND download_status is null;"
    )
    DBI::dbSendQuery(con, sql_query)
}

## Count how many of the passed urls already exist in store
db_count_existing_urls <- function(con, urls) {
    sql_count_existing_articles <- paste0(
        "SELECT COUNT (*) FROM blogs.entries
         WHERE url = any(array[",
        stringr::str_c("$", seq_len(length(urls)), collapse = ", "),
        "])"
    )
    count <- DBI::dbGetQuery(con, sql_count_existing_articles, urls)
    count[[1]]
}

## Insert new urls
db_insert_article_urls <- function(con, source_id, urls) {
    sql_insert <- "INSERT INTO blogs.entries
         (source_id, url) VALUES ($1, $2) ON CONFLICT DO NOTHING;"

    for (url in urls) {
        insert_values <- list(source_id, url)
        DBI::dbExecute(con, sql_insert, insert_values)
    }
}

## Get table of downloaded, but not yet parsed articles
db_find_unparsed_articles <- function(con, blog) {
    sql_query <- paste0(
        "SELECT
            id,
            source_id,
            url,
            download_status,
            html
        FROM blogs.entries
        WHERE
            download_status = '200'
            AND text is null
            AND source_id ='", blog, "';"
    )
    DBI::dbSendQuery(con, sql_query)
}

## Insert data parsed from html
db_update_parsed_content <- function(con, entry_id, source_id, content) {
    sql_insert <- "
        UPDATE blogs.entries SET
            source_id = $2,
            author = $3,
            title = $4,
            text = $5,
            created_at = $6,
            tags = $7,
            language = $8,
            links = $9,
            media = $10
        WHERE id = $1
        "
    insert_values <- list(
        entry_id,
        source_id,
        content$author,
        content$title,
        content$text,
        content$date,
        content$tags,
        content$language,
        content$links,
        content$media
    )
    DBI::dbExecute(con, sql_insert, insert_values)
}
digital-geopolitics/dgblogs documentation built on March 22, 2022, 6:40 p.m.