library(petro.One)

form_input <- list(dummy = "dummy", query = "?q=", peer_reviewed = "peer_reviewed=",
                   published_between = "published_between=",
                   from_year = "from_year=",
                   to_year = "to_year=",
                   start = "start=",
                   rows = "rows=",
                   dc_type = "dc_type=")

make_search_url_1 <- function(query = NULL, start = NULL, from_year = NULL,
                            peer_reviewed = NULL,
                            published_between = NULL,
                            rows = NULL,
                            to_year = NULL,
                            dc_type = NULL,
                            how = "any") {

    website <- "https://www.onepetro.org"

    if (!is.null(start) || !is.null(rows)) {
        if (!is.null(rows) & is.null(start)) start = 0
        stopifnot(is.numeric(start), is.numeric(rows))
    }

    if (!is.null(from_year) && !is.null(to_year)) {
        stopifnot(is.numeric(from_year), is.numeric(to_year))
    }

    if (is.null(query)) {
        stop("search words not provided")
    } else {
        split_query <- unlist(strsplit(query, " "))
        if (length(split_query) > 1) {
            query <- paste(split_query, collapse = "+")
            query <- ifelse(how == "all", paste0("'", query, "'"), query)
            # print(query)
        }
    }

    if (!is.null(from_year) || !is.null(to_year)) {
        # use regex to validate year is between 1900 and 2099
        pattern <- "(?:(?:19|20)[0-9]{2})"
        if (!grepl(pattern, from_year, perl = TRUE) ||
            !grepl(pattern, to_year,   perl = TRUE)) stop("year not valid")
        # if valid year then turn on published_between
        published_between = "on"
        # if any of the *from* or *to* years are null replace with empty char
        if (is.null(from_year)) {
            from_year = ""
        }
        if (is.null(to_year)) {
            to_year = ""
        }
    }

    # peer_reviewed=on if TRUE; blank if unslected or FALSE
    if (is.null(peer_reviewed)) {
        peer_reviewed = ""
    } else {
        if (peer_reviewed) peer_reviewed = "on"
    }

    # document type
    if (!is.null(dc_type)) {
        valid_options <- c("conference-paper", "journal-paper",
                           "media", "general", "presentation", "chapter",
                           "other", "standard")
        # stop if it is not in the options
        if (!dc_type %in% valid_options) {
            msg <- sprintf("Option unknown. It must be one of [ %s ]",
                           paste(valid_options, collapse = ", "))
            stop(msg)
            # cat(valid_options, "\n")
        }
    }

    s_search  <- paste(website, "search", sep = "/")

    # these strings will need to join with the ampersand & at the tail
    s_query   <- paste0("?q=", query)
    s_peer    <- paste0("peer_reviewed=", peer_reviewed)
    s_publish <- paste0("published_between=", published_between)
    s_from    <- paste0("from_year=", from_year)
    s_to      <- paste0("to_year=", to_year)
    s_start   <- paste0("start=", start)
    s_rows    <- paste0("rows=", rows)
    s_type    <- paste0("dc_type=", dc_type)

    # url
    s_url <- list(websearch = s_search, query = s_query, peer = s_peer,
                  published_between = s_publish, from_year = s_from, to_year = s_to,
                  start = s_start, rows = s_rows, dc_type = s_type
    )

    for (i in 1:length(s_url)) {
        # cat(i, my_url[[i]], "\n")
        if (i == 1) joined <- s_url[[i]]
        if (i == 2) joined <- paste0(joined, s_url[[i]])
        if (i >=3 ) {
            if (s_url[[i]] == form_input[[i]] & i <= 6) {
                joined <- paste(joined, s_url[[i]], sep = "&")
            } else  if (s_url[[i]] != form_input[[i]]) {
                # cat(i, s_url[[i]], "\n")
                joined <- paste(joined, s_url[[i]], sep = "&")
            }
        }
    }

    q_url <- joined
    q_url <- gsub('"', "'", q_url)
    q_url <- gsub("'", '"', q_url)
    q_url
}

url <- make_search_url_1(query = "flowing gradient survey", how = "all")
print(url)
cat(url)
# onepetro_page_to_dataframe(url)
url <- make_search_url_1(query = "flowing gradient survey", how = "all")
print(url)
cat(url)
url <- make_search_url(query = "flowing gradient survey", how = "all")
onepetro_page_to_dataframe(url)


f0nzie/petro.One documentation built on May 29, 2019, 12:05 a.m.