R/import_sigaps_publi.R

Defines functions import_sigaps_html save_sigaps_in_package

Documented in import_sigaps_html save_sigaps_in_package

#' Import paper classification table from the sigaps website
#'
#' @import xml2
#' @import tibble
#' @import tidyr
import_sigaps_html <- function(path) {
  html <- xml2::read_html(path)

  # The table with the papers classifications is the second one
  table_publi <- xml2::xml_find_all(html, ".//table[2]")

  # first header with the years
  first_header <- xml2::xml_text(
    xml2::xml_find_all(table_publi, xpath = "tr[1]/td")
    )
  # The two first rows are the headers
  articles <- xml2::xml_find_all(table_publi, xpath = "tr[position()>2]")

  # A function to find quickly content from their position
  gg <- function(position, node = articles)
    xml2::xml_text(xml2::xml_find_all(node, xpath = paste0("td[", position, "]")))

  publi_rank_large <- tibble::tibble(
    nlm_id = gg(1),
    iso_title = gg(4),
    rank1 = gg(6),
    rank2 = gg(8),
    rank3 = gg(10),
    rank4 = gg(12),
    rank5 = gg(14),
  )

  names(publi_rank_large)[3:7] <- first_header[5:9]

  publi_rank_long <- tidyr::gather(publi_rank_large, year,
                                   paper_rank, -nlm_id, -iso_title)

  publi_rank_long
}


#' Write imported data in the package

save_sigaps_in_package <- function(df_sigaps) {
  # Find the path to the data directory of the package
  data_directory_path <- system.file("data", package = "rigaps")

  # check if writable
  data_directory_is_writable <- file.access(data_directory_path, 2) == 0

  if (!data_directory_is_writable) stop("Package directory not avalaible")

  file_path <- paste0(data_directory_path, "/sigaps_publi.rda")
  save(df_sigaps, file = file_path)
}
jomuller/rigaps documentation built on May 29, 2019, 12:39 p.m.