data-raw/DATASET.R

library(dplyr)
library(rcordis)
library(tidyr)
library(purrr)
library(readr)
library(dbplyr)
library(glue)
library(stringr)
library(janitor)
library(forcats)
library(lubridate)
library(here)
library(usethis)
library(conflicted)
# common conflicts:
conflict_prefer("here", "here")
conflict_prefer("filter", "dplyr")
# read_csv2 uses a "handleless" curl::curl
# not sure it can bypass proxy settings

# head --------------------------------------------------------------------
#' Title
#'
#' @param fpn
#' @param what
#'
#' @return
#' @export
#'
#' @examples
download_from_cordis <- function(fpn, what = "projects") {
  get_source(fpn, what) %>%
    download.file(
      here("data-raw", glue("fp{ fpn }-{ what }.csv")),
      method = "wget",
      quiet = TRUE)
}

read_wrapper <- function(filename) {
  filename %>%
    read_csv2(col_types = cols(.default = "c")) %>%
    clean_names()
}

get_coord_meta <- function(proj_tbl) {
  proj_tbl %>%
    select(
      rcn,
      id,
      acronym,
      status,
      call,
      framework_programme,
      funding_scheme,
      start_date,
      end_date,
      total_cost,
      ec_max_contribution,
      coordinator,
      coordinator_country) %>%
    mutate(
      rcn = strtoi(rcn),
      id = strtoi(id),
      call = str_remove(call, "FP7-|H2020-"),
      framework_programme = str_to_lower(framework_programme),
      status = str_to_lower(status),
      # reconciliation of fp7 statuses
      status = str_replace(status, "^ong$", "signed"),
      status = str_replace(status, "^clo$", "closed"),
      status = str_replace(status, "^can$", "terminated"),
      # dates
      start_date = ymd(start_date),
      end_date = ymd(end_date),
      # relevel
      status = fct_relevel(status, c("signed", "closed", "terminated")),
      coordinator = str_to_title(coordinator)) %>%
    extract(call, "call_year", "(\\d{4})", remove = FALSE, convert = TRUE)
}

# main --------------------------------------------------------------------

# update project data

1:8 %>%
  walk(download_from_cordis, what = "projects")
1:8 %>%
  walk(download_from_cordis, what = "organizations")

7:8 %>%
  walk(download_from_cordis, what = "reports")


proj_raw <- "data-raw" %>%
  here(glue("fp{ 1:8 }-projects.csv")) %>%
  map_df(read_wrapper)

org_raw <- "data-raw" %>%
  here(glue("fp{ 1:8 }-organizations.csv")) %>%
  map_df(read_wrapper)

## sample dataset
cordis <- proj_raw %>%
  get_coord_meta() %>%
  mutate(
    status = fct_explicit_na(status),
    framework_programme = fct_relevel(framework_programme, "fp7", "h2020"))

usethis::use_data(cordis, overwrite = TRUE, compress = "xz")
zambujo/rcordis documentation built on June 14, 2020, 9:41 p.m.