options(width = 120)
cran <- function(package)
    paste0('https://CRAN.R-project.org/package=',package)
rdoc <- function(package,funct)
    paste0('https://www.rdocumentation.org/packages/',
           package,'/functions/',funct,'/')
lnk_ <- function(name,url)
    paste0('[',name,']','(',url,')')
lnk <- function(package,funct)
    paste0(lnk_(package,cran(package)),'::',
           lnk_(funct,rdoc(package,funct)))
estat_toc_link <-
    'https://ec.europa.eu/eurostat/api/dissemination/catalogue/toc/txt?lang=EN'

NEW (May 2023):

The package is fully compatible with the new Eurostat’s API SDMX 2.1 and no longer relies on the old Eurostat’s Bulk Download Facility.

NEW (January 2023):

Use eurodata_codegen, a point-and-click app for rapid and easy generation of richly-commented R code, to import a Eurostat dataset or it's subset (based on the eurodata::importData() function).

The core API of the eurodata package contains just 6 functions -- 4 for data or metadata imports and 2 for search:

Import functionality:

Search functionality:

NEW (December 2022):

Parameter filters in importData() allows to download only the selected values of dimensions, instead of downloading the full dataset. See the example close to the end below.

NEW (August 2022) Extra functionality:

See the usage example at the very end below.

Installation

install.packages('eurodata') # from CRAN
# or
remotes::install_github('alekrutkowski/eurodata') # package 'remotes' needs to be installed

Functionality demo

library(eurodata)

Imports

x <- importData('nama_10_a10')  # actual dataset
str(x)
head(x,10)
y <- importDataList()  # metadata
colnames(y)
str(y[y$Code=='nama_10_a10',])  # metadata on x
z <- importLabels('geo')
head(z,10)

Search

# A trick to display the side-effects:
library(magrittr)
cleanForGH <- function(x) x %>%
    substr(457, nchar(.)) %>%  # drop html style preamble not supported by GitHub md parser
    paste('<html>',.) %>%
    cat(sep='\n')
browseDataList <- function(...) eurodata::browseDataList(...)$html %>%
    cleanForGH
print.EurostatDataList <- function(x, SearchCriteria, ...) x %>%
    eurodata:::tableToHtml(Sys.time(), SearchCriteria) %>%
    list(html=.) %>%
    eurodata:::print.BrowsedEurostatDatasetList() %>%
    cleanForGH
find <- function(...) eurodata::find(...)$report %>%
    cat(sep='\n')
# Free-style text search based on the parts of words in the dataset names
find(gdp,main,international,-quarterly)
# Search based on the parts of the dataset codes
find(bop, its)
find(bop,-ybk,its)
browseDataList(grepl('GDP',`Dataset name`) &
                   grepl('main',`Dataset name`) &
                   grepl('international',`Dataset name`) &
                   !grepl('quarterly',`Dataset name`))
browseDataList(grepl('bop',Code) & grepl('its',Code))
# Producing a table of datasets which (1) include a dimension `sizeclas`
# (i.e. firm size class) and (2) some data for firms with fewer than 10 employees
# (`sizeclas` code "LT10") and (3) have sectorial data (i.e. include a
# dimension `nace_r2`).
library(magrittr)
metab <- importMetabase()
codes_with_nace <- metab %>% 
    subset(Dim_name=='nace_r2') %>%
    extract2('Code') %>%
    unique
final_codes <- metab %>%
    subset(Dim_name=='sizeclas' & Dim_val=='LT10' &
               Code %in% codes_with_nace) %>%
    extract2('Code') %>%
    unique
importDataList() %>%
    subset(Code %in% final_codes) %>%
    as.EurostatDataList %>%
    # the `SearchCriteria` argument below is optional
    print(SearchCriteria =
              'those including data on firms with fewer than 10 employees and NACE Rev.2 disaggregation') 

New parameter filters

To reduce the download size and time if full dataset not needed, e.g.:

subset_of__bop_its6_det <-
    importData('bop_its6_det',
               # New -- download only subset of available data
               filters = list(geo=c('AT','BG'), # these two countries
                              TIME_PERIOD=2014:2020, # only that period
                              bop_item='SC')) # only "Services: Transport"
str(subset_of__bop_its6_det)

Extras

describe('nama_10_gdp')
describe('nama_10_gdp', wide=TRUE)
compare('nama_10_gdp', 'nama_10_a64')


alekrutkowski/eurodata documentation built on Nov. 20, 2024, 10:17 p.m.