Eurostat provides several download facilities, we are looking for a bulk download of bilateral trade data.

library(knitr)
opts_knit$set(root.dir="../../..") # file paths are relative to the root of the project directory
library(tradeflows)
library(dplyr)
library(reshape2)

Places to download Eurostat data from

Easy comext

Eurostat interface to the COMEXT bilateral trade data enables the selection of reporting and partner area, product, trade fow and time period. For example Fuel wood Austria

Bulk download

SDMX SOAP and Rest APIs

Eurostat provides a REST API and a SOAP API.

"These SDMX Web Services are a programmatic access to Eurostat data, with the possibility to:

Bulk download of comext data

Yearly data from the 2014S2 folder on eurostat's bulk download facility.

Download and decompress

# Prepare the filename to 
# download all comext trade flows for a given year
year <- 2013
bulkfile <- paste0("nc",year, "52.7z")

Data is downloaded in 7z format. It can be uncompressed in Debian GNU linux with the p7zip program. Use system2 to invoque the decompression utility. Example downloading 2012 data:

download.file(paste0("http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=comext%2F2014S2%2Fdata%2F",bulkfile), 
              destfile = paste0("data-raw/",bulkfile))

# Change to the raw-data directory
curdir <- getwd()  
setwd("data-raw/")

# decompress file and come back to the project root directory
tryCatch(system2("p7zip", args=c("-d", bulkfile)),
    finally = setwd(curdir))

Read .dat file

Comext bulk files are over 400 Mb in size and cannot be read in one slice. One has to extract a slice of interesting product codes first.

# read.table takes more than 10 minutes to run on a laptop
# nc2012 <- read.table("data-raw/nc201252.dat", header = TRUE)
bulkfile <- gsub(".7z", ".dat", bulkfile)
productcode <- 4407

# Change to the raw-data directory
curdir <- getwd()  
setwd("data-raw/")
# grep productcode into a smaller file containing only those characters
system2("grep", args=c(productcode,bulkfile), 
        stdout = paste0(productcode,bulkfile))
# Read the first line of bulkfile to use it as col.names 
header <- readLines(bulkfile, n=1)
header <- strsplit(header,",")[[1]]
setwd(curdir)

# read the data file
nc <- read.table(paste0("data-raw/", productcode, bulkfile),
                     sep=",", col.names = header, as.is=TRUE)
str(nc)
# 'data.frame':    24364 obs. of  9 variables:
#  $ DECLARANT    : chr  "001" "001" "001" "001" ...
#  $ PARTNER      : int  3 3 3 3 3 3 3 3 3 3 ...
#  $ PRODUCT_NC   : chr  "44071015" "44071015" "44071031" "44071038" ...
#  $ FLOW         : int  1 2 1 1 1 2 1 2 1 2 ...
#  $ STAT_REGIME  : int  4 4 4 4 4 4 4 4 4 4 ...
#  $ PERIOD       : int  201252 201252 201252 201252 201252 201252 201252 201252 201252 201252 ...
#  $ VALUE_1000ECU: num  64.8 147.4 126.2 213.8 725.1 ...
#  $ QUANTITY_TON : num  44.2 977.3 179.8 207.6 1142.3 ...
#  $ SUP_QUANTITY : int  91 1042 388 498 3891 506 2 311 4218 192 ...
unique(nc$PRODUCT_NC) 
unique(nc$PRODUCT_NC[grepl("440799", nc$PRODUCT_NC)])

Explore comext bulk data

Stat regime

Definition p 16 There are broadly two approaches, closely linked with customs procedures, used for the compilation of external trade in goods: general trade system and the special trade system .

unique(nc$STAT_REGIME)
nc %>% group_by(STAT_REGIME) %>%
    summarise(n = n())

Period

unique(nc$PERIOD)

Products

Products which do not start with 44 should be removed.

nc %>% group_by(PRODUCT_NC) %>%
    summarise(nblines = n(),
              tradevalue = sum(VALUE_1000ECU),
              weight = sum(QUANTITY_TON),
              quantity = sum(SUP_QUANTITY)) %>%
    mutate(conversion_factor = weight / quantity,
           price = tradevalue / quantity) %>%
    arrange(-nblines) %>% 
    head(10) %>% kable 

Manipulate comtrade data

Rename columns from the bulk download to EFI names

# use the column table 
column_names %>% filter(!is.na(comext)) %>%
    select(comext, efi) %>%kable

nc2 <- renamecolumns(nc,"comext","efi")
str(nc2)

Calculate price and conversion factor

Add information to the Eurostat bulk data

Other tools

Using the SmarterPoland package

Load data from the Eurostat trade database, using the smarterpoland package

The SmarterPoland package is downloading data tables from the bulkdownload listing but these data tables do not contain bilateral trade.

Example use of the function SmarterPoland::getEurostatRCV Download the dataset 'Pupil/Student - teacher ratio and average class size' from eurostat.

tmp <- SmarterPoland::getEurostatRCV(kod = "educ_iste")
head(tmp)

Download a sample dataset containing Sawnwood trade by species (for_swspec). This is downloading from the [Eurostat bulkdownload listing]

swd <- SmarterPoland::getEurostatRCV(kod = "for_swspec")
head(swd)
#   treespec prod_wd indic_fo    unit  geo  time      value
# 1    CONIF    SAWN      EXP THS_EUR   AT 2013  1013150.51
# 2    CONIF    SAWN      EXP THS_EUR   BE 2013   229409.43
# 3    CONIF    SAWN      EXP THS_EUR   BG 2013    41869.04
# 4    CONIF    SAWN      EXP THS_EUR BLEU 2013          NA
# 5    CONIF    SAWN      EXP THS_EUR   BR 2013          NA
# 6    CONIF    SAWN      EXP THS_EUR   CA 2013          NA


unique(swd$geo)
#  [1] AT   BE   BG   BLEU BR   CA   CH   CN   CY   CZ   DE   DK   EE   EL   ES   FI  
# [17] FR   HR   HU   ID   IE   IN   IS   IT   LT   LU   LV   ME   MK   MT   NL   NO  
# [33] PL   PT   RO   RU   SE   SI   SK   TR   UK   US   LI  
# 43 Levels: AT BE BG BLEU BR CA CH CN CY CZ DE DK EE EL ES FI FR HR HU ID IE IN ... US


unique(swd[c("treespec", "indic_fo", "unit")]) %>%
    dcast( unit + indic_fo  ~ treespec, value.var="treespec")
#      unit indic_fo C_FIR CONIF C_PIN NC_ASH NC_BEE NC_BIR NC_CHE NC_MAP NC_OAK NCONIF
# 1 THS_EUR      EXP C_FIR CONIF C_PIN NC_ASH NC_BEE NC_BIR NC_CHE NC_MAP NC_OAK NCONIF
# 2 THS_EUR      IMP C_FIR CONIF C_PIN NC_ASH NC_BEE NC_BIR NC_CHE NC_MAP NC_OAK NCONIF
# 3  THS_M3      EXP C_FIR CONIF C_PIN NC_ASH NC_BEE NC_BIR NC_CHE NC_MAP NC_OAK NCONIF
# 4  THS_M3      IMP C_FIR CONIF C_PIN NC_ASH NC_BEE NC_BIR NC_CHE NC_MAP NC_OAK NCONIF
# 5 THS_NAC      EXP C_FIR CONIF C_PIN NC_ASH NC_BEE NC_BIR NC_CHE NC_MAP NC_OAK NCONIF
# 6 THS_NAC      IMP C_FIR CONIF C_PIN NC_ASH NC_BEE NC_BIR NC_CHE NC_MAP NC_OAK NCONIF
#   NC_POP TOTAL
# 1 NC_POP TOTAL
# 2 NC_POP TOTAL
# 3 NC_POP TOTAL
# 4 NC_POP TOTAL
# 5 NC_POP TOTAL
# 6 NC_POP TOTAL

Try to download EU trade as written on this database http://epp.eurostat.ec.europa.eu/portal/page/portal/international_trade/data/database

trade <- SmarterPoland::getEurostatRCV(kod = "DS-016890")
head(trade)

Source code of the SmarterPoland package

SmarterPoland::getEurostatRCV
# function (kod = "educ_iste") 
# {
#     require(reshape)
#     dat <- getEurostatRaw(kod)
#     dat2 <- t(as.data.frame(strsplit(as.character(dat[, 1]), 
#         split = ",")))
#     cnames <- strsplit(colnames(dat)[1], split = "[,\\\\]")[[1]]
#     colnames(dat2) <- cnames[-length(cnames)]
#     rownames(dat2) <- dat[, 1]
#     rownames(dat) <- dat[, 1]
#     dat3 <- data.frame(dat2, dat[, -1])
#     colnames(dat3) <- c(colnames(dat2), colnames(dat)[-1])
#     dat4 <- melt(dat3, id = cnames[-length(cnames)])
#     colnames(dat4)[ncol(dat4) - 1] = cnames[length(cnames)]
#     dat4
# }
# <environment: namespace:SmarterPoland>

SmarterPoland::getEurostatRaw
# function (kod = "educ_iste") 
# {
#     adres <- paste("http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2F", 
#         kod, ".tsv.gz", sep = "")
#     tfile <- tempfile()
#     download.file(adres, tfile)
#     dat <- read.table(gzfile(tfile), sep = "\t", na.strings = ": ", 
#         header = F, stringsAsFactors = F)
#     unlink(tfile)
#     colnames(dat) <- as.character(dat[1, ])
#     dat <- dat[-1, ]
#     for (i in 2:ncol(dat)) {
#         tmp <- sapply(strsplit(as.character(dat[, i]), split = " "), 
#             `[`, 1)
#         tmp[tmp == ":"] = NA
#         dat[, i] <- as.numeric(tmp)
#     }
#     dat
# }
# <environment: namespace:SmarterPoland>


paul4forest/tradeflows documentation built on Oct. 8, 2019, 10:35 a.m.