knitr::opts_chunk$set(echo = FALSE)
knitr::opts_knit$set(root.dir="../..") # file paths are relative to the root of the project directory
library(eutradeflows)
library(dplyr)
library(tidyr)
library(ggplot2)
con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows")

Introduction

The purpose of this document is to update the cache in the form of rds files so the data can be analysed on machines which do not have a database installation.

Trade flows for Finland.

Download from Comext and transfer to DB

See tradeharvester/notebooks/harvest.Rmd, the section on 2020 update on the laptop. involves the use of transfer7zfolder2db, harvestcomextmetadata and eutradeflows::cleanallcomextcodes

From the DB to the cache

Load from database and update metadata cache

Note metadata is loaded first here, since EU country codes are used later in the data query.

# Load partner country codes and names
partner_names <- tbl(con, 'vld_comext_partner') %>% collect()
# Fix character encoding issue in Laos name (see Latex error in bio_imports_main_partner_countries.Rmd)
partner_names$partner[grepl("Lao", partner_names$partner)] <- "Lao"

# Load reporter country codes and names
reporter_names <- tbl(con, 'vld_comext_reporter') %>% collect()
# Load product codes and names
product_names <- tbl(con, 'vld_comext_product') %>% collect()
# Fix character encoding issue in some product names
product_names$productdescription <- gsub("�"," ", product_names$productdescription)
# The issue is not only with this "├" special character but also with those that follow 
# Truncate those to 40 characters
product_names$productdescription <- ifelse(grepl("├",product_names$productdescription),
       substr(product_names$productdescription,1,40),
       product_names$productdescription)


# Save the metadata in rds cache files
saveRDS(partner_names, "data_raw/biosam/partner_names.rds")
saveRDS(reporter_names, "data_raw/biosam/reporter_names.rds")
saveRDS(product_names, "data_raw/biosam/product_names.rds")

# Save extra metadata in csv files
# Product names at the 2 digit level only
product_names_HS2 <- product_names %>% 
    mutate(productcode2d = substr(productcode,1,2)) %>% 
    filter(productcode == productcode2d)
write.csv(product_names_HS2,"data_raw/biosam/product_names_CN2.csv")


# List of EU country codes used to select extra-eu trade only below
eu_codes <- reporter_names$reportercode

Load from database and update data cache

FInland only

Forest products with furniture

yearly_tbl <- tbl(con, 'raw_comext_yearly') %>% 
    # Only Finland
    filter(reportercode %in% c(32L)) %>% 
    select(c("reportercode", "reporteriso", "partnercode", "partneriso", 
             "productcode", "flowcode", "statregime", "unitcode", 
             "period", "tradevalue", "weight", "quantity"))
# Load the yearly table from the database to the memory
system.time(
    finland_fp <- yearly_tbl %>% collect()
)
# user  system elapsed 
# 0.742   0.038   1.319 
# Backup the output of the query
saveRDS(finland_fp, "data_raw/biosam/trade_finland_forest_products.rds")

Products until CN96

yearly_tbl <- tbl(con, 'raw_comext_yearly_env_impact') %>% 
    # Only Finland
    filter(reportercode %in% c(32L)) %>% 
    select(c("reportercode", "reporteriso", "partnercode", "partneriso", 
             "productcode", "flowcode", "statregime", "unitcode", 
             "period", "tradevalue", "weight", "quantity"))
# Load the yearly table from the database to the memory
system.time(
    yearly <- yearly_tbl %>% collect()
)
#  user  system elapsed 
# 4.595   0.400  20.186 
# Backup the output of the query
saveRDS(yearly, "data_raw/biosam/trade_finland_to_CN63.rds")

All countries

yearly_tbl <- tbl(con, 'raw_comext_yearly') %>% 
    select(c("reportercode", "reporteriso", "partnercode", "partneriso", 
             "productcode", "flowcode", "statregime", "unitcode", 
             "period", "tradevalue", "weight", "quantity"))
# Load the yearly table from the database to the memory
system.time(
    yearly <- yearly_tbl %>% collect()
)

# Backup the output of the query
saveRDS(yearly, "data_raw/biosam/forest_trade_all_countries.rds")

Load data from cache

partner_names <- readRDS("data_raw/biosam/partner_names.rds")
reporter_names <- readRDS("data_raw/biosam/reporter_names.rds")
product_names <- readRDS("data_raw/biosam/product_names.rds")
yearly_finland <- readRDS("data_raw/biosam/forest_trade_finland.rds")

Disconnect from the DB

RMariaDB::dbDisconnect(con)


stix-global/eutradeflows documentation built on Nov. 13, 2020, 9:23 p.m.