library(kableExtra) # will load necessary LaTeX packages such as booktabs
library(knitr)
knitr::opts_chunk$set(echo = FALSE)
knitr::opts_knit$set(root.dir="../..") # file paths are relative to the root of the project directory
library(eutradeflows)
library(dplyr)
library(tidyr)
library(ggplot2)

Introduction

The purpose of this document is to select the main biocommodities imported by EU countries at the CN 2 digit level.

Load data

Load data from rds files (Serialization Interface for R Objects). See the notebook bio_imports_update_cache.Rmd to update the cached data.

# Load metadata
partner_names <- readRDS("data_raw/env_impact/partner_names.rds")
reporter_names <- readRDS("data_raw/env_impact/reporter_names.rds")
product_names <- readRDS("data_raw/env_impact/product_names.rds")

product_names_HS2 <- product_names %>% 
    mutate(productcode2d = substr(productcode,1,2)) %>% 
    filter(productcode == productcode2d)

# Load data from cache
system.time(
    yearly <- readRDS("data_raw/env_impact/extra_eu_bio_imports.rds")
)
# 15 seconds for 63 countries
#   user  system elapsed 
# 14.999   0.382  15.389 

yearly <- yearly %>% 
    mutate(year = period %/% 100,
           productcode2d = substr(productcode, 1,2)) 

Aggregate

FEATURE: calculate aggregates in the cache document, so this document generates faster? Is this needed?

We selected extra EU partners in the query already (see bio_imports_update_cache.Rmd). Then we aggregate trade value and weight each year at the 2 digit level.

yearly_agg <- yearly %>% 
    group_by(productcode2d, year) %>% 
    # Use sum(as.numeric(.)) to avoid integer overflow error
    summarise(tradevalue = sum(as.numeric(tradevalue), na.rm = TRUE),
              weight = sum(as.numeric(weight), na.rm = TRUE))

# Aggregate quantities might be relevant later they need to take care of units
# FEATURE the quantity might be pivoted to wide format along the unit and joined with the yearly_agg data frame.
yearly_agg_quantity <- yearly %>% 
    mutate(productcode2d = substr(productcode, 1,2)) %>% 
    # Add the unit code, relevant for quantity calculations
    group_by(productcode2d, unitcode, year) %>% 
    # Use sum(as.numeric(.)) to avoid integer overflow error
    summarise(quantity = sum(as.numeric(quantity),na.rm = TRUE))

Overview last 5 years

Trade value in billion euros

Total trade values over the last 5 years, by product chapter

yearly_agg %>% 
    group_by(productcode2d) %>% 
    left_join(product_names_HS2, by="productcode2d") %>% 
    mutate(tradevalue_b = round(tradevalue/1e9,2)) %>% 
    # Last 5 years of data
    filter(year > max(year) - 5) %>% 
    pivot_wider(id_cols = c("productcode2d", "productdescription"), names_from = year, values_from = tradevalue_b) %>% 
    mutate(productdescription = substr(productdescription,1,51)) %>% 
    # Note booktabs=TRUE doesn't float with error:
    # LaTeX Warning: Float too large for page by 866.38799pt on input line 265. 
    # knitr::kable(format = 'latex', caption = 'Trade value in billion euros', booktabs=TRUE) %>%
    knitr::kable(format = 'markdown', caption = 'Trade value in billion euros') %>%
    kableExtra::column_spec(2, width = "30em")

Mass in billion kg

Total trade values over the last 5 years, by product chapter

yearly_agg %>% 
    group_by(productcode2d) %>% 
    left_join(product_names_HS2, by="productcode2d") %>% 
    mutate(weight_b = round(weight/1e9,2)) %>% 
    # Last 5 years of data
    filter(year > max(year) - 5) %>% 
    pivot_wider(id_cols = c("productcode2d", "productdescription"), names_from = year, values_from = weight_b) %>% 
    mutate(productdescription = substr(productdescription,1,41)) %>% 
    # Note booktabs=TRUE doesn't float with error:
    # LaTeX Warning: Float too large for page by 866.38799pt on input line 265. 
    # knitr::kable(format = 'latex', caption = 'Trade value in billion euros', booktabs=TRUE) %>%
    knitr::kable(format = 'markdown', caption = 'Trade value in billion euros') %>%
    kableExtra::column_spec(2, width = "30em")

Plot time series

yearly_agg_long <- yearly_agg %>% 
    # Reshape in long format to use tradevalue and weight as facet variables
    pivot_longer(cols=c(tradevalue, weight), 
                 names_to = 'variable', values_to = 'value') %>% 
    mutate(value = value/1e9) 

# Loop
for (this_product2d in unique(yearly_agg_long$productcode2d)){
    # if(this_product2d == "03") break # shorten loop for debug
    df <-  yearly_agg_long %>% 
        filter(productcode2d == this_product2d) 
    product_description <- product_names_HS2$productdescription[product_names_HS2$productcode2d==this_product2d]
    # Make a sub-section title
    # As explained https://stackoverflow.com/a/36808845/2641825
    cat(sprintf('\n\n## %s %s \n\n',
                this_product2d, substr(product_description,1,63)))

    # Plot time series of trade value and weight
    p <- df %>% 
        ggplot(aes(year, value, color=productcode2d)) +
        geom_point() +
        theme(legend.position = "none") +
        facet_wrap(~variable, scales = 'free', ncol=2) +
        ylab("Tradevalues in billion EUROS and weight in billion kg") +
        ggtitle(paste(this_product2d, product_description))
    print(p)

    # Display summary table of value in billion euros and weight in billion kg
    yearly_agg %>% 
        filter(productcode2d == this_product2d
                & year > max(year) - 5 ) %>% 
        mutate(tradevalue_b = round(tradevalue/1e9,3),
               weight_b = round(weight/1e9,3)) %>% 
        select(-tradevalue, -weight) %>% 
        # kable(format.args = list(big.mark = ',')) %>% 
        kable() %>% 
        print()
}
# ALl on one plot
yearly_agg_long %>% 
    ggplot(aes(year, value, color=productcode2d)) +
    geom_point() +
    theme(legend.position = "none") +
    facet_wrap(productcode2d~variable, scales = 'free', ncol=2) +
    ylab("Tradevalues in billion EUROS and weight in billion kg")
ggsave("~/downloads/HS2.pdf", width = 12, height = 20)


stix-global/eutradeflows documentation built on Nov. 13, 2020, 9:23 p.m.