# I tried placing the yaml block after the chunks but for some reason it doesn't always generate the pdf and generates a html file instead.
# pdf is generated when the yaml block is at the top, 

library(knitr)
opts_knit$set(root.dir="../../..") # file paths are relative to the root of the project directory
opts_chunk$set(echo=FALSE)
library(tradeflows)
library(dplyr)
library(ggplot2)
library(reshape2)
# ---------------------------------------------------------------------------------
# ! This chunk is used for development only, keep eval=FALSE.
# ---------------------------------------------------------------------------------
# Test countries: Cameroon, Ghana, Indonesia
message("Using the development template in ",
        "inputpath = 'inst/templates/'")

# Instruction to generate the report before building the package
creatediscrepancyreport(440799, "China", inputpath = "inst/templates")

# The data frame tfdata will be passed by the report generating function to the template
# Product and reporter information can be extracted back from tfdata 
# in the template this would have to be the country appearing most in the data frame
productcodeinreport <- c(440799)
reporterinreport <- "Indonesia" 
# Test countries: Cameroon, Ghana, Indonesia
tfdata <- readdbtbl("raw_flow_yearly") %>%
    filter(productcode == productcodeinreport & 
               (reporter == reporterinreport | 
                    partner == reporterinreport)) %>%
    collect() 
numberoflargecountries <- 27
# Extract unit and check that function arguments reporterinreport and productcodeinreport are consistent with the data frame
metadata <- extractmetadata(tfdata)
stopifnot(metadata$productcode == productcodeinreport)
stopifnot(metadata$reporter == reporterinreport)
if(metadata$unit == "Volume in cubic meters"){
    metadata$unit <- "cubic meters"
}

# Calculate discrepancies
tfdata  <- tfdata %>% 
    addpartnerflow %>% 
    calculatediscrepancies 

# Select the last year available for which at least half of the countries have
# quantity data 
lastyearreporter <- tfdata %>% 
    filter(reporter == reporterinreport) %>% 
    mutate(naquantity = is.na(quantity)) %>%
    group_by(year) %>%
    summarise(naratio = sum(naquantity)/n()) %>%
    filter(naratio<0.5 & year == max(year))
lastyearreporter <- lastyearreporter$year

yearinreport <- lastyearreporter

# Display number of years available only if other countries reported 
# data for more years
if (lastyearreporter != max(tfdata$year)){
    messageyears = paste("Data is available until",
                         max(tfdata$year),"for other countries.")
} else {
    messageyears = ""
}

Abstract

This document presents an overview of validated trade flow data reported for product number r as.character(productcodeinreport) by r reporterinreport and its trade partners. r reporterinreport reported importing from r length(unique(tfdata$partner[tfdata$reporter==reporterinreport & tfdata$flow=="Import"])) countries and exporting to r length(unique(tfdata$partner[tfdata$reporter==reporterinreport & tfdata$flow=="Export"])) countries between r min(tfdata$year) and r lastyearreporter. r messageyears Trade values are reported in US dollars and quantities in r metadata$unit. Discrepancy is equal to the difference between the quantity reported by the trade partners and the quantity reported by r reporterinreport. A positive dicrepancy means that a trade partner reported a higher quantity, a negative discrepancy means that a trade partner reported a lower quantity in the given year.

# Select the largest import or export flows reported by the reporter
largestreporter <- tfdata %>% 
    filterworldeu28 %>%
    filter(year == yearinreport & reporter == reporterinreport) %>% 
    arrange(desc(quantity)) %>%
    head(numberoflargecountries)

# Display latest trade flows as a table
largestreporter %>% 
    select(year, reporter, partner, flow, flag, tradevalue, quantity,
           dicrepancy_quantity = discrq) %>%
    kable(caption = paste(numberoflargecountries, "largest import or export flows reported by",
                          reporterinreport, "in", yearinreport))

\newpage

Comparison between trade flows reported by r reporterinreport and its main partners

tfsum <- tfdata %>% 
    mutate(isreporter = ifelse(reporter == reporterinreport,
                               "",
                               "partners")) 
# Switch import and export for the partners
tfsum[tfsum$isreporter=="partners",] <- tfsum[tfsum$isreporter=="partners",] %>%
    mutate(flow = gsub("Import","aaaaaaa",flow),
           flow = gsub("Export","Import",flow),
           flow = gsub("aaaaaaa","Export",flow))

tfsum %>%    
    group_by(year,flow, isreporter) %>%
    summarise(nrow = n(),
              value = sum(tradevalue, na.rm=TRUE),
              quantity = sum(quantity, na.rm=TRUE)) %>%
    melt(id = c("year","flow","isreporter")) %>%
    dcast(year + flow ~ isreporter + variable) %>%
    kable(caption = paste("Number of trade flows (nrow), total trade value and total quantity reported by ", reporterinreport, " (columns 3,4,5) and its trade partners (columns 6,7,8) for the years available in the database."))

\newpage

# Display these trade flows as a plot
largestreporter <- tfdata %>% 
    filterworldeu28 %>% 
    # Keep the same combination of flow / country
    filter(paste0(flow,partner) %in% 
               paste0(largestreporter$flow,
                      largestreporter$partner)) %>%
    # reshape table for use in the plot
    select(year, partner, flow, quantityreporter, quantitypartner) %>%
    arrange(-quantityreporter) %>%
    rowwise() %>%
    mutate(qmin = min(quantityreporter, quantitypartner),
           qmax = max(quantityreporter, quantitypartner)) %>%
#     rename(discrepancy = discrq) %>%
    melt(id = c("year","partner","flow", "qmin","qmax")) %>%
    mutate(flowpartner = paste(flow, partner))


# Summarise quantity by country and trade direction over the period
largestreporter_arranged <- largestreporter %>% group_by(flow, partner) %>%
    summarise(q = sum(value, na.rm=TRUE)) %>%
    mutate(flowpartner = paste(flow, partner)) %>%
    arrange(desc(q))

######################
message("Separation of import export is not working because import and export plots would have different number and facet and it would be a pain to manage plot size create a new column called flowpartner")
largestreporter <- largestreporter %>%
    mutate(flowpartner = ordered(flowpartner, levels = largestreporter_arranged$flowpartner))

ggplot(largestreporter, aes(x = year, y = value, fill = variable)) +
    geom_bar(position="dodge", stat="identity") +
    scale_fill_manual(values = c("chocolate3","black")) +
    geom_errorbar(aes(ymin = qmin, ymax = qmax, 
                      width=0.2, colour="discrepancy")) +
    scale_colour_manual(name="error bars",values="red") +
    ylab(metadata$unit) +
    ggtitle(paste("Major trade flows reported by", reporterinreport, "and its trade partners")) +
    facet_wrap(~flowpartner, scales = "free_y", ncol = 3) +
    theme_bw() + theme(legend.position= "bottom",
                       legend.box="horizontal") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

\newpage

Relative discrepancy

Relative discrepancy is a ratio of the size of the discrepancy to the total trade volume reported by both partners. It is equal to

$$\frac{(quantitypartner - quantity)}{(quantity + quantitypartner)}$$

When the relative discrepancy is equal to 0 both reporter and partner reported the same value. When the discrepancy is equal to 1 the reporter did not report any trade. When the discrepancy is equal to -1, the partner did not report any trade. Values in between represent the relative size of the discrepancy. Looking for outlier in the plot means looking for points with relative discrepancy close to 1 or -1 on the y axis and with a high trade value or high trade quantity on the x axis.

## Trade flows as a point cloud
tfdata <- tfdata %>% filterworldeu28() %>% addregion()
ggplot(filter(tfdata, reporter == reporterinreport), 
       aes(x = quantity, y = reldiscrq, color = regionpartner)) +
    geom_point() + 
    facet_wrap(~flow, scales="free_x", ncol = 2) +
    ylab("Relative discrepancy of the quantity") + 
    xlab(paste("Quantity in", metadata$unit))

cat("\n\n")
ggplot(filter(tfdata, reporter == reporterinreport), 
       aes(x = tradevalue, y = reldiscrv, color = regionpartner)) +
    geom_point() + 
    facet_wrap(~flow, scales = "free_x", ncol = 2)  +
    ylab("Relative discrepancy of the trade value") +
    xlab("Trade value in US $")


EuropeanForestInstitute/tradeflows documentation built on Oct. 7, 2019, 10:57 a.m.