message("you can now develop directly in the inst/tempalte dir, see instruction to compile the pdf in the file there")
library(dplyr)
library(tradeflows)
library(knitr)
library(ggplot2)
options(digits = 4) #  Number of digits in knitr tables
opts_chunk$set(echo=FALSE, message=FALSE)
######### #
# ! This chunk is used for development only, remove it or change eval=FALSE.
# ! When this file is used as a template, place two  `r` in the title.
######### #
# The data frame tfdata will be passed by the report generating function to the template
# Reporter information can be extracted back from tfdata 
# in the template this would have to be the country appearing most in the data frame
reporterinreport <- "China" 
# Test countries: Cameroon, Ghana, Indonesia

message("Trade values are the same in raw flow and validated flow")
message("The table read will have to be changed to validated_flow to access quantities")

# If I use the readdbtbl function to join the 2 tables, the following error will be returned
# Error: x and y don't share the same src. Set copy = TRUE to copy y into x's source (this may be time consuming).
# Therefore I create the conenction object here so that it can be shared between the two tbl() objects.
setdatabaseconfig(silent=TRUE)
db <- getOption("tradeflowsDB")
DBread <- src_mysql(user=db["user"], host=db["host"],
                    password=db["password"], dbname=db["dbname"])
tfdata <- tbl(DBread, "raw_flow_yearly")

# product names itto -------------------------------------------------------- 
message("Discuss with Simo and Janne to change column names in the product work table, rename them as such: (product = name_short, productcode = code) ")
productitto <- tbl(DBread, "product_work") %>%
    filter(nomenclature == "HS12") %>%
    select(product = name_short, productcode = code) 

# Join itto product names --------------------------------------------------
tfdata <- tfdata %>% 
    filter(reporter == reporterinreport) %>% 
    select(year, reporter, partner, partnercode, flow, flag, productcode, tradevalue, quantity) %>%
    left_join(productitto) %>%  # joining late in the pipe, after filter is faster
    collect()

tfsummarised <- tfdata %>%
    group_by(year, reporter, partner, partnercode, flow, product) %>%
    summarise(tradevalue = sum(tradevalue)) %>%
    collect()

# Collect itto product codes
productitto <- tbl(DBread, "product_work") %>%
    filter(nomenclature == "HS12") %>%
    rename(product = name_short, productcode = code) %>%
    collect()
# 100 largest trade flows
largetf <- tfdata %>% filterworldeu28 %>% data.frame() %>% # remove grouping
    arrange(desc(tradevalue)) %>% head(100)

# 10 largest partners for the dataframe currently in use in the report
#' @param dtf data frame of trade flow data
#' @param n number of parnters to select
#' @param product_ character vector of product names, when NULL, query all products
#' @examples
#'\dontrun{
#' largepartners()
#' largepartners(c("LOGS","SAWNWOOD"))
#' }
largepartners <- function(product_=NULL, dtf = tfsummarised, n = 10){
    if (!is.null(product_)){
        dtf <- filter(dtf, product %in% product_) 
    }
    dtf <- dtf %>%
        filterworldeu28() %>% 
        group_by(flow, partner, partnercode) %>% 
        summarise(tradevalue = sum(tradevalue)) %>%
        data.frame() %>% # remove grouping
        arrange(desc(tradevalue)) %>% 
        select(partner, partnercode) %>% unique %>% head(n) # Could add flow here
    return(dtf)
}
#' Return the index of the last common character in a character vector
#' example use 
# blabla <- c("I like this", "I like that")
# lastcommoncharacter(blabla) 
# substring(blabla[1], 1, lastcommoncharacter(blabla) )
# substring(blabla, lastcommoncharacter(blabla) )
#' 
lastcommoncharacter <- function(x){
    xsplit <- strsplit(x, NULL)
    # logsplit[[1]][1] 
    a <- NULL
    n <- 1
    while(sum(a != a[1]) == 0){
            a <- sapply(xsplit,function(x) paste(x[1:n], collapse=""))
            n <- n +1
    }
    return(n-2)
}


#' Product description
#' Use:
#' description(c(440799, 440795))
description <- function(productcodes){
    descr <- productitto %>% 
        filter(productcode %in% productcodes) %>% 
        select(productcode, description) %>%
        mutate(description = stringr::str_trim(description))
        unique
    cat("Common part of the product ")
    n <- lastcommoncharacter(descr$description)
    cat(substring(descr$description[1], 1, n))
    descr$description <- substring(descr$description, n)
    # Individual codes and description
    for(code in descr$productcode){
        cat("\n\n__",code,":__ ", sep="")
        cat(descr$description[descr$productcode == code])
    }
}


message("Try to skip common substring from the description")

if(FALSE){
    descr <- productitto %>% filter(product== "LOGS" ) %>%
        select(productcode, description)
    n <- lastcommoncharacter(descr$description)
    substring(descr$description[1], 1, n)
    substring(descr$description[1], n)
    descrsplit <- strsplit(descr$description,NULL)
    # logsplit[[1]][1] 
    a <- NULL
    n <- 1
    while(sum(a != a[1]) == 0){
            a <- sapply(descrsplit,function(x) paste(x[1:n], collapse=""))
            n <- n +1
    }
    paste(descrsplit[[1]][1:(n-1)], collapse="")
    descr$desctiption <- sapply(descrsplit,function(x) paste(x[(n-1):length(x)], collapse=""))
}

\newpage

Logs

#' @param product_ the itto name of a product
productplot <- function(product_){
    # Prepare this object first otherwise query is very slow
    partnercodes <- largepartners(product_)$partnercode    
    # Data
    dtf <- filter(tfdata, product == product_ & 
                      partnercode %in% partnercodes) %>%
        mutate(partner = stringr::str_wrap(partner, width = 14) )
    p <- ggplot(dtf,
                aes(x = as.numeric(year),
                    y = tradevalue,
                    fill = as.factor(productcode))) +
        geom_bar(stat="identity") +
        ylab("Trade value in  US dollars") +
        theme(legend.position= "bottom") +
        scale_x_continuous(breaks = c(2010,2012)) +
        ggtitle(paste("Largest ", product_, " flows reported by", reporterinreport)) +
        facet_grid(flow + reporter ~ partner, scales="free_y") + 
        guides(fill=guide_legend(nrow=2,byrow=TRUE))
    try(print(p))
    try(description(unique(dtf$productcode)))
}

productplot("LOGS")

\newpage

Sawnwood

productplot("SAWNWOOD")

\newpage

Furniture

productplot("WOODEN FURNITURE")

## Other products groups that could be added for separate plots
## See report on Indonesia's industry
## Furniture
## Wood panels
## Pulp and paper

All products

partnercodes <- largepartners()$partnercode
# filter(tfdata, partner %in% largetf$partner)
ggplot(filter(tfsummarised, partnercode %in% partnercodes),
       aes(x = as.numeric(year), y = tradevalue/1e6,
                       fill = as.factor(product))) +
    geom_bar(stat="identity") +
    ylab("Trade value in million US dollars") +
    theme(legend.position= "bottom") +
    scale_x_continuous(breaks = c(2010,2012)) +
    ggtitle(paste("Largest wood products flows reported by", reporterinreport)) +
    facet_grid(flow + reporter ~ partner) + 
    guides(fill=guide_legend(ncol=1,byrow=TRUE))

message("place country names over 2 lines if necessary")
# http://stackoverflow.com/questions/9052650/ggplot2-splitting-facet-strip-text-into-two-lines
# Using stringr::str_wrap
# stringr::str_wrap("lkj lkj lkjlkj lkj lkj ", width = 20) 
# Or with base functions, without the stringr package
# paste(strwrap("lkj lkj lkjlkj lkj lkj ", width = 20), collapse="\n")

# Optionally 
# Make the same plot with flows reported from those countries
### Trade between `r reporterinreport` and the EU
# EU is a reporter in the database, but not a partner.
ggplot(filter(tfdata, partner == "EU28"),
       aes(x = as.numeric(year), y = tradevalue/1e6,
                       fill = as.factor(product))) +
    geom_bar(stat="identity") +
    scale_x_continuous(breaks = c(2010,2012)) +
    ggtitle("100 Largest wood products import flows reported by the EU with China") +
    facet_grid(flow + reporter ~ partner) + 
    ylab("Trade value in million US dollars") +
    theme(legend.position= "bottom") +
    guides(fill=guide_legend(nrow=4,byrow=TRUE))


EuropeanForestInstitute/tradeflows documentation built on Oct. 7, 2019, 10:57 a.m.