# This vignette should be edited in the vignettes/ folder
# Documentation on how to build vignettes in http://r-pkgs.had.co.nz/vignettes.html
# To build this vignette run 
devtools::build_vignettes()
# Then rebuild the package CTRL+SHIFT+B so that the vignette is 
# integrated to the package documentation
library(knitr)
# Do not evaluate code chunks below, only display code
opts_chunk$set(eval=FALSE) 
library(dplyr)

Create the database structure

Recreating the empty database structure:

# create emtpty database structure for raw codes
eutradeflows::createdbstructure(sqlfile = 'raw_comext.sql', dbname = 'tradeflows')
# create empty database structure for validated codes
eutradeflows::createdbstructure(sqlfile = 'vld_comext.sql', dbname = 'tradeflows')

Connect to the database

con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows")

Load raw comext data and text files into the database

# transfer raw codes
tradeharvester::transfertxtcodesfolder2db(con, rawdatacomextfolder = "~/R/tradeharvester/data_raw/comext/201707/text/english/")

# transfer raw codes on the server
tradeharvester::transfertxtcodesfolder2db(con, rawdatacomextfolder = "/mnt/sdb/data_raw/comext/201710/text/english/")

Clean raw data

Prepare product, unit, reporter, partner codes

cleanallcomextcodes(con)

# Check the content of codes
# Display the first 6 lines of all validated `vld̀  tables 
vldtables <- grep("vld", RMariaDB::dbListTables(con), value = TRUE)
lapply(vldtables, 
       function(x){
           tbl(con, x) %>% head() %>% collect() %>% kable(caption = x)
       })

Clean trade flows data

Cleaning is implemented in the tradeflows package see the help of the function tradeflows::cleancomext().

tradeflows::cleancomext()

The cleaning procedure is run each night by a cron job:

0 5 * * * paul Rscript -e "library(tradeflows); cleancomext('tradeflows')" >> ~/log/clean$(date +"\%Y\%m\%d").log 2>&1

Disconnect from the database

RMariaDB::dbDisconnect(con)


stix-global/eutradeflows documentation built on Nov. 13, 2020, 9:23 p.m.