# This vignette should be edited in the /vignettes folder
# Documentation on how to build vignettes in http://r-pkgs.had.co.nz/vignettes.html
# To build this vignette run 
devtools::build_vignettes()
# Then rebuild the package CTRL+SHIFT+B so that the vignette is 
# integrated to the package documentation
library(knitr)
# Do not evaluate code chunks below, only display code
opts_chunk$set(eval=FALSE) 
library(dplyr)

Load data into MariaDB

Install a MariaDB database server and client

Install mariadb server and client on Debian

sudo apt installmariadb-server
sudo apt install libmariadbclient-dev

Make sure the tradeflows package is also installed. It contains the database configuration in inst/config/databaseconfig.R Note: this should probably be changed to environment variables.

Create a database

In bash enter the following commands:

sudo mariadb

Then at the MariaDB prompt:

CREATE USER 'R'@'localhost' IDENTIFIED BY 'localhost';
CREATE USER paul@localhost;
create database tradeflows;
connect tradeflows;
GRANT ALL PRIVILEGES ON tradeflows. * TO R@localhost;
GRANT ALL PRIVILEGES ON tradeflows. * TO paul@localhost;
quit

Create a database structure

Recreating the empty database structure:

# create emtpty database structure for raw codes
eutradeflows::createdbstructure(sqlfile = 'raw_comext.sql', dbname = 'tradeflows')
# create empty database structure for validated codes
eutradeflows::createdbstructure(sqlfile = 'vld_comext.sql', dbname = 'tradeflows')

Load table dumps into the database structure

if(FALSE){
    path <- '/tmp/comext_sql_dump/'
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_monthly.sql.7z'))
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_monthly_template.sql.7z'))
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_partner.sql.7z'))
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_product.sql.7z'))
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_reporter.sql.7z'))
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_unit_description.sql.7z'))
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_unit.sql.7z'))
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_yearly_template.sql.7z'))
    eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_flow_yearly.sql.7z'))
}

harvest metadata

The issue was that the metadata was empty for the countries. There was metadata for products however.

if(FALSE){
    data_raw_path <- "/tmp/data_raw"
    # Connect to the database
    con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows")
    eutradeflows::createdbstructure(sqlfile = 'raw_comext.sql', dbname = 'tradeflows') 
    tradeharveseter::harvestcomextmetadata(con, data_raw_path)

    # Issues with partner code lead me to load them separately not needed if everything works well
    # Write partner codes to the database
    # tradeharvester::writetxtcodes2db(con, "raw_comext_partner",
    #                                  file.path(data_raw_path, 
    #                                            "/comext/COMEXT_METADATA/CLASSIFICATIONS_AND_RELATIONS/ENGLISH/PARTNERS.txt"),
    #                                  columnnames = c("partnercode", "datestart", "dateend",
    #                                                  "partner", "datestart2", "dateend2"))
    # Disconnect from the database
    RMariaDB::dbDisconnect(con)
}

Clean comext codes

if(FALSE){
    # Conect to the DB
    con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows")
    # Extract unique match between each code and their most recent description
    # Note: the raw data contains repeated codes because the description changes over time. 
    eutradeflows::cleanallcomextcodes(con)
    # Check for the presence of codes 
    reporter <- tbl(con, "vld_comext_reporter") %>% collect()    
    partner <- tbl(con, 'vld_comext_partner') %>% collect()
    product <- tbl(con, 'vld_comext_product') %>% collect()
    unit <- tbl(con, 'vld_comext_unit') %>% collect()
    unit_description <- tbl(con, 'vld_comext_unit_description') %>% collect()
    # Disconnect from the DB 
    RMariaDB::dbDisconnect(con)
}

Clean data

See the other vignette on the cleaning procedure.

Load data into MySQL (outdated)

These methods are outdated and only kept here for historical purposes

Load a dump

The following commands can be used to load mysql dump files: ```{bash eval=FALSE} 7zr e raw_comext_monthly_201708.sql.7z cat raw_comext_monthly_201708.sql | mysql tradeflows

## Create a dump
[how to compress a mysqldump with 7z via a pipe?](https://unix.stackexchange.com/questions/232006/how-to-compress-a-mysql-dump-using-7z-via-a-pipe)
First version needed to make sure the archive doesn't exist already 
other wise it would contain the SQL dump twice.
See Paul's question on stackexchange 
[How to overwrite content in a 7z archive created from a pipe?](https://unix.stackexchange.com/questions/386913/how-to-overwrite-content-in-a-7z-archive-created-from-a-pipe)
The answer is to supply a name for the file generated from standard input, 
within the `-si` option, such as `-sifilename`.

```{bash eval=FALSE}
# prefer 7zr to 7z as this is the only one available on the server
cd _server_path_/sqldump

# Recent data
mysqldump tradeflows raw_comext_monthly_201708 | 7zr a -siraw_comext_monthly_201708.sql raw_comext_monthly_201708.sql.7z
mysqldump tradeflows raw_comext_monthly_201709 | 7zr a -siraw_comext_monthly_201709.sql raw_comext_monthly_201709.sql.7z

# Monthly archive
mysqldump tradeflows raw_comext_monthly_2016S1 | 7zr a -si raw_comext_monthly_2016S1.sql.7z

# Dump validated codes
mysqldump tradeflows vld_comext_product | 7zr a -si vld_comext_product.sql.7z
mysqldump tradeflows vld_comext_reporter | 7zr a -si vld_comext_reporter.sql.7z
mysqldump tradeflows vld_comext_partner | 7zr a -si vld_comext_partner.sql.7z

Create an R function that dumps a database table to a given folder location. If the archive exists already, do not recreate it, unless force=TRUE. If force=TRUE, delete the archive. If the archive doesn't exist, compress the output of mysqldump and store it in the given folder.

Create an R function that dumps all tables in the trade flows database, each table in a separate .sql.7z file.

Create an R function that dumps all tables containing a given pattern in their names, such as "monthly" or "yearly".

Load data into PostGres

Create a PostGres database

See my blog post on postgresql to see how to install PostgreSQL, create a user called "rdb" and a database called "tradeflows" for use with R.

Connect to the database

Migrate from MySQL to PostgreSQL

Install pgloader on Debian:

sudo apt install pgloader

Login as the postgres user and create a database

sudo su postgres
createdb -O paul tradeflows_migrated

Transfer data from the mysql database to postgresql

pgloader mysql://paul@localhost/tradeflows postgresql:///tradeflows_migrated

Pgloader log

$ pgloader mysql://paul@localhost/tradeflows postgresql:///tradeflows_migrated
2020-10-12T12:02:13.017000Z LOG pgloader version "3.6.1"
2020-10-12T12:02:13.053000Z LOG Migrating from #<MYSQL-CONNECTION mysql://paul@localhost:3306/tradeflows {1005D87DD3}>
2020-10-12T12:02:13.054000Z LOG Migrating into #<PGSQL-CONNECTION pgsql://paul@UNIX:5432/tradeflows_migrated {1005ED33C3}>
2020-10-12T12:48:20.720000Z LOG report summary reset
                             table name     errors       rows      bytes      total time
---------------------------------------  ---------  ---------  ---------  --------------
                        fetch meta data          0         62                     0.085s
                         Create Schemas          0          0                     0.002s
                       Create SQL Types          0          0                     0.009s
                          Create tables          0         34                     0.062s
                         Set Table OIDs          0         17                     0.012s
---------------------------------------  ---------  ---------  ---------  --------------
          tradeflows.raw_comext_monthly          0   34549076     2.5 GB      12m43.856s
 tradeflows.raw_comext_monthly_template          0          0                     0.014s
          tradeflows.raw_comext_partner          0        338    20.5 kB          0.028s
         tradeflows.raw_comext_reporter          0         29     1.6 kB          0.033s
 tradeflows.raw_comext_unit_description          0         34     2.6 kB          0.054s
tradeflows.raw_comext_yearly_env_impact          0  122689986     8.8 GB      36m23.685s
          tradeflows.raw_comext_product          0      31215     5.6 MB          2.251s
             tradeflows.raw_comext_unit          0       5585   179.8 kB          1.148s
           tradeflows.raw_comext_yearly          0    8027695   589.5 MB        5m2.414s
  tradeflows.raw_comext_yearly_template          0          0                     0.038s
          tradeflows.vld_comext_partner          0        295     5.2 kB          0.043s
          tradeflows.vld_comext_product          0      29390     4.0 MB          1.185s
             tradeflows.vld_comext_unit          0       5585   136.2 kB          0.268s
 tradeflows.vld_comext_monthly_template          0          0                     0.002s
  tradeflows.vld_comext_priceconversion          0          0                     0.007s
         tradeflows.vld_comext_reporter          0         29     0.3 kB          0.004s
 tradeflows.vld_comext_unit_description          0         32     1.1 kB          0.006s
---------------------------------------  ---------  ---------  ---------  --------------
                COPY Threads Completion          0          4                 36m23.734s
                         Create Indexes          0         45                1h2m22.680s
                 Index Build Completion          0         45                  9m43.159s
                        Reset Sequences          0          0                     0.061s
                           Primary Keys          0          0                     0.000s
                    Create Foreign Keys          0          0                     0.000s
                        Create Triggers          0          0                     0.000s
                        Set Search Path          0          1                     0.002s
                       Install Comments          0          0                     0.000s
---------------------------------------  ---------  ---------  ---------  --------------
                      Total import time165339289    11.8 GB    1h48m29.636s

Dump pg schema

Dump the PostgreSQL schema to a sql file:

pg_dump --schema-only tradeflows_migrated > /tmp/tradeflows_migrated.sql

I edited the file and saved it to two different files for reuse purposes:

raw_comext_psql.sql
vld_comext_psql.sql

Now I need to modify R/database_create.R so that it can use the psql files. Or simply make it to a bash file.

Create tables in PostgreSQL

Load the raw_comext_psql.sql database creation file, while connected as user rdb:

psql -d tradeflows -h localhost -U rdb -f ~/rp/eutradeflows/inst/config/raw_comext_psql.sql

Log into the DB as user rdb and check if the schema and databases were created:

psql -d tradeflows -h localhost -U rdb
\dt raw_comext.*;

Check that the tables are empty

select * from raw_comext.product;
select * from raw_comext.partner ;
select * from raw_comext.product ;
select * from raw_comext.reporter;
select * from raw_comext.unit;
select * from raw_comext.unit_description;
select * from raw_comext.yearly;
select * from raw_comext.yearly_template ;

Harvest metadata from Comext

Install the RPostgres package so R can communicate with PostGres.

install.packages("RPostgres")

The following is a decompoistion of the function call that harvests metadata:

harvestcomextmetadata(con, data_raw_path)

Which contains calls to the functions:

  1. downloadcomextmetadata
  2. transfertxtcodesfolder2db

We decompose transfertxtcodesfolder2db into its components.

if(FALSE){

    library(eutradeflows)
    dir.create("/tmp/data_raw")
    rawdatafolder <- "/tmp/data_raw"
    pause = 0
    # Download recent comext .txt codes files
    comextfilestxt <- downloadcomextmetadata(rawdatafolder,
                                             comextfolderpath = getOption("comext")["metadatafolder"],
                                             extension = ".txt",
                                             recentyears = recentyears,
                                             pause = pause)
    comextfoldertxt <- unique(comextfilestxt$folder)

    # Connect to the database
    # TODO replace this with a PostgreSQL connector
    con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows")
    # Create the database structure
    # TODO replace this with the PostgreSQL structure
    createdbstructure(sqlfile = 'raw_comext.sql', dbname = 'tradeflows') 

    # Decompose the call to transfertxtcodesfolder2db
    rawdatacomextfolder = file.path(rawdatafolder,comextfoldertxt)
    rawdatacomextfolder <- normalizePath(rawdatacomextfolder)

    # Write product codes to the database
    # For information, writetxtcodes2db will only perform the transfer if the table is empty
    # TODO replace this with a PostgreSQL connector
    writetxtcodes2db(RMariaDBcon, "raw_comext_product", file.path(rawdatacomextfolder,"CN.txt"),
                     columnnames =  c("productcode", "datestart", "dateend", "liben_to_delete",
                                      "productdescription", "datestart2", "dateend2"))



}

Replace the R database package

Replace RMySQL with RMariaDB

The only function that has a different name is RMySQL::mysqlHasDefault(). It was renamed to RMariaDB::mariadbHasDefault(). All other functions have the same name. The package name used as a function prefix RMySQL:: can be replaced by RMariaDB::.

Bash command to replace all RMySQL:: function calls to RMariaDB::.

# Replace the special function
git grep -lz 'RMySQL::mysqlHasDefault'| xargs -0 perl -i'' -pE "s/RMySQL::mysqlHasDefault/RMariaDB::mariadbHasDefault/g"

# Replace all other functions
git grep -lz 'RMySQL::'| xargs -0 perl -i'' -pE "s/RMySQL::/RMariaDB::/g"

# Replace the mistaken dbconnect statement
# R> con <- RMySQL::dbConnect(RMySQL::MySQL(), dbname = "test")
# has now become
# R> con <- RMariaDB::dbConnect(RMariaDB::MySQL(), dbname = "test")
# The driver should be RMariaDB::MariaDB()
git grep -lz '::MySQL'| xargs -0 perl -i'' -pE "s/::MySQL/::MariaDB/g"

# Replace the connection parameter
git grep -lz 'RMySQLcon'| xargs -0 perl -i'' -pE "s/RMySQLcon/RMariaDBcon/g"


stix-global/eutradeflows documentation built on Nov. 13, 2020, 9:23 p.m.