# This vignette should be edited in the /vignettes folder # Documentation on how to build vignettes in http://r-pkgs.had.co.nz/vignettes.html # To build this vignette run devtools::build_vignettes() # Then rebuild the package CTRL+SHIFT+B so that the vignette is # integrated to the package documentation
library(knitr) # Do not evaluate code chunks below, only display code opts_chunk$set(eval=FALSE) library(dplyr)
Install mariadb server and client on Debian
sudo apt installmariadb-server sudo apt install libmariadbclient-dev
Make sure the tradeflows package is also installed. It contains the database configuration in inst/config/databaseconfig.R Note: this should probably be changed to environment variables.
In bash enter the following commands:
sudo mariadb
Then at the MariaDB prompt:
CREATE USER 'R'@'localhost' IDENTIFIED BY 'localhost'; CREATE USER paul@localhost; create database tradeflows; connect tradeflows; GRANT ALL PRIVILEGES ON tradeflows. * TO R@localhost; GRANT ALL PRIVILEGES ON tradeflows. * TO paul@localhost; quit
Recreating the empty database structure:
# create emtpty database structure for raw codes eutradeflows::createdbstructure(sqlfile = 'raw_comext.sql', dbname = 'tradeflows') # create empty database structure for validated codes eutradeflows::createdbstructure(sqlfile = 'vld_comext.sql', dbname = 'tradeflows')
if(FALSE){ path <- '/tmp/comext_sql_dump/' eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_monthly.sql.7z')) eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_monthly_template.sql.7z')) eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_partner.sql.7z')) eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_product.sql.7z')) eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_reporter.sql.7z')) eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_unit_description.sql.7z')) eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_unit.sql.7z')) eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_comext_yearly_template.sql.7z')) eutradeflows::loadtabledump('tradeflows', file.path(path,'raw_flow_yearly.sql.7z')) }
The issue was that the metadata was empty for the countries. There was metadata for products however.
if(FALSE){ data_raw_path <- "/tmp/data_raw" # Connect to the database con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows") eutradeflows::createdbstructure(sqlfile = 'raw_comext.sql', dbname = 'tradeflows') tradeharveseter::harvestcomextmetadata(con, data_raw_path) # Issues with partner code lead me to load them separately not needed if everything works well # Write partner codes to the database # tradeharvester::writetxtcodes2db(con, "raw_comext_partner", # file.path(data_raw_path, # "/comext/COMEXT_METADATA/CLASSIFICATIONS_AND_RELATIONS/ENGLISH/PARTNERS.txt"), # columnnames = c("partnercode", "datestart", "dateend", # "partner", "datestart2", "dateend2")) # Disconnect from the database RMariaDB::dbDisconnect(con) }
if(FALSE){ # Conect to the DB con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows") # Extract unique match between each code and their most recent description # Note: the raw data contains repeated codes because the description changes over time. eutradeflows::cleanallcomextcodes(con) # Check for the presence of codes reporter <- tbl(con, "vld_comext_reporter") %>% collect() partner <- tbl(con, 'vld_comext_partner') %>% collect() product <- tbl(con, 'vld_comext_product') %>% collect() unit <- tbl(con, 'vld_comext_unit') %>% collect() unit_description <- tbl(con, 'vld_comext_unit_description') %>% collect() # Disconnect from the DB RMariaDB::dbDisconnect(con) }
See the other vignette on the cleaning procedure.
These methods are outdated and only kept here for historical purposes
The following commands can be used to load mysql dump files: ```{bash eval=FALSE} 7zr e raw_comext_monthly_201708.sql.7z cat raw_comext_monthly_201708.sql | mysql tradeflows
## Create a dump [how to compress a mysqldump with 7z via a pipe?](https://unix.stackexchange.com/questions/232006/how-to-compress-a-mysql-dump-using-7z-via-a-pipe) First version needed to make sure the archive doesn't exist already other wise it would contain the SQL dump twice. See Paul's question on stackexchange [How to overwrite content in a 7z archive created from a pipe?](https://unix.stackexchange.com/questions/386913/how-to-overwrite-content-in-a-7z-archive-created-from-a-pipe) The answer is to supply a name for the file generated from standard input, within the `-si` option, such as `-sifilename`. ```{bash eval=FALSE} # prefer 7zr to 7z as this is the only one available on the server cd _server_path_/sqldump # Recent data mysqldump tradeflows raw_comext_monthly_201708 | 7zr a -siraw_comext_monthly_201708.sql raw_comext_monthly_201708.sql.7z mysqldump tradeflows raw_comext_monthly_201709 | 7zr a -siraw_comext_monthly_201709.sql raw_comext_monthly_201709.sql.7z # Monthly archive mysqldump tradeflows raw_comext_monthly_2016S1 | 7zr a -si raw_comext_monthly_2016S1.sql.7z # Dump validated codes mysqldump tradeflows vld_comext_product | 7zr a -si vld_comext_product.sql.7z mysqldump tradeflows vld_comext_reporter | 7zr a -si vld_comext_reporter.sql.7z mysqldump tradeflows vld_comext_partner | 7zr a -si vld_comext_partner.sql.7z
Create an R function that dumps a database table to a given folder location. If the archive exists already, do not recreate it, unless force=TRUE. If force=TRUE, delete the archive. If the archive doesn't exist, compress the output of mysqldump and store it in the given folder.
Create an R function that dumps all tables in the trade flows database, each table in a separate .sql.7z file.
Create an R function that dumps all tables containing a given pattern in their names, such as "monthly" or "yearly".
See my blog post on postgresql to see how to install PostgreSQL, create a user called "rdb" and a database called "tradeflows" for use with R.
Connect to the database
Install pgloader on Debian:
sudo apt install pgloader
Login as the postgres user and create a database
sudo su postgres createdb -O paul tradeflows_migrated
Transfer data from the mysql database to postgresql
pgloader mysql://paul@localhost/tradeflows postgresql:///tradeflows_migrated
$ pgloader mysql://paul@localhost/tradeflows postgresql:///tradeflows_migrated 2020-10-12T12:02:13.017000Z LOG pgloader version "3.6.1" 2020-10-12T12:02:13.053000Z LOG Migrating from #<MYSQL-CONNECTION mysql://paul@localhost:3306/tradeflows {1005D87DD3}> 2020-10-12T12:02:13.054000Z LOG Migrating into #<PGSQL-CONNECTION pgsql://paul@UNIX:5432/tradeflows_migrated {1005ED33C3}> 2020-10-12T12:48:20.720000Z LOG report summary reset table name errors rows bytes total time --------------------------------------- --------- --------- --------- -------------- fetch meta data 0 62 0.085s Create Schemas 0 0 0.002s Create SQL Types 0 0 0.009s Create tables 0 34 0.062s Set Table OIDs 0 17 0.012s --------------------------------------- --------- --------- --------- -------------- tradeflows.raw_comext_monthly 0 34549076 2.5 GB 12m43.856s tradeflows.raw_comext_monthly_template 0 0 0.014s tradeflows.raw_comext_partner 0 338 20.5 kB 0.028s tradeflows.raw_comext_reporter 0 29 1.6 kB 0.033s tradeflows.raw_comext_unit_description 0 34 2.6 kB 0.054s tradeflows.raw_comext_yearly_env_impact 0 122689986 8.8 GB 36m23.685s tradeflows.raw_comext_product 0 31215 5.6 MB 2.251s tradeflows.raw_comext_unit 0 5585 179.8 kB 1.148s tradeflows.raw_comext_yearly 0 8027695 589.5 MB 5m2.414s tradeflows.raw_comext_yearly_template 0 0 0.038s tradeflows.vld_comext_partner 0 295 5.2 kB 0.043s tradeflows.vld_comext_product 0 29390 4.0 MB 1.185s tradeflows.vld_comext_unit 0 5585 136.2 kB 0.268s tradeflows.vld_comext_monthly_template 0 0 0.002s tradeflows.vld_comext_priceconversion 0 0 0.007s tradeflows.vld_comext_reporter 0 29 0.3 kB 0.004s tradeflows.vld_comext_unit_description 0 32 1.1 kB 0.006s --------------------------------------- --------- --------- --------- -------------- COPY Threads Completion 0 4 36m23.734s Create Indexes 0 45 1h2m22.680s Index Build Completion 0 45 9m43.159s Reset Sequences 0 0 0.061s Primary Keys 0 0 0.000s Create Foreign Keys 0 0 0.000s Create Triggers 0 0 0.000s Set Search Path 0 1 0.002s Install Comments 0 0 0.000s --------------------------------------- --------- --------- --------- -------------- Total import time ✓ 165339289 11.8 GB 1h48m29.636s
Dump the PostgreSQL schema to a sql file:
pg_dump --schema-only tradeflows_migrated > /tmp/tradeflows_migrated.sql
I edited the file and saved it to two different files for reuse purposes:
raw_comext_psql.sql vld_comext_psql.sql
Now I need to modify R/database_create.R so that it can use the psql files. Or simply make it to a bash file.
Load the raw_comext_psql.sql
database creation file, while connected as user
rdb:
psql -d tradeflows -h localhost -U rdb -f ~/rp/eutradeflows/inst/config/raw_comext_psql.sql
Log into the DB as user rdb and check if the schema and databases were created:
psql -d tradeflows -h localhost -U rdb \dt raw_comext.*;
Check that the tables are empty
select * from raw_comext.product; select * from raw_comext.partner ; select * from raw_comext.product ; select * from raw_comext.reporter; select * from raw_comext.unit; select * from raw_comext.unit_description; select * from raw_comext.yearly; select * from raw_comext.yearly_template ;
Install the RPostgres package so R can communicate with PostGres.
install.packages("RPostgres")
The following is a decompoistion of the function call that harvests metadata:
harvestcomextmetadata(con, data_raw_path)
Which contains calls to the functions:
We decompose transfertxtcodesfolder2db into its components.
if(FALSE){ library(eutradeflows) dir.create("/tmp/data_raw") rawdatafolder <- "/tmp/data_raw" pause = 0 # Download recent comext .txt codes files comextfilestxt <- downloadcomextmetadata(rawdatafolder, comextfolderpath = getOption("comext")["metadatafolder"], extension = ".txt", recentyears = recentyears, pause = pause) comextfoldertxt <- unique(comextfilestxt$folder) # Connect to the database # TODO replace this with a PostgreSQL connector con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows") # Create the database structure # TODO replace this with the PostgreSQL structure createdbstructure(sqlfile = 'raw_comext.sql', dbname = 'tradeflows') # Decompose the call to transfertxtcodesfolder2db rawdatacomextfolder = file.path(rawdatafolder,comextfoldertxt) rawdatacomextfolder <- normalizePath(rawdatacomextfolder) # Write product codes to the database # For information, writetxtcodes2db will only perform the transfer if the table is empty # TODO replace this with a PostgreSQL connector writetxtcodes2db(RMariaDBcon, "raw_comext_product", file.path(rawdatacomextfolder,"CN.txt"), columnnames = c("productcode", "datestart", "dateend", "liben_to_delete", "productdescription", "datestart2", "dateend2")) }
The only function that has a different name is RMySQL::mysqlHasDefault().
It was renamed to RMariaDB::mariadbHasDefault().
All other functions have the same name.
The package name used as a function prefix RMySQL::
can be replaced by RMariaDB::
.
Bash command to replace all RMySQL::
function calls to RMariaDB::
.
# Replace the special function git grep -lz 'RMySQL::mysqlHasDefault'| xargs -0 perl -i'' -pE "s/RMySQL::mysqlHasDefault/RMariaDB::mariadbHasDefault/g" # Replace all other functions git grep -lz 'RMySQL::'| xargs -0 perl -i'' -pE "s/RMySQL::/RMariaDB::/g" # Replace the mistaken dbconnect statement # R> con <- RMySQL::dbConnect(RMySQL::MySQL(), dbname = "test") # has now become # R> con <- RMariaDB::dbConnect(RMariaDB::MySQL(), dbname = "test") # The driver should be RMariaDB::MariaDB() git grep -lz '::MySQL'| xargs -0 perl -i'' -pE "s/::MySQL/::MariaDB/g" # Replace the connection parameter git grep -lz 'RMySQLcon'| xargs -0 perl -i'' -pE "s/RMySQLcon/RMariaDBcon/g"
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.