library(tradeflows) library(knitr) opts_knit$set(root.dir="../..") # file paths are relative to the root of the project directory library(dplyr) library(ggplot2) library(tidyr) con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows")
Since the update in comext data structure, unitcode are now part of the main table
"raw_comext_monthly". It is thus not necessary anymore to add unit codes to the main table.
The cleancomext()
function needs to be updated to reflect that.
In 2018 there was an update to the Comext data structure.
The update requires a modification of the database structure and of
the way units are dealt with in the cleaning functions.
The issue lies in the loadcomext1product()
function,
itself calling the eutradeflows::addunit2tbl()
function.
The table already has a unit code, we only need to add the unit description.
# Output of an erroneous run before the change cleancomext("tradeflows") # # Starting to clean on 2018.06.29 at 09:52 # # Loading table definitions from: # /home/debian/R/x86_64-pc-linux-gnu-library/3.4/eutradeflows/config/vld_comext.sql # into the `tradeflows` database. # ERROR 1060 (42S21) at line 104: Duplicate column name 'unitcode'
The lack of data inthe raw codes tables has to be addressed in the harvester package. See ~/R/tradeharvester/docs/harvest.Rmd for details.
See also backup of the validated data below
# Dump codes tables eutradeflows::dumptable("tradeflows", "raw_comext_partner", "/mnt/sdb/public/sqldump/") eutradeflows::dumptable("tradeflows", "raw_comext_product", "/mnt/sdb/public/sqldump/") eutradeflows::dumptable("tradeflows", "raw_comext_reporter", "/mnt/sdb/public/sqldump/") eutradeflows::dumptable("tradeflows", "raw_comext_unit", "/mnt/sdb/public/sqldump/") eutradeflows::dumptable("tradeflows", "raw_comext_unit_description", "/mnt/sdb/public/sqldump/") # Dump main table eutradeflows::dumptable("tradeflows", "raw_comext_monthly", "/mnt/sdb/public/sqldump/") # The main table takes a long time try to load data for one product instead # load data for one product only # Dump by queru SO question # export from the Mysql CLI # $ mysql -e "select * from raw_comext_monthly where productcode = 44071091" tradeflows > swd44071091.sql # The result is not an SQL file # Export using the mysqldump tool # mysqldump --databases tradeflows --tables raw_comext_monthly --where="productcode = 44071091" > swd44071091.sql
library(eutradeflows) # Codes tables loadtabledump("tradeflows", "~/downloads/raw_comext_partner.sql.7z") loadtabledump("tradeflows", "~/downloads/raw_comext_product.sql.7z") loadtabledump("tradeflows", "~/downloads/raw_comext_reporter.sql.7z") loadtabledump("tradeflows", "~/downloads/raw_comext_unit.sql.7z") loadtabledump("tradeflows", "~/downloads/raw_comext_unit_description.sql.7z") # Main table loadtabledump("tradeflows", "~/downloads/raw_comext_monthly.sql.7z")
Now the raw data is in my local database and I can reproduct the error.
cleancomext("tradeflows") # Recreate the error with a function further down the nest dbname = "tradeflows" rawtabletemplate = "raw_comext_monthly_template" vldtabletemplate = "vld_comext_monthly_template" tablewrite = "vld_comext_monthly" tablepriceconversion = "vld_comext_priceconversion" templatecharacters = "template" logfile = paste0('~/public_html/log/validate',format(Sys.Date(), '%Y'),'.txt') # Now replace all RMySQL connection objects with RMariaDB cleancomextmonthly1product(RMariaDBcon = RMariaDBcon, productanalysed = productcode, tablearchive = tablearchive, tablerecent = tablerecent, tablewrite = tablewrite, tablepriceconversion = tablepriceconversion)
sqldumpfolder <- "/mnt/sdb/public/sqldump/" eutradeflows::dumptable("tradeflows", "vld_comext_partner", sqldumpfolder) eutradeflows::dumptable("tradeflows", "vld_comext_product", sqldumpfolder) eutradeflows::dumptable("tradeflows", "vld_comext_reporter", sqldumpfolder) eutradeflows::dumptable("tradeflows", "vld_comext_unit", sqldumpfolder) eutradeflows::dumptable("tradeflows", "vld_comext_unit_description", sqldumpfolder) # Dump main table eutradeflows::dumptable("tradeflows", "vld_comext_monthly", sqldumpfolder)
# Connect to the database con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows") # Load data for product 44079910 # The issue is within that code: dtf <- loadcomext1product(con, productanalysed = "44071091", table = "raw_comext_monthly") # which itself calls `eutradeflows::addunit2tbl` # The problem is that the unit code is already there. head(dtf) unique(dtf$year)
productanalysed <- "44071091" dtfr <- tbl(con, "raw_comext_monthly") %>% filter(productcode == productanalysed) %>% collect()
Units seem to be missing from the database. Trying to reload them directly on the server with the R harvest function below
# I copied the following R commants to the server directly con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows") tradeharvester::harvestcomextmetadata(RMariaDBcon=con, rawdatafolder='/mnt/sdb/public', pause=0)
It worked, then dump the corresponding metadata tables with:
dumptable("tradeflows", "raw_comext_partner", "/mnt/sdb/public/sqldump/") dumptable("tradeflows", "raw_comext_product", "/mnt/sdb/public/sqldump/") dumptable("tradeflows", "raw_comext_reporter", "/mnt/sdb/public/sqldump/") dumptable("tradeflows", "raw_comext_unit", "/mnt/sdb/public/sqldump/") dumptable("tradeflows", "raw_comext_unit_description", "/mnt/sdb/public/sqldump/")
RMariaDB::dbDisconnect(con)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.