library(knitr) opts_knit$set(root.dir="..") # file paths are relative to the root of the project directory library(tradeflows) library(eutradeflows) library(dplyr) library(tidyr) library(ggplot2) # Connect to the database con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows")
Monthly data from the last 3 years are needed, people don’t like to work with flat files.
Data format: Declarant, partner, product codes, flow, with time columns
In the long term there should be a simple interface to search the SQL database.
tbl(con, "vld_comext_reporter") %>% collect() %>% filter(grepl("Kingdom", reporter)) # UK reportercode is 6 tbl(con, "vld_comext_partner") %>% collect() %>% filter(grepl("Indonesia",partner)) # Indonesia partnercode is 700 # recent data wpukind_r <- tbl(con, "raw_comext_monthly_201709") %>% filter(reportercode == 6 & partnercode == 700) %>% addproreppar2tbl(con, .) %>% # show_query() %>% collect() # archive data wpukind_a <- tbl(con, "raw_comext_monthly_2016S1") %>% filter(reportercode == 6 & partnercode == 700) %>% addproreppar2tbl(con, .) %>% collect() # bind archive and recent data, keep only imports wpukind <- rbind(wpukind_a, wpukind_r) %>% filter(flowcode == 1) # keep only product 44 wpukind44 <- wpukind %>% filter(productcode == 44) wpukind44182080 <- wpukind %>% filter(productcode == 44182080) # Keep only furniture product 94 wpukind94 <- wpukind %>% filter(productcode == 94) wpukind94036090 <- wpukind %>% filter(productcode == 94036090) write.csv(wpukind44, "/tmp/comext/ukindonesia44.csv") write.csv(wpukind94, "/tmp/comext/ukindonesia94.csv") # 2 highest traded products at the 8 digit level write.csv(wpukind44182080, "/tmp/comext/ukindonesia44182080.csv") write.csv(wpukind94036090, "/tmp/comext/ukindonesia94036090.csv") # keep only product with highest import in value over the period # Calculate highest import value wpukind %>% group_by(productcode) %>% summarise(tradevalue = sum(tradevalue)) %>% arrange(desc(tradevalue)) # Wood products ggplot(wpukind44, aes(x = as.factor(period), y = tradevalue)) + geom_point() + ylab("Trade value in 1000€") lubridate::ymd(c("201001"))
In short: statregime should be included, otherwise there are duplications of the trade flows. Monthly data from the last 3 years are needed, Data format: Declarant, partner, product codes, flow, with time columns
productanalysed <- "44071091" # Load recent montly raw data for one product dtfr <- tbl(con, "raw_comext_monthly_201709") %>% filter(productcode == productanalysed) %>% # Add quantity units eutradeflows::addunit2tbl(con, maintbl = ., tableunit = "vld_comext_unit") %>% # Add productdescription, reporter and partner addproreppar2tbl(con,.) %>% collect() dtfr %>% select(productcode, reportercode, partnercode, flowcode, unitcode, period, quantity, productdescription, reporter, partner, statregime) %>% group_by(period) %>% # Create a unique id (optional) # unite(id, reportercode, partnercode, productcode, flowcode, statregime, # remove = FALSE) %>% spread(period, quantity) %>% # Move description to the back select(-productdescription, productdescription) %>% write.csv("/tmp/comext/raw.csv", row.names = FALSE)
productanalysed <- "44071190" #"44071091" periodstart <- (as.numeric(format(Sys.time(), "%Y")) - 3) * 100 tableread <- "vld_comext_monthly" # Load recent montly raw data for one product dtf <- tbl(con, tableread) %>% filter(productcode == productanalysed & period > periodstart) %>% # Add productdescription, reporter and partner addproreppar2tbl(con,.) %>% collect() %>% # Add flow information, import export, # cannot be done in addproreppar2tbl because the little data frame # is not present in the database left_join(data_frame(flow = c("import", "export"), flowcode = 1:2), by = "flowcode") %>% mutate(quantity = round(quantity), # Cut long partner description partner = substr(partner, 0, 40), # Replace empty partner by the partner code partner = ifelse(is.na(partner), partnercode, partner), # Rename Germany so that it's consistent with partner name reporter = if_else(grepl("Fr Germany",reporter), "Germany", reporter)) csvfile <- "/tmp/comext/vld.csv" # Spread quantity data along the period and write to a csv file dtf %>% # Keep only these columns select(productcode, flow, statregime, flag, reporter, partner, reportercode, partnercode, flowcode, unitcode, period, quantity, productdescription) %>% # Collapse flags present in the given line together group_by(productcode, flowcode, reportercode, partnercode, statregime) %>% mutate(flag = paste(unique(flag), collapse = ", ")) %>% spread(period, quantity) %>% # Move description and code to the last columns select(-productdescription, -reportercode, -partnercode, -flowcode, -unitcode, productdescription, reportercode, partnercode, flowcode, unitcode) %>% arrange(productcode, flowcode, reporter, partner) %>% write.csv(csvfile, row.names = FALSE, na="")
swdpine <- loadflows(con, productanalysed = "44071190", periodstart = 201700, tableread = "vld_comext_monthly") %>% formatflows() # Write the wide format write2csv_spread(swdpine, "/tmp/comext/swdpine_spread.csv") readLines("/tmp/comext/swdpine_spread.csv", 5) # Write the long format write2csv_long(swdpine, "/tmp/comext/swdpine_long.csv") readLines("/tmp/comext/swdpine_long.csv", 5)
tableread <- "vld_comext_monthly" vld <- tbl(con, tableread) %>% distinct(productcode) %>% collect() lapply(vld$productcode, function(p) loadflows(con, p) %>% formatflows() %>% write2csv_spread(paste0("/tmp/",p,".csv"))) message("If there is no quantity, do not export the file") message("Add cleanallcomextcodes(con) to the cleancomext function") # On the server (make this into a function) if(FALSE){ # con <- RMariaDB::dbConnect(RMariaDB::MariaDB(), dbname = "tradeflows") tableread <- "vld_comext_monthly" vld <- tbl(con, tableread) %>% distinct(productcode) %>% collect() lapply(vld$productcode, function(p) loadflows(con, p) %>% formatflows() %>% write2csv_spread(paste0("/mnt/sdb/data_raw/csv/8d/",p,".csv"))) }
productanalysed <- 440712 periodstart <- (as.numeric(format(Sys.time(), "%Y")) - 3) * 100 tableread <- "vld_comext_monthly" # Edit the pattern productpattern <- paste0(productanalysed, "%") dtf <- tbl(con, tableread) %>% filter(productcode %like% productpattern & period > periodstart) %>% addproreppar2tbl(con,.) %>% collect()
Use the function
productanalysed <- 440712 periodstart <- 201600 tableread <- "vld_comext_monthly" # Edit the pattern productpattern <- paste0(productanalysed, "%") dtf <- tbl(con, tableread) %>% filter(productcode %like% productpattern & period > periodstart) %>% group_by(period, reportercode, partnercode, flowcode, unit, statregime) %>% summarise_at(vars(tradevalue, weight, starts_with("quantity")), sum) %>% # Cannot use addproreppar2tbl() because of the error below # addproreppar2tbl(con, .) %>% # Error: `by` can't contain join column `productcode` which is missing from LHS left_join(tbl(con, "vld_comext_reporter"), by = "reportercode") %>% left_join(tbl(con, "vld_comext_partner"), by = "partnercode") %>% collect() %>% mutate(productcode = as.character(productanalysed)) %>% left_join(collect(tbl(con, "vld_comext_product")), by = "productcode")
Write aggregates to a csv file
Use the function
# 4407
# 44
productanalysed <- "44071091" dtfr <- tbl(con, "raw_comext_monthly_201709") %>% filter(productcode == productanalysed) %>% collect() # Spread the data as-is (droping the statregime column) returns an error if(FALSE){ dtfr %>% select(ends_with("code"), period, quantity) %>% spread(period, quantity) } # Error: Duplicate identifiers for rows (907, 908), (933, 934), (723, 724), (744, 745), # There are indeed duplicated rows, except for the statregime which is different dtfr[907:908,] # Creating an additional index as explained in StackOverflow answers # https://stackoverflow.com/a/44511254/2641825 # https://stackoverflow.com/a/43259735 dtfr %>% select(ends_with("code"), period, quantity) %>% group_by(period) %>% mutate(id = 1:n()) %>% spread(period, quantity) # Leads to a lot of row duplication and a lot of empty column values # In fact I needed to keep the statregime column as a variable before the spread
RMariaDB::dbDisconnect(con)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.