# This script defines report creation functions
# Each report has specific data requirements,
# Therefore functions fullfillthe data requirements of each report.
#
# createreport() and createreportfromdb() are generic function with no predefined template.
# createcompletenessreport(), creatediscrepancyreport() and createoverviewreport()
# are specific functions with a default template.
# The table below illustrates the number countries the number of products to be expected
# in each report type.
# |Report name |Number of countries | Number of products | function |
# |------------:|:-------------------|--------------------|--------------------------|
# |overview | one | many | createoverviewreport |
# |completeness | many | one | createcompletenessreport |
# |discrepancy | one | one | creatediscrepancyreport |
#' Extract report metadata from trade flow table
#'
#' This can be used to check that the given trade flow data has only
#' one reporter and product or the empty string as expected by some reports.
#' @param tfdata a table of trade flows data
#' @return a list of metadata containing the productcode, reporter and unit elements
#' @export
extractmetadata <- function(tfdata){
# Extract product code
productcodeindata <- unique(tfdata$productcode)
# In cases where there are many products replace this variable by ""
if(length(productcodeindata)>1) productcodeindata <- ""
# Extract reporter
nbyears <- length(unique(tfdata$year))
reporterindata <- tfdata %>% group_by(reporter) %>%
summarise(nrowperyear = n()/nbyears ) %>%
# The one which has far more than one row per year
filter(nrowperyear > 4)
reporterindata <- reporterindata$reporter
# In cases where there are many countries replace this variable by ""
if(length(reporterindata)>1) reporterindata <- ""
# Extract quantity unit
unitindata <- tfdata %>% group_by(unit) %>%
summarise(nrowperyear = n()/nbyears) %>%
# Find the unit appearing most in the data
filter(nrowperyear > 0.2 * max(nrowperyear))
unitindata <- unitindata$unit
return(list(productcode = productcodeindata,
reporter = reporterindata,
unit = unitindata))
}
#' Create data completeness report for comtrade data
#'
#' This function will generate reports for any template that is
#' product specific. Giving information about all world trade
#' flows for one product.
#' @param tfdata a dataframe containing trade flows data that
#' will be passed to R code run by the template
#' @param inputpath path of the template, defaults to package internal path
#' @param template name of the template file, including .Rmd extension
#' @param fileprefix character string at the begining of the generated pdf file name
#' @param productcodeinreport numeric or character string leave this variable empty if there are many products
#' @param reporterinreport character string leave this variable empty if there are many reporters
#' @param outputdir name of the output directory relative to getwd()
#' @param encoding, encoding of the template file. See also iconv
#' The names of encodings and which ones are available are
#' platform-dependent. All R platforms support ""
#' (for the encoding of the current locale), "latin1" and "UTF-8".
#' @examples
#' \dontrun{
#' createreport(NULL, template="allproducts.Rmd", reporterinreport ="China")
#' }
#' @export
createreport <- function(tfdata,
template,
fileprefix = NULL,
filesuffix = NULL,
productcodeinreport = NULL,
reporterinreport = NULL,
inputpath = system.file("templates",
package="tradeflows"),
outputdir = "reports",
encoding = "UTF-8",
toc = TRUE,
keep_tex = FALSE){
# load optional packages
require(ggplot2)
require(reshape2)
require(knitr)
# # Legacy check for colmuns, might be placed in the template
# if (!"productcode" %in% names(tfdata)){
# stop("Rename raw data columns to EFI convention before running this function.")
# }
# Don't show R code in output
knitr::opts_chunk$set(echo=FALSE, warning=FALSE)
# If the path is relative, make it an absolute path
# So that rmarkdown render works properly
if(substr(outputdir, 1,1) != "/"){
outputdir <- file.path(getwd(), outputdir)
}
# legacy check for a unitque product, report should be also generated for many products
# if (length(unique(tfdata$productcode)) != 1){
# message("There should be only one product in the trade flows data frame")
# return(FALSE)
# }
# Create the report file name
filename <- paste0(fileprefix, productcodeinreport,
gsub(" ", "", reporterinreport),
filesuffix, ".pdf")
tryCatch(rmarkdown::render(input = file.path(inputpath, template),
output_format = rmarkdown::pdf_document(keep_tex = keep_tex,
toc = toc),
output_dir = outputdir,
output_file = filename,
encoding = encoding),
finally = print("Finally"))
}
#' Create report from the database
#'
#' For a given vector of product codes, create reports using a given template.
#' @description Create a report from the database
#' @rdname createrproducteport
#' @param template name of the template file
#' @param products a vector of product codes
#' @param ... arguments passed to \code{\link{createreport}()}
#' from \code{createcompletenessreport}()
#' @export
createreportfromdb <- function(tableread,
products = "all",
...){
if (products=="all"){
products <- productsindb(table = tableread)
}
# Loop on products
for (productcodeinreport in products){
message("creating report for product: ", productcodeinreport)
dtf <- readdbproduct(productcodeinreport, tableread)
createreport(tfdata = dtf, ...)
}
}
#' Create reports for all products in the given reporter country
#'
#' The location of the report can be changed by changing the outputdir parameter.
#' See arguments of \code{\link{createreport}()}
#' to determine in which folder the template is located.
#'
#' jfsqlevel determines the product group levels,
#' there are approximatley a dozen of product groups under jfsq level 1
#' and sixty product groups under level 2.
#' Depending on the level choosen, there will be a dozen or sixty titles in the overview report.
#' @param reporter_ character name of a country reporting data
#' @param productcode_ vector of product code, NULL by default to generate the report for all products
#' @param template name of the template file.
#' @param outputdir path where report will be saved, relative to the working directory
#' or an absolute path.
#' @param jfsqlevel integer 1 or 2 the level of jfsq product names to be used, see the internal table classificationitto
#' @param ... further arguments passed to \code{\link{createreport}()}
#' @examples
#'\dontrun{
#' createoverviewreport("China", beginyear = 2010, endyear = 2013, outputdir = "/tmp")
#' createoverviewreport("China", beginyear = 2010, endyear = 2013, inputpath = "inst/templates")
#' }
#' @export
createoverviewreport <- function(reporter_,
productcode_ = NULL,
beginyear = 0, endyear = 9999,
template = "overviewtradevalue.Rmd",
outputdir = "reports/overview",
tableread = "validated_flow_yearly",
jfsqlevel = 2,
fileprefix = gsub("\\..*","",template),
dataonly = FALSE, ...){
message("Trade values are the same in raw flow and validated flow")
message("The table read will have to be changed to validated_flow to access quantities")
# If I use the readdbtbl function to join the 2 tables, the following error will be returned
# Error: x and y don't share the same src. Set copy = TRUE to copy y into x's source (this may be time consuming).
# Therefore I create the connection object here so that it can be shared between the two tbl() objects.
setdatabaseconfig(silent=TRUE)
db <- getOption("tradeflowsDB")
DBread <- src_mysql(user=db["user"], host=db["host"],
password=db["password"], dbname=db["dbname"])
# Load trade flow data --------------------------------------------------------
tfdata <- tbl(DBread, tableread) %>%
filter(reporter == reporter_&
year >= beginyear & year <= endyear) %>%
select(year, period, reporter, reportercode, partner, partnercode,
flow, flag, unit, productcode, tradevalue, quantity) %>%
collect() %>%
# Remove 4 digit level products
filter(nchar(as.character(productcode)) == 6)
# Load itto product names --------------------------------------------------------
message("Load local table from the package")
productitto <- tbl(DBread, "product_work") %>%
select(product = name_short, productcode = code) %>%
distinct()
jfsqproductgroups <- classificationitto %>%
select(productcode = productcodecomtrade,
product = jfsq2name) %>%
distinct()
tfdata <- tfdata %>%
left_join(jfsqproductgroups, by="productcode") %>%
# Cosmetic, shorten a few long itto product name
mutate(product = gsub("OTHER ARTICLES OF PAPER AND PAPERBOARD, READY FOR USE of which: ",
"", product))
# Replace product by jfsq1name if required by the jfsqlevel parameter
if (jfsqlevel == 1){
nrowtfdata <- nrow(tfdata)
jfsqproductgroups <- classificationitto %>%
select(productcode = productcodecomtrade, jfsq1name) %>%
distinct()
tfdata <- tfdata %>%
left_join(jfsqproductgroups, by="productcode")
tfdata <- tfdata %>%
# This replacement of the product column was done
# To avoid painfull refactoring which might not be needed
select(-product) %>%
rename(product = jfsq1name)
if(FALSE){#debug find out that there is a duplicate product code
message("there is an issue with SECONDARY PAPER PRODUCTS and WOOD-BASED PANELS")
bla <- tfdata %>% group_by(productcode) %>%
summarise(n = n())
#bla0 <- bla
bla <- bla %>% mutate(count = n - bla0$n) %>%
arrange(desc(count))
jfsqproductgroups %>% filter(productcode == 441129)
classificationitto %>% filter(productcodecomtrade == 441129)
}
stopifnot(nrowtfdata == nrow(tfdata))
}
# Change productcode to a factor
# do it here, after the merge with jfsqproductgroups
tfdata <- tfdata %>%
mutate(productcode = as.factor(productcode),
year = as.numeric(year))
# Select only productcode
if(!is.null(productcode_)){
tfdata <- tfdata %>% filter(productcode %in% productcode_)
}
# Return the data mostly for development purposes
if(dataonly){
return(tfdata)
}
# Change beginyear and endyear for use in the file name
if(beginyear == 0) beginyear <- ""
if(endyear == 9999) endyear <- ""
# Create the report
createreport(tfdata,
template = template,
outputdir = outputdir,
reporterinreport = reporter_,
fileprefix = fileprefix,
filesuffix = paste0(beginyear, endyear,"jfsq",jfsqlevel),
...)
}
#' Create the completeness report
#'
#' @param productcode_ vector of product codes
#' @param ... arguments passed to \code{\link{createreport}()}
#' @examples
#' \dontrun{
#' createcompletenessreport(440799, beginyear = 2010, endyear = 2011)
#' }
#' @export
createcompletenessreport <- function(productcode_,
beginyear = 0, endyear = 9999,
template = "completeness.Rmd",
outputdir = "reports/completeness",
tableread = "raw_flow_yearly",
toc = TRUE, ...){
#### Load data ####
rawtbl <- readdbtbl(tableread) %>%
# dplyr verbs executed on tbl objects are translated to SQL statements
# have to use this underscore trick because of the non standard evaluation
# see vignette("nse") for more information on this
filter(productcode == productcode_ &
year >= beginyear & year <= endyear)
dtf <- rawtbl %>% collect %>%
# Mysql datatype year is loaded as a character string
# convert the year column to an integer
mutate(year = as.integer(year))
# delete mysqlconnection object, to avoid message of the type:
# "Auto-disconnecting mysql connection (0, 3)"
rm(rawtbl)
# Change beginyear and endyear for use in the file name
if(beginyear == 0) beginyear <- ""
if(endyear == 9999) endyear <- ""
#### Create the report ####
createreport(tfdata = dtf,
template = template,
productcode = productcode_,
outputdir = outputdir,
toc = toc,
fileprefix = "completeness",
filesuffix = paste0(beginyear, endyear),
...)
}
#' Create a discrepancy report
#'
#' @param productcode vector of product codes
#' @param reporter_ one single country name
#' @param ... arguments passed to \code{\link{createreport}()}
#' @examples
#'\dontrun{
#' creatediscrepancyreport(440799, "Cameroon", outputdir = "reports/discrepancies")
#' creatediscrepancyreport(440710, "Thailand", beginyear = 2010, endyear = 2012)
#' }
#' @export
creatediscrepancyreport <- function(productcode_, reporter_,
beginyear = 0, endyear = 9999,
template = "discrepancy.Rmd",
outputdir = "reports/discrepancies",
tableread = "raw_flow_yearly",
toc = FALSE, ...){
dtf <- readdbtbl(tableread) %>%
filter(productcode == productcode_ &
year >= beginyear & year <= endyear &
(reporter == reporter_ | partner == reporter_)) %>%
collect
# Change beginyear and endyear for use in the file name
if(beginyear == 0) beginyear <- ""
if(endyear == 9999) endyear <- ""
createreport(tfdata = dtf,
template = template,
productcode = productcode_,
reporter = reporter_,
outputdir = outputdir,
toc = toc,
fileprefix = "discrepancies",
filesuffix = paste0(beginyear, endyear),
...)
}
#' Product description
#'
#' To be used in a markdown formated document.
#' Use:
#' description(c(440799, 440795))
#' @param productcodes numeric vector of product codes
#' @export
description <- function(productcodes){
descr <- classificationcomtrade$HS %>%
filter(productcode %in% productcodes)
# Individual codes and description remove product code if its
# at the begining of the description
if (sum(as.character(descr$productcode) !=
substring(descr$description, 1, 6))==0){
descr$description <- substring(descr$description,7)
}
for(code in descr$productcode){
cat("\n\n__",code,":__ ", sep="")
cat(descr$description[descr$productcode == code])
}
}
#' Fault tolerant report creation for the server
#'
#' Can be used with lapply to generate reports for a list of countries.
#' @param reporter character containing one reporter name
#' @examples
#' \dontrun{
#' countries <- c("Finland","France")
#' lapply(countries, trytocreateoverviewreports)
#' }
#' @export
trytocreateoverviewreports <- function(reporter,
tableread = "validated_flow_yearly",
outputdir = "reports/overview"){
try(createoverviewreport(reporter, jfsqlevel = 1, template = "overviewquantity.Rmd",
tableread = tableread, outputdir = outputdir))
try(createoverviewreport(reporter, jfsqlevel = 1, template = "overviewtradevalue.Rmd",
tableread = tableread, outputdir = outputdir))
}
#' Generate a list of reports
#' @param inputpath path of the template, defaults to package internal path
#' @param template name of the template file, including .Rmd extension
#' @param outputdir name of the output directory relative to getwd()
#' @param filename name of the output file
#' @param encoding, encoding of the template file
#' @examples
#' \dontrun{
#' countries <- data_frame(reporter = c("Finland", "France"))
#' createcountryindex(countries)
#' }
#' @export
createcountryindex <- function(countries,
template = "countryindex.Rmd",
inputpath = system.file("templates",
package="tradeflows"),
outputdir = "reports/overview",
filename = "countryindex.html",
encoding = "UTF-8"){
# This doesn't check if files realy exist
rmarkdown::render(input = file.path(inputpath, template),
output_format = rmarkdown::html_document(),
output_dir = outputdir,
output_file = filename,
encoding = encoding)
}
#' Create overview reports for all countries available
#'
#' @param tableread character name of a database table
#' @param path where reports will be located
#' @examples
#' \dontrun{
#' createalloverviewreports("validated_flow_yearly", "/tmp")
#' }
#' @export
createalloverviewreports <- function(tableread = "validated_flow_yearly",
outputdir = "reports/overview"){
flowavailable <- readdbtbl(tableread) %>%
select(reporter) %>%
distinct() %>%
collect()
lapply(flowavailable$reporter, trytocreateoverviewreports,
tableread = tableread, outputdir = outputdir)
createcountryindex(flowavailable, outputdir = outputdir)
}
if (FALSE){
library(tradeflows)
###################### #
# Overview reports #
###################### #
# Use the default template built within the package
createoverviewreport(reporter_ = "Italy")
# Use the template in development for quick itteration without package building
###################### #
# Completeness reports #
###################### #
# Use the default template built within the package
# You need to rebuild the package for template updates to take effect
createcompletenessreport(productcode_ = 440799)
createcompletenessreport(productcode_ = 440710)
# Use the template in development before package is build and select some years
createcompletenessreport(440799, beginyear = 2010, endyear = 2011, inputpath = "inst/templates")
# Use the createreportfromdb function with the template that will be exported with the package
createreportfromdb("raw_flow_yearly", 440799, template = "completeness.Rmd",
outputdir = "reports/completeness/")
# pandoc: Cannot decode byte '\xfc': Data.Text.Encoding.Fusion.streamUtf8: Invalid UTF-8 stream
# Error: pandoc document conversion failed with error 1
createcompletenessreport(tradeflows::sawnwoodexample, outputdir = directory)
# report for the "black hole" dataset
load("data-raw/comtrade/440799.RData")
swd99 <- renamecolumns(dtf, "comtrade", "efi")
createcompletenessreport(swd99, outputdir = directory)
# another dataset
load("data-raw/sawnwood_all.RData")
swdall <- renamecolumns(swdall)
createcompletenessreport(swdall, "4407", outputdir = directory)
# Complete dataset from server
load("data-raw/4409.RData")
dtf <- renamecolumns(dtf)
createcompletenessreport(dtf, outputdir = directory)
####################### #
# Discrepancies reports #
####################### #
creatediscrepancyreport(productcode_ = 440799, reporter_ = "Germany")
# Old way using only the createreport() functio
load("data-raw/comtrade/440799.RData")
swd99 <- renamecolumns(dtf, "comtrade", "efi")
createreport(swd99, outputdir = directory, template = "discrepancies.Rmd")
############################### #
# Network visualisation reports #
############################### #
directory <- "docs/development/networkvisualisation/"
load("data-raw/comtrade/440799.RData")
swd99 <- renamecolumns(dtf, "comtrade", "efi")
createreport(swd99, outputdir = directory, template="asdfdsa.Rmd")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.