# I tried placing the yaml block after the chunks but for some reason it doesn't always generate the pdf and generates a html file instead. # pdf is generated when the yaml block is at the top, library(knitr) opts_knit$set(root.dir="../../..") # file paths are relative to the root of the project directory opts_chunk$set(echo=FALSE) library(tradeflows) library(dplyr) library(ggplot2) library(reshape2)
# --------------------------------------------------------------------------------- # ! This chunk is used for development only, keep eval=FALSE. # --------------------------------------------------------------------------------- # Test countries: Cameroon, Ghana, Indonesia message("Using the development template in ", "inputpath = 'inst/templates/'") # Instruction to generate the report before building the package creatediscrepancyreport(440799, "China", inputpath = "inst/templates") # The data frame tfdata will be passed by the report generating function to the template # Product and reporter information can be extracted back from tfdata # in the template this would have to be the country appearing most in the data frame productcodeinreport <- c(440799) reporterinreport <- "Indonesia" # Test countries: Cameroon, Ghana, Indonesia tfdata <- readdbtbl("raw_flow_yearly") %>% filter(productcode == productcodeinreport & (reporter == reporterinreport | partner == reporterinreport)) %>% collect()
numberoflargecountries <- 27 # Extract unit and check that function arguments reporterinreport and productcodeinreport are consistent with the data frame metadata <- extractmetadata(tfdata) stopifnot(metadata$productcode == productcodeinreport) stopifnot(metadata$reporter == reporterinreport) if(metadata$unit == "Volume in cubic meters"){ metadata$unit <- "cubic meters" } # Calculate discrepancies tfdata <- tfdata %>% addpartnerflow %>% calculatediscrepancies # Select the last year available for which at least half of the countries have # quantity data lastyearreporter <- tfdata %>% filter(reporter == reporterinreport) %>% mutate(naquantity = is.na(quantity)) %>% group_by(year) %>% summarise(naratio = sum(naquantity)/n()) %>% filter(naratio<0.5 & year == max(year)) lastyearreporter <- lastyearreporter$year yearinreport <- lastyearreporter # Display number of years available only if other countries reported # data for more years if (lastyearreporter != max(tfdata$year)){ messageyears = paste("Data is available until", max(tfdata$year),"for other countries.") } else { messageyears = "" }
This document presents an overview of validated trade flow data reported for product number r as.character(productcodeinreport)
by r reporterinreport
and its trade partners. r reporterinreport
reported importing from
r length(unique(tfdata$partner[tfdata$reporter==reporterinreport & tfdata$flow=="Import"]))
countries and exporting to
r length(unique(tfdata$partner[tfdata$reporter==reporterinreport & tfdata$flow=="Export"]))
countries
between r min(tfdata$year)
and r lastyearreporter
.
r messageyears
Trade values are reported in US dollars and quantities in r metadata$unit
. Discrepancy is equal to the difference between the quantity reported by the trade partners and the quantity reported by r reporterinreport
. A positive dicrepancy means that a trade partner reported a higher quantity, a negative discrepancy means that a trade partner reported a lower quantity in the given year.
# Select the largest import or export flows reported by the reporter largestreporter <- tfdata %>% filterworldeu28 %>% filter(year == yearinreport & reporter == reporterinreport) %>% arrange(desc(quantity)) %>% head(numberoflargecountries) # Display latest trade flows as a table largestreporter %>% select(year, reporter, partner, flow, flag, tradevalue, quantity, dicrepancy_quantity = discrq) %>% kable(caption = paste(numberoflargecountries, "largest import or export flows reported by", reporterinreport, "in", yearinreport))
\newpage
r reporterinreport
and its main partnerstfsum <- tfdata %>% mutate(isreporter = ifelse(reporter == reporterinreport, "", "partners")) # Switch import and export for the partners tfsum[tfsum$isreporter=="partners",] <- tfsum[tfsum$isreporter=="partners",] %>% mutate(flow = gsub("Import","aaaaaaa",flow), flow = gsub("Export","Import",flow), flow = gsub("aaaaaaa","Export",flow)) tfsum %>% group_by(year,flow, isreporter) %>% summarise(nrow = n(), value = sum(tradevalue, na.rm=TRUE), quantity = sum(quantity, na.rm=TRUE)) %>% melt(id = c("year","flow","isreporter")) %>% dcast(year + flow ~ isreporter + variable) %>% kable(caption = paste("Number of trade flows (nrow), total trade value and total quantity reported by ", reporterinreport, " (columns 3,4,5) and its trade partners (columns 6,7,8) for the years available in the database."))
\newpage
# Display these trade flows as a plot largestreporter <- tfdata %>% filterworldeu28 %>% # Keep the same combination of flow / country filter(paste0(flow,partner) %in% paste0(largestreporter$flow, largestreporter$partner)) %>% # reshape table for use in the plot select(year, partner, flow, quantityreporter, quantitypartner) %>% arrange(-quantityreporter) %>% rowwise() %>% mutate(qmin = min(quantityreporter, quantitypartner), qmax = max(quantityreporter, quantitypartner)) %>% # rename(discrepancy = discrq) %>% melt(id = c("year","partner","flow", "qmin","qmax")) %>% mutate(flowpartner = paste(flow, partner)) # Summarise quantity by country and trade direction over the period largestreporter_arranged <- largestreporter %>% group_by(flow, partner) %>% summarise(q = sum(value, na.rm=TRUE)) %>% mutate(flowpartner = paste(flow, partner)) %>% arrange(desc(q)) ###################### message("Separation of import export is not working because import and export plots would have different number and facet and it would be a pain to manage plot size create a new column called flowpartner") largestreporter <- largestreporter %>% mutate(flowpartner = ordered(flowpartner, levels = largestreporter_arranged$flowpartner)) ggplot(largestreporter, aes(x = year, y = value, fill = variable)) + geom_bar(position="dodge", stat="identity") + scale_fill_manual(values = c("chocolate3","black")) + geom_errorbar(aes(ymin = qmin, ymax = qmax, width=0.2, colour="discrepancy")) + scale_colour_manual(name="error bars",values="red") + ylab(metadata$unit) + ggtitle(paste("Major trade flows reported by", reporterinreport, "and its trade partners")) + facet_wrap(~flowpartner, scales = "free_y", ncol = 3) + theme_bw() + theme(legend.position= "bottom", legend.box="horizontal") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
\newpage
Relative discrepancy is a ratio of the size of the discrepancy to the total trade volume reported by both partners. It is equal to
$$\frac{(quantitypartner - quantity)}{(quantity + quantitypartner)}$$
When the relative discrepancy is equal to 0 both reporter and partner reported the same value. When the discrepancy is equal to 1 the reporter did not report any trade. When the discrepancy is equal to -1, the partner did not report any trade. Values in between represent the relative size of the discrepancy. Looking for outlier in the plot means looking for points with relative discrepancy close to 1 or -1 on the y axis and with a high trade value or high trade quantity on the x axis.
## Trade flows as a point cloud tfdata <- tfdata %>% filterworldeu28() %>% addregion() ggplot(filter(tfdata, reporter == reporterinreport), aes(x = quantity, y = reldiscrq, color = regionpartner)) + geom_point() + facet_wrap(~flow, scales="free_x", ncol = 2) + ylab("Relative discrepancy of the quantity") + xlab(paste("Quantity in", metadata$unit)) cat("\n\n") ggplot(filter(tfdata, reporter == reporterinreport), aes(x = tradevalue, y = reldiscrv, color = regionpartner)) + geom_point() + facet_wrap(~flow, scales = "free_x", ncol = 2) + ylab("Relative discrepancy of the trade value") + xlab("Trade value in US $")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.