#' @author Gerald C. Nelson, \email{nelson.gerald.c@gmail.com}
#' @keywords utilities, FAOSTAT, data management
#' @description
#This script reads in the FAO Food Balance Sheet information and does some manipulations of the data
#' @source \url{http://faostat3.fao.org/download/FB/FBS/E}
source("R/dataPrep.setup.R")
# Intro -------------------------------------------------------------------
#This script reads in the FAO Food Balance Sheet information from a zip file, does
# some manipulations of the data,
#and writes out results to an rds file
#Copyright (C) 2015 Gerald C. Nelson, except where noted
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details at http://www.gnu.org/licenses/.
# Read in the FBS data from a zip file
# FAO changed the structure of the zip file some time in 2015.
# Code to read in the old structure remains below but is commented out
# dt.FBSrawData <- as.data.table(read.csv(unzip(FBSdataZip, file = FBScsv),
# stringsAsFactors = FALSE,
# colClasses = list((character=1:7))))
# temp <- unz(FBSdataZip,FBScsv)
# data <- read.table(unz(FBSdataZip, FBScsv), nrows=10, header=T, quote="\"", sep=",")
colCharacter = c("Country Code", "Country","Item Code","Item","Element Code","Element", "Unit" )
# temp <- read_file(FBSdataZip,col_numeric(colNumeric),col_character("Country.Code"))
temp <- unzip(FBSdataZip, file = FBScsv)
dt.FBSrawData <- fread(temp, header = TRUE)
file.remove(temp)
#temp <- read.csv("data-raw/FBSData/FoodBalanceSheets_E_All_Data.csv",nrows = 10)
# old column names
# colnames(FBSrawData) <- c("FAOSTAT_country_code", "country_name", "item_code", "item",
# "element_group", "variable_code", "element","year", "unit", "value", "flag")
colnames(dt.FBSrawData) <- c("FAOSTAT_country_code", "country_name", "item_code", "item",
"variable_code", "element","unit",
"X1961","X1961F","X1962","X1962F","X1963","X1963F","X1964","X1964F","X1965","X1965F",
"X1966","X1966F","X1967","X1967F","X1968","X1968F","X1969","X1969F","X1970","X1970F",
"X1971","X1971F","X1972","X1972F","X1973","X1973F","X1974","X1974F","X1975","X1975F",
"X1976","X1976F","X1977","X1977F","X1978","X1978F","X1979","X1979F","X1980","X1980F",
"X1981","X1981F","X1982","X1982F","X1983","X1983F","X1984","X1984F","X1985","X1985F",
"X1986","X1986F","X1987","X1987F","X1988","X1988F","X1989","X1989F","X1990","X1990F",
"X1991","X1991F","X1992","X1992F","X1993","X1993F","X1994","X1994F","X1995","X1995F",
"X1996","X1996F","X1997","X1997F","X1998","X1998F","X1999","X1999F","X2000","X2000F",
"X2001","X2001F","X2002","X2002F","X2003","X2003F","X2004","X2004F","X2005","X2005F",
"X2006","X2006F","X2007","X2007F","X2008","X2008F","X2009","X2009F","X2010","X2010F",
"X2011","X2011F","X2012","X2012F","X2013","X2013F","X2014","X2014F","X2015","X2015F")
#this drops all the ...F columns which provide information for each year on the data in that year
colKeepList <- c("FAOSTAT_country_code", "country_name", "item_code", "item",
"variable_code", "element","unit",
"X1961","X1962","X1963","X1964","X1965",
"X1966","X1967","X1968","X1969","X1970",
"X1971","X1972","X1973","X1974","X1975",
"X1976","X1977","X1978","X1979","X1980",
"X1981","X1982","X1983","X1984","X1985",
"X1986","X1987","X1988","X1989","X1990",
"X1991","X1992","X1993","X1994","X1995",
"X1996","X1997","X1998","X1999","X2000",
"X2001","X2002","X2003","X2004","X2005",
"X2006","X2007","X2008","X2009","X2010",
"X2011","X2012","X2013","X2014","X2015")
dt.FBSraw <- dt.FBSrawData[,colKeepList, with = FALSE]
# improve on some element names
dt.FBSraw[element=="Food", element := "foodMT"]
dt.FBSraw[element=="Food supply quantity (kg/capita/yr)", element := "perCapKg"]
dt.FBSraw[element=="Food supply (kcal/capita/day)",element := "perCapKcal"]
dt.FBSraw[element=="Protein supply quantity (g/capita/day)", element := "perCapProt"]
dt.FBSraw[element=="Fat supply quantity (g/capita/day)", element := "perCapFat"]
# change 'element' to 'variable' to make it consistent with other data sources
setnames(dt.FBSraw,old=c("element","variable_code"),new = c("variable","variable_code"))
#how to drop years with the previous version of the FBS data
# remove years before 2010. The latest year is 2011 currently.
# setkey(dt.FBSraw,year)
# dt.FBSraw <- dt.FBSraw[year > 2009,]
#keep just the years in keepYearList.FBS
colKeepListYears <- c("FAOSTAT_country_code", "country_name", "item_code", "item",
"variable_code", "variable","unit",
keepYearList.FBS)
dt.FBSraw <- dt.FBSraw[,colKeepListYears, with = FALSE]
#old version of convert selected columns to character class
#charConvertList <- c("FAOSTAT_country_code","variable_code","item_code","unit","year")
#old version of convert selected columns to numeric class
charConvertList <- c("FAOSTAT_country_code","variable_code","item_code","unit")
dt.FBSraw[,(keepYearList.FBS) := lapply(.SD, as.numeric), .SDcols=keepYearList.FBS]
#add X to beginning of the year (X2009 instead of 2009), old version of the FBS data
#dt.FBSraw[, year := paste("X", dt.FBSraw$year, sep = "")]
# Read in a worksheet with the list of FBS food items by code, name, definition, and IMPACT commodity code
dt.FBScommodLookup <- as.data.table(read.xlsx(FBSCommodityInfo,
sheet = 1,
startRow = 1,
cols = 1:7,
colNames = TRUE))
charConvertList <- c("item_code")
for (col in charConvertList) set(dt.FBScommodLookup, j=col, value=as.character(dt.FBScommodLookup[[col]]))
# remove the item called "Miscellaneous"
dt.FBScommodLookup <- dt.FBScommodLookup[!item_name == "Miscellaneous",]
# Read in the region lookup table, created in dataPrep.regions.R
regions.all <- getNewestVersion("regions.all")
# Read in the worksheet that has the FAO country code-ISO country name lookup
dt.FBSNameLookup <- as.data.table(read.xlsx(FAOCountryNameCodeLookup,
sheet = 1,
startRow = 1,
colNames = TRUE))
#convert to character and leave just ISO code and FAOSTAT code
charConvertList <- c("ISO3","FAOSTAT")
dt.FBSNameLookup <- dt.FBSNameLookup[, lapply(.SD, as.character), .SDcols=charConvertList]
setnames(dt.FBSNameLookup,c("ISO3","FAOSTAT"),c("ISO_code","FAOSTAT_country_code"))
setkey(dt.FBSraw,FAOSTAT_country_code)
setkey(dt.FBSNameLookup,FAOSTAT_country_code)
dt.FBS <- dt.FBSraw[dt.FBSNameLookup]
# regions.ISO <- read.xlsx(ISOCodes)
# colnames(regions.ISO) <- c("ISO_code", "country_name")
# # Check for aggregations of countries; this should have no content
# FBSDat.countryAggs <- subset(dt.FBS,!(ISO_code %in% regions.ISO$ISO_code))
#
# #get rid of rows that are aggregations of countries
# dt.FBS <- subset(dt.FBS,Country %in% regions.ISO$country_name)
# Create separate data frame for aggregations of commodities
aggregates <- c("Animal fats + (Total)",
"Cereals - Excluding Beer + (Total)",
"Meat + (Total)",
"Milk - Excluding Butter + (Total)",
"Offals + (Total)",
"Oilcrops + (Total)",
"Pulses + (Total)",
"Stimulants + (Total)",
"Sugar & Sweeteners + (Total)",
"Vegetable Oils + (Total)",
"Spices + (Total)",
"Starchy Roots + (Total)",
"Sugar Crops + (Total)",
"Treenuts + (Total)",
"Vegetables + (Total)",
"Vegetal Products + (Total)",
"Alcoholic Beverages + (Total)",
"Animal Products + (Total)",
"Aquatic Products, Other + (Total)",
"Eggs + (Total)",
"Fish, Seafood + (Total)",
"Fruits - Excluding Wine + (Total)",
"Grand Total + (Total)",
"Miscellaneous + (Total)")
#dt.FBS.aggs <- subset(dt.FBS,(item %in% aggregates))
dt.FBS.commods <- dt.FBS[!item %in% aggregates]
#remove rows where FBS category is 'Miscellaneous' because we don't have an IMPACT equivalent
dt.FBS.commods <- dt.FBS.commods[!item == "Miscellaneous"]
# ARE (United Arab Emirates) doesn't have entries for kcals from alcoholic beverages, even zero
#include IMPACT code and nonIMPACT code assignment in the commodities data frame
setkey(dt.FBS.commods,item_code)
setkey(dt.FBScommodLookup,item_code)
temp <- dt.FBS.commods[dt.FBScommodLookup]
deleteColList <- c("FAOSTAT_country_code","IMPACT_missing_code","fish","alcohol")
temp[,(deleteColList) := NULL]
idVars <- c( "country_name","item_code","item",
"variable_code","variable","unit","ISO_code","item_name","definition",
"IMPACT_code")
dt.FBS.commods.melt <- melt(temp,
id.vars = idVars,
variable.name = "year",
measure.vars = keepYearList.FBS,
variable.factor = FALSE)
# need to sum individual FBS commodities to the IMPACT commodity they are in
dt.FBS.commods.melt[,value.sum := sum(value), by = list(ISO_code,variable, IMPACT_code,year)]
# keep "value" around in case we need to compare it
#now get rid of info that is not needed
keepListCol <- c("country_name", "variable_code", "variable", "unit", "ISO_code",
"IMPACT_code", "year", "value.sum")
dt.temp <- dt.FBS.commods.melt[,keepListCol, with=FALSE]
dt.FBS.commods.final <-
unique(dt.temp[, c(key(dt.temp), keepListCol), with = FALSE])
setnames(dt.FBS.commods.final,old=c("value.sum"),new = c("value"))
dt.FBS.commods.final[order(ISO_code)]
removeOldVersions("FBS")
saveRDS(dt.FBS.commods.final, file = paste(mData,"/FBS.",Sys.Date(),".rds",sep=""))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.