#UPLOAD USDA stock data into STOCK Domain after eliminating USDA stocks for years where data on production for FAO and USDA is not in line. Eliminate also USDA stocks for years in which USDA data on trade is not in line with FAO data on trade
## load the library
rm(list=ls())
setwd("C:/Users/VALDIVIACR/Documents/faoswsStock/modules/Pull Stock USDA")
library(faosws)
library(data.table)
library(faoswsUtil)
library(ggplot2)
library(faoswsBalancing)
library(faoswsModules)
library(sendmailR)
library(dplyr)
library(readxl)
# library(xlsx)
# library(writexl)
if (CheckDebug()) {
R_SWS_SHARE_PATH <- "//hqlprsws1.hq.un.fao.org/sws_r_share"
mydir <- "C:/Users/VALDIVIACR/Documents/faoswsStock/modules/Pull Stock USDA"
SETTINGS <- faoswsModules::ReadSettings(file.path(mydir, "sws.yml"))
SetClientFiles(SETTINGS[["certdir"]])
GetTestEnvironment(baseUrl = SETTINGS[["server"]], token = SETTINGS[["token"]])
}
startYear = as.numeric(2010)
endYear = as.numeric(2019)
stopifnot(startYear <= endYear)
yearVals = startYear:endYear
##' Get data configuration and session
sessionKey = swsContext.datasets[[1]]
sessionCountries =
getQueryKey("geographicAreaM49", sessionKey)
geoKeys = GetCodeList(domain = "agriculture", dataset = "aproduction",
dimension = "geographicAreaM49")[type == "country", code]
################################################
##### Pull Prduction Data #####
################################################
message("Pulling data from Agriculture Production")
## if the
geoDim = Dimension(name = "geographicAreaM49", keys = geoKeys)
eleDim <- Dimension(name = "measuredElement", keys = c("5510"))
itemKeys = GetCodeList(domain = "agriculture", dataset = "aproduction",
dimension = "measuredItemCPC")[, code]
itemDim = Dimension(name = "measuredItemCPC", keys = itemKeys)
timeDim = Dimension(name = "timePointYears", keys = as.character(yearVals))
ProdKey = DatasetKey(domain = "agriculture", dataset = "aproduction",
dimensions = list(
geographicAreaM49 = geoDim,
measuredElement = eleDim,
measuredItemCPC = itemDim,
timePointYears = timeDim)
)
ProdFAOData = GetData(ProdKey)
setnames(ProdFAOData, c("measuredElement", "measuredItemCPC"),
c("measuredElementSuaFbs", "measuredItemSuaFbs"))
################################################
##### Pull Imports and Exports Data #####
################################################
message("Pulling data from Trade")
eleTradeDim = Dimension(name = "measuredElementTrade",
keys = c("5610"))
eleTradeDim2 = Dimension(name = "measuredElementTrade",
keys = c("5910"))
tradeKey = DatasetKey(
domain = "trade", dataset = "total_trade_cpc_m49",
dimensions = list(geographicAreaM49 = geoDim,
measuredElementTrade = eleTradeDim,
measuredItemCPC = itemDim,
timePointYears = timeDim)
)
tradeKey2 = DatasetKey(
domain = "trade", dataset = "total_trade_cpc_m49",
dimensions = list(geographicAreaM49 = geoDim,
measuredElementTrade = eleTradeDim2,
measuredItemCPC = itemDim,
timePointYears = timeDim)
)
ImportData = GetData(tradeKey)
setnames(ImportData, c("measuredElementTrade", "measuredItemCPC"),
c("measuredElementSuaFbs", "measuredItemSuaFbs"))
ExportData = GetData(tradeKey2)
setnames(ExportData, c("measuredElementTrade", "measuredItemCPC"),
c("measuredElementSuaFbs", "measuredItemSuaFbs"))
################################################
##### Pull from SUA BAL #####
################################################
message("Pulling data from SUA Balanced")
geoDim = Dimension(name = "geographicAreaM49", keys = geoKeys)
eleDim <- Dimension(name = "measuredElementSuaFbs", keys = c("5510"))
itemKeys = GetCodeList(domain = "suafbs", dataset = "sua_unbalanced",
dimension = "measuredItemFbsSua")[, code]
itemDim = Dimension(name = "measuredItemFbsSua", keys = itemKeys)
timeDim = Dimension(name = "timePointYears", keys = as.character(yearVals))
SUAKey = DatasetKey(domain = "suafbs", dataset = "sua_unbalanced",
dimensions = list(
geographicAreaM49 = geoDim,
measuredElement = eleDim,
measuredItemCPC = itemDim,
timePointYears = timeDim)
)
SUABALData = GetData(SUAKey)
setnames(SUABALData, "measuredItemFbsSua", "measuredItemSuaFbs")
##read downloaded data from USDA website-->data to be downloaded locally
StocksUSDA<-read.csv("C:/Users/VALDIVIACR/Documents/faoswsStock/modules/Pull Stock USDA/psd_alldata.csv")
#convert quantities from USDA unit measurement
StocksUSDA$unit <- NA
StocksUSDA$unit[StocksUSDA$Unit_ID==2]<- 60
StocksUSDA$unit[StocksUSDA$Unit_ID==7]<- 1000
StocksUSDA$unit[StocksUSDA$Unit_ID==8]<- 1000
StocksUSDA$unit[StocksUSDA$Unit_ID==21]<- 1
StocksUSDA$unit[StocksUSDA$Unit_ID==27]<- (217.72434*1000)/1000
StocksUSDA<- StocksUSDA%>% dplyr::mutate(ValueConv=Value*unit)
StocksUSDA<- StocksUSDA %>% group_by(Country_Code, Commodity_Code, Market_Year) %>% dplyr::mutate(Beg_stock=sum(ValueConv[Attribute_ID==20]))
StocksUSDA<- StocksUSDA %>% group_by(Country_Code, Commodity_Code, Market_Year) %>% dplyr::mutate(End_stock=sum(ValueConv[Attribute_ID==176]))
StocksUSDA<- StocksUSDA %>% group_by(Country_Code, Commodity_Code, Market_Year) %>% dplyr::mutate(Prod_USDA=sum(ValueConv[Attribute_ID==28]))
StocksUSDA<- StocksUSDA %>% group_by(Country_Code, Commodity_Code, Market_Year) %>% dplyr::mutate(Imports_USDA=sum(ValueConv[Attribute_ID==57]))
StocksUSDA<- StocksUSDA %>% group_by(Country_Code, Commodity_Code, Market_Year) %>% dplyr::mutate(Exports_USDA=sum(ValueConv[Attribute_ID==88]))
#eliminate series that do not have stocks
StocksUSDA$BEGSTOCKyes[StocksUSDA$Attribute_ID==20]<- 1
StocksUSDA$BEGSTOCKyes[is.na(StocksUSDA$BEGSTOCKyes)]<-0
StocksUSDA<- StocksUSDA %>% group_by(Country_Code, Commodity_Code) %>% dplyr::mutate(BEGSTOCKyes2=max(BEGSTOCKyes))
StocksUSDA<-StocksUSDA%>%dplyr:: filter(BEGSTOCKyes2==1)
#create delta stocks and supply USDA
StocksUSDA<- StocksUSDA%>% dplyr::mutate(Stock_Delta=End_stock-Beg_stock)
StocksUSDA<- StocksUSDA%>% dplyr::mutate(Supply_USDA=Prod_USDA+Imports_USDA-Exports_USDA)
#create variable for USDA supply percentage change by year
USDA_supplychange<-StocksUSDA%>%dplyr:: filter(Attribute_ID==28)
USDA_supplychange <- USDA_supplychange%>%
dplyr::arrange(Market_Year) %>%
mutate(USDA_supplychange = (Supply_USDA - lag(Supply_USDA))/lag(Supply_USDA))
USDA_supplychange <- subset(USDA_supplychange, select = c("Commodity_Code", "Country_Code", "Market_Year", "USDA_supplychange"))
StocksUSDA <- merge(StocksUSDA,USDA_supplychange, by = c("Commodity_Code", "Country_Code", "Market_Year"), all.x = TRUE, all.y =TRUE)
#clean data, create cumulative stocks for USDA data
StocksUSDA <- subset(StocksUSDA, select = c("Commodity_Code", "Commodity_Description", "Country_Code", "Country_Name", "Market_Year", "Beg_stock", "End_stock", "Stock_Delta", "Prod_USDA", "Imports_USDA", "Exports_USDA", "Supply_USDA", "USDA_supplychange"))
StocksUSDA<-unique(StocksUSDA)
StocksUSDA<-data.table(StocksUSDA)
StocksUSDA[, CumUSDAadj := `End_stock`- `Beg_stock`[Market_Year==1960], by = list(`Country_Code`, `Commodity_Code`)]
##MATCHING CODE USDA AND FAO FOR COMMODITIES AND COUNTRIES. CREATING COLUMNS FOR PRODCTION USDA YEAR T+1, T-1 TO MATCH USDA PRODUCTION TO FAO PRODUCTION
USDAconv_codes=ReadDatatable("usda_mapping_codes")
USDAconv_codes$usda_code<- as.character(USDAconv_codes$usda_code)
USDAconv_codes_ITEM<-USDAconv_codes%>%dplyr::filter(element=="ITEM")
USDAconv_codes_country<-USDAconv_codes%>%dplyr::filter(element=="COUNTRY")
names(StocksUSDA)[names(StocksUSDA) == "Country_Code"] <- "usda_code"
StocksUSDA <- merge(StocksUSDA,USDAconv_codes_country, by = c("usda_code"), all.x = TRUE, all.y =TRUE)
StocksUSDA<-StocksUSDA%>%dplyr:: filter(sws_code!="NA")
StocksUSDA<-StocksUSDA%>%dplyr:: filter(Country_Name!="NA")
StocksUSDA <- subset(StocksUSDA, select = c("Commodity_Code", "Commodity_Description", "Market_Year", "Beg_stock", "CumUSDAadj", "End_stock", "Stock_Delta", "Prod_USDA", "Imports_USDA", "Exports_USDA", "Supply_USDA", "USDA_supplychange", "sws_code"))
names(StocksUSDA)[names(StocksUSDA) == "sws_code"] <- "geographicAreaM49"
names(StocksUSDA)[names(StocksUSDA) == "Commodity_Code"] <- "usda_codes"
StocksUSDA$usda_codes<- as.numeric(StocksUSDA$usda_codes)
USDAconv_codes_ITEM$usda_code<- as.numeric(USDAconv_codes_ITEM$usda_code)
names(USDAconv_codes_ITEM)[names(USDAconv_codes_ITEM) == "usda_code"] <- "usda_codes"
StocksUSDA <- merge(StocksUSDA,USDAconv_codes_ITEM, by = c("usda_codes"), all.x = TRUE, all.y =TRUE)
StocksUSDA<-StocksUSDA%>%dplyr:: filter(sws_code!="NA")
StocksUSDA <- subset(StocksUSDA, select = c("geographicAreaM49", "sws_code", "Market_Year", "Beg_stock", "CumUSDAadj", "End_stock", "Stock_Delta", "Prod_USDA", "Imports_USDA", "Exports_USDA", "Supply_USDA", "USDA_supplychange"))
setnames(StocksUSDA, "sws_code", "measuredItemSuaFbs")
setnames(StocksUSDA, "Market_Year", "timePointYears")
StocksUSDA$geographicAreaM49<- as.character(StocksUSDA$geographicAreaM49)
StocksUSDA$geographicAreaM49<- as.numeric(StocksUSDA$geographicAreaM49)
StocksUSDA$geographicAreaM49<- as.integer(StocksUSDA$geographicAreaM49)
StocksUSDA$geographicAreaM49<- as.character(StocksUSDA$geographicAreaM49)
StocksUSDA$timePointYears<- as.integer(StocksUSDA$timePointYears)
StocksUSDA$measuredItemSuaFbs<- as.character(StocksUSDA$measuredItemSuaFbs)
setnames(SUABALData, "Value", "ProdFAOSUA")
setnames(ProdFAOData, "Value", "ProdFAOPROD")
ProdFAOfin<-merge(SUABALData,ProdFAOData, by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears"), all.x = T, all.y=T)
ProdFAOfin<-ProdFAOfin%>%dplyr::mutate(Value=ifelse(ProdFAOPROD==0|is.na(ProdFAOPROD), ProdFAOSUA, ProdFAOPROD))
ProdFAOfin <- subset(ProdFAOfin, select = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears", "Value"))
setnames(ProdFAOfin, "Value", "ProdFAO")
ProdFAOfin$timePointYears<- as.character(ProdFAOfin$timePointYears)
StocksUSDA<-merge(StocksUSDA,ProdFAOfin, by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears"), all.x = T, all.y=T)
setnames(ImportData, "Value", "ImportFAO")
setnames(ExportData, "Value", "ExportFAO")
ImportData$timePointYears<-as.integer(ImportData$timePointYears)
ExportData$timePointYears<-as.integer(ExportData$timePointYears)
StocksUSDA<-merge(StocksUSDA,ImportData, by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears"), all.x = T)
StocksUSDA<-merge(StocksUSDA,ExportData, by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears"), all.x = T)
StocksUSDA$ProdFAO[is.na(StocksUSDA$ProdFAO)]<-0
StocksUSDA$ImportFAO[is.na(StocksUSDA$ImportFAO)]<-0
StocksUSDA$ExportFAO[is.na(StocksUSDA$ExportFAO)]<-0
StocksUSDA<-StocksUSDA%>%dplyr:: filter(timePointYears>=2010)
StocksUSDA<- StocksUSDA%>% dplyr::mutate(SupplyFAO=ProdFAO+ImportFAO-ExportFAO)
#create variable for FAO supply percentage change by year
StocksUSDA$timePointYears<-as.integer(StocksUSDA$timePointYears)
StocksUSDA<-arrange(StocksUSDA, geographicAreaM49, measuredItemSuaFbs, timePointYears)
StocksUSDA <-StocksUSDA%>%mutate(FAO_supplychange = (SupplyFAO - lag(SupplyFAO))/lag(SupplyFAO))
StocksUSDA<-as.data.table(StocksUSDA)
setnames(StocksUSDA, "Prod_USDA", "ProdUSDA_T")
setnames(StocksUSDA, "Beg_stock", "OPENING_STOCKS_USDA")
StocksUSDA[, "ProdUSDA_T+1":= as.integer(shift(ProdUSDA_T, type= "lead"), by= list ("geographicAreaM49", "measuredItemSuaFbs"))]
StocksUSDA[, "ProdUSDA_T-1":= as.integer(shift(ProdUSDA_T, type= "lag"), by= list ("geographicAreaM49", "measuredItemSuaFbs"))]
###creating a table with closing stocks FAO by year
#StocksFAOclosing <- read_excel("StocksUSDA_FAOfinal2.xlsx")
#StocksFAOclosing<-StocksFAOclosing[ , c(1,3,5,8)]
#setnames(StocksFAOclosing, "COUNTRY", "geographicaream49")
#setnames(StocksFAOclosing, "ITEM CPC_code", "measureditemsuafbs")
#setnames(StocksFAOclosing, "YEAR", "timepointyears")
#setnames(StocksFAOclosing, "CUMULATIVE STOCKS FAO", "cumulative_stocks_fao")
#StocksFAOclosing<-StocksFAOclosing%>%dplyr:: filter(!is.na(cumulative_stocks_fao))
#StocksFAOclosing$geographicaream49<- as.integer(StocksFAOclosing$geographicaream49)
#StocksFAOclosing$measureditemsuafbs<- as.character(StocksFAOclosing$measureditemsuafbs)
#StocksFAOclosing$timepointyears<- as.integer(StocksFAOclosing$timepointyears)
#StocksFAOclosing$cumulative_stocks_fao<- as.numeric(StocksFAOclosing$cumulative_stocks_fao)
#write.csv(StocksFAOclosing, "stocks_fao_closing.csv", row.names = F)
StocksFAOclosing=ReadDatatable("stocks_fao_closing")
setnames(StocksFAOclosing, "geographicaream49", "geographicAreaM49")
setnames(StocksFAOclosing, "measureditemsuafbs", "measuredItemSuaFbs")
setnames(StocksFAOclosing, "timepointyears", "timePointYears")
setnames(StocksFAOclosing, "cumulative_stocks_fao", "CUMULATIVE STOCKS FAO")
StocksFAOclosing$geographicAreaM49<- as.character(StocksFAOclosing$geographicAreaM49)
StocksUSDA$timePointYears<- as.numeric(StocksUSDA$timePointYears)
StocksUSDA<-merge(StocksUSDA,StocksFAOclosing, by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears"), all.x = T)
#DIVIDE DATA USDA FROM DATA FAO BEFORE MATCHING
StocksFAO<- subset(StocksUSDA, select = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears", "ProdFAO", "ImportFAO", "ExportFAO", "SupplyFAO", "FAO_supplychange", "CUMULATIVE STOCKS FAO"))
StocksUSDA<- subset(StocksUSDA, select = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears", "OPENING_STOCKS_USDA", "CumUSDAadj", "End_stock", "Stock_Delta", "ProdUSDA_T", "Imports_USDA", "Exports_USDA", "Supply_USDA", "USDA_supplychange", "ProdFAO", "ProdUSDA_T+1", "ProdUSDA_T-1"))
##MATCH PRODUCTION BY USING MINIMUM DIFFERENCE
StocksUSDA[, "DiffT":= abs(`ProdFAO`-`ProdUSDA_T`)]
StocksUSDA[, "DiffTplus1":= abs(`ProdFAO`-`ProdUSDA_T+1`)]
StocksUSDA[, "DiffTmin1":= abs(`ProdFAO`-`ProdUSDA_T-1`)]
StocksUSDA <- transform(StocksUSDA, min = pmin(`DiffT`, `DiffTplus1`,`DiffTmin1`))
StocksUSDA[, Match := ifelse((min==DiffT), 0,
ifelse((min==DiffTplus1), 1,
ifelse((min==DiffTmin1), -1, NA
)))]
mode_fun <- function(x) {
mode0 <- names(which.max(table(x)))
if(is.numeric(x)) return(as.numeric(mode0))
mode0
}
StocksUSDA[
,Yearmatch := mode_fun(Match[timePointYears >= 2014]),
by = c("geographicAreaM49", "measuredItemSuaFbs")
]
StocksUSDA$timePointYears<- as.integer(StocksUSDA$timePointYears)
StocksUSDA[, "Year":= (`timePointYears`-`Yearmatch`)]
StocksUSDA[,timePointYears:=NULL]
setnames(StocksUSDA, "Year", "timePointYears")
StocksUSDA<-StocksUSDA[StocksUSDA$timePointYears>=2010]
StocksUSDA$timePointYears<-as.character(StocksUSDA$timePointYears)
StocksUSDA[,ProdFAO:=NULL]
StocksFAO$timePointYears<-as.character(StocksFAO$timePointYears)
StocksUSDA<-merge(StocksUSDA,StocksFAO, by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears"), all.x = T, all.y=T)
StocksUSDA$ProdFAO[is.na(StocksUSDA$ProdFAO)]<-0
##ONCE YEAR IS MATCHED REJECT USDA DATA WITH TOO LARGE DIFFERENCE WITH FAO DATA ON PRODUCTION
Stocks_TO_USE<-StocksUSDA[, MatchT := ifelse(ProdFAO<=1000 & ProdUSDA_T<=1000, 1,
ifelse (ProdFAO>1000 & ProdFAO <=5000 &
ProdFAO<=(ProdUSDA_T+0.4*ProdUSDA_T)& ProdFAO>=(ProdUSDA_T-0.4*ProdUSDA_T),1,
ifelse (ProdFAO>5000 & ProdFAO <=10000 & ProdFAO<=(ProdUSDA_T+0.3*ProdUSDA_T)&
ProdFAO>=(ProdUSDA_T-0.3*ProdUSDA_T),1,
ifelse (ProdFAO>10000 & ProdFAO<=(ProdUSDA_T+0.25*ProdUSDA_T) &
ProdFAO>=(ProdUSDA_T-0.25*ProdUSDA_T),1,NA))))]
#consider blank 0 as matching
Stocks_TO_USE[(ProdFAO==0 | is.na(ProdFAO))&(ProdUSDA_T==0 | is.na(ProdUSDA_T) ), MatchT := 0]
Stocks_TO_USE$timePointYears<-as.integer(Stocks_TO_USE$timePointYears)
Stocks_TO_USE<- Stocks_TO_USE %>% group_by(geographicAreaM49, measuredItemSuaFbs) %>% dplyr::mutate(MatchT2=min(MatchT[timePointYears==2014]))
Stocks_TO_USE<-as.data.table(Stocks_TO_USE)
Stocks_TO_USE<-Stocks_TO_USE[MatchT2!=Inf & timePointYears>=2013]
##FOR PRIMARY PRODUCTS REJECT USDA DATA WITH TOO LARGE DIFFERENCE BETWEEN USDA CUMULATIVE STOCKS AND FAO CUMULATIVE STOCKS
##16.06.19 increase more percentages for cumulative stock comparison (100%)
Stocks_TO_USE[, OpeningComparisonAdj3 := ifelse(`CUMULATIVE STOCKS FAO`<=(CumUSDAadj+1*CumUSDAadj) &
`CUMULATIVE STOCKS FAO`>=(CumUSDAadj-1*CumUSDAadj),"yes", "no")]
#give yes to NA and zero
Stocks_TO_USE[(`CUMULATIVE STOCKS FAO`==0 | is.na(`CUMULATIVE STOCKS FAO`) &
CumUSDAadj==0 | is.na(CumUSDAadj)), OpeningComparisonAdj3 := "yes"]
## Filter cases with cumulative match in 2013
List <- Stocks_TO_USE[(OpeningComparisonAdj3=="yes"& timePointYears=="2013")]
# table(List$OpeningComparisonAdj3)
# table(List$`COUNTRY DESCRIPTION`)
ListYES<-List[ ,c(1,2), with=FALSE]
StockYES <- merge(ListYES, Stocks_TO_USE, by = c("geographicAreaM49", "measuredItemSuaFbs"), all.x = )
##SElect data to be uploaded, choose years 2014-2019
Stockupload<- as.data.table(StockYES[ timePointYears >=2014 & timePointYears <=2019])
Stockupload2 <- Stockupload[ , c(1,2,3,4), with=FALSE]
colnames(Stockupload2) <- c("geographicAreaM49", "measuredItemCPC", "timePointYears", "Value")
Stockupload2$measuredElement <-5113
Stockupload2$flagObservationStatus <-"T"
Stockupload2$flagMethod <-"h"
###test
Stocktest<-Stockupload
Stocktest <- Stocktest[ , c(1,2,3,4,7,8,9,10,11,12,21,22,23,24,25,26)]
Stocktest<- Stocktest %>% dplyr::mutate(net_trade_usda=(Imports_USDA-Exports_USDA))
Stocktest$ProdFAO[is.na(Stocktest$ProdFAO)]<-0
Stocktest$ImportFAO[is.na(Stocktest$ImportFAO)]<-0
Stocktest$ExportFAO[is.na(Stocktest$ExportFAO)]<-0
Stocktest<- Stocktest %>% dplyr::mutate(net_trade_fao=(ImportFAO-ExportFAO))
Stocktest<- Stocktest %>% dplyr::mutate(diff_trade=(net_trade_fao-net_trade_usda))
Stocktest<- Stocktest %>% dplyr::mutate(ratio_supp=(diff_trade/SupplyFAO))
Stocktest<- Stocktest %>% dplyr::mutate(ratio_delta=(Stock_Delta/SupplyFAO))
Stocktest<- Stocktest %>% dplyr::mutate(ratio_trade=(diff_trade/net_trade_fao))
Stocktest<- Stocktest %>% dplyr::mutate(trade_delta=(diff_trade/Stock_Delta))
Stocktest<- Stocktest %>% dplyr::mutate(supp_stock=(SupplyFAO-Stock_Delta))
#Stocktest<-Stocktest%>%dplyr::mutate(exclude1=ifelse(timePointYears<=2018 & ((FAO_supplychange>=0 & USDA_supplychange<=0 & ratio_delta<(-0.08)) | (FAO_supplychange<=0 & USDA_supplychange>=0 & ratio_delta>0.08)) & !(FAO_supplychange==0 & USDA_supplychange==0) & Stock_Delta!=0 & (abs(diff_trade))>40000, 1, 0))
Stocktest<-Stocktest%>%dplyr::mutate(exclude2=ifelse(timePointYears<=2018 & supp_stock<0, 1, 0))
Stocktest<-Stocktest%>%dplyr::mutate(exclude3=ifelse(timePointYears<=2018 & (abs(ratio_supp))>10 & diff_trade!=0, 1, 0))
Stocktest<-Stocktest%>%dplyr::mutate(exclude4=ifelse(timePointYears<=2018 & (abs(ratio_supp))>1 & ((ratio_supp<0 & ratio_delta>0)|(ratio_supp>0 & ratio_delta<0)) & Stock_Delta!=0 & diff_trade!=0 & (abs(ratio_delta))>0.1, 1, 0))
Stocktest<-Stocktest%>%dplyr::mutate(excludeFIN=ifelse((exclude2==1|exclude3==1|exclude4==1), 1, 0))
Stocktest<-Stocktest%>%dplyr::mutate(yearexclude=ifelse(excludeFIN==1, timePointYears, NA))
Stocktest <- Stocktest %>%group_by(geographicAreaM49, measuredItemSuaFbs) %>% dplyr:: mutate(minyear=min(yearexclude, na.rm = T))
Stocktest<-Stocktest%>%dplyr::mutate(excludeFIN2=ifelse(timePointYears>=minyear, 1, 0))
Stocktest$excludeFIN2[is.na(Stocktest$excludeFIN2)]<-0
Stocktest<-Stocktest%>%dplyr:: filter(excludeFIN2==0)
Stockupload <- Stocktest[ , c(1,2,3,4)]
colnames(Stockupload) <- c("geographicAreaM49", "measuredItemCPC", "timePointYears", "Value")
Stockupload$measuredElement<-"5113"
Stockupload$flagObservationStatus<-"T"
Stockupload$flagMethod<-"h"
Stockupload<-Stockupload[,c("geographicAreaM49", "measuredItemCPC", "measuredElement", "timePointYears", "Value", "flagObservationStatus", "flagMethod")]
write.csv(Stockupload, "Stocks_uploadedSept2020.csv", row.names = F)
##Pull data from stock and change Flag for USDA data
if (CheckDebug()) {
R_SWS_SHARE_PATH <- "//hqlprsws1.hq.un.fao.org/sws_r_share"
mydir <- "D:/FAO office folder/R/version faostandardization october 2018/faoswsStandardization/modules/FullstandardizationAndBalancing"
SETTINGS <- faoswsModules::ReadSettings(file.path(mydir, "sws.yml"))
SetClientFiles(SETTINGS[["certdir"]])
GetTestEnvironment(baseUrl = SETTINGS[["server"]], token = SETTINGS[["token"]])
}
stockEleDim = Dimension(name = "measuredElement",
keys = c("5071", "5113"))
itemKeys = GetCodeList(domain = "Stock", dataset = "stocksdata", "measuredItemCPC")
itemKeys = itemKeys[, code]
itemDim <- Dimension(name = "measuredItemCPC", keys = itemKeys)
geoKeys = GetCodeList(domain = "Stock", dataset = "stocksdata", "geographicAreaM49")
geoKeys = geoKeys[, code]
geoDim <- Dimension(name = "geographicAreaM49", keys = geoKeys)
startYear = as.character(2014)
endYear = as.character(2019)
yearVals = as.character(startYear:endYear)
timeDim <- Dimension(name = "timePointYears", keys = as.character(yearVals))
stockKey = DatasetKey(domain = "Stock", dataset = "stocksdata",
dimensions = list(
geographicAreaM49 = geoDim,
measuredElement = stockEleDim,
measuredItemCPC = itemDim,
timePointYears = timeDim)
)
stockDataSWS = GetData(stockKey)
stockDataSWS2<-stockDataSWS%>%dplyr:: filter(flagObservationStatus=="T" & flagMethod=="h")
stockDataSWS2$Value <-""
stockDataSWS2$flagObservationStatus <-""
stockDataSWS2$flagMethod <-""
stockDataSWS2<-as.data.table(stockDataSWS2)
SaveData(domain = "Stock", dataset = "stocksdata", data= stockDataSWS2)
protected_stock<- stockDataSWS %>%
dplyr::mutate(official=ifelse(flagObservationStatus %in% c("", "T")&flagMethod=="p",TRUE,FALSE)) %>%
dplyr::filter(official==TRUE) %>%
dplyr::select(geographicAreaM49,measuredElement,measuredItemCPC,
timePointYears,Value,flagObservationStatus,flagMethod)
##check to perform before saving. all should be character except value
setDT(Stockupload)[, ("timePointYears") := lapply(.SD, as.character), .SDcols = "timePointYears"]
setDT(Stockupload)[, ("measuredElement") := lapply(.SD, as.character), .SDcols = "measuredElement"]
setDT(Stockupload)[, ("measuredItemCPC") := lapply(.SD, as.character), .SDcols = "measuredItemCPC"]
setDT(Stockupload)[, ("geographicAreaM49") := lapply(.SD, as.character), .SDcols = "geographicAreaM49"]
setDT(Stockupload)[, ("Value") := lapply(.SD, as.numeric), .SDcols = "Value"]
##Exclude protected values from the upload
Stockupload<-Stockupload %>% dplyr::anti_join(protected_stock,
by=c("geographicAreaM49","measuredElement",
"measuredItemCPC","timePointYears"))
Stockupload<-Stockupload%>%dplyr:: filter(!is.na(Value))
Stockupload<- as.data.table(Stockupload)
SaveData(domain = "Stock", dataset = "stocksdata", data= Stockupload)
####Recalculate all stock variations based on official opening stocks
##Pull all Opening stock data from stock domain
message("Pulling data from Stock domain")
stockEleDim = Dimension(name = "measuredElement",
keys = "5113")
itemKeys = GetCodeList(domain = "Stock", dataset = "stocksdata", "measuredItemCPC")
itemKeys = itemKeys[, code]
itemDim <- Dimension(name = "measuredItemCPC", keys = itemKeys)
geoKeys = GetCodeList(domain = "Stock", dataset = "stocksdata", "geographicAreaM49")
geoKeys = geoKeys[, code]
geoDim <- Dimension(name = "geographicAreaM49", keys = geoKeys)
startYear = as.character(2014)
endYear = as.character(2019)
yearVals = as.character(startYear:endYear)
timeDim <- Dimension(name = "timePointYears", keys = as.character(yearVals))
stockKey = DatasetKey(domain = "Stock", dataset = "stocksdata",
dimensions = list(
geographicAreaM49 = geoDim,
measuredElement = stockEleDim,
measuredItemCPC = itemDim,
timePointYears = timeDim)
)
stockDataSWS = GetData(stockKey)
##Recalculate stock variations for T-h data
stockDataSWS<-stockDataSWS[flagObservationStatus=="T"&flagMethod=="h"]
stockDataSWS[,
delta := shift(Value, type = "lag") - Value,
by = c("geographicAreaM49", "measuredItemCPC")
]
stockDataSWS[,
delta :=
ifelse(
timePointYears == max(timePointYears) & is.na(delta),
0,
delta
),
by = c("geographicAreaM49", "measuredItemCPC")
]
stockDataSWS[flagObservationStatus == "T", `:=`(delta_flag_obs = "T", delta_flag_method = "h")]
Variationtoupload <-
stockDataSWS[
timePointYears %in% 2014:2019,
.(
geographicAreaM49,
measuredItemCPC,
timePointYears,
measuredElement = "5071",
Value = delta,
flagObservationStatus = delta_flag_obs,
flagMethod = delta_flag_method
)
]
##check to perform before saving. all should be character but value
setDT(Variationtoupload)[, ("timePointYears") := lapply(.SD, as.character), .SDcols = "timePointYears"]
setDT(Variationtoupload)[, ("measuredElement") := lapply(.SD, as.character), .SDcols = "measuredElement"]
setDT(Variationtoupload)[, ("measuredItemCPC") := lapply(.SD, as.character), .SDcols = "measuredItemCPC"]
setDT(Variationtoupload)[, ("geographicAreaM49") := lapply(.SD, as.character), .SDcols = "geographicAreaM49"]
setDT(Variationtoupload)[, ("Value") := lapply(.SD, as.numeric), .SDcols = "Value"]
###save recalculated variations
Variationtoupload<- as.data.table(Variationtoupload)
SaveData(domain = "Stock", dataset = "stocksdata", data= Variationtoupload)
##Isolation forest test
options('repos' = c(CRAN = "https://cran.rstudio.com/"))
library(isotree)
train=Stocktest
train<-as.data.table(train)
train<-train[!is.na(train$OPENING_STOCKS_USDA)]
cols <- c("measuredItemSuaFbs","geographicAreaM49","timePointYears","ProdUSDA_T", "Imports_USDA","Exports_USDA","ProdFAO", "ImportFAO","ExportFAO", "FAO_supplychange", "USDA_supplychange" )
iso <- isolation.forest(train[,cols,with=FALSE], ntrees = 100, nthreads = 1)
### Check which row has the highest outlier score
pred <- predict(iso, train[,cols,with=FALSE])
train$iso_score <- pred
train[order(iso_score)]
treshold <- 0.33
final_set <- train[train$iso_score>treshold]
final_set<-final_set[final_set$timePointYears<=2018]
setnames(final_set, "measuredItemSuaFbs", "measuredItemFbsSua")
final_set <- nameData(domain = "suafbs", dataset = "sua_unbalanced", final_set)
final_set <- final_set[ , c(1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,36)]
final_set<-arrange(final_set, geographicAreaM49, measuredItemFbsSua, timePointYears)
write.csv(final_set, "Stocks_anomalous_USDA.csv", row.names = F)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.