modules/SUA_bal_compilation_round2021/main.R

library(faosws)
library(faoswsUtil)
library(faoswsBalancing)
library(faoswsProcessing) 
library(faoswsStandardization)
library(dplyr)
library(data.table)
library(tidyr)
library(openxlsx)

# The only parameter is the string to print
# COUNTRY is taken from environment (parameter)
dbg_print <- function(x) {
  message(paste0("NEWBAL (", COUNTRY, "): ", x))
}

start_time <- Sys.time()

R_SWS_SHARE_PATH <- Sys.getenv("R_SWS_SHARE_PATH")

if (CheckDebug()) {
  R_SWS_SHARE_PATH <- "//hqlprsws1.hq.un.fao.org/sws_r_share"
  
  mydir <- "modules/SUA_bal_compilation_round2021"
  
  SETTINGS <- faoswsModules::ReadSettings(file.path(mydir, "sws.yml"))
  
  SetClientFiles(SETTINGS[["certdir"]])
  
  GetTestEnvironment(baseUrl = SETTINGS[["server"]], token = SETTINGS[["token"]])
}

startYear = as.numeric(swsContext.computationParams$startYear)
endYear = as.numeric(swsContext.computationParams$endYear)
stopifnot(startYear <= endYear)
yearVals = startYear:endYear


COUNTRY <- as.character(swsContext.datasets[[1]]@dimensions$geographicAreaM49@keys)

COUNTRY_suaunbal <- as.character(swsContext.datasets[[2]]@dimensions$geographicAreaM49@keys)

COUNTRY_shareup <- as.character(swsContext.datasets[[3]]@dimensions$geographicAreaM49@keys)

COUNTRY_sharedown <- as.character(swsContext.datasets[[4]]@dimensions$geographicAreaM49@keys)

if(sum(COUNTRY == c(COUNTRY_suaunbal,COUNTRY_sharedown,COUNTRY_shareup)) < 3){
  dbg_print("Session countries are not identical")
  print(paste0("Session countries are not identical"))
  stop("Session countries are not identical")
  
}

COUNTRY_NAME <-
  nameData(
    "suafbs", "sua_unbalanced",
    data.table(geographicAreaM49 = COUNTRY))$geographicAreaM49_description

dbg_print("parameters")

USER <- regmatches(
  swsContext.username,
  regexpr("(?<=/).+$", swsContext.username, perl = TRUE)
)

# if (!file.exists(file.path(R_SWS_SHARE_PATH, USER))) {
#   dir.create(file.path(R_SWS_SHARE_PATH, USER))
# }


STOP_AFTER_DERIVED <- as.logical(swsContext.computationParams$stop_after_derived)

THRESHOLD_METHOD <- 'share'

FIX_OUTLIERS <- TRUE

FILL_EXTRACTION_RATES <- TRUE

YEARS <- as.character(2000:endYear)

TMP_DIR <- file.path(tempdir(), USER)
if (!file.exists(TMP_DIR)) dir.create(TMP_DIR, recursive = TRUE)

tmp_file_imb         <- file.path(TMP_DIR, paste0("IMBALANCE_", COUNTRY, ".xlsx"))
tmp_file_shares      <- file.path(TMP_DIR, paste0("SHARES_", COUNTRY, ".xlsx"))
tmp_file_losses_to_check <- file.path(TMP_DIR, paste0("LOSSES_TO_CHECK_", COUNTRY, ".xlsx"))
tmp_file_industrial_to_check <- file.path(TMP_DIR, paste0("INDUSTRIAL_TO_CHECK_", COUNTRY, ".xlsx"))
#tmp_file_negative    <- file.path(TMP_DIR, paste0("NEGATIVE_AVAILAB_", COUNTRY, ".csv"))
#tmp_file_non_exist   <- file.path(TMP_DIR, paste0("NONEXISTENT_", COUNTRY, ".csv"))
#tmp_file_fix_shares  <- file.path(TMP_DIR, paste0("FIXED_PROC_SHARES_", COUNTRY, ".csv"))
#tmp_file_NegNetTrade <- file.path(TMP_DIR, paste0("NEG_NET_TRADE_", COUNTRY, ".csv"))
tmp_file_Stock_correction <- file.path(TMP_DIR, paste0("STOCK_CORRECTION_", COUNTRY, ".csv"))

# Always source files in R/ (useful for local runs).
# Your WD should be in faoswsStandardization/
sapply(dir("R", full.names = TRUE), source)


p <- defaultStandardizationParameters()

p$itemVar <- "measuredItemSuaFbs"
p$mergeKey[p$mergeKey == "measuredItemCPC"] <- "measuredItemSuaFbs"
p$elementVar <- "measuredElementSuaFbs"
p$childVar <- "measuredItemChildCPC"
p$parentVar <- "measuredItemParentCPC"
p$createIntermetiateFile <- "TRUE"
p$protected <- "Protected"
p$official <- "Official"





shareDownUp_file <-
  file.path(R_SWS_SHARE_PATH, USER, paste0("shareDownUp_", COUNTRY, ".csv"))


tourist_cons_table <- ReadDatatable("keep_tourist_consumption")

stopifnot(nrow(tourist_cons_table) > 0)

TourismNoIndustrial <- tourist_cons_table[small == "X"]$tourist
NonSmallIslands<-tourist_cons_table[small != "X"]$tourist

dbg_print("define functions")

######### FUNCTIONS: at some point, they will be moved out of this file. ####

# Replacement for merge(x, y, by = VARS, all.x = TRUE) that do not set keys
# By default it behaves as dplyr::left_join(). If nomatch = 0, non-matching
# rows will not be returned
dt_left_join <- function(x, y, by = NA, allow.cartesian = FALSE,
                         nomatch = NA) {
  if (anyNA(by)) {
    stop("'by' is required")
  }
  
  if (any(!is.data.table(x), !is.data.table(y))) {
    stop("'x' and 'y' should be data.tables")
  }
  
  res <- y[x, on = by, allow.cartesian = allow.cartesian, nomatch = nomatch]
  
  setcolorder(res, c(names(x), setdiff(names(y), names(x))))
  
  res
}

dt_full_join <- function(x, y, by = NA) {
  if (anyNA(by)) {
    stop("'by' is required")
  }
  
  if (any(!is.data.table(x), !is.data.table(y))) {
    stop("'x' and 'y' should be data.tables")
  }
  
  res <- merge(x, y, by = by, all = TRUE)
  
  # merge sets the key to `by`
  setkey(res, NULL)
  
  res
}

message("Line 140")
#function used for countries which had stock data in the past (before 2014)
#linearization taking into account the supply before 2013. 
coeffs_stocks_mod <- function(x) {
  tmp <- lm(data = x[timePointYears < startYear], supply_inc ~ supply_exc + trend)
  
  as.list(tmp$coefficients)
}

# This function will recalculate opening stocks from the first
# observation. Always.
update_opening_stocks <- function(x) {
  x <- x[order(geographicAreaM49, measuredItemSuaFbs, timePointYears)]
  
  groups <- unique(x[, c("geographicAreaM49", "measuredItemSuaFbs"), with = FALSE])
  
  res <- list()
  
  for (i in seq_len(nrow(groups))) {
    z <- x[groups[i], on = c("geographicAreaM49", "measuredItemSuaFbs")]
    if (nrow(z) > 1) {
      for (j in seq_len(nrow(z))[-1]) {
        # negative delta cannot be more than opening
        if (z$delta[j-1] < 0 & abs(z$delta[j-1]) > z$new_opening[j-1]) {
          z$delta[j-1] <- - z$new_opening[j-1]
        }
        z$new_opening[j] <- z$new_opening[j-1] + z$delta[j-1]
      }
      # negative delta cannot be more than opening
      if (z$delta[j] < 0 & abs(z$delta[j]) > z$new_opening[j]) {
        z$delta[j] <- - z$new_opening[j]
      }
    }
    res[[i]] <- z
  }
  rbindlist(res)
}


# Fill NAs by LOCF/FOCB/interpolation if more than two
# non-missing observations are available, otherwhise just
# replicate the only non-missing observation
na.fill_ <- function(x) {
  if(sum(!is.na(x)) > 1) {
    zoo::na.fill(x, "extend")
  } else {
    rep(x[!is.na(x)], length(x))
  }
}



`%!in%` <- Negate(`%in%`)


# RemainingToProcessedParent() and RemainingProdChildToAssign() will
# be used in the derivation of shareDownUp

RemainingToProcessedParent <- function(data) {
  data[, 
       parent_already_processed :=
         ifelse(
           is.na(parent_qty_processed),
           parent_qty_processed,
           sum(processed_to_child / extractionRate, na.rm = TRUE)
         ),
       by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
  ]
  
  data[, remaining_processed_parent := round(parent_qty_processed - parent_already_processed)]
  
  data[remaining_processed_parent < 0, remaining_processed_parent := 0]
  
  data[,
       only_child_left :=
         sum(is.na(processed_to_child)) == 1 &
         is.na(processed_to_child) &
         !is.na(production_of_child) &
         !is.na(parent_qty_processed) &
         production_of_child > 0,
       by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
  ]
  
  data[
    only_child_left == TRUE,
    processed_to_child := remaining_processed_parent * extractionRate
  ]
  
  data[,
       parent_already_processed :=
         ifelse(
           is.na(parent_qty_processed),
           parent_qty_processed,
           sum(processed_to_child / extractionRate, na.rm = TRUE)
         ),
       by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
  ]
  
  data[, remaining_processed_parent := round(parent_qty_processed - parent_already_processed)]
  
  data[remaining_processed_parent < 0, remaining_processed_parent := 0]
  
  return(data)
}


RemainingProdChildToAssign <- function(data) {
  
  data[,
       available_processed_child := sum(processed_to_child, na.rm = TRUE),
       by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
  ]
  
  data[, remaining_to_process_child := round(production_of_child - available_processed_child)]
  
  data[remaining_to_process_child < 0, remaining_to_process_child := 0]
  
  data[,
       only_parent_left :=
         sum(is.na(processed_to_child)) == 1 &
         is.na(processed_to_child) &
         !is.na(parent_qty_processed) &
         parent_qty_processed >= 0
  ]
  
  data[only_parent_left==TRUE,processed_to_child:=ifelse(only_parent_left==TRUE,remaining_to_process_child,processed_to_child)]
  
  data[,
       available_processed_child := sum(processed_to_child, na.rm = TRUE),
       by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
  ]
  
  data[, remaining_to_process_child := round(production_of_child - available_processed_child)]
  
  data[remaining_to_process_child < 0, remaining_to_process_child := 0]
  
  return(data)
}


# The fmax function is used when fixing the processingShare of coproducts.
# If TRUE it means that "+" or "or" cases are involved.
fmax <- function(child, main, share, plusor = FALSE) {
  main <- unique(main)
  
  if (plusor) {
    found <- sum(sapply(child, function(x) grepl(x, main), USE.NAMES = FALSE))
    
    if (found == 0) {
      return(max(share, na.rm = TRUE))
    } else if (found == 1) {
      return(
        share[(1:length(child))[sapply(child, function(x) grepl(x, main), USE.NAMES = FALSE)]]
      )
    } else { # should be 2
      return(max(share[(1:length(child))[sapply(child, function(x) grepl(x, main), USE.NAMES = FALSE)]], na.rm = TRUE))
    }
  } else {
    if (sum(grepl(main, child)) > 0) {
      share[child == main]
    } else {
      max(share, na.rm = TRUE)
    }
  }
}

# Function that calculates imbalance as supply - utilizations (both calculated
# inside the function, dropped if keep_(supply|utilizations) set to FALSE).
calculateImbalance <- function(data,
                               supply_add = c("production", "imports"),
                               supply_subtract = c("exports", "stockChange"),
                               supply_all = union(supply_add, supply_subtract),
                               item_name = "measuredItemSuaFbs",
                               bygroup = c("geographicAreaM49", "timePointYears", item_name),
                               keep_supply = TRUE,
                               keep_utilizations = TRUE) {
  
  stopifnot(is.data.table(data))
  
  data[,
       `:=`(
         supply =
           sum(Value[measuredElementSuaFbs %chin% supply_add],
               - Value[measuredElementSuaFbs %chin% supply_subtract],
               na.rm = TRUE),
         # All elements that are NOT supply elements
         utilizations =
           sum(Value[!(measuredElementSuaFbs %chin% supply_all)],
               na.rm = TRUE)
       ),
       by = bygroup
  ][,
    imbalance := supply - utilizations
  ]
  
  if (keep_supply == FALSE) {
    data[, supply := NULL]
  }
  
  if (keep_utilizations == FALSE) {
    data[, utilizations := NULL]
  }
  
}

outside <- function(x, lower = NA, upper = NA) {
  x < lower | x > upper
}


send_mail <- function(from = NA, to = NA, subject = NA,
                      body = NA, remove = FALSE) {
  
  if (missing(from)) from <- 'no-reply@fao.org'
  
  if (missing(to)) {
    if (exists('swsContext.userEmail')) {
      to <- swsContext.userEmail
    }
  }
  
  if (is.null(to)) {
    stop('No valid email in `to` parameter.')
  }
  
  if (missing(subject)) stop('Missing `subject`.')
  
  if (missing(body)) stop('Missing `body`.')
  
  if (length(body) > 1) {
    body <-
      sapply(
        body,
        function(x) {
          if (file.exists(x)) {
            # https://en.wikipedia.org/wiki/Media_type 
            file_type <-
              switch(
                tolower(sub('.*\\.([^.]+)$', '\\1', basename(x))),
                txt  = 'text/plain',
                csv  = 'text/csv',
                png  = 'image/png',
                jpeg = 'image/jpeg',
                jpg  = 'image/jpeg',
                gif  = 'image/gif',
                xls  = 'application/vnd.ms-excel',
                xlsx = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
                doc  = 'application/msword',
                docx = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
                pdf  = 'application/pdf',
                zip  = 'application/zip',
                # https://stackoverflow.com/questions/24725593/mime-type-for-serialized-r-objects
                rds  = 'application/octet-stream'
              )
            
            if (is.null(file_type)) {
              stop(paste(tolower(sub('.*\\.([^.]+)$', '\\1', basename(x))),
                         'is not a supported file type.'))
            } else {
              res <- sendmailR:::.file_attachment(x, basename(x), type = file_type)
              
              if (remove == TRUE) {
                unlink(x)
              }
              
              return(res)
            }
          } else {
            return(x)
          }
        }
      )
  } else if (!is.character(body)) {
    stop('`body` should be either a string or a list.')
  }
  
  sendmailR::sendmail(from, to, subject, as.list(body))
}

balance_proportional <- function(data) {
  
  x <- copy(data)
  
  x <-
    x[
      can_balance == TRUE,
      c("measuredElementSuaFbs", "Value",
        "mov_share", "imbalance", "min_threshold",
        "max_threshold", "can_balance"),
      with = FALSE
    ]
  
  mov_share_sum <- sum(x$mov_share, na.rm = TRUE)
  
  if (mov_share_sum > 0) {
    # Recalculate mov_share, as we are excluding protected values
    x[, mov_share := mov_share / mov_share_sum]
  } else {
    # It means that there are no back shares, so use current values
    x[!is.na(Value) & can_balance == TRUE, mov_share := Value / sum(Value, na.rm = TRUE)]
  }
  
  x[is.na(Value), Value := 0]
  
  x[, adjusted_value := 0]
  
  x[Value + mov_share * imbalance >= 0, adjusted_value := Value + mov_share * imbalance]
  
  x[adjusted_value > Value & adjusted_value > max_threshold, adjusted_value := max_threshold]
  
  x[adjusted_value < Value & adjusted_value < min_threshold, adjusted_value := min_threshold]
  
  x <-  x[, c("measuredElementSuaFbs", "adjusted_value"), with = FALSE][data, on = "measuredElementSuaFbs"]
  
  return(as.numeric(x$adjusted_value))
}


# rollavg() is a rolling average function that uses computed averages
# to generate new values if there are missing values (and FOCB/LOCF).
# I.e.:
# vec <- c(NA, 2, 3, 2.5, 4, 3, NA, NA, NA)
#
#> RcppRoll::roll_mean(myvec, 3, fill = 'extend', align = 'right')
#[1]       NA       NA       NA 2.500000 3.166667 3.166667       NA       NA       NA 
#
#> rollavg(myvec)
#[1] 2.000000 2.000000 3.000000 2.500000 4.000000 3.000000 3.166667 3.388889 3.185185

rollavg <- function(x, order = 3) {
  # order should be > 2
  stopifnot(order >= 3)
  
  non_missing <- sum(!is.na(x))
  
  # For cases that have just two non-missing observations
  order <- ifelse(order > 2 & non_missing == 2, 2, order)
  
  if (non_missing == 1) {
    x[is.na(x)] <- na.omit(x)[1]
  } else if (non_missing >= order) {
    n <- 1
    while(any(is.na(x)) & n <= 10) { # 10 is max tries
      movav <- suppressWarnings(RcppRoll::roll_mean(x, order, fill = 'extend', align = 'right'))
      movav <- data.table::shift(movav)
      x[is.na(x)] <- movav[is.na(x)]
      n <- n + 1
    }
    
    x <- zoo::na.fill(x, 'extend')
  }
  
  return(x)
}


newBalancing <- function(data, Utilization_Table) {
  
  # Contains a variable that indicates whether stocks changed
  data[, change_stocks := NA_integer_]
  
  # XXX define "food residual" those items for which the only utilization
  # is food. Food processing can also be another possible utilization and
  # if there is that does not change its food-residualness, this is why
  # the check is done before assigning food processing.
  # NW: I have commented the conditions out
  # Now it only checks to make sure that food is the only utilization
  # We noticed that some of the food items were missing from the utilization table
  # This is still different from the previous approach of assigning all of the imbalance to 
  # food when "none of the other utilizations are activable"
  data[,
       food_resid :=
         # It's a food item & ...
         (measuredItemSuaFbs %chin% Utilization_Table[food_item == 'X', cpc_code] |
            # food exists & ...
            # !is.na(Value[measuredElementSuaFbs == 'food']) &
            Food_Median > 0 & !is.na(Food_Median)) &
         # ... is the only utilization
         all(is.na(Value[!(measuredElementSuaFbs %chin%
                             c('loss', 'food', 'production', 'imports',
                               'exports', 'stockChange','foodManufacturing', 'tourist'))])),
       by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  # 0143	Cottonseed
  # 01442	Mustard seed
  # 01443	Rapeseed or colza seed
  # 01446	Safflower seed
  # 01449.90	Other oil seeds, n.e.
  # 01491.02	Palm kernels
  # 01801	Sugar beet
  # 01802	Sugar cane
  # 01809	Other sugar crops n.e.
  
  
  special_list<-c("0143","01442","01443","01446","01449.90","01491.02","01801","01802","01809")
  
  data[measuredItemSuaFbs %chin% special_list,
       food_resid :=
         # It's a food item & ...
         (measuredItemSuaFbs %chin% Utilization_Table[food_item == 'X', cpc_code] &
            # food exists & ...
            # !is.na(Value[measuredElementSuaFbs == 'food']) &
            Food_Median > 0 & !is.na(Food_Median)) &
         # ... is the only utilization
         all(is.na(Value[!(measuredElementSuaFbs %chin%
                             c('loss', 'food', 'production', 'imports',
                               'exports', 'stockChange','foodManufacturing', 'tourist'))])),
       by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  # Checking if the commodity has past value before assigning the residual
  # imbalance at the end of the balancing procees
  data[,
       `:=`(
         feed_resid =
           # It's a feed item or have past value & ...
           #(measuredItemSuaFbs %in% Utilization_Table[feed == 'X', cpc_code] |
           (Feed_Median > 0 & !is.na(Feed_Median)) &
           #feed is the only utilization....
           all(is.na(Value[!(measuredElementSuaFbs %chin%
                               c('feed', 'production', 'imports', 'exports',
                                 'stockChange','foodManufacturing'))])),
         # It's a industrial item or have past value & ...
         industrial_resid = Industrial_Median > 0 & !is.na(Industrial_Median)),
       by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ] 
  
  data[,
       supply :=
         sum(
           Value[measuredElementSuaFbs %chin% c('production', 'imports')],
           - Value[measuredElementSuaFbs %chin% c('exports')],
           na.rm = TRUE
         ),
       by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  # #BLOCKING not Industrial items from FBS cycle 2021 all countries - livia
  not_industrial_table <- ReadDatatable("not_industrial_use_table")
  
  not_industrial <- not_industrial_table[not_industrial %in% "X",]$cpc_code
   
  data[measuredItemSuaFbs %in% not_industrial & measuredElementSuaFbs %in% "industrial" &
          Value > 0 & !is.na(Value), `:=`(industrial_resid = FALSE , Value = 0 , flagObservationStatus = "E" ,
          flagMethod = "f" , Protected = TRUE)]
  
  
  calculateImbalance(data)
  
  # When production needs to be created
  # data[
  #   Protected == FALSE &
  #     # Only primary
  #     measuredItemSuaFbs %chin% Utilization_Table[primary_item == "X"]$cpc_code &
  #     measuredElementSuaFbs == 'production' &
  #     supply < 0 &
  #     stockable == FALSE &
  #     !is.na(Value) & Value != 0,
  #   `:=`(
  #     Value = Value - imbalance,
  #     flagObservationStatus = "E",
  #     flagMethod = "c"
  #   )]
  
  # calculateImbalance(data)
  
  # if we have new production or new imports, we assign the imbalance to food (if it is a food item) or to feed(it is a feed item)
   
  data[imbalance>0 & (is.na(Production_Median)|Production_Median==0)& (is.na(Import_Median)|Import_Median==0)
       & Protected==FALSE & measuredElementSuaFbs== "food"
       & measuredItemSuaFbs %chin% Utilization_Table[food_item == "X"]$cpc_code, 
       `:=`(
         Value =
           ifelse(
             is.na(Value) & imbalance > 0,
             imbalance,
             ifelse(Value + imbalance >= 0, Value + imbalance, 0)
           ),
         flagObservationStatus = "E",
         flagMethod = "h"
       )
  ]
  
  # recalculate imbalance
  
  calculateImbalance(data)
  
  data[imbalance>0 & (is.na(Production_Median)|Production_Median==0)&(is.na(Import_Median)|Import_Median==0) & 
         Protected==FALSE & measuredElementSuaFbs== "feed"
       & measuredItemSuaFbs %chin% Utilization_Table[feed == "X"]$cpc_code, 
       `:=`(
         Value =
           ifelse(
             is.na(Value) & imbalance > 0,
             imbalance,
             ifelse(Value + imbalance >= 0, Value + imbalance, 0)
           ),
         flagObservationStatus = "E",
         flagMethod = "h"
       )
  ]
  
  
  # recalculate imbalance
  calculateImbalance(data)
  
  # Try to assign the maximum of imbalance to stocks
  # NOTE: in the conditions below, 2 was 0.2, indicating that no more than
  # 20% should go to stocks. Now, the condition was relaxed a lot (200%)
  data <-
    dt_left_join(
      data,
      all_opening_stocks[,
                         .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
                           timePointYears, opening_stocks = Value)
      ],
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    )
  
  data[,
       Value_0 := ifelse(is.na(Value), 0, Value)
  ][
    Protected == FALSE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "stockChange" &
      stockable == TRUE,
    change_stocks :=
      # The numbers indicate the case. Assignmnet (value and flags) will be done below
      case_when(
        # case 1: we don't want stocks to change sign.
        sign(Value_0) * sign(Value_0 + imbalance) == -1                                                  ~ 1L,
        # case 2: if value + imbalance takes LESS than opening stock, take all from stocks
        Value_0 <= 0 & (Value_0 + imbalance <= 0) & abs(Value_0 + imbalance) <= opening_stocks           ~ 2L,
        # case 3: if value + imbalance takes MORE than opening stock, take max opening stocks
        Value_0 <= 0 & (Value_0 + imbalance <= 0) & abs(Value_0 + imbalance) > opening_stocks            ~ 3L,
        # case 4: if value + imbalance send LESS than 200% of supply, send all
        Value_0 >= 0 & (Value_0 + imbalance >= 0) & (Value_0 + imbalance + opening_stocks <= supply * 2) ~ 4L,
        # case 5: if value + imbalance send MORE than 200% of supply, send 200% of supply
        Value_0 >= 0 & (Value_0 + imbalance >= 0) & (Value_0 + imbalance + opening_stocks > supply * 2)  ~ 5L
      )
  ]
  
  data[change_stocks == 1L, Value := 0]
  data[change_stocks == 2L, Value := Value_0 + imbalance]
  data[change_stocks == 3L, Value := - opening_stocks]
  data[change_stocks == 4L, Value := Value_0 + imbalance]
  # Only case for which grouping is required
  data[
    change_stocks == 5L,
    Value := max(supply * 2 - opening_stocks, 0),
    by = c("geographicAreaM49", "measuredItemSuaFbs")
  ]
  
  data[
    change_stocks %in% 1L:5L,
    `:=`(flagObservationStatus = "E", flagMethod = "s")
  ]
  
  data[, Value_0 := NULL]
  
  data[, opening_stocks := NULL]
  
  # Recalculate imbalance
  calculateImbalance(data)
  
  # Assign imbalance to food if food "only" (not "residual") item
  data[
    Protected == FALSE &
      food_resid == TRUE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "food",
    `:=`(
      Value = ifelse(is.na(Value) & imbalance > 0, imbalance, ifelse(Value + imbalance >= 0, Value + imbalance, 0)),
      flagObservationStatus = "E",
      flagMethod = "h"
    )
  ]
  
  if (COUNTRY %in% TourismNoIndustrial) {
    data[measuredElementSuaFbs == "tourist" & flagMethod != "f", Protected := FALSE]
    
    adj_tour_ind <-
      data.table::dcast(
        data[
          measuredItemSuaFbs %chin% Utilization_Table[food_item == "X"]$cpc_code &
            measuredElementSuaFbs %chin% c("industrial", "tourist")
        ],
        geographicAreaM49 + timePointYears + measuredItemSuaFbs ~ measuredElementSuaFbs,
        value.var = "Value"
      )
    
    adj_tour_ind <- adj_tour_ind[industrial > 0]
    
    adj_tour_ind[, new_tourist := industrial]
    adj_tour_ind[!is.na(tourist), new_tourist :=industrial]
    
    adj_tour_ind[, c("industrial", "tourist") := NULL]
    
    by_vars <- c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    
    data <- dt_left_join(data, adj_tour_ind, by = by_vars)
    
    data[
      measuredElementSuaFbs %in% c("industrial", "tourist"),
      any_protected := any(Protected == TRUE),
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    ]
    
    data[
      !is.na(new_tourist) & measuredElementSuaFbs == "tourist" & any_protected == FALSE,
      `:=`(
        Value = new_tourist,
        flagObservationStatus = "E",
        flagMethod = "e"
      )
    ]
    
    data[
      !is.na(new_tourist) & measuredElementSuaFbs == "industrial" & any_protected == FALSE,
      `:=`(
        Value = 0,
        flagObservationStatus = "E",
        flagMethod = "e"
      )
    ]
    
    data[, c("any_protected", "new_tourist") := NULL]
    data[measuredElementSuaFbs == "tourist", Protected := TRUE]
    
  }
  
  for (j in 1:10) {
    
    # Recalculate imbalance
    calculateImbalance(data)
    
    data[, can_balance := FALSE]
    
    data[
      !is.na(Value) &
        Protected == FALSE &
        !(data.table::between(Value, min_threshold, max_threshold, incbounds = FALSE) %in% FALSE) &
        !(measuredElementSuaFbs %chin%
            c("production", "imports", "exports", "stockChange", "foodManufacturing", "seed")),
      can_balance := TRUE
    ]
    
    data[,
         elements_balance := any(can_balance),
         by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
    ]
    
    if (nrow(data[dplyr::near(imbalance, 0) == FALSE & elements_balance == TRUE]) == 0) {
      break()
    } else {
      data[
        dplyr::near(imbalance, 0) == FALSE &
          elements_balance == TRUE,
        adjusted_value := balance_proportional(.SD),
        by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
      ]
      
      data[
        !is.na(adjusted_value) & !dplyr::near(adjusted_value, Value),
        `:=`(
          Value = adjusted_value,
          flagObservationStatus = "E",
          flagMethod = "-"
        )
      ]
      
      data[, adjusted_value := NULL]
    }
    
  }
  
  # At this point the imbalance (in the best case scenario) should be zero,
  # the following re-calculation is useful only for debugging
  
  calculateImbalance(data)
  
  # Assign imbalance to food if food "only" (not "residual") item
  data[
    Protected == FALSE &
      food_resid == TRUE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "food",
    `:=`(
      Value =
        ifelse(
          is.na(Value) & imbalance > 0,
          imbalance,
          ifelse(Value + imbalance >= 0, Value + imbalance, 0)
        ),
      flagObservationStatus = "E",
      flagMethod = "h"
    )
  ]
  
  calculateImbalance(data)
  
  # Assign the residual imbalance to industrial if the conditions are met
  data[geographicAreaM49 %in% TourismNoIndustrial & measuredItemSuaFbs %in% Utilization_Table[food_item == 'X', cpc_code],
       industrial_resid := FALSE
  ]
  
  data[
    Protected == FALSE &
      industrial_resid == TRUE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "industrial",
    `:=`(
      Value =
        ifelse(
          is.na(Value) & imbalance > 0,
          imbalance,
          ifelse(Value + imbalance >= 0, Value + imbalance, Value)
        ),
      flagObservationStatus = "E",
      flagMethod = "b"
    )
  ]
  
  
  calculateImbalance(data)
  
  # Assign the residual imbalance to feed if the conditions are met
  data[
    Protected == FALSE &
      feed_resid == TRUE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "feed",
    `:=`(
      # XXX: this creates a warning when no assignment is done:
      # Coerced 'logical' RHS to 'double'
      Value =
        ifelse(
          is.na(Value) & imbalance > 0,
          imbalance,
          ifelse(Value + imbalance >= 0, Value + imbalance, Value)
        ),
      flagObservationStatus = "E",
      flagMethod = "b"
    )
  ]
  
  calculateImbalance(data)
  
  data[, c("supply", "utilizations", "imbalance", "mov_share_rebased") := NULL]
  
  return(data)
}




############################## / FUNCTIONS ##################################


dbg_print("end functions")



#####################################  TREE #################################
message("Line 816")

dbg_print("download tree")

tree <- getCommodityTreeNewMethod(COUNTRY, YEARS)

stopifnot(nrow(tree) > 0)

tree <- tree[geographicAreaM49 %chin% COUNTRY]


# The `tree_exceptions` will npo be checked by validateTree()

# Exception: high share conmfirmed by official data
tree_exceptions <- tree[geographicAreaM49 == "392" & measuredItemParentCPC == "0141" & measuredItemChildCPC == "23995.01"]

if (nrow(tree_exceptions) > 0) {
  tree <- tree[!(geographicAreaM49 == "392" & measuredItemParentCPC == "0141" & measuredItemChildCPC == "23995.01")]
}

validateTree(tree)

if (nrow(tree_exceptions) > 0) {
  tree <- rbind(tree, tree_exceptions)
  rm(tree_exceptions)
}

## NA ExtractionRates are recorded in the sws dataset as 0
## for the standardization, we nee them to be treated as NA
## therefore here we are re-changing it

# TODO: Keep all protected 0 ERs
tree[Value == 0 & !(flagObservationStatus == "E" & flagMethod == "f"), Value := NA]

#proc_level_exceptions <- ReadDatatable("processing_level_exceptions")
#
#if (nrow(proc_level_exceptions) > 0) {
#  setnames(proc_level_exceptions, c("m49_code", "parent", "child"),
#           c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC"))
#
#  tree <-
#    tree[!proc_level_exceptions[is.na(level)],
#         on = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC")]
#
#  proc_level_exceptions <- proc_level_exceptions[!is.na(level)]
#}

tree_to_send <- tree[is.na(Value) & measuredElementSuaFbs=="extractionRate"]
message("Line 864")

if (FILL_EXTRACTION_RATES == TRUE) {
  
  expanded_tree <-
    merge(
      data.table(
        geographicAreaM49 = unique(tree$geographicAreaM49),
        timePointYears = as.character(sort(2000:endYear))
      ),
      unique(
        tree[,
             c("geographicAreaM49", "measuredElementSuaFbs",
               "measuredItemParentCPC", "measuredItemChildCPC"),
             with = FALSE
        ]
      ),
      by = "geographicAreaM49",
      all = TRUE,
      allow.cartesian = TRUE
    )
  
  tree <- tree[expanded_tree, on = colnames(expanded_tree)]
  
  # flags for carry forward/backward
  tree[is.na(Value), c("flagObservationStatus", "flagMethod") := list("E", "t")]
  
  tree <-
    tree[!is.na(Value)][
      tree,
      on = c("geographicAreaM49", "measuredElementSuaFbs",
             "measuredItemParentCPC", "measuredItemChildCPC",
             "timePointYears"),
      roll = -Inf
    ]
  
  tree <-
    tree[!is.na(Value)][
      tree,
      on = c("geographicAreaM49", "measuredElementSuaFbs",
             "measuredItemParentCPC", "measuredItemChildCPC",
             "timePointYears"),
      roll = Inf
    ]
  
  # keep orig flags
  tree[, flagObservationStatus := i.i.flagObservationStatus]
  tree[, flagMethod := i.i.flagMethod]
  
  tree[, names(tree)[grep("^i\\.", names(tree))] := NULL]
}


# XXX: connections removed here that should not exist in
# the commodity tree (so, they should be fixed there)
tree[
  #timePointYears >= 2014 &
  ((measuredItemParentCPC == "02211" & measuredItemChildCPC == "22212") |
     #cheese from whole cow milk cannot come from skim mulk of cow
     (measuredItemParentCPC == "22110.02" & measuredItemChildCPC == "22251.01") |
     (measuredItemParentCPC == "02211" & measuredItemChildCPC == "22251.02")
  ),
  `:=`(
    Value = NA,
    flagObservationStatus = "M",
    flagMethod = "n"
  )
]

#correction of milk tree for Czechia TO DO: generalize for the next round

tree[
  timePointYears >= startYear & geographicAreaM49=="203" &
    
    #“whole milk powder”  from whole cow milk cannot come from skim mulk of cow
    ((measuredItemParentCPC == "22110.02" & measuredItemChildCPC == "22211") |
       #“whole milk condensed” from whole cow milk cannot come from skim mulk of cow
       (measuredItemParentCPC == "22110.02" & measuredItemChildCPC == "22222.01")),
  `:=`(
    Value = NA,
    flagObservationStatus = "M",
    flagMethod = "n"
  )
]


# saveRDS(
#   tree[
#     !is.na(Value) & measuredElementSuaFbs == "extractionRate",
#     -grepl("measuredElementSuaFbs", names(tree)),
#     with = FALSE
#   ],
#   file.path(R_SWS_SHARE_PATH, "FBSvalidation", COUNTRY, "tree.rds")
# )

tree_to_send <-
  tree_to_send %>% 
  dplyr::anti_join(
    tree[is.na(Value) & measuredElementSuaFbs == "extractionRate"],
    by = c("geographicAreaM49", "measuredElementSuaFbs",
           "measuredItemParentCPC", "measuredItemChildCPC", "timePointYears",
           "Value", "flagObservationStatus", "flagMethod")
  ) %>%
  dplyr::select(-Value) %>%
  dplyr::left_join(
    tree,
    by = c("geographicAreaM49", "measuredElementSuaFbs",
           "measuredItemParentCPC", "measuredItemChildCPC",
           "timePointYears", "flagObservationStatus", "flagMethod")
  ) %>%
  setDT()

tree_to_send <-
  tree_to_send[,
               c("geographicAreaM49", "measuredElementSuaFbs", "measuredItemParentCPC",
                 "measuredItemChildCPC", "timePointYears", "Value",
                 "flagObservationStatus", "flagMethod"),
               with = FALSE
  ]

setnames(
  tree_to_send,
  c("measuredItemParentCPC", "measuredItemChildCPC"),
  c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree")
)

tree_to_send <-
  nameData("suafbs", "ess_fbs_commodity_tree2", tree_to_send,
           except = c('measuredElementSuaFbs', 'timePointYears'))

tree_to_send[,
             `:=`(
               measuredItemParentCPC_tree = paste0("'", measuredItemParentCPC_tree),
               measuredItemChildCPC_tree = paste0("'", measuredItemChildCPC_tree))
]

# tmp_file_extr <- file.path(TMP_DIR, paste0("FILLED_ER_", COUNTRY, ".csv"))
# 
# write.csv(tree_to_send, tmp_file_extr)

# XXX remove NAs
tree <- tree[!is.na(Value)]

uniqueLevels <- unique(tree[, c("geographicAreaM49", "timePointYears"), with = FALSE])

levels <- list()

treeLevels <- list()

for (i in seq_len(nrow(uniqueLevels))) {
  filter <- uniqueLevels[i, ]
  
  treeCurrent <- tree[filter, on = c("geographicAreaM49", "timePointYears")]
  
  levels <- findProcessingLevel(treeCurrent, "measuredItemParentCPC", "measuredItemChildCPC")
  
  setnames(levels, "temp", "measuredItemParentCPC")
  
  treeLevels[[i]] <- dt_left_join(treeCurrent, levels, by = "measuredItemParentCPC")
}

tree <- rbindlist(treeLevels)

tree[,
     processingLevel := max(processingLevel, na.rm = TRUE),
     by = c("geographicAreaM49", "timePointYears",
            "measuredElementSuaFbs", "measuredItemChildCPC")
]

# XXX Check if this one is still good or it can be obtained within the dataset
processed_item_datatable <- ReadDatatable("processed_item")
processedCPC <- processed_item_datatable[, measured_item_cpc]


# XXX Get codes to exclude animals from imbalances file at the end 
itemMap <- GetCodeList(domain = "agriculture", dataset = "aproduction", "measuredItemCPC")
itemMap <- itemMap[, .(measuredItemSuaFbs = code, type)]

##################################### / TREE ################################


############################ POPULATION #####################################

key <-
  DatasetKey(
    domain = "population",
    dataset = "population_unpd",
    dimensions =
      list(
        geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
        measuredElementSuaFbs = Dimension(name = "measuredElement", keys = "511"), # 511 = Total population
        timePointYears = Dimension(name = "timePointYears", keys = as.character(2000:endYear))
      )
  )

dbg_print("download population")

popSWS <- GetData(key)

stopifnot(nrow(popSWS) > 0)

popSWS[geographicAreaM49 == "156", geographicAreaM49 := "1248"]

# Fix for missing regional official data in the country total
# Source: DEMOGRAPHIC SURVEY, Kurdistan Region of Iraq, July 2018, IOM UN Migration
# ("the KRI population at 5,122,747 individuals and the overall Iraqi
# population at 36,004,552 individuals", pag.14; it implies 14.22805%)
# https://iraq.unfpa.org/sites/default/files/pub-pdf/KRSO%20IOM%20UNFPA%20Demographic%20Survey%20Kurdistan%20Region%20of%20Iraq_0.pdf
popSWS[geographicAreaM49 == "368" & timePointYears %in% startYear:endYear, Value := Value * 0.8577195]


############################ / POPULATION ##################################

############################################################### DATA #######


# 5510 Production[t]
# 5610 Import Quantity [t]
# 5071 Stock Variation [t]
# 5023 Export Quantity [t]
# 5910 Loss [t]
# 5016 Industrial uses [t]
# 5165 Feed [t]
# 5520 Seed [t]
# 5525 Tourist Consumption [t]
# 5164 Residual other uses [t]
# 5141 Food [t]
# 664 Food Supply (/capita/day) [Kcal]

elemKeys <- c("5510", "5610", "5071", "5113", "5023", "5910", "5016",
              "5165", "5520", "5525", "5164", "5166", "5141")

itemKeys <- GetCodeList(domain = "suafbs", dataset = "sua_unbalanced", "measuredItemFbsSua")
itemKeys <- itemKeys$code


# NOTE: the definition of "food_resid" changed (see inside newBalancing)
# (so, the tables below are not used anymore)

#food_classification_country_specific <-
#  ReadDatatable("food_classification_country_specific",
#                where = paste0("geographic_area_m49 IN ('", COUNTRY, "')"))
#
#food_classification_country_specific <-
#  food_classification_country_specific[geographic_area_m49 == COUNTRY]
#
#food_only_items <- food_classification_country_specific[food_classification == 'Food Residual', measured_item_cpc]


Utilization_Table <- ReadDatatable("utilization_table_2018")

stockable_items <- Utilization_Table[stock == 'X', cpc_code]

zeroWeight <- ReadDatatable("zero_weight")[, item_code]

flagValidTable <- ReadDatatable("valid_flags")


# We decided to unprotect E,e (replaced outliers). Done after they are
# replaced (i.e., if initially an Ee exists, it should remain; they need
# to be unprotected for balancing.
flagValidTable[flagObservationStatus == 'E' & flagMethod == 'e', Protected := FALSE]


# XXX: we need to unprotect I,c because it was being assigned
# to imputation of production of derived which is NOT protected.
# This will need to change in the next exercise.
flagValidTable[flagObservationStatus == 'I' & flagMethod == 'c', Protected := FALSE]


# Nutrients are:
# 1001 Calories
# 1003 Proteins
# 1005 Fats
nutrientCodes = c("1001", "1003", "1005")

nutrientData <-
  getNutritiveFactors(
    measuredElement = nutrientCodes, 
    timePointYears = as.character(startYear:endYear),
    geographicAreaM49 = COUNTRY
  )

######### CREAM SWEDEN 

nutrientData[geographicAreaM49=="752" & measuredItemCPC=="22120"& measuredElement=="1001",Value:=195]
nutrientData[geographicAreaM49=="752" & measuredItemCPC=="22120"& measuredElement=="1003",Value:=3]
nutrientData[geographicAreaM49=="752" & measuredItemCPC=="22120"& measuredElement=="1005",Value:=19]

### MILK SWEDEN
nutrientData[geographicAreaM49%in%c("756","300","250","372","276")& measuredItemCPC=="22251.01"& measuredElement=="1001",Value:=387]
nutrientData[geographicAreaM49%in%c("756","300","250","372","276")& measuredItemCPC=="22251.01"& measuredElement=="1003",Value:=26]
nutrientData[geographicAreaM49%in%c("756","300","250","372","276")& measuredItemCPC=="22251.01"& measuredElement=="1005",Value:=30]

nutrientData[geographicAreaM49=="300"& measuredItemCPC=="22253"& measuredElement=="1001",Value:=310]
nutrientData[geographicAreaM49=="300"& measuredItemCPC=="22253"& measuredElement=="1003",Value:=23]
nutrientData[geographicAreaM49=="300"& measuredItemCPC=="22253"& measuredElement=="1005",Value:=23]


nutrientData[measuredElement=="1001",measuredElement:="664"]

nutrientData[measuredElement=="1003",measuredElement:="674"]

nutrientData[measuredElement=="1005",measuredElement:="684"]

if (CheckDebug()) {
  key <-
    DatasetKey(
      domain = "suafbs",
      dataset = "sua_unbalanced",
      dimensions =
        list(
          geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
          measuredElementSuaFbs = Dimension(name = "measuredElementSuaFbs", keys = elemKeys),
          measuredItemFbsSua = Dimension(name = "measuredItemFbsSua", keys = itemKeys),
          timePointYears = Dimension(name = "timePointYears", keys = YEARS)
        )
    )
  
  ## This key is used later to extract stocks from the balanced session:
  ## For the new round where the starting year was no longer 2014 a discrepancy came out in the bal session between opening stocks of the 
  ## first year of compilation. It was the decided to start the stock computation from the data present in the balanced session instead of the
  ## unbalanced ( and keep the stocks discrepancy in the unbalanced session instead). Liva and Cristina
  
  key_stocks_balanced <-
    DatasetKey(
      domain = "suafbs",
      dataset = "sua_balanced",
      dimensions =
        list(
          geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
          measuredElementSuaFbs = Dimension(name = "measuredElementSuaFbs", keys = c("5113","5071")),
          measuredItemFbsSua = Dimension(name = "measuredItemFbsSua", keys = itemKeys),
          timePointYears = Dimension(name = "timePointYears", keys = YEARS)
        )
    )
  
} else {
  key <- swsContext.datasets[[2]]
  
  key@dimensions$timePointYears@keys <- YEARS
  key@dimensions$measuredItemFbsSua@keys <- itemKeys
  key@dimensions$measuredElementSuaFbs@keys <- elemKeys
  key@dimensions$geographicAreaM49@keys <- COUNTRY
  
  
  ##key for the stocks
  # 
  key_stocks_balanced <- swsContext.datasets[[1]]
  key_stocks_balanced@dimensions$timePointYears@keys <- YEARS
  key_stocks_balanced@dimensions$measuredItemFbsSua@keys <- itemKeys
  key_stocks_balanced@dimensions$measuredElementSuaFbs@keys <- c("5113","5071")
  key_stocks_balanced@dimensions$geographicAreaM49@keys <- COUNTRY
}

dbg_print("download data")


# LOAD
data <- GetData(key)

# Remove item that is not part of agriculture domain
data <- data[measuredItemFbsSua != "F1223"]

#LOAD stocks data from balanced session
data_stock_balanced <- GetData(key_stocks_balanced)


dbg_print("convert sugar")

##############################################################
######### SUGAR RAW CODES TO BE CONVERTED IN 2351F ###########
##############################################################
data <- convertSugarCodes_new(data)

#################### FODDER CROPS ##########################################

# TODO: create SWS datatable for this.
# Some of these items may be missing in reference files,
# thus we carry forward the last observation.
fodder_crops_items <-
  tibble::tribble(
    ~description, ~code,
    "Maize for forage", "01911",
    "Alfalfa for forage", "01912",
    "Sorghum for forage", "01919.01",
    "Rye grass for forage", "01919.02",
    "Other grasses for forage", "01919.91",
    "Clover for forage", "01919.03",
    "Other oilseeds for forage", "01919.94",
    "Other legumes for  forage", "01919.92",
    "Cabbage for fodder", "01919.04",
    "Mixed grass and legumes for forage", "01919.93",
    "Turnips for fodder", "01919.05",
    "Beets for fodder", "01919.06",
    "Carrots for fodder", "01919.07",
    "Swedes for fodder", "01919.08",
    "Other forage products, nes", "01919.96",
    "Other forage crops, nes", "01919.95",
    "Hay for forage, from legumes", "01919.10",
    "Hay for forage, from grasses", "01919.09",
    "Hay for forage, from other crops nes", "01919.11"
  )


fodder_crops_availab <-
  data[
    measuredItemFbsSua %chin% fodder_crops_items$code &
      measuredElementSuaFbs == "5510"
  ]

if (nrow(fodder_crops_availab) > 0) {
  
  fodder_crops_complete <-
    CJ(
      geographicAreaM49 = unique(fodder_crops_availab$geographicAreaM49),
      measuredElementSuaFbs = "5510",
      timePointYears = unique(data$timePointYears),
      measuredItemFbsSua = unique(fodder_crops_availab$measuredItemFbsSua)
    )
  
  fodder_crops_complete <-
    fodder_crops_complete[order(geographicAreaM49, measuredItemFbsSua, timePointYears)]
  
  fodder_crops <-
    dt_left_join(
      fodder_crops_complete,
      fodder_crops_availab[, .(geographicAreaM49, measuredElementSuaFbs,
                               measuredItemFbsSua, timePointYears, Value)],
      by = c("geographicAreaM49", "measuredElementSuaFbs",
             "measuredItemFbsSua", "timePointYears")
    )
  
  fodder_crops[,
               Value := zoo::na.locf(Value),
               by = c("geographicAreaM49", "measuredItemFbsSua")
  ]
  
  fodder_crops_new <-
    fodder_crops[
      !fodder_crops_availab,
      on = c("geographicAreaM49", "measuredElementSuaFbs",
             "measuredItemFbsSua", "timePointYears")
    ]
  
  
  if (nrow(fodder_crops_new) > 0) {
    
    fodder_crops_new[, `:=`(flagObservationStatus = "E", flagMethod = "t")]
    
    data <- rbind(data, fodder_crops_new)
    
  }
}

#################### / FODDER CROPS ########################################


# opening_stocks_2014 <-
#   ReadDatatable(
#     "opening_stocks_2014",
#     where = paste0("m49_code IN (",
#                    paste(shQuote(COUNTRY, type = "sh"), collapse = ", "), ")")
#   )

# The procedure works even if no previous opening stocks  exists, but in that
# case there is no way to know if data does not exist because there was an
# issue downloading data or because it actually does not exist. Luckily there
# are many calls to SWS, which should fail if there are issues: it is unlikely
# that only this one fails.
#stopifnot(nrow(opening_stocks_2014) > 0)
message("Line 1286")

non_null_prev_deltas <-
  unique(
    data[
      measuredElementSuaFbs == "5071" & timePointYears %in% (startYear-5):(startYear-1)
    ][,
      .SD[sum(!dplyr::near(Value, 0)) > 0],
      by = c("geographicAreaM49", "measuredItemFbsSua")
    ][,
      .(geographicAreaM49, measuredItemFbsSua)
    ]
  )

# 
# # Keep only those for which recent variations are available
# opening_stocks_2014 <-
#   opening_stocks_2014[
#     non_null_prev_deltas,
#     on = c("geographicAreaM49", "measuredItemFbsSua"),
#     nomatch = 0
#   ]


# # The opening stock in the first year is computed as Opening stock t-1 + stock variation t-1 (giulia)

original_opening_stocks <- data_stock_balanced[measuredElementSuaFbs %in% c("5113", "5071") ]

#flagValidTable_stocks <- copy(flagValidTable)

flagValidTable[is.na(flagObservationStatus), flagObservationStatus := ""]

original_opening_stocks <-
  flagValidTable[
    original_opening_stocks,
    on = c("flagObservationStatus", "flagMethod")
  ][,
    Valid := NULL
  ]


unbal_protected_opening_stocks <- data[measuredElementSuaFbs %in% c("5113", "5071") ]

unbal_protected_opening_stocks <-
  flagValidTable[
    unbal_protected_opening_stocks,
    on = c("flagObservationStatus", "flagMethod")
    ][,
      Valid := NULL
      ]

unbal_protected_opening_stocks <- unbal_protected_opening_stocks[Protected == TRUE,]


original_opening_stocks<- rbind(original_opening_stocks[! unbal_protected_opening_stocks , on = c("measuredElementSuaFbs","geographicAreaM49",
                                                    "timePointYears", "measuredItemFbsSua")], unbal_protected_opening_stocks)
## If starting of the previous year is NA is not working 
# StartingOpening<- original_opening_stocks[ , Op1Y:= Value[measuredElementSuaFbs=="5113"
#                                           & timePointYears==(startYear-1)] + Value[measuredElementSuaFbs=="5071" & timePointYears== (startYear-1)],
#                                           by= c("measuredItemFbsSua", "geographicAreaM49")]
## It has been decided to protect in general the opening stocks in 2014 
original_opening_stocks[timePointYears %in% "2014" & measuredElementSuaFbs %in% "5113", Protected := TRUE]

StartingOpening<- original_opening_stocks[ , Op1Y:= sum(Value[measuredElementSuaFbs=="5113"
                                                          & timePointYears==(startYear-1)], Value[measuredElementSuaFbs=="5071" & timePointYears== (startYear-1)],
                                                        na.rm = T),
                                           by= c("measuredItemFbsSua", "geographicAreaM49")]

StartingOpening<- StartingOpening[measuredElementSuaFbs=="5113"]

StartingOpening<-StartingOpening[!is.na(Op1Y)]
StartingOpening[ , c("timePointYears", "Value", "flagObservationStatus", "flagMethod", "Protected", "measuredElementSuaFbs"):=NULL]

StartingOpening<-unique(StartingOpening)

# negative and positive limit
StartingOpening[Op1Y < 0, Op1Y := 0]

positivelimit <-
  data[
    timePointYears == (startYear-1) &
     measuredItemFbsSua %in% unique(StartingOpening$measuredItemFbsSua)
  ][ ,opening_lim :=
        sum(
          Value[measuredElementSuaFbs %chin% c("5510", "5610")],
          - Value[measuredElementSuaFbs == "5910"],
          na.rm = TRUE
        ) * 2,
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

positivelimit[ , c("measuredElementSuaFbs", "Value", "flagObservationStatus", "flagMethod", "timePointYears") := NULL]
positivelimit<- as.data.table(unique(positivelimit))

StartingOpening<-merge(StartingOpening,positivelimit, by= c("measuredItemFbsSua", "geographicAreaM49") )

# StartingOpening[opening_lim<0, opening_lim:=0]

StartingOpening$opening_lim <- as.integer(StartingOpening$opening_lim)



# StartingOpening[opening_lim > 0 & Op1Y > opening_lim, Op1Y := opening_lim]

StartingOpening[ , opening_lim:= NULL]


original_opening_stocks<- original_opening_stocks[ measuredElementSuaFbs=="5113"]
original_opening_stocks[ , Op1Y:=NULL]



StartingOpening[ , timePointYears:= as.character(startYear)]

# remove the opening that are protected in all opening stocks
StartingOpening <-
    StartingOpening[
      !original_opening_stocks[
        timePointYears == startYear & Protected == TRUE,
        .(geographicAreaM49, measuredItemFbsSua)
      ],
      on = c("geographicAreaM49", "measuredItemFbsSua")
    ]


all_opening_stocks <-
    merge(
      original_opening_stocks,
      StartingOpening,
      by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
      all = TRUE
    )

all_opening_stocks[
    !Protected %in% TRUE & !is.na(Op1Y),
    `:=`(
      Value = Op1Y,
      flagObservationStatus = "I",
      flagMethod = "-",
      # We protect these, in any case, because they should not
      # be overwritten, even if not (semi) official or expert
      Protected = TRUE,
      measuredElementSuaFbs = "5113",
      timePointYears = as.character(startYear)
    )
  ][,
    Op1Y := NULL
  ]


# # Remove protected in 2014 from cumulated
# opening_stocks_2014 <-
#   opening_stocks_2014[
#     !original_opening_stocks[
#       timePointYears == "2014" & Protected == TRUE,
#       .(geographicAreaM49, measuredItemFbsSua)
#     ],
#     on = c("geographicAreaM49", "measuredItemFbsSua")
#   ]

#
# all_opening_stocks <-
#   merge(
#     original_opening_stocks,
#     opening_stocks_2014,
#     by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
#     all = TRUE
#   )


# all_opening_stocks[
#   !Protected %in% TRUE & is.na(Value) & !is.na(Value_cumulated),
#   `:=`(
#     Value = Value_cumulated,
#     flagObservationStatus = "I",
#     flagMethod = "-",
#     # We protect these, in any case, because they should not
#     # be overwritten, even if not (semi) official or expert
#     Protected = TRUE,
#     measuredElementSuaFbs = "5113",
#     timePointYears = "2014"
#   )
# ][,
#   Value_cumulated := NULL
# ]

## remaining_opening_stocks have been commented since in the new round we don't create opening as 20% of the year before if the item is stockable
## otherwise the risk is to create stocks where stock does not exist before livia and cristina
# Now, for all remaining stockable items, we create opening
# stocks in 2014 as 20% of supply in 2013 (they should be 0 now, giulia)

# remaining_opening_stocks <-
#   data[
#     timePointYears == as.character(startYear-1) &
#       measuredItemFbsSua %chin%
#       setdiff(
#         stockable_items,
#         all_opening_stocks$measuredItemFbsSua
#       )
#   ][,
#     .(
#       opening_20 =
#         sum(
#           Value[measuredElementSuaFbs %chin% c("5510", "5610")],
#           - Value[measuredElementSuaFbs == "5910"],
#           na.rm = TRUE
#         ) * 0.2,
#       timePointYears = as.character(startYear)
#     ),
#     by = c("geographicAreaM49", "measuredItemFbsSua")
#   ]
# 
# remaining_opening_stocks[opening_20 < 0, opening_20 := 0]
# 
# all_opening_stocks <-
#   merge(
#     all_opening_stocks,
#     remaining_opening_stocks,
#     by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
#     all = TRUE
#   )
# 
# all_opening_stocks <- all_opening_stocks[!is.na(timePointYears)]
# 
# all_opening_stocks[
#   !Protected %in% TRUE & is.na(Value) & !is.na(opening_20),
#   `:=`(
#     Value = opening_20,
#     flagObservationStatus = "I",
#     flagMethod = "i",
#     # We protect these, in any case, because they should not
#     # be overwritten, even if not (semi) official or expert
#     Protected = TRUE
#   )
# ][,
#   opening_20 := NULL
# ]

# to add: delete automatically opening stock for non-stockable commodity, even if they have time-series. giulia

complete_all_opening <-
  CJ(
    geographicAreaM49 = unique(all_opening_stocks$geographicAreaM49),
    timePointYears = as.character(min(all_opening_stocks$timePointYears):endYear),
    measuredItemFbsSua = unique(all_opening_stocks$measuredItemFbsSua)
  )


all_opening_stocks <-
  merge(
    complete_all_opening,
    all_opening_stocks,
    by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
    all = TRUE
  )

all_opening_stocks[, orig_val := Value]

all_opening_stocks <-
  all_opening_stocks[!is.na(Value)][
    all_opening_stocks,
    on = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
    roll = Inf]

all_opening_stocks[
  is.na(orig_val) & !is.na(Value),
  `:=`(
    Protected = FALSE,
    flagObservationStatus = "E",
    flagMethod = "t"
  )
]

all_opening_stocks[, measuredElementSuaFbs := "5113"]

all_opening_stocks[, orig_val := NULL]

all_opening_stocks[, names(all_opening_stocks)[grep("^i\\.", names(all_opening_stocks))] := NULL]

all_opening_stocks <- all_opening_stocks[!is.na(timePointYears)]



# Generate stocks variations for items for which opening
# exists for ALL years and variations don't exist

opening_avail_all_years <-
  all_opening_stocks[
    !is.na(Value) & timePointYears >= startYear,
    .SD[.N == (endYear - startYear + 1)],
    measuredItemFbsSua
  ]

delta_avail_for_open_all_years <-
  data[
    measuredElementSuaFbs == "5071" &
      timePointYears >= startYear &
      measuredItemFbsSua %chin% opening_avail_all_years$measuredItemFbsSua,
    .SD[.N == (endYear - startYear + 1)],
    measuredItemFbsSua
  ]

to_generate_by_ident <-
  setdiff(
    opening_avail_all_years$measuredItemFbsSua,
    delta_avail_for_open_all_years$measuredItemFbsSua
  )

data_rm_ident <- data[measuredElementSuaFbs == "5071" & measuredItemFbsSua %chin% to_generate_by_ident]

if (length(to_generate_by_ident) > 0) {
  
  opening_avail_all_years <-
    opening_avail_all_years[measuredItemFbsSua %chin% to_generate_by_ident]
  
  opening_avail_all_years[, Protected := NULL]
  
  next_opening_key <-
    DatasetKey(
      domain = "Stock",
      dataset = "stocksdata",
      dimensions =
        list(
          geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
          measuredElementSuaFbs = Dimension(name = "measuredElement", keys = "5113"),
          measuredItemFbsSua = Dimension(name = "measuredItemCPC", keys = unique(opening_avail_all_years$measuredItemFbsSua)),
          timePointYears = Dimension(name = "timePointYears", keys = as.character(as.numeric(max(opening_avail_all_years$timePointYears)) + 1))
        )
    )
  
  next_opening <- GetData(next_opening_key)
  
  setnames(
    next_opening,
    c("measuredItemCPC", "measuredElement"),
    c("measuredItemFbsSua", "measuredElementSuaFbs")
  )
  
  opening_avail_all_years <-
    rbind(opening_avail_all_years, next_opening)
  
  opening_avail_all_years <-
    opening_avail_all_years[
      order(geographicAreaM49, measuredItemFbsSua, timePointYears)
    ]
  
  opening_avail_all_years[,
                          delta := shift(Value, type = "lead") - Value,
                          by = c("geographicAreaM49", "measuredItemFbsSua")
  ]
  
  opening_avail_all_years[,
                          delta :=
                            ifelse(
                              timePointYears == max(timePointYears) & is.na(delta),
                              0,
                              delta
                            ),
                          by = c("geographicAreaM49", "measuredItemFbsSua")
  ]
  
  opening_avail_all_years <-
    dt_left_join(
      opening_avail_all_years,
      flagValidTable,
      by = c("flagObservationStatus", "flagMethod")
    )
  
  opening_avail_all_years[shift(Protected, type = "lead") == TRUE & Protected == TRUE, `:=`(delta_flag_obs = "T", delta_flag_method = "p")]
  opening_avail_all_years[!(shift(Protected, type = "lead") == TRUE & Protected == TRUE), `:=`(delta_flag_obs = "I", delta_flag_method = "i")]
  
  delta_identity <-
    opening_avail_all_years[
      timePointYears %in% yearVals,
      .(
        geographicAreaM49,
        measuredItemFbsSua,
        timePointYears,
        measuredElementSuaFbs = "5071",
        Value = delta,
        flagObservationStatus = delta_flag_obs,
        flagMethod = delta_flag_method
      )
    ]
  
  delta_identity <-
    delta_identity[!data_rm_ident,
                   on = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")]
  
  data <- rbind(data, delta_identity)
  
  data <-
    data[order(geographicAreaM49, measuredItemFbsSua,
               timePointYears, measuredElementSuaFbs)]
  
}

# / Generate stocks variations for items for which opening
# / exists for ALL years and variations don't exist



# Recalculate opening stocks

data_for_opening <-
  dt_left_join(
    all_opening_stocks[,
                       .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
                         timePointYears, new_opening = Value)
    ],
    data[
      measuredElementSuaFbs == '5071' &
        timePointYears %in% yearVals &
        !is.na(Value),
      .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
        timePointYears, delta = Value)
    ],
    by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  )

data_for_opening[is.na(delta), delta := 0]

data_for_opening <- data_for_opening[timePointYears >= startYear]

data_for_opening <- update_opening_stocks(data_for_opening)

all_opening_stocks <-
  dt_left_join(
    all_opening_stocks,
    data_for_opening[,
                     .(
                       geographicAreaM49,
                       measuredItemFbsSua = measuredItemSuaFbs,
                       timePointYears,
                       new_opening
                     )
    ],
    by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
  )

all_opening_stocks[
  !is.na(new_opening) & (round(new_opening) != round(Value) | is.na(Value)),
  `:=`(
    Value = new_opening,
    flagObservationStatus = "E",
    flagMethod = "u",
    Protected = FALSE
  )
]

all_opening_stocks[, new_opening := NULL]

# / Recalculate opening stocks



data <-
  dt_left_join(data, flagValidTable,
               by = c("flagObservationStatus", "flagMethod"))

data[flagObservationStatus %chin% c("", "T"), `:=`(Official = TRUE, Protected = TRUE)]
data[is.na(Official), Official := FALSE]
data[is.na(Protected), Protected := FALSE]


# We remove "5113" (opening stocks) as will be stored in separate table.
data <- data[measuredElementSuaFbs != "5113"]

dbg_print("elementToCodeNames")

# XXX FIXME: the elementCodesToNames below is
# not working proberply, see issue #38
codes <- as.data.table(tibble::tribble(
  ~measuredElementSuaFbs,  ~name,
  "5910", "exports",
  "5520", "feed",
  "5141", "food",
  "5023", "foodManufacturing",
  "5610", "imports",
  "5165", "industrial",
  "5016", "loss",
  "5510", "production",
  "5525", "seed",
  "5164", "tourist",
  "5071", "stockChange"
))

data <- dt_left_join(data, codes, by = "measuredElementSuaFbs")

data[, measuredElementSuaFbs := name]

data[, name := NULL]

# NOTE: if this should be used again (#38), camelCase the element names (#45)
#data <-
#  elementCodesToNames(
#    data,
#    itemCol = "measuredItemFbsSua",
#    elementCol = "measuredElementSuaFbs"
#  )


# XXX: there are some NAs here, but probably there shouldn't
data <- data[!is.na(measuredElementSuaFbs)]

setnames(data, "measuredItemFbsSua", "measuredItemSuaFbs")
message("Line 1686")


########## Remove feed if new element and negative imbalance is huge

# Feed requires country-specific reference files, which are being updated.
# Until these get reviewed, the feed module will generate feed items in
# some countries, where it is not required/needed/appropriate. Below, we
# remove the NEW feed item if the NEGATIVE imbalance obtained by including
# it is more than 50% of supply (e.g., -72%).

new_feed <-
  data[
    measuredElementSuaFbs == "feed",
    .(
      pre = sum(!is.na(Value[timePointYears < startYear])),
      post = sum(!is.na(Value[timePointYears >= startYear]))
    ),
    by = c("geographicAreaM49", "measuredItemSuaFbs")
  ][
    pre == 0 & post > 0
  ][,
    c("pre", "post") := NULL
  ]

msg_new_feed_remove <- "No NEW feed removed"
msg_new_feed_dubious <- "No other NEW items should be removed"

if (nrow(new_feed) > 0) {
  
  # prev_data_avg <-
  #   data[
  #     new_feed, on = c("geographicAreaM49", "measuredItemSuaFbs")
  #   ][
  #     timePointYears >= 2010 & timePointYears < startYear
  #   ][
  #     order(geographicAreaM49, measuredItemSuaFbs, measuredElementSuaFbs, timePointYears),
  #     Value := na.fill_(Value),
  #     by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
  #   ][,
  #     .(Value = sum(Value) / sum(!is.na(Value)), timePointYears = 0),
  #     by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
  #   ]
  # 
  # calculateImbalance(prev_data_avg, supply_subtract = c("exports", "stockChange"))
  
  # prev_processed_avg <-
  #   prev_data_avg[
  #     supply > 0 &
  #       utilizations > 0 &
  #       measuredElementSuaFbs == "foodManufacturing" &
  #       !is.na(Value),
  #     .(geographicAreaM49, measuredItemSuaFbs, proc_ratio = Value / supply)
  #   ]
  
  
  new_data_avg <-
    data[
      new_feed,
      on = c("geographicAreaM49", "measuredItemSuaFbs")
    ][
      # measuredElementSuaFbs != "foodManufacturing" & timePointYears >= 2014
    ][
      order(geographicAreaM49, measuredItemSuaFbs, measuredElementSuaFbs, timePointYears),
      Value := na.fill_(Value),
      by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
    ][,
      .(Value = sum(Value) / sum(!is.na(Value) & !dplyr::near(Value, 0)), timePointYears = 1),
      by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
    ][
      !is.nan(Value)
    ]
  
  calculateImbalance(
    new_data_avg,
    keep_utilizations = FALSE,
    supply_subtract = c("exports", "stockChange")
  )
  
  # new_data_supply <-
  #   unique(
  #     new_data_avg[,
  #       c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears", "supply"),
  #       with = FALSE
  #     ]
  #   )
  
  # new_data_proc <-
  #   dt_left_join(
  #     new_data_supply,
  #     prev_processed_avg,
  #     by = c("geographicAreaM49", "measuredItemSuaFbs"),
  #     nomatch = 0
  #   )
  # 
  # new_data_proc <-
  #   new_data_proc[
  #     supply > 0,
  #     .(geographicAreaM49, measuredItemSuaFbs,
  #       measuredElementSuaFbs = "foodManufacturing",
  #       Value = supply * proc_ratio, timePointYears = 1)
  #   ]
  # 
  # new_data_avg[, c("supply", "imbalance") := NULL]
  
  # new_data_avg <-
  #   rbind(
  #     new_data_avg,
  #     new_data_proc
  #   )
  # 
  # calculateImbalance(new_data_avg, supply_subtract = c("exports", "stockChange"))
  
  
  new_feed_to_remove <-
    unique(
      new_data_avg[
        imbalance / supply <= -0.3,
        c("geographicAreaM49", "measuredItemSuaFbs"),
        with = FALSE
      ]
    )
  
  new_feed_to_remove[, measuredElementSuaFbs := "feed"]
  new_feed_to_remove[, remove_feed := TRUE]
  
  new_feed_dubious <-
    unique(
      new_data_avg[
        imbalance / supply > -0.3 & imbalance / supply <= -0.05,
        .(geographicAreaM49, measuredItemSuaFbs, x = imbalance / supply)
      ][order(x)][, x := NULL]
    )
  
  data <-
    merge(
      data,
      new_feed_to_remove,
      by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs"),
      all.x = TRUE
    )
  
  feed_to_remove <-
    data[
      remove_feed == TRUE & Protected == FALSE
    ][,
      .(geographicAreaM49, measuredItemSuaFbs, measuredElementSuaFbs, timePointYears)
    ]
  
  data[, remove_feed := NULL]
  
  data <- data[!feed_to_remove, on = names(feed_to_remove)]
  
  if (nrow(feed_to_remove) > 0) {
    msg_new_feed_remove <- paste(new_feed_to_remove$measuredItemSuaFbs, collapse = ", ")
  }
  
  if (nrow(new_feed_dubious) > 0) {
    msg_new_feed_dubious <- paste(new_feed_dubious$measuredItemSuaFbs, collapse = ", ")
  }
}


########## / Remove feed if new element and negative imbalance is huge


treeRestricted <-
  tree[,
       c("measuredItemParentCPC", "measuredItemChildCPC", "processingLevel"),
       with = FALSE
  ]

treeRestricted <- unique(treeRestricted[order(measuredItemChildCPC)])

primaryInvolved <- faoswsProduction::getPrimary(processedCPC, treeRestricted, p)

dbg_print("primary involved descendents")

# XXX: check here, 0111 is in results
primaryInvolvedDescendents <-
  getChildren(
    commodityTree = treeRestricted,
    parentColname = "measuredItemParentCPC",
    childColname = "measuredItemChildCPC",
    topNodes = primaryInvolved
  )


# stocks need to be generated for those items for
# which "opening stocks" are available

items_to_generate_stocks <-
  unique(all_opening_stocks$measuredItemFbsSua)

stock <-
  CJ(
    measuredItemSuaFbs    = items_to_generate_stocks,
    measuredElementSuaFbs = 'stockChange',
    geographicAreaM49     = unique(data$geographicAreaM49),
    timePointYears        = unique(data$timePointYears)
  )

# rbind with anti_join
data <-
  rbind(
    data,
    stock[!data, on = c('measuredItemSuaFbs', 'measuredElementSuaFbs',
                        'geographicAreaM49', 'timePointYears')],
    fill = TRUE
  )

# XXX what is primaryInvolvedDescendents ?????????
#deriv <- CJ(measuredItemSuaFbs = primaryInvolvedDescendents, measuredElementSuaFbs = 'production', geographicAreaM49 = unique(data$geographicAreaM49), timePointYears = unique(data$timePointYears))

deriv <-
  CJ(
    measuredItemSuaFbs    = unique(tree$measuredItemChildCPC),
    measuredElementSuaFbs = 'production',
    geographicAreaM49     = unique(data$geographicAreaM49),
    timePointYears        = unique(data$timePointYears)
  )

# rbind with anti_join
data <-
  rbind(
    data,
    deriv[!data, on = c('measuredItemSuaFbs', 'measuredElementSuaFbs',
                        'geographicAreaM49', 'timePointYears')],
    fill = TRUE
  )



data[is.na(Official), Official := FALSE]
data[is.na(Protected), Protected := FALSE]



data[, stockable := measuredItemSuaFbs %chin% stockable_items]


# XXX: Remove items for which no production, imports or exports exist after 2013.
non_existing_for_imputation <-
  setdiff(
    data[
      measuredElementSuaFbs %chin% c('production', 'imports', 'exports') &
        timePointYears < startYear,
      unique(measuredItemSuaFbs)
    ],
    data[
      measuredElementSuaFbs %chin% c('production', 'imports', 'exports') &
        timePointYears >= startYear,
      unique(measuredItemSuaFbs)
    ]
  )

data <- data[!(measuredItemSuaFbs %chin% non_existing_for_imputation)]

message("Line 1943")

############################################### Create derived production ###################################

dbg_print("derivation of shareDownUp")

############# ShareDownUp ----------------------------------------

#this table will be used to assign to zeroweight comodities 
#the processed quantities of their coproduct
coproduct_table <- ReadDatatable('zeroweight_coproducts')

stopifnot(nrow(coproduct_table) > 0)

# Can't do anything if this information if missing, so remove these cases
coproduct_table <- coproduct_table[!is.na(branch)]

### XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
### XXX removing these cases as '22242.01' appears as zero-weight XXXX
### XXX for main product = '22110.04' Buffalo milk                XXXX
### XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
coproduct_table <- coproduct_table[branch != '22242.01 + 22110.04']

coproduct_table <- coproduct_table[, .(measured_item_child_cpc, branch)]

coproduct_for_sharedownup <- copy(coproduct_table)

coproduct_for_sharedownup_easy <- coproduct_for_sharedownup[!grepl('\\+|or', branch)]

coproduct_for_sharedownup_plus <- coproduct_for_sharedownup[grepl('\\+', branch)]

coproduct_for_sharedownup_plus <-
  rbind(
    tidyr::separate(
      coproduct_for_sharedownup_plus,
      branch,
      into = c('main1', 'main2'),
      remove = FALSE,
      sep = ' *\\+ *')[, .(measured_item_child_cpc, branch= main1)],
    tidyr::separate(
      coproduct_for_sharedownup_plus,
      branch,
      into = c('main1', 'main2'),
      remove = FALSE,
      sep = ' *\\+ *')[, .(measured_item_child_cpc,branch = main2)]
  )

coproduct_for_sharedownup_plus <- unique(coproduct_for_sharedownup_plus)


coproduct_for_sharedownup_or <- coproduct_for_sharedownup[grepl('or', branch)]

coproduct_for_sharedownup_or <-
  rbind(
    #coproduct_table_or,
    tidyr::separate(
      coproduct_for_sharedownup_or,
      branch,
      into = c('main1', 'main2'),
      remove = FALSE,
      sep = ' *or *')[, .(measured_item_child_cpc, branch= main1)],
    tidyr::separate(
      coproduct_for_sharedownup_or,
      branch,
      into = c('main1', 'main2'),
      remove = FALSE,
      sep = ' *or *')[, .(measured_item_child_cpc,branch = main2)]
  )

coproduct_for_sharedownup_or <- unique(coproduct_for_sharedownup_or)

coproduct_for_sharedownup <-
  rbind(
    coproduct_for_sharedownup_easy,
    coproduct_for_sharedownup_plus,
    coproduct_for_sharedownup_or
  )

## solve the problem of wrong connection (raw centrifugal sugar --> bagasse towards raw cane and beet --> bagasse)
coproduct_for_sharedownup[measured_item_child_cpc == "39140.02" & branch == "23511.01", branch := "2351f"]


#Using the whole tree not by level
ExtrRate <-
  tree[
    !is.na(Value) &
      measuredElementSuaFbs == 'extractionRate'
  ][,
    .(
      measuredItemParentCPC,
      geographicAreaM49,
      measuredItemChildCPC,
      timePointYears,
      extractionRate = Value,
      processingLevel
    )
  ]

# We include utilizations to identify if proceseed is the only utilization
data_tree <-
  data[
    measuredElementSuaFbs %chin%
      c('production', 'imports', 'exports',
        'stockChange', 'foodManufacturing', 'loss',
        'food', 'industrial', 'feed', 'seed')
  ]


#subset the tree according to parents and child present in the SUA data

ExtrRate <-
  ExtrRate[
    measuredItemChildCPC %chin% data_tree$measuredItemSuaFbs &
      measuredItemParentCPC %chin% data_tree$measuredItemSuaFbs
  ]


setnames(data_tree, "measuredItemSuaFbs", "measuredItemParentCPC")

dataProcessingShare<-copy(data_tree)

data_tree <-
  merge(
    data_tree,
    ExtrRate,
    by = c(p$parentVar, p$geoVar, p$yearVar),
    allow.cartesian = TRUE,
    all.y = TRUE
  )

data_tree <- as.data.table(data_tree)

# the availability for parent that have one child and only processed as utilization will 
# be entirely assigned to processed for that its unique child even for 2014 onwards
data_tree[,
          availability :=
            sum(
              Value[get(p$elementVar) %in% c(p$productionCode, p$importCode)],
              - Value[get(p$elementVar) %in% c(p$exportCode, "stockChange")],
              na.rm = TRUE
            ),
          by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
]

data_tree[availability<0,availability:=0]

# used to chack if a parent has processed as utilization
data_tree[,
          proc_Median :=
            median(
              Value[measuredElementSuaFbs == "foodManufacturing" & timePointYears %in% 2000:endYear],
              na.rm=TRUE
            ),
          by = c(p$parentVar, p$geoVar)
]

# boolean variable taking TRUE if the parent has only processed as utilization 
data_tree[,
          unique_proc :=
            proc_Median > 0 &
            !is.na(proc_Median) &
            # ... is the only utilization
            all(is.na(Value[!(measuredElementSuaFbs %chin%
                                c('production', 'imports', 'exports',
                                  'stockChange','foodManufacturing'))])),
          by = c(p$parentVar, p$geoVar, p$yearVar)
]

sel_vars <-
  c("measuredItemParentCPC", "geographicAreaM49", "timePointYears",
    "measuredElementSuaFbs", "flagObservationStatus", "flagMethod",
    "Value", "Official", "measuredItemChildCPC", "extractionRate",
    "processingLevel")

data_tree<-
  unique(
    data_tree[, c(sel_vars, "availability", "unique_proc"), with = FALSE],
    by = sel_vars
  )

message("Line 2118")

# dataset to calculate the number of parent of each child and the number of children of each parent
# including zeroweight commodities
sel_vars <- c("geographicAreaM49", "measuredItemParentCPC",
              "measuredItemChildCPC","timePointYears")
data_count <- unique(data_tree[, sel_vars, with = FALSE], by = sel_vars)

#Caculate the number of parent of each child
data_count[,
           number_of_parent := .N,
           by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
]

# calculate the number of children of each parent
# we exclude zeroweight to avoid doublecounting of children (processing)
data_count[measuredItemChildCPC %!in% zeroWeight,
           number_of_children := uniqueN(measuredItemChildCPC),
           by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
]

data_tree <-
  dt_left_join(
    data_tree,
    data_count,
    by = c(p$parentVar, p$childVar, p$geoVar, p$yearVar),
    allow.cartesian = TRUE
  )

# dataset containing the processed quantity of parents
food_proc <-
  unique(
    data_tree[
      measuredElementSuaFbs == "foodManufacturing",
      c("geographicAreaM49", "measuredItemParentCPC", "timePointYears", "Value"),
      with = FALSE
    ],
    by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears", "Value")
  )

setnames(food_proc, "Value", "parent_qty_processed")

food_proc[timePointYears >= startYear, parent_qty_processed := NA_real_]

data_tree <-
  dt_left_join(
    data_tree,
    food_proc,
    by = c(p$parentVar, p$geoVar, p$yearVar),
    allow.cartesian = TRUE
  )

sel_vars <- 
  c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC",
    "timePointYears", "extractionRate", "parent_qty_processed",
    "processingLevel", "number_of_parent", "number_of_children")

data_tree <-
  unique(
    data_tree[, c(sel_vars, "availability", "unique_proc"), with = FALSE],
    by = sel_vars
  )


# dataset containing the production of child commodities
dataprodchild <- data[measuredElementSuaFbs %chin% c('production')]

setnames(dataprodchild, "measuredItemSuaFbs", "measuredItemChildCPC")

dataprodchild <-
  unique(
    dataprodchild[,
                  c("geographicAreaM49", "measuredItemChildCPC",
                    "timePointYears", "Value", "flagObservationStatus",
                    "flagMethod"),
                  with = FALSE
    ]
  )

setnames(dataprodchild, "Value", "production_of_child")

message("Line 2200")

dataprodchild[timePointYears >= startYear, production_of_child := NA_real_]

data_tree <-
  dt_left_join(
    data_tree,
    dataprodchild,
    by = c(p$geoVar, p$childVar, p$yearVar)
  )

# ShareDownups for zeroweights are calculated  separately
# to avoid double counting when agregating processed quantities of parent

# dataset containing informations of zeroweight commodities
data_zeroweight <- data_tree[measuredItemChildCPC %chin% zeroWeight]

# import data for coproduct relation
zw_coproduct <-
  coproduct_for_sharedownup[,
                            .(zeroweight = measured_item_child_cpc, measuredItemChildCPC = branch)
  ]

zw_coproduct <- unique(zw_coproduct, by = c("measuredItemChildCPC", "zeroweight"))

# We subset the zeroweight coproduct reference table by taking only zeroweights and their coproduct
# that are childcommodities in the tree of the country
zw_coproduct <-
  zw_coproduct[
    measuredItemChildCPC %chin% data_tree$measuredItemChildCPC &
      zeroweight %chin% data_tree$measuredItemChildCPC
  ]


# Computing information for non zeroweight commodities
data_tree <- data_tree[measuredItemChildCPC %!in% zeroWeight]

# this dataset will be used when creating processing share and shareUpdown
dataComplete <- copy(data_tree)

# Quantity of parent destined to the production of the given child (only for child with one parent for the moment) (to delete?)
data_tree[, processed_to_child := ifelse(number_of_parent == 1, production_of_child, NA_real_)]

# if a parent has one child, all the processed goes to that child 
data_tree[
  number_of_children == 1,
  processed_to_child := parent_qty_processed * extractionRate,
  processed_to_child
]

data_tree[production_of_child == 0, processed_to_child := 0]

# assigning the entired availability to processed for parent having only processed as utilization
data_tree[
  number_of_children == 1 & unique_proc == TRUE,
  processed_to_child := availability * extractionRate
]


# mirror assignment for imputing processed quantity for multple parent children
# 5 loop is sufficient to deal with all the cases

for (k in 1:5) {
  data_tree <- RemainingToProcessedParent(data_tree)
  data_tree <- RemainingProdChildToAssign(data_tree)
}

data_tree <- RemainingToProcessedParent(data_tree)

# proportional allocation of the remaing production of multiple parent children
data_tree[,
          processed_to_child :=
            ifelse(
              number_of_parent > 1 & is.na(processed_to_child),
              (remaining_to_process_child * is.na(processed_to_child) * remaining_processed_parent) / sum((remaining_processed_parent * is.na(processed_to_child)), na.rm = TRUE),
              processed_to_child),
          by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
]

# Update of remaining production to assing ( should be zero for 2000:2013)
data_tree[,
          parent_already_processed :=
            ifelse(
              is.na(parent_qty_processed),
              parent_qty_processed,
              sum(processed_to_child / extractionRate,na.rm = TRUE)
            ),
          by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
]

data_tree[, remaining_processed_parent := round(parent_qty_processed - parent_already_processed)]

data_tree[
  remaining_processed_parent < 0,
  remaining_processed_parent := 0
]


# Impute processed quantity for 2014 onwards using 3 years average
# (this only to imput shareDownUp)
data_tree <-
  data_tree[
    order(geographicAreaM49, measuredItemParentCPC, measuredItemChildCPC, timePointYears),
    processed_to_child_avg := rollavg(processed_to_child, order = 3),
    by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC")
  ]

setkey(data_tree, NULL)

data_tree[timePointYears >= startYear & is.na(processed_to_child), processed_to_child := processed_to_child_avg]


# Back to zeroweight cases(we assign to zeroweights the processed quantity of their coproduct(already calculated))

zw_coproduct_bis <-
  merge(
    data_tree,
    zw_coproduct,
    by = "measuredItemChildCPC",
    allow.cartesian = TRUE,
    all.y = TRUE
  )

zw_coproduct_bis[,
                 `:=`(
                   measuredItemChildCPC = zeroweight,
                   processed_to_child = processed_to_child / extractionRate
                 )
]

sel_vars <- 
  c("geographicAreaM49", "measuredItemParentCPC",
    "measuredItemChildCPC", "timePointYears")

zw_coproduct_bis <-
  zw_coproduct_bis[, c(sel_vars, "processed_to_child"), with = FALSE]

# Correction to milk tree issue ( zeroweight can be associated with 2 main products from the same parent)
# example: butter of cow milk
zw_coproduct_bis[,
                 processed_to_child := sum(processed_to_child, na.rm = TRUE),
                 by = sel_vars 
]

zw_coproduct_bis <-
  unique(zw_coproduct_bis, by = colnames(zw_coproduct_bis))

data_zeroweight <-
  dt_left_join(data_zeroweight, zw_coproduct_bis, by = sel_vars)

data_zeroweight[,
                processed_to_child := processed_to_child * extractionRate
]

sel_vars <-
  c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC",
    "timePointYears", "number_of_parent", "parent_qty_processed",
    "production_of_child", "processed_to_child")

data_zeroweight <- data_zeroweight[, sel_vars, with = FALSE]

data_tree <- data_tree[, sel_vars, with = FALSE]

#combining zeroweight and non zero weight commodities
data_tree <- rbind(data_tree, data_zeroweight)

#calculate ShareDownUp
data_tree[,
          shareDownUp := processed_to_child / sum(processed_to_child, na.rm = TRUE),
          by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
]

#some corrections...

data_tree[is.na(shareDownUp) & number_of_parent == 1, shareDownUp := 1]

data_tree[
  (production_of_child == 0 | is.na(production_of_child)) &
    measuredItemChildCPC %!in% zeroWeight &
    timePointYears < startYear,
  shareDownUp := 0
]

data_tree[
  (parent_qty_processed == 0 | is.na(parent_qty_processed)) &
    timePointYears < startYear,
  shareDownUp :=0
]

data_tree[is.na(shareDownUp), shareDownUp := 0]

data_tree <-
  unique(
    data_tree[,
              c("geographicAreaM49", "measuredItemParentCPC",
                "measuredItemChildCPC", "timePointYears", "shareDownUp"),
              with = FALSE
    ]
  )

########################GENERARING ProcessedShare for parent----------------------------------------
######################## ANd ShareUpDown for children ##############################################
####################################################################################################

dataProcessingShare[,
                    availability :=
                      sum(
                        Value[get(p$elementVar) %in% c(p$productionCode, p$importCode)],
                        - Value[get(p$elementVar) %in% c(p$exportCode, "stockChange")],
                        na.rm = TRUE
                      ),
                    #by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
                    by = c(p$geoVar, p$yearVar, p$parentVar)
]

dataProcessingShare[,SumLoss:=sum(
  Value[measuredElementSuaFbs=="loss" & timePointYears %in% yearVals],na.rm = TRUE
),
by=c("geographicAreaM49","measuredItemParentCPC")
]

dataProcessingShare[,SumProd:=sum(
  Value[measuredElementSuaFbs=="production" & timePointYears %in% yearVals],na.rm = TRUE
),
by=c("geographicAreaM49","measuredItemParentCPC")
]


dataProcessingShare[,parent_qty_processed:=Value[measuredElementSuaFbs=="foodManufacturing"],
                    by=c("geographicAreaM49","measuredItemParentCPC","timePointYears")
]

dataProcessingShare[,Prod:=Value[measuredElementSuaFbs=="production"],
                    by=c("geographicAreaM49","measuredItemParentCPC","timePointYears")
]

dataProcessingShare[,RatioLoss:=SumLoss/SumProd]

dataProcessingShare[,
                    loss_Median_before :=
                      median(
                        Value[measuredElementSuaFbs == "loss" & timePointYears %in% 2000:(startYear-1)],
                        na.rm=TRUE
                      ),
                    by = c(p$parentVar, p$geoVar)
]

dataProcessingShare[,NewLoss:=ifelse((is.na(loss_Median_before)|loss_Median_before==0) & !is.na(RatioLoss),TRUE,FALSE)]

# dataProcessingShare[,
#                     NewLoss :=
#        # have  loss for 2014-2018 & ...
#        !is.na(RatioLoss)&
#           # does not have past losses &
#        is.na(loss_Median_before) &
#        # ... loss,process and tourist are the only utilization
#        all(is.na(Value[!(measuredElementSuaFbs %chin%
#                            c('loss', 'production', 'imports',
#                              'exports', 'stockChange','foodManufacturing', 'tourist'))])),
#      by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
#      ]

# dataset containing the processed quantity of parents
dataProcessingShare <-
  unique(
    dataProcessingShare[,
                        c("geographicAreaM49", "measuredItemParentCPC", "timePointYears", "Prod",
                          "availability","RatioLoss","NewLoss","parent_qty_processed"),
                        with = FALSE
    ],
    by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears", "Prod",
           "availability","RatioLoss","NewLoss","parent_qty_processed")
  )
#setnames(dataProcessingShare, "Value", "Prod")

# Processing share and ShareUpDown
dataProcessingShare <-
  unique(
    dataProcessingShare[,
                        c("geographicAreaM49", "timePointYears", "availability",
                          "measuredItemParentCPC", "parent_qty_processed",
                          "RatioLoss","NewLoss","Prod"),
                        with = FALSE
    ],
    by = c("geographicAreaM49", "timePointYears",
           "measuredItemParentCPC", "parent_qty_processed")
  )

dataProcessingShare[timePointYears %in% yearVals, parent_qty_processed := NA_real_]

#correction of Pshare to adjust processed in case of new Loss

dataProcessingShare[is.na(RatioLoss),RatioLoss:=0]
dataProcessingShare[is.na(Prod),Prod:=0]


# dataProcessingShare[,
#   Pshare := ifelse(NewLoss==TRUE,(parent_qty_processed-RatioLoss*Prod) / availability,
#                    parent_qty_processed / availability),
#   by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
# ]


dataProcessingShare[,
                    Pshare := parent_qty_processed / availability,
                    by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
]

dataProcessingShare[Pshare > 1, Pshare := 1]
dataProcessingShare[Pshare < 0,Pshare := 0]
dataProcessingShare[is.nan(Pshare), Pshare := NA_real_]

dataProcessingShare <-
  dataProcessingShare[
    order(geographicAreaM49, measuredItemParentCPC, timePointYears),
    Pshare_avr := rollavg(Pshare, order = 3),
    by = c("geographicAreaM49", "measuredItemParentCPC")
  ]

setkey(dataProcessingShare, NULL)

# we estimate Pshare for 2014 onwards, even if processed are modified manually because a this stage
# availability of 2014 onwards are known (stocks are not generated yet)
dataProcessingShare[timePointYears >= startYear, Pshare := Pshare_avr]
dataProcessingShare[, Pshare_avr := NULL]

data_Pshare <-
  dataProcessingShare[,
                      c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
                        "parent_qty_processed", "Pshare","NewLoss"),
                      with = FALSE
  ]

data_Pshare<-merge(
  data_Pshare,
  
  data[measuredElementSuaFbs=="loss",list(geographicAreaM49,timePointYears,measuredItemParentCPC=measuredItemSuaFbs,Loss=Value)],
  
  by=c(p$geoVar,p$parentVar,p$yearVar),
  all.x = TRUE
)

# calculating shareUpdown for each child
data_ShareUpDoawn <-
  dataComplete[,
               c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
                 "availability", "parent_qty_processed", "measuredItemChildCPC",
                 "extractionRate", "production_of_child", "number_of_children"),
               with = FALSE
  ]

data_ShareUpDoawn <-
  unique(
    data_ShareUpDoawn,
    by = colnames(data_ShareUpDoawn)
  )

data_ShareUpDoawn <- merge(
  data_ShareUpDoawn,
  data_tree,
  by = c("geographicAreaM49", "measuredItemParentCPC",
         "measuredItemChildCPC", "timePointYears"),
  all = TRUE
)

data_ShareUpDoawn <- data_ShareUpDoawn[measuredItemChildCPC %!in% zeroWeight]
data_ShareUpDoawn[, shareUpDown := NA_real_]

data_ShareUpDoawn[
  !is.na(parent_qty_processed) & extractionRate>0,
  shareUpDown := (production_of_child / extractionRate) * shareDownUp / sum(production_of_child / extractionRate * shareDownUp, na.rm = TRUE),
  by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
]

data_ShareUpDoawn[is.nan(shareUpDown), shareUpDown := NA_real_]

data_ShareUpDoawn <-
  data_ShareUpDoawn[
    order(geographicAreaM49, measuredItemParentCPC, measuredItemChildCPC, timePointYears),
    shareUpDown_avg := rollavg(shareUpDown, order = 3),
    by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC")
  ]

setkey(data_ShareUpDoawn, NULL)

data_ShareUpDoawn[timePointYears >= startYear, shareUpDown := shareUpDown_avg]

data_ShareUpDoawn[, shareUpDown_avg := NULL]

data_ShareUpDoawn[
  timePointYears >= startYear,
  shareUpDown := shareUpDown / sum(shareUpDown, na.rm = TRUE),
  by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
]

sel_vars <-
  c("geographicAreaM49", "measuredItemParentCPC",
    "measuredItemChildCPC", "timePointYears", "shareUpDown")

data_ShareUpDoawn_final <- data_ShareUpDoawn[, sel_vars, with = FALSE]

shareUpDown_zeroweight <-
  merge(
    data_ShareUpDoawn_final,
    zw_coproduct,
    by = "measuredItemChildCPC",
    allow.cartesian = TRUE,
    all.y = TRUE
  )

shareUpDown_zeroweight[, measuredItemChildCPC := zeroweight]

shareUpDown_zeroweight <- shareUpDown_zeroweight[, sel_vars, with = FALSE]

# Correction to milk tree issue ( zeroweight can be associated with 2 main products from the same parent)
# example: butter of cow milk
shareUpDown_zeroweight<-
  shareUpDown_zeroweight[,
                         shareUpDown := sum(shareUpDown, na.rm = TRUE),
                         by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC", "timePointYears")
  ]

shareUpDown_zeroweight <-
  unique(
    shareUpDown_zeroweight,
    by = colnames(shareUpDown_zeroweight)
  )

# /correction


data_ShareUpDoawn_final <- rbind(data_ShareUpDoawn_final, shareUpDown_zeroweight)

# some correction
data_ShareUpDoawn_final[is.na(shareUpDown), shareUpDown := 0]
data_ShareUpDoawn_final[!is.na(shareUpDown), flagObservationStatus := "I"]
data_ShareUpDoawn_final[!is.na(shareUpDown), flagMethod := "c"]


# sAVE DATA TO SWS

shareUpDown_to_save <- copy(data_ShareUpDoawn_final)

shareUpDown_to_save[, measuredElementSuaFbs := "5431"]

setnames(
  shareUpDown_to_save,
  c("measuredItemParentCPC", "measuredItemChildCPC"),
  c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree")
)

setnames(shareUpDown_to_save, "shareUpDown", "Value")

#shareUpDown_to_save[, Value := round(Value, 3)]
message("Line 2628")

sessionKey_shareUpDown <- swsContext.datasets[[3]]

CONFIG <- GetDatasetConfig(sessionKey_shareUpDown@domain, sessionKey_shareUpDown@dataset)

data_shareUpDown_sws <- GetData(sessionKey_shareUpDown)

data_shareUpDown_sws[
  #timePointYears >= 2014 &
  ((measuredItemParentCPC_tree == "02211" & measuredItemChildCPC_tree == "22212") |
     #cheese from whole cow milk cannot come from skim mulk of cow
     (measuredItemParentCPC_tree == "22110.02" & measuredItemChildCPC_tree == "22251.01")),
  Value := 0
  
]


#saving ShareUpDown For the first time #all flage are (i,c) like production of derived
if (nrow(data_shareUpDown_sws) == 0) {
  faosws::SaveData(
    domain = "suafbs",
    dataset = "up_down_share",
    data = shareUpDown_to_save,
    waitTimeout = 20000
  )
} else {
  
  setnames(
    data_shareUpDown_sws,
    c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree", "Value"),
    c("measuredItemParentCPC", "measuredItemChildCPC", "shareUpDown")
  )
  
  data_ShareUpDoawn_to_use <- copy(data_shareUpDown_sws)
  
  data_ShareUpDoawn_to_use <-
    data_ShareUpDoawn_to_use[, colnames(data_ShareUpDoawn_final), with = FALSE]
  
  data_ShareUpDoawn_final<-data_ShareUpDoawn_final[timePointYears %in% as.character(yearVals)]
  
  #If a new connection parent-child is added, the shareUpDown of all the children are affected
  #So we overwrite the share of all the children of the parent in the new connection
  new_connection<- data_ShareUpDoawn_final[!data_ShareUpDoawn_to_use,
                                           on = c('geographicAreaM49', 'measuredItemParentCPC',
                                                  'measuredItemChildCPC', 'timePointYears')
  ]
  
  # rbind with anti_join
  data_ShareUpDoawn_final <-
    rbind(
      data_ShareUpDoawn_to_use[!new_connection,           #we exclude parent included in the new connection
                               on = c('geographicAreaM49', 
                                      'measuredItemParentCPC',
                                      'timePointYears')],
      unique(
        data_ShareUpDoawn_final[
          unique(new_connection[,c('geographicAreaM49', 
                                   'measuredItemParentCPC', 
                                   'timePointYears'),with=FALSE]),
          on = c('geographicAreaM49', 
                 'measuredItemParentCPC', 
                 'timePointYears')
        ]
      ),
      fill = TRUE
    )
  
}

#consistency check: sum of shareUpDown by parent should not exceed 1.
message("Line 2690")

data_ShareUpDoawn_final_invalid <-
  data_ShareUpDoawn_final[
    measuredItemChildCPC %!in% zeroWeight,
    .(sum_shares = round(sum(shareUpDown, na.rm = TRUE),1)),
    by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
  ][
    !dplyr::near(sum_shares, 1) & !dplyr::near(sum_shares, 0)
  ]


if (nrow(data_ShareUpDoawn_final_invalid) > 0) {
  
  write.csv(
    data_ShareUpDoawn_final_invalid,
    file.path(paste0("ShareUpDown_toCorrect_", COUNTRY, ".csv"))
  )
  
  if (!CheckDebug()) {
    send_mail(
      from = "do-not-reply@fao.org",
      to = swsContext.userEmail,
      subject = "Some shareUpDown are invalid",
      body = c(paste("There are some invalid shareUpDown (they do not sum to 1). See attachment and fix them in the SWS shareUpDown dataset"),
               file.path(paste0("ShareUpDown_toCorrect_", COUNTRY, ".csv")))
    )
  }
  
  stop("Some shareUpDown can create conflict. Check your email.")
}
# 
# if(nrow(data_ShareUpDoawn_final)!=0){
#   
#   
# }

#/processingShare---------------------

message("Line 2729")


# / Tables that will be required by the co-product issue (fix processingShare)

sessionKey_shareDownUp <- swsContext.datasets[[4]]

CONFIG <- GetDatasetConfig(sessionKey_shareDownUp@domain, sessionKey_shareDownUp@dataset)

data_shareDownUp_sws <- GetData(sessionKey_shareDownUp)

########### set parents present in sws and not present in the estimated data to zero ###########

data_tree_test <-
  merge(
    data.table(
      geographicAreaM49 = unique(data_tree$geographicAreaM49),
      timePointYears = sort(unique(data_tree$timePointYears))
    ),
    unique(
      data_shareDownUp_sws[,
                           list(geographicAreaM49, 
                                measuredItemParentCPC = measuredItemParentCPC_tree, 
                                measuredItemChildCPC = measuredItemChildCPC_tree)
      ]
    ),
    by = "geographicAreaM49",
    all = TRUE,
    allow.cartesian = TRUE
  )

#not solved issue
#diff_test <- data_tree[!data_tree_test, on = c("geographicAreaM49","measuredItemParentCPC",
#                                              "measuredItemChildCPC","timePointYears")]

missing_connections <- data_tree_test[!data_tree, on = c("geographicAreaM49","measuredItemParentCPC",
                                                         "measuredItemChildCPC","timePointYears")]

missing_connections <- missing_connections[, shareDownUp := 0 ]

missing_connections <- missing_connections[, list(geographicAreaM49,measuredItemParentCPC,measuredItemChildCPC,
                                                  timePointYears,shareDownUp)]


data_tree <- rbind(data_tree, missing_connections)


#final sharedowmUp

shareDownUp_previous <-copy(data_shareDownUp_sws)
setnames(shareDownUp_previous,c("Value","measuredItemParentCPC_tree","measuredItemChildCPC_tree"),
                   c("shareDownUp_prev","measuredItemParentCPC","measuredItemChildCPC"))

if (nrow(shareDownUp_previous) > 0) {
  data_tree_final <-
    dt_left_join(
      data_tree,
      shareDownUp_previous,
      by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
    )
  
  
  data_tree_final[flagObservationStatus == "E" & flagMethod=="f", shareDownUp := shareDownUp_prev]
  
  data_tree_final[!is.na(shareDownUp) & is.na(flagMethod),`:=`(flagObservationStatus="I", flagMethod="i",
                                                               measuredElementSuaFbs="5432")]
  
  message("Line 2830")
  
  data_tree_final[, shareDownUp_prev := NULL]
  data_tree_final_save <- copy(data_tree_final)
  
  data_tree_final[,`:=`(measuredElementSuaFbs=NULL,flagObservationStatus=NULL,flagMethod=NULL)]
  data_tree_final <- unique(data_tree_final, by = colnames(data_tree_final))
} else {
  data_tree_final <- data_tree
  data_tree_final_save <- copy(data_tree_final)
  data_tree_final_save[,`:=`(flagObservationStatus="I", flagMethod="i",
                             measuredElementSuaFbs="5432")]
}

data_tree_final_save<-data_tree_final_save[timePointYears %in% startYear:endYear]
setnames(data_tree_final_save,c("measuredItemParentCPC","measuredItemChildCPC","shareDownUp"),
         c("measuredItemParentCPC_tree","measuredItemChildCPC_tree","Value"))

faosws::SaveData(
  domain = "suafbs",
  dataset = "down_up_share",
  data = data_tree_final_save,
  waitTimeout = 20000
)

# Check on consistency of shareDownUp (computed + manual Ef in sws) 
shareDownUp_invalid <-
  data_tree_final[,
                  .(sum_shares = round(sum(shareDownUp),1)),
                  by = c("geographicAreaM49", "timePointYears", "measuredItemChildCPC")
  ][
    !dplyr::near(sum_shares, 1) & !dplyr::near(sum_shares, 0)
  ][
    timePointYears >= startYear
  ]

if (nrow(shareDownUp_invalid) > 0) {
  
  shareDownUp_invalid <-
    data_tree_final[
      shareDownUp_invalid,
      on = c("geographicAreaM49", "timePointYears", "measuredItemChildCPC")
    ]
  
  write.csv(
    shareDownUp_invalid,
    file.path(paste0("shareDownUp_INVALID_", COUNTRY, ".csv"))
  )
  
  if (!CheckDebug()) {
    send_mail(
      from = "do-not-reply@fao.org",
      to = swsContext.userEmail,
      subject = "Some shareDownUp are invalid",
      body = c(paste("There are some invalid shareDownUp (they do not sum to 1). See attachment and fix them in", sub("/work/SWS_R_Share/", "", shareDownUp_file)),
               file.path(paste0("shareDownUp_INVALID_", COUNTRY, ".csv")))
    )
  }
  
  unlink(file.path(R_SWS_SHARE_PATH, USER, paste0("shareDownUp_INVALID_", COUNTRY, ".csv")))
  
  stop("Some shares are invalid. Check your email.")
}

message("Line 2905")

# / END ShareDownUp -------------------------------------------------------



# stockable items for which a historical series of at least
# 5 non-missing/non-null data points exist
historical_avail_stocks <-
  data[
    measuredElementSuaFbs == "stockChange" &
      timePointYears < startYear &
      !is.na(Value) &
      stockable == TRUE,
    .(n = sum(!dplyr::near(Value, 0))),
    by = c("geographicAreaM49", "measuredItemSuaFbs")
  ][
    n >= 5
  ][,
    n := NULL
  ]


# Keep processingShare and shareDownUp
computed_shares <- list()
computed_shares_send <- list()
# Keep negative availability
negative_availability <- list()

#keep shareuPdOWN
updated_shareUpDOwn<-list()

fixed_proc_shares <- list()

original_stock_variation <-
  data[
    measuredElementSuaFbs == "stockChange" &
      timePointYears >= startYear &
      !is.na(Value),
    c(
      "geographicAreaM49", "measuredItemSuaFbs",
      "timePointYears", "Value", "flagObservationStatus", "flagMethod"
    )
  ]

dbg_print("starting derived production loop")

if (length(primaryInvolvedDescendents) == 0) {
  message("No primary commodity involved in this country")
} else {
  
  #USED TO AVOID DUPLICATES
  data[, Processed_estimed := FALSE]
  
  for (lev in sort(unique(tree$processingLevel))) {
    #testing purpose
    # lev=0
    dbg_print(paste("derived production loop, level", lev))
    
    treeCurrentLevel <-
      tree[
        !is.na(Value) &
          processingLevel == lev &
          measuredElementSuaFbs == 'extractionRate',
        .(
          measuredItemParentCPC,
          geographicAreaM49,
          measuredItemChildCPC,
          timePointYears,
          extractionRate = Value
        )
      ]
    
    #tree containing children of all parent of current level
    tree_parent_Level<-
      tree[
        !is.na(Value) &
          measuredElementSuaFbs == 'extractionRate' &
          measuredItemParentCPC %chin% treeCurrentLevel[[p$parentVar]],
        .(
          measuredItemParentCPC,
          geographicAreaM49,
          measuredItemChildCPC,
          timePointYears,
          extractionRate = Value,
          processingLevel
        )
      ]
    
    ############# Create stocks, if missing ####################
    # Stocks will be generated also for those cases for which we have data
    condition_for_stocks <-
      data$stockable == TRUE &
      data$measuredItemSuaFbs %chin% items_to_generate_stocks &
      #data$measuredItemSuaFbs %chin% treeCurrentLevel$measuredItemParentCPC &
      data$measuredElementSuaFbs %chin% c('production', 'imports', 'exports')
    
    if (any(condition_for_stocks)) {
      dbg_print("generating stocks")
      
      data_histmod_stocks <-
        data[
          measuredItemSuaFbs %chin% historical_avail_stocks$measuredItemSuaFbs
        ]
      
      if (nrow(data_histmod_stocks) > 0) {
        dbg_print("data for histmod available")
        
        data_histmod_stocks <-
          data_histmod_stocks[,
                              .(
                                supply_inc =
                                  sum(
                                    Value[measuredElementSuaFbs %chin% c("production", "imports")],
                                    - Value[measuredElementSuaFbs %chin% c("exports", "stockChange")],
                                    na.rm = TRUE
                                  ),
                                supply_exc =
                                  sum(
                                    Value[measuredElementSuaFbs %chin% c("production", "imports")],
                                    - Value[measuredElementSuaFbs == "exports"],
                                    na.rm = TRUE
                                  )
                              ),
                              keyby = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
          ][,
            trend := seq_len(.N),
            by = c("geographicAreaM49", "measuredItemSuaFbs")
          ]
        
        dbg_print("coeffs_stocks_mod")
        
        data_histmod_stocks[,
                            c("c_int", "c_sup", "c_trend") := coeffs_stocks_mod(.SD),
                            by = c("geographicAreaM49", "measuredItemSuaFbs")
        ][,
          supply_inc_pred := c_int + c_sup * supply_exc + c_trend * trend,
        ][,
          `:=`(
            min_new_supply_inc = min(supply_inc[supply_inc > 0 & timePointYears >= 2009 & timePointYears < startYear], na.rm = TRUE),
            avg_new_supply_inc = mean(supply_inc[supply_inc > 0 & timePointYears >= 2009 & timePointYears < startYear], na.rm = TRUE)
          ),
          by = c("geographicAreaM49", "measuredItemSuaFbs")
        ][
          supply_inc_pred <= 0 & timePointYears >= startYear,
          supply_inc_pred := min_new_supply_inc
        ][,
          delta_pred := supply_exc - supply_inc_pred
        ]
        
        data_histmod_stocks[
          supply_inc > 100,
          abs_stock_perc := abs(supply_exc - supply_inc) / supply_inc
        ]
        
        data_histmod_stocks[,
                            abs_delta_pred_perc := abs(delta_pred) / avg_new_supply_inc
        ]
        
        # Max without the actual maximum, to avoid extremes
        data_histmod_stocks[,
                            #upper_abs_stock_perc := max(abs_stock_perc[-(which(abs_stock_perc == max(abs_stock_perc, na.rm = TRUE)))], na.rm = TRUE),
                            upper_abs_stock_perc := max(abs_stock_perc[timePointYears < startYear], na.rm = TRUE),
                            by = c("geographicAreaM49", "measuredItemSuaFbs")
        ]
        
        # upper_abs_stock_perc can be infinite if abs_stock_perc is always missing
        # (note that abs_stock_perc is calculated only for supply > 100)
        data_histmod_stocks[
          abs_delta_pred_perc > upper_abs_stock_perc &
            timePointYears >= startYear &
            !is.infinite(upper_abs_stock_perc),
          delta_pred :=
            upper_abs_stock_perc * avg_new_supply_inc * sign(delta_pred)
        ]
        
        data_for_stocks <-
          data_histmod_stocks[
            timePointYears >= startYear ,
            .(geographicAreaM49, measuredItemSuaFbs,
              timePointYears, delta = delta_pred)
          ]
        
        if (nrow(data_for_stocks) > 0) {
          # HERE
          data_for_stocks <-
            dt_left_join(
              data_for_stocks,
              all_opening_stocks[,
                                 .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
                                   timePointYears, new_opening = Value)
              ],
              by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
            )
          
          stockdata <-
            data[
              Protected == TRUE &
                measuredElementSuaFbs == "stockChange" &
                !is.na(Value) &
                measuredItemSuaFbs %chin% data_for_stocks$measuredItemSuaFbs,
              .(
                geographicAreaM49,
                measuredItemSuaFbs,
                timePointYears,
                stockvar = Value
              )
            ]
          
          data_for_stocks <-
            dt_left_join(
              data_for_stocks,
              stockdata,
              by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
            )
          
          data_for_stocks[!is.na(stockvar), delta := stockvar]
          
          data_for_stocks[, stockvar := NULL]
          data_for_stocks[is.na(new_opening), new_opening:=0]
          
          # Stock withdrawal cannot exceed opening stocks in the first year
          data_for_stocks[
            timePointYears == startYear & delta < 0 & abs(delta) > new_opening,
            delta := - new_opening
          ]
          
          # NOTE: Data here should be ordered by country/item/year (CHECK)
          data_for_stocks <- update_opening_stocks(data_for_stocks)
          
          data <-
            dt_left_join(
              data,
              data_for_stocks,
              by = c('geographicAreaM49', 'timePointYears', 'measuredItemSuaFbs')
            )
          
          data[
            measuredElementSuaFbs == "stockChange" &
              Protected == FALSE &
              !is.na(delta),
            `:=`(
              Value = delta,
              flagObservationStatus = "E",
              flagMethod = "u",
              Protected = FALSE
            )
          ]
          
          data[, c("delta", "new_opening") := NULL]
          
          all_opening_stocks <-
            dt_full_join(
              all_opening_stocks,
              data_for_stocks[,
                              .(
                                geographicAreaM49,
                                measuredItemFbsSua = measuredItemSuaFbs,
                                timePointYears,
                                new_opening
                              )
              ],
              by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
            )
          
          all_opening_stocks[
            !is.na(new_opening) & (round(new_opening) != round(Value) | is.na(Value)),
            `:=`(
              Value = new_opening,
              flagObservationStatus = "E",
              flagMethod = "u",
              Protected = FALSE
            )
          ]
          
          all_opening_stocks[, new_opening := NULL]
        }
      }
    }
    
    
    ############# END / Create stocks, if missing ##################
    
    
    #Correcting stocks
    
    data_st<-copy(data)
    
    data_st[,
            supply :=
              sum(
                Value[get(p$elementVar) %in% c(p$productionCode, p$importCode)],
                # XXX p$stockCode is "stockChange", not "stockChange"
                - Value[get(p$elementVar) %in% c(p$exportCode)],
                na.rm = TRUE
              ),
            by = c(p$geoVar, p$yearVar, p$itemVar)
    ]
    data_st<-data_st[measuredElementSuaFbs=="stockChange",]
    
    
    data_stocks <-
      merge(
        data_st,
        all_opening_stocks[,list(geographicAreaM49,measuredItemSuaFbs=measuredItemFbsSua,
                                 timePointYears,openingStocks=Value)],
        by = c(p$geoVar, p$itemVar, p$yearVar)
      )
    
    data_stocks[,stockCheck:=ifelse(sign(Value) == 1 & Value+openingStocks>2*supply & supply>0,TRUE,FALSE)]
    
    #create max_column:
    data_stocks[, maxValue := 2*supply-openingStocks]
    data_stocks[, stocks_checkVal := ifelse(stockCheck == T & sign(maxValue)==1, maxValue , 0)]
    
    
    data_stocks[flagObservationStatus=="E" & flagMethod=="f",Protected:=FALSE]
    
    for(i in startYear:max(unique(as.numeric(data_stocks$timePointYears)))){
      
      data_stocks[,Value:=ifelse(timePointYears==i & stockCheck==TRUE & Protected==FALSE,stocks_checkVal,Value)]
      
      data_stocks[order(geographicAreaM49,measuredItemSuaFbs, timePointYears),prev_stocks:=c(NA, Value[-.N]),
                  by= c(p$geoVar, p$itemVar)]
      
      data_stocks[order(geographicAreaM49,measuredItemSuaFbs, timePointYears),prev_opening:=c(NA, openingStocks[-.N]),
                  by= c(p$geoVar, p$itemVar)]
      
      
      if(i<max(unique(as.numeric(data_stocks$timePointYears)))){
        
        data_stocks[,openingStocks:=ifelse(timePointYears==i+1 ,prev_opening+prev_stocks,openingStocks)]
        
      }
      
    }
    
    
    data <-
      merge(
        data,
        data_stocks[,list(geographicAreaM49,measuredItemSuaFbs,measuredElementSuaFbs,
                          timePointYears,corr_stocks=Value,stockCheck)],
        by = c(p$geoVar,p$elementVar, p$itemVar, p$yearVar),
        all.x = TRUE
      )
    
    
    data[
      measuredElementSuaFbs == "stockChange" &
        Protected == FALSE &
        !is.na(corr_stocks) &
        stockCheck==TRUE,
      `:=`(
        Value = corr_stocks,
        flagObservationStatus = "E",
        flagMethod = "n",
        Protected = FALSE
      )
    ]
    
    data[
      measuredElementSuaFbs == "stockChange" &
        flagObservationStatus=="E" &
        flagMethod=="f" &
        !is.na(corr_stocks) &
        stockCheck==TRUE,
      `:=`(
        Value = corr_stocks,
        flagObservationStatus = "E",
        flagMethod = "n",
        Protected = FALSE
      )
    ]
    
    
    
    data[,corr_stocks:=NULL]
    data[,stockCheck:=NULL]
    
    
    stock_correction_send<-data[measuredElementSuaFbs == "stockChange" &
                                  flagObservationStatus=="E" &
                                  flagMethod=="n",]
    
    write.csv(stock_correction_send,tmp_file_Stock_correction)
    
    
    
    
    
    
    all_opening_stocks <-
      merge(
        all_opening_stocks,
        data_stocks[,list(geographicAreaM49,measuredItemFbsSua=measuredItemSuaFbs,
                          timePointYears,corr_op_stocks=openingStocks,stockCheck)],
        by = c(p$geoVar, "measuredItemFbsSua", p$yearVar),
        all.x = TRUE
      )
    
    all_opening_stocks[, stockCheck:= dplyr::lag(stockCheck)]
    
    all_opening_stocks[
      measuredElementSuaFbs == "5113" &
        Protected == FALSE &
        !is.na(corr_op_stocks) &
        stockCheck==TRUE,
      `:=`(
        Value = corr_op_stocks,
        flagObservationStatus = "E",
        flagMethod = "u",
        Protected = FALSE
      )
    ]
    
    all_opening_stocks[,corr_op_stocks:=NULL]
    all_opening_stocks[,stockCheck:=NULL]
    # 
    # 
    ############# END / Create stocks, if missing ##################
    message("Line 3185")
    
    dataMergeTree <- data[measuredElementSuaFbs %chin% c('production', 'imports', 'exports', 'stockChange')]
    
    setnames(dataMergeTree, "measuredItemSuaFbs", "measuredItemParentCPC")
    
    dbg_print("dataMerge + treeCurrentLevel")
    
    dataMergeTree <-
      merge(
        dataMergeTree,
        tree_parent_Level,
        by = c(p$parentVar, p$geoVar, p$yearVar),
        allow.cartesian = TRUE
      )
    
    dbg_print("dataMerge + data_tree_final")
    
    dataMergeTree <-
      merge(
        dataMergeTree,
        data_tree_final,
        by = c(p$parentVar, p$childVar, p$geoVar, p$yearVar),
        allow.cartesian = TRUE
      )
    
    setkey(dataMergeTree, NULL)
    
    dbg_print("dataMerge availability")
    
    dataMergeTree[,
                  availability :=
                    sum(
                      Value[get(p$elementVar) %in% c(p$productionCode, p$importCode)],
                      # XXX p$stockCode is "stockChange", not "stockChange"
                      - Value[get(p$elementVar) %in% c(p$exportCode, "stockChange")],
                      na.rm = TRUE
                    ),
                  by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
    ]
    
    dbg_print("negative availability")
    
    negative_availability[[as.character(lev)]] <-
      unique(
        dataMergeTree[
          availability < -100,
          .(
            country            = geographicAreaM49,
            year               = timePointYears,
            measuredItemFbsSua = measuredItemParentCPC,
            element            = measuredElementSuaFbs,
            Value,
            flagObservationStatus,
            flagMethod,
            availability
          )
        ]
      )
    
    dbg_print("zero out negative availability")
    
    # XXX: Replace negative availability, so we get zero production, instead of negative. This, , however, should be fixed in advance, somehow.
    dataMergeTree[availability < 0, availability := 0]
    
    dbg_print("dataMergeTree + shareDownUp_previous")
    
    message("Line 3252")
    
    ##########Caculating and updating processed of parent--------------------------
    #amsata
    datamergeNew <- copy(dataMergeTree)
    
    datamergeNew <-
      datamergeNew[,
                   c("geographicAreaM49", "measuredItemParentCPC", "availability",
                     "measuredItemChildCPC", "timePointYears", "extractionRate",
                     "processingLevel", "shareDownUp"),
                   with = FALSE
      ]
    
    datamergeNew <- unique(datamergeNew, by = colnames(datamergeNew))
    
    datamergeNew <-
      merge(
        datamergeNew,
        data[
          measuredElementSuaFbs == 'production',
          list(geographicAreaM49, timePointYears,
               measuredItemChildCPC = measuredItemSuaFbs, production = Value,
               Protected, Official, flagObservationStatus, flagMethod)
        ],
        by = c('geographicAreaM49', 'timePointYears', 'measuredItemChildCPC')
      )
    
    setkey(datamergeNew, NULL)
    
    datamergeNew[, manual := flagObservationStatus == "E" & flagMethod == "f"]
    
    datamergeNew[, c("flagObservationStatus", "flagMethod") := NULL]
    
    datamergeNew_zw <- datamergeNew[measuredItemChildCPC %chin% zeroWeight]
    
    datamergeNew <- datamergeNew[measuredItemChildCPC %!in% zeroWeight]
    
    datamergeNew <-
      dt_left_join(
        datamergeNew,
        data_Pshare,
        by = c(p$parentVar, p$geoVar, p$yearVar),
        allow.cartesian = TRUE
      ) 
    
    datamergeNew <-
      dt_left_join(
        datamergeNew,
        data_ShareUpDoawn_final,
        by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
      ) 
    
    datamergeNew <-
      datamergeNew[,
                   c("geographicAreaM49", "measuredItemParentCPC", "availability", "Pshare","NewLoss","Loss",
                     "measuredItemChildCPC", "timePointYears", "extractionRate",
                     "processingLevel", "shareDownUp", "shareUpDown", "production",
                     "Protected", "flagObservationStatus", "flagMethod", "Official", "manual"),
                   with = FALSE
      ]
    
    datamergeNew <- unique(datamergeNew)
    
    # datamergeNew[,zeroweight:=measuredItemChildCPC %in% zeroWeight]
    
    
    # including processed of parent
    datamergeNew <-
      dt_left_join(
        datamergeNew,
        data[
          measuredElementSuaFbs == 'foodManufacturing',
          list(geographicAreaM49, timePointYears,
               measuredItemParentCPC = measuredItemSuaFbs,
               processedBis = Value, processedProt = Protected)
        ],
        by = c('geographicAreaM49', 'timePointYears', 'measuredItemParentCPC')
      )
    
    # datamergeNew[timePointYears >= 2014 & Protected == FALSE, Value := NA][, Protected := NULL]
    datamergeNew[timePointYears >= startYear & Protected == FALSE, production := NA]
    datamergeNew[timePointYears >= startYear & processedProt == FALSE, processedBis := NA]
    
    # Calculate the number of child with protected production for each parent
    datamergeNew[,
                 Number_childPro := sum(Protected == TRUE & shareUpDown > 0, na.rm = TRUE),
                 by = c(p$parentVar, p$geoVar, p$yearVar)
    ]
    
    
    datamergeNew[ ,
                  AllProotected:=sum(shareUpDown[Protected==TRUE],na.rm = TRUE),
                  by=c("geographicAreaM49","timePointYears","measuredItemParentCPC")
    ]
    
    datamergeNew[is.na(shareUpDown),shareUpDown:=0]
    
    #Update of the share up down . Giulia: sbagliato, se viene cambiata solo una down up mi trovo con inconsistency
    datamergeNew[extractionRate>0 ,
                 shareUpDown:=ifelse(Protected==TRUE & AllProotected!=1,(production*shareDownUp/extractionRate)/sum(production[Protected==TRUE]*shareDownUp[Protected==TRUE]/extractionRate[Protected==TRUE],na.rm = TRUE)*
                                       (1-sum(shareUpDown[Protected==FALSE],na.rm = TRUE)),shareUpDown),
                 by=c("geographicAreaM49","timePointYears","measuredItemParentCPC")
    ]
    datamergeNew[,AllProotected:=NULL]
    # calculate the number of child of each parent where processed used to be send
    datamergeNew[,
                 Number_child := sum(shareUpDown > 0, na.rm = TRUE),
                 by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
    ]
    
    # we estimate processed quantity for parent
    # based on the historique trend of processed percentage
    datamergeNew[, Processed := ifelse(NewLoss==TRUE,Pshare * (availability-ifelse(is.na(Loss),0,Loss)),Pshare * availability)] 
    
    # However, we may have cases where some production of child commodities are official
    datamergeNew[sign(Processed) == -1, Processed := 0]
    # case1: some child production are official but not all
    # in this case we update the shareUpDown of the child commodities for which the production is official
    # if the sum of their ShareUpdown is greater than 1, we update processed using the official production of those
    # children and then we update the shareUpDown of different child commodities
    datamergeNew[, c("shareUpDown_NEW", "Processed_new") := NA_real_]
    
    # A child is converted up to process of parent if it has a protected production 
    # with shareUpDown>0, shareDownUp>0 and extractionRate > 0. this avoid issue caused by multiple parent
    # children with protected production
    
    datamergeNew[,
                 child_DownUp :=
                   Protected == TRUE &
                   shareUpDown > 0 &
                   extractionRate > 0 &
                   shareDownUp > 0
    ]
    
    # GianLuca suggestion----------------
    
    datamergeNew[Number_childPro > 0, #& extractionRate > 0,
                 #sum_shareUD_high == TRUE,
                 Processed_new :=
                   sum(
                     production[child_DownUp == TRUE] / extractionRate[child_DownUp == TRUE] * shareDownUp[child_DownUp == TRUE],
                     na.rm = TRUE
                   ) /
                   sum(shareUpDown[child_DownUp == TRUE], na.rm = TRUE),
                 by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
    ]
    
    datamergeNew[,
                 Processed_new := ifelse(is.na(Processed_new), mean(Processed_new, na.rm = TRUE), Processed_new),
                 by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
    ]
    
    datamergeNew[,
                 processed_down_up := sum(child_DownUp, na.rm = TRUE) > 0,
                 by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
    ]
    
    datamergeNew[is.na(Processed_new) & processed_down_up == TRUE, Processed_new := 0]
    
    datamergeNew[Processed_new == 0 & processed_down_up == FALSE, Processed_new := NA_real_]
    
    datamergeNew[
      Protected == TRUE & manual == FALSE,
      `:=`(flagObservationStatus = "T", flagMethod = "-")
    ]
    
    datamergeNew[
      Protected == TRUE & manual == TRUE,
      `:=`(flagObservationStatus = "E", flagMethod = "f")
    ]
    
    datamergeNew[
      Protected==FALSE,
      `:=`(flagObservationStatus = "I", flagMethod = "c")
    ]
    
    datamergeNew[!is.na(shareUpDown_NEW), shareUpDown := shareUpDown_NEW]
    
    # GianLuca------------------------
    
    
    #Update the processed quantities
    
    dataForProc <-
      datamergeNew[,
                   c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
                     "availability","Pshare","Processed", "processedBis", "Processed_new"),
                   with = FALSE
      ]
    
    dataForProc <- unique(dataForProc)
    
    dataForProc[
      timePointYears >= startYear & !is.na(Processed_new),
      processedBis := Processed_new
    ]
    
    # cancel the automatic update of Pshare for 2014 onward
    #However this lines can be kept for futur improvement
    
    # dataForProc[,
    #   Pshare := processedBis / availability,
    #   by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
    # ]
    # dataForProc[is.nan(Pshare), Pshare := NA_real_]
    # dataForProc[Pshare > 1, Pshare := 1]
    # dataForProc[Pshare < 0, Pshare := 0]
    # dataForProc <-
    #   dataForProc[
    #     order(geographicAreaM49, measuredItemParentCPC, timePointYears),
    #     Pshare_avr := rollavg(Pshare, order = 3),
    #     by = c("geographicAreaM49", "measuredItemParentCPC")
    #   ]
    # 
    # setkey(dataForProc, NULL)
    # 
    # dataForProc[timePointYears > 2013 & is.na(Pshare), Pshare := Pshare_avr]
    # 
    # dataForProc[, Pshare_avr := NULL]
    # 
    # dataForProc[, Processed := Pshare * availability]
    
    dataForProc[!is.na(Processed_new), Processed := Processed_new]
    
    datamergeNew <-
      datamergeNew[,
                   c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
                     "measuredItemChildCPC", "extractionRate", "processingLevel",
                     "shareDownUp", "shareUpDown", "production", "Protected",
                     "flagObservationStatus", "flagMethod", "Official", "manual",
                     "processed_down_up"),
                   with = FALSE
      ]
    
    datamergeNew <-
      merge(
        dataForProc,
        datamergeNew,
        by = c(p$geoVar, p$yearVar, p$parentVar),
        allow.cartesian = TRUE
      ) 
    
    setkey(datamergeNew, NULL)
    
    #if the shareUpDown of a zeroweight coproduct is updated, the shareUpDown of
    #the corresponding zeroweight should also be updated
    
    #merge zeroweight with the tree
    
    datamergeNew_zeroweight <-
      merge(
        datamergeNew,
        zw_coproduct ,
        by = "measuredItemChildCPC",
        allow.cartesian = TRUE
      )
    
    setkey(datamergeNew_zeroweight, NULL)
    
    datamergeNew_zeroweight[, measuredItemChildCPC := zeroweight]
    
    datamergeNew_zeroweight <-
      datamergeNew_zeroweight[,
                              c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
                                "measuredItemChildCPC", "Pshare", "shareUpDown", "Processed",
                                "flagObservationStatus", "flagMethod", "processed_down_up"),
                              with = FALSE
      ]
    
    #correction
    datamergeNew_zeroweight<-
      datamergeNew_zeroweight[,
                              shareUpDown := sum(shareUpDown, na.rm = TRUE),
                              by = c("geographicAreaM49", "measuredItemParentCPC",
                                     "measuredItemChildCPC", "timePointYears")
      ]
    
    datamergeNew_zeroweight <- unique(datamergeNew_zeroweight)
    
    #/correction
    
    datamergeNew_zeroweight <-
      merge(
        datamergeNew_zeroweight,
        datamergeNew_zw,
        by = c("geographicAreaM49", "timePointYears",
               "measuredItemParentCPC", "measuredItemChildCPC")
      )
    
    datamergeNew <-
      datamergeNew[, colnames(datamergeNew_zeroweight), with = FALSE]
    
    datamergeNew_final <- rbind(datamergeNew, datamergeNew_zeroweight)
    
    #some correction
    datamergeNew_final[is.na(shareUpDown), shareUpDown := NA_real_]
    
    updated_shareUpDOwn[[as.character(lev)]] <-
      datamergeNew_final[
        processingLevel == lev,
        c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
          "measuredItemChildCPC", "shareUpDown", "flagObservationStatus",
          "flagMethod"),
        with = FALSE
      ]
    
    #/processed of parent-----------------
    estimated_processed <-
      datamergeNew_final[,
                         list(geographicAreaM49, timePointYears,
                              measuredItemSuaFbs = measuredItemParentCPC,
                              Processed, processed_down_up)
      ]
    
    estimated_processed <- unique(estimated_processed)
    
    complete_food_proc <-
      CJ(
        measuredItemSuaFbs    = unique(estimated_processed$measuredItemSuaFbs),
        measuredElementSuaFbs = 'foodManufacturing',
        geographicAreaM49     = unique(estimated_processed$geographicAreaM49),
        timePointYears        = unique(estimated_processed$timePointYears)
      )
    
    # rbind with anti_join
    data <-
      rbind(
        data,
        complete_food_proc[
          !data,
          on = c('measuredItemSuaFbs', 'measuredElementSuaFbs',
                 'geographicAreaM49', 'timePointYears')
        ],
        fill = TRUE
      )
    
    data <-
      dt_left_join(
        data,
        estimated_processed[timePointYears >= startYear],
        by = c('geographicAreaM49', 'timePointYears', 'measuredItemSuaFbs'),
        allow.cartesian = TRUE
      )
    
    data[is.na(Protected), Protected := FALSE]
    data[is.na(Official), Official := FALSE]
    data[is.na(Processed_estimed), Processed_estimed := FALSE]
    
    data[
      measuredElementSuaFbs == "foodManufacturing" &
        Protected == FALSE & !is.na(Processed) & Processed_estimed == FALSE,
      `:=`(
        Value = Processed,
        flagObservationStatus = "E",
        flagMethod = ifelse(processed_down_up == TRUE, "-", "i"),
        Processed_estimed = TRUE
      )
    ][,
      c("Processed", "processed_down_up") := NULL
    ]
    
    
    ####### estimating production of derived
    data_production <-
      datamergeNew_final[
        processingLevel == lev,
        c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
          "availability", "Pshare", "measuredItemChildCPC", "extractionRate",
          "shareDownUp", "shareUpDown", "production", "Protected"),
        with = FALSE
      ]
    
    data_production <- unique(data_production)
    
    data_production <-
      dt_left_join(
        data_production,
        unique(
          data[
            measuredElementSuaFbs == "foodManufacturing" & !is.na(Value),
            .(geographicAreaM49, timePointYears,
              measuredItemParentCPC = measuredItemSuaFbs,
              Processed_parent = Value)
          ]
        ),
        by = c(p$geoVar, p$yearVar, p$parentVar)
      )
    
    data_production[,
                    #new_imputation:=Processed_parent*shareUpDown*extractionRate*shareDownUp
                    new_imputation := Processed_parent * shareUpDown * extractionRate #shareDownUp is not part of the formula
    ]
    
    data_production <-
      data_production[,
                      c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
                        "availability", "Pshare", "measuredItemChildCPC", "production",
                        "new_imputation", "extractionRate", "shareDownUp",
                        "shareUpDown", "Processed_parent"),
                      with = FALSE
      ]
    
    data_production <- unique(data_production, by = c(colnames(data_production))) 
    
    sel_vars <- 
      c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
        "availability", "Pshare", "measuredItemChildCPC", "extractionRate",
        "shareDownUp", "shareUpDown", "Processed_parent")
    
    computed_shares[[as.character(lev)]] <-
      data_production[, sel_vars, with = FALSE]
    
    # XXX: change this name
    computed_shares_send[[as.character(lev)]] <-
      data_production[, sel_vars, with = FALSE]
    
    dbg_print("sum unique of new_imputation")
    
    data_production <-
      data_production[,
                      .(
                        measuredElementSuaFbs = 'production',
                        # NOTE: the sum(unique()) is just a temporary HACK: check how to do it properly
                        imputed_deriv_value = sum(unique(round(new_imputation, 2)), na.rm = TRUE)
                      ),
                      .(geographicAreaM49, timePointYears, measuredItemSuaFbs = measuredItemChildCPC)
      ]
    
    
    data_production_complete <-
      CJ(
        measuredItemSuaFbs    = unique(data_production$measuredItemSuaFbs),
        measuredElementSuaFbs = 'production',
        geographicAreaM49     = unique(data_production$geographicAreaM49),
        timePointYears        = unique(data_production$timePointYears)
      )
    
    setkey(data_production_complete, NULL)
    
    sel_vars <- 
      c('geographicAreaM49', 'timePointYears',
        'measuredItemSuaFbs', 'measuredElementSuaFbs') 
    
    # rbind with anti_join
    data <-
      rbind(
        data,
        data_production_complete[!data, on = sel_vars],
        fill = TRUE
      )
    
    data <- dt_left_join(data, data_production, by = sel_vars)
    
    dbg_print("z data for shares to send")
    
    z <-
      data[
        measuredElementSuaFbs %chin% c("production", "imports", "exports", "stockChange"),
        c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs",
          "measuredElementSuaFbs", "Value", "Protected", "imputed_deriv_value"),
        with = FALSE
      ]
    
    z[
      measuredElementSuaFbs == 'production' & Protected == FALSE,
      Value := imputed_deriv_value
    ]
    
    z[, imputed_deriv_value := NULL]
    
    # XXX stocks create duplicates
    z <- unique(z)
    
    dbg_print("dcast z data")
    
    z <-
      data.table::dcast(
        z,
        geographicAreaM49 + timePointYears + measuredItemSuaFbs ~ measuredElementSuaFbs,
        value.var = c("Value", "Protected")
      )
    
    # XXX stockChange may change below (and also production, if required)
    
    dbg_print("z data + computed_shares_send")
    
    tmp <-
      z[computed_shares_send[[as.character(lev)]],
        on = c('geographicAreaM49'  = 'geographicAreaM49',
               'timePointYears'     = 'timePointYears',
               'measuredItemSuaFbs' = 'measuredItemChildCPC')]
    
    setkey(tmp, NULL)
    
    # XXX stocks create duplicates
    tmp <- unique(tmp)
    
    computed_shares_send[[as.character(lev)]] <- tmp
    
    dbg_print("assign production of derived to non protected/frozen data")
    
    # Assign if non-protected only non-fozen data
    # (XXX: here only measuredItemSuaFbs that are child should be assigned)
    data[
      timePointYears >= startYear &
        Protected == FALSE &
        measuredElementSuaFbs == 'production' &
        !is.na(imputed_deriv_value),
      `:=`(
        Value = imputed_deriv_value,
        flagObservationStatus = "I", flagMethod = "c")
    ]
    
    data[, imputed_deriv_value := NULL]
  }
  
  
  ### Processed check and remove processed that does not link to children
  
  ProcessedCheck = copy(data)
  setnames(ProcessedCheck, c("measuredItemSuaFbs", "Value"), c("measuredItemParentCPC", "foodManufacturing"))
  ProcessedCheck = ProcessedCheck[measuredElementSuaFbs == "foodManufacturing" & flagMethod == "i" , ]
  
  ProductionCheck = copy(data)
  setnames(ProductionCheck, c("measuredItemSuaFbs", "Value"), c("measuredItemChildCPC", "production"))
  ProductionCheck = ProductionCheck[measuredElementSuaFbs == "production" & flagMethod == "c" , ]
  
  CheckProcessedData = merge(
    ProcessedCheck[, .(geographicAreaM49, timePointYears, measuredItemParentCPC, foodManufacturing)],
    tree[measuredElementSuaFbs == "extractionRate", .(geographicAreaM49, measuredItemParentCPC, measuredItemChildCPC, timePointYears)],
    by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears"), all.x = TRUE
  )
  
  CheckProcessedData = merge(
    CheckProcessedData,
    ProductionCheck[, .(geographicAreaM49, timePointYears, measuredItemChildCPC, production)],
    by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears"), all.x = TRUE
  )
  
  CheckProcessedData[, 
                     ProcessedCheck := sum(production, na.rm = TRUE), 
                     by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")]
  
  CheckProcessedData[ProcessedCheck == 0, foodManufacturing := 0  ]
  
  CheckProcessedData = subset(CheckProcessedData, ProcessedCheck == 0)
  
  if(nrow(CheckProcessedData)>0) {
    
    saveProcessedCheck = unique(CheckProcessedData[ , list(geographicAreaM49, measuredItemSuaFbs = measuredItemParentCPC, 
                                                           measuredElementSuaFbs = "foodManufacturing", 
                                                           Value_processedCheck = foodManufacturing,
                                                           timePointYears)])
    saveProcessedCheck =  saveProcessedCheck[timePointYears %in% as.character(startYear:endYear),]}
  
  # save these back to data
  if(exists("saveProcessedCheck")){
    data = merge(data,
                 saveProcessedCheck,
                 by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs", "measuredElementSuaFbs"), all.x=T) 
    
    data[measuredElementSuaFbs == "foodManufacturing" & !is.na(Value_processedCheck) & Value != 0, Value := Value_processedCheck ]
    data[, Value_processedCheck := NULL]
  }
  
}
# end of derived production loop

data[measuredElementSuaFbs == "production" & dplyr::near(Value,0) & flagObservationStatus == "I" &
       flagMethod == "c", `:=` (Value = NA, flagObservationStatus = NA, flagMethod = NA) ]

setkey(data, NULL)

computed_shares <- rbindlist(computed_shares)

updated_shareUpDOwn <- rbindlist(updated_shareUpDOwn)

updated_shareUpDOwn<- updated_shareUpDOwn[timePointYears %in% startYear:endYear]


shareUpDown_to_save <- copy(updated_shareUpDOwn)
shareUpDown_to_save[, measuredElementSuaFbs := "5431"]

setnames(
  shareUpDown_to_save,
  c("measuredItemParentCPC", "measuredItemChildCPC", "shareUpDown"),
  c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree", "Value")
)

# shareUpDown_to_save <- shareUpDown_to_save[!is.na(Value)]
shareUpDown_to_save[is.na(Value),Value:=0]

data_shareUpDown_sws <- GetData(sessionKey_shareUpDown)
setcolorder(
  shareUpDown_to_save, names(data_shareUpDown_sws))

#shareUpDown_to_save[, Value := round(Value, 3)]

faosws::SaveData(
  domain = "suafbs",
  dataset = "up_down_share",
  data = shareUpDown_to_save,
  waitTimeout = 20000
)


remove_connection<-data_shareUpDown_sws[((measuredItemParentCPC_tree == "02211" & measuredItemChildCPC_tree == "22212") |
                                           #cheese from whole cow milk cannot come from skim mulk of cow
                                           (measuredItemParentCPC_tree == "22110.02" & measuredItemChildCPC_tree == "22251.01") |
                                           (measuredItemParentCPC_tree == "02211" & measuredItemChildCPC_tree == "22251.02"))]


if (nrow(remove_connection)>0){

  remove_connection[,`:=`(Value = NA, flagObservationStatus = NA, flagMethod = NA)]
  faosws::SaveData(
    domain = "suafbs",
    dataset = "up_down_share",
    data = remove_connection,
    waitTimeout = 20000
  )
}

dbg_print("end of derived production loop")


data_deriv <-
  data[
    measuredElementSuaFbs == 'production' &
      timePointYears %in% yearVals,
    list(
      geographicAreaM49,
      measuredElementSuaFbs = "5510",
      measuredItemFbsSua = measuredItemSuaFbs,
      timePointYears,
      Value,
      flagObservationStatus,
      flagMethod
    )
  ]

# save processed data
data_processed <-
  data[
    measuredElementSuaFbs == 'foodManufacturing' &
      timePointYears %in% yearVals,
    list(
      geographicAreaM49,
      measuredElementSuaFbs = "5023",
      measuredItemFbsSua = measuredItemSuaFbs,
      timePointYears,
      Value,
      flagObservationStatus,
      flagMethod
    )
  ]

# Save stock data in SUA Unbalanced

data_stock_to_save <-
  data[
    measuredElementSuaFbs == 'stockChange' &
      timePointYears %in% yearVals &
      !is.na(Value),
    list(
      geographicAreaM49,
      measuredElementSuaFbs = "5071",
      measuredItemFbsSua = measuredItemSuaFbs,
      timePointYears,
      Value,
      flagObservationStatus,
      flagMethod
    )
  ]

opening_stocks_to_save <- copy(all_opening_stocks)
opening_stocks_to_save[, Protected := NULL]

data_to_save_unbalanced <-
  rbind(
    data_deriv,
    data_processed,
    data_stock_to_save,
    opening_stocks_to_save
  )

data_to_save_unbalanced <- data_to_save_unbalanced[!is.na(Value)]



dbg_print("complete data elements")

data <-
  plyr::ddply(
    data,
    .variables = c('geographicAreaM49', 'timePointYears'),
    .fun = function(x) addMissingElements(as.data.table(x), p)
  )

setDT(data)


######################## OUTLIERS #################################
# re-writing of Cristina's outliers plugin with data.table syntax #




# Download also calories
elemKeys_all <- c(elemKeys, "664")

# Get ALL data from SUA BALANCED, do an anti-join, set to NA whatever was not
# generated here so to clean the session on unbalanced of non-existing cells

if (CheckDebug()) {
  key_all <-
    DatasetKey(
      domain = "suafbs",
      dataset = "sua_balanced",
      dimensions =
        list(
          geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
          measuredElementSuaFbs = Dimension(name = "measuredElementSuaFbs", keys = elemKeys_all),
          measuredItemFbsSua = Dimension(name = "measuredItemFbsSua", keys = itemKeys),
          # Get from 2010, just for the SUA aggregation
          timePointYears = Dimension(name = "timePointYears", keys = as.character(2010:endYear))
        )
    )
} else {
  key_all <- swsContext.datasets[[1]]
  
  key_all@dimensions$timePointYears@keys <- as.character(2010:endYear)
  key_all@dimensions$measuredItemFbsSua@keys <- itemKeys
  key_all@dimensions$measuredElementSuaFbs@keys <- elemKeys_all
  key_all@dimensions$geographicAreaM49@keys <- COUNTRY
}


data_suabal <- GetData(key_all)





dbg_print("start outliers")




if (FIX_OUTLIERS == TRUE) {
  
  
  
  sel_vars <- c('geographicAreaM49', 'timePointYears',
                
                'measuredItemSuaFbs', 'measuredElementSuaFbs')
  
  
  
  commDef <- ReadDatatable("fbs_commodity_definitions") # XXX: updated?
  
  
  
  stopifnot(nrow(commDef) > 0)
  
  
  
  primaryProxyPrimary_items <-
    
    commDef[proxy_primary == "X" | primary_commodity == "X"]$cpc
  
  
  
  food_items <- commDef[food_item == "X"]$cpc
  
  fbsTree <- ReadDatatable("fbs_tree")
  
  dairy_meat_items <- fbsTree[id3 %chin% c("2948", "2943"), item_sua_fbs]
  
  dout <-
    
    CJ(
      
      geographicAreaM49 = unique(data$geographicAreaM49),
      
      timePointYears = unique(data$timePointYears),
      
      measuredItemSuaFbs = unique(data$measuredItemSuaFbs),
      
      measuredElementSuaFbs = unique(data$measuredElementSuaFbs)
      
    )
  
  
  
  dout <- data[dout, on = sel_vars]
  
  
  
  # dout <- merge(dout, codes, by.x  = "measuredElementSuaFbs",by.y = "name",all.x = TRUE)
  
  # dout[, measuredElementSuaFbs := name]
  
  # dout[, measuredElementSuaFbs := NULL]
  
  
  
  
  
  dout[is.na(Protected), Protected := FALSE]
  
  
  
  data_suabalout <-data_suabal
  
  
  
  refYear=(startYear-3):(startYear-1)
  
  
  
  suabalCJ <-
    
    CJ(
      
      geographicAreaM49 = unique(data_suabalout$geographicAreaM49),
      
      timePointYears = unique(as.character(refYear)),
      
      measuredItemFbsSua = unique(data_suabalout$measuredItemFbsSua),
      
      measuredElementSuaFbs = unique(data_suabalout$measuredElementSuaFbs)
      
    )
  
  
  
  outlier_suabal = merge(suabalCJ, data_suabalout, all.x=T, by = c("geographicAreaM49", "timePointYears", "measuredItemFbsSua",
                                                                   
                                                                   "measuredElementSuaFbs"))
  
  
  
  # merge in names (will also keep only elemnts for which we need thresholds, i.e. excl. stock opeings)
  
  
  
  
  
  
  
  
  
  setnames(outlier_suabal, "measuredItemFbsSua", "measuredItemSuaFbs")
  
  
  
  outlier_suabal[,
                 
                 `:=`(
                   
                   production = Value[measuredElementSuaFbs  == "5510"],
                   
                   supply     = sum(Value[measuredElementSuaFbs %in% c("5510", "5610")],
                                    
                                    - Value[measuredElementSuaFbs %in% c("5910", "5071")],
                                    
                                    na.rm = TRUE),
                   
                   domsupply  = sum(Value[measuredElementSuaFbs %in% c("5510", "5610")],
                                    
                                    na.rm = TRUE)
                   
                 ),
                 
                 by = c('geographicAreaM49', 'measuredItemSuaFbs', 'timePointYears')
                 
  ]
  
  
  
  outlier_suabal[measuredElementSuaFbs %chin% c("5520", "5165", "5164"), element_supply := supply]
  
  outlier_suabal[measuredElementSuaFbs == "5525", element_supply := production]
  
  outlier_suabal[measuredElementSuaFbs == "5016", element_supply := domsupply]
  
  
  
  outlier_suabal[element_supply < 0, element_supply := NA_real_]
  
  
  
  outlier_suabal[, ratio := Value / element_supply]
  
  
  
  
  
  
  
  # Calculate thresholds form sua balanced values
  
  outlier_suabal[,
                 
                 `:=`(
                   
                   mean_ratio = mean(ratio[timePointYears %in% refYear], na.rm = TRUE),
                   
                   Meanold    = mean(Value[timePointYears %in% refYear], na.rm = TRUE)
                   
                 ),
                 
                 by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
                 
  ]
  
  
  
  outlier_suabal[mean_ratio > 1, mean_ratio := 1]
  
  outlier_suabal[mean_ratio < 0, mean_ratio := 0]
  
  outlier_suabal[is.na(mean_ratio) | is.nan(mean_ratio), mean_ratio := 0]
  
  outlier_suabal[is.na(Meanold) | is.nan(Meanold), Meanold := 0]
  
  outlier_suabal[, abs_diff_threshold := ifelse(measuredElementSuaFbs == "5520", 0.1, 0.05)]
  
  outlier_suabal <- merge(outlier_suabal,codes,by = "measuredElementSuaFbs",all.x = TRUE)
  
  outlier_suabal <- outlier_suabal[!is.na(name)]
  
  outlier_suabal[,measuredElementSuaFbs := NULL]
  
  setnames(outlier_suabal, "name","measuredElementSuaFbs")
  
  
  
  # Protecting here Ee as we should not correct already corrected outliers (commented because otherwise if you change production the outlier is not corrected accordingly)
  
  # dout[flagObservationStatus == "E" & flagMethod == "e", Protected := TRUE]
  
  
  
  
  
  dout[,
       
       `:=`(
         
         production = Value[measuredElementSuaFbs  == "production"],
         
         supply     = sum(Value[measuredElementSuaFbs %in% c("production", "imports")],
                          
                          - Value[measuredElementSuaFbs %in% c("exports", "stockChange")],
                          
                          na.rm = TRUE),
         
         domsupply  = sum(Value[measuredElementSuaFbs %in% c("production", "imports")],
                          
                          na.rm = TRUE)
         
       ),
       
       by = c('geographicAreaM49', 'measuredItemSuaFbs', 'timePointYears')
       
  ]
  
  
  
  dout[measuredElementSuaFbs %chin% c("feed", "industrial", "tourist"), element_supply := supply]
  
  dout[measuredElementSuaFbs == "seed", element_supply := production]
  
  dout[measuredElementSuaFbs == "loss", element_supply := domsupply]
  
  
  
  dout[element_supply < 0, element_supply := NA_real_]
  
  
  
  dout[, ratio := Value / element_supply]
  dout[ratio<0 , ratio:=0]
  
  
  dout = merge(dout,
               
               unique(outlier_suabal[, .(measuredElementSuaFbs, geographicAreaM49, measuredItemSuaFbs,
                                         
                                         mean_ratio, Meanold, abs_diff_threshold)]),
               
               by = c("measuredElementSuaFbs", "geographicAreaM49", "measuredItemSuaFbs"), all.x=T)
  
  
  # New series not deleted even if hisotircal mean is zero, only new series for meat and dairy product losses are removed
  dout[Meanold == 0 & mean_ratio == 0  &  measuredElementSuaFbs %in% "loss" & measuredItemSuaFbs %in% dairy_meat_items  
       ,`:=` (impute = 0, Protected = TRUE)]
  
  dout[Meanold == 0 & mean_ratio == 0  &  measuredElementSuaFbs %in% c("feed", "seed", "loss", "industrial", "tourist" ) & 
         Value == 0, impute := NA_real_]
  # 
  # If the element is new, there will be no `mean_ratio` and `Meanold`, thus we
  
  # use the new values to re-calculate them.
  
  
  
  dout[
    
    timePointYears >= startYear & (is.na(mean_ratio) | is.nan(mean_ratio) | is.infinite(mean_ratio) | mean_ratio == 0),
    
    mean_ratio := median(ratio[timePointYears >= startYear], na.rm = TRUE),
    
    by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
    
  ]
  
  
  
  dout[
    
    timePointYears >= startYear & (is.na(Meanold) | is.nan(Meanold) | is.infinite(Meanold) | Meanold == 0),
    
    Meanold := median(Value[timePointYears >= startYear], na.rm = TRUE),
    
    by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
    
  ]
  
  
  
  dout[mean_ratio > 1, mean_ratio := 1]
  
  
  
  dout[, abs_diff_threshold := ifelse(measuredElementSuaFbs == "feed", 0.1, 0.05)]
  
  
  
  dout[
    
    Protected == FALSE &
      
      timePointYears %in% yearVals &
      
      mean_ratio > 0 &
      
      abs(element_supply) > 0 &
      
      measuredElementSuaFbs %chin% c("feed", "seed", "loss", "industrial", "tourist") &
      
      # the conditions below define the outlier, or cases that were NA
      
      (is.na(Value) |
         
         abs(ratio - mean_ratio) > abs_diff_threshold |
         
         mean_ratio == 1 |
         
         (mean_ratio != 0 & dplyr::near(ratio, 0)) |
         
         (outside(Value / Meanold, 0.5, 2) & outside(Value - Meanold, -10000, 10000))),
    
    impute := element_supply*mean_ratio
    
  ]
  
  
  
  # get rid of utilizations that are zero in 2014-2019
  
  # dout[Meanold == 0 & mean_ratio == 0 & measuredElementSuaFbs %in% c("feed", "seed", "loss", "industrial" ), impute := 0 ]

  
  # Remove imputation for loss that is non-food and non-primaryProxyPrimary
  
  dout[ (!is.na(Value)| !is.na(impute)) &
    
    measuredElementSuaFbs == "loss" &
      
      !(measuredItemSuaFbs %chin% food_items &
          
          measuredItemSuaFbs %chin% primaryProxyPrimary_items),
    
    impute := 0
    
  ]
  
  
  
  fbsTree <- ReadDatatable("fbs_tree")
  
  
  
  # Remove imputation for seed of Fruits and vegetables
  
  dout[ (!is.na(Value)| !is.na(impute)) &
    
    measuredElementSuaFbs == "seed" &
      
      measuredItemSuaFbs %chin% fbsTree[id3 == "2918"|id3=="2919"]$item_sua_fbs,
    
    impute := 0
    
  ]
  
  
  
  
  
  # Exclude feed as the first time it needs to be generated, without saving
  
  # outliers to sua_unbalanced as they have E,e flag (which is protected...).
  
  # In other words, feed has not been run yet if first time.
  
  if (STOP_AFTER_DERIVED == TRUE) {
    
    dout <- dout[measuredElementSuaFbs != "feed"]
    
  }
  
  
  
  dout <-
    
    dout[
      
      !is.na(impute) & (is.na(Value) | round(Value, 1) != round(impute, 1)),
      
      list(
        
        geographicAreaM49,
        
        measuredItemSuaFbs,
        
        measuredElementSuaFbs,
        
        timePointYears,
        
        Value_imputed = impute
        
      )
      
    ]
  
  
  
  dout <- dout[timePointYears %in% yearVals]
  
  
  
  if (nrow(dout) > 0) {
    
    
    
    data <- dt_full_join(data, dout, by = sel_vars)
    
    
    
    data[
      
      !is.na(Value_imputed),
      
      `:=`(
        
        Value = Value_imputed,
        
        flagObservationStatus = "E",
        
        flagMethod = "e",
        
        Protected = FALSE)
      
    ]
    
    
    
    data_outliers <-
      
      data[!is.na(Value_imputed) &
             
             measuredElementSuaFbs %in% c("feed", "seed", "loss", "industrial", "tourist")]
    
    
    
    data_outliers[,Value := ifelse(Value ==0, NA_real_,Value)]
    
    data_outliers[, flagObservationStatus := ifelse(is.na(Value) , NA_character_,flagObservationStatus)]
    data_outliers[, flagMethod := ifelse(Value == is.na(Value) , NA_character_,flagMethod)]
    
    
    data[, Value_imputed := NULL]
    
  }
  
}





dbg_print("end outliers")

####################### / OUTLIERS #################################



##################### INDUSTRIAL-TOURISM ###############################

dbg_print("Fix tourism/industrial")

# data[, Protected_orig := Protected]
# data[flagObservationStatus == "E" & flagMethod == "e", Protected := TRUE]

# In the past, industrial contained what is now tourism =>
# remove tourism from industrial.
# XXX: it seems (actually, is) slow.
if (COUNTRY %in% NonSmallIslands) {
  ind_tour_tab <-
    data[
      measuredItemSuaFbs %chin% Utilization_Table[food_item == "X"]$cpc_code &
        timePointYears >= startYear &
        measuredElementSuaFbs %chin% c("industrial", "tourist") &
        !is.na(Value),
      .SD[.N == 2], # both industrial and tourism need to be non-missing
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    ]
  
  ind_tour_tab[,
               any_protected := any(Protected == TRUE),
               by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  ]
  
  ind_tour_tab <- ind_tour_tab[any_protected == FALSE]
  
  ind_tour_tab[, any_protected := FALSE]
  
  ind_tour_tab <-
    ind_tour_tab[,
                 .(
                   indNoTour =
                     ifelse(sign(Value[measuredElementSuaFbs == "tourist"])==1,Value[measuredElementSuaFbs == "industrial"] -
                              Value[measuredElementSuaFbs == "tourist"],Value[measuredElementSuaFbs == "industrial"])
                 ),
                 by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    ]
  
  ind_tour_tab[indNoTour < 0, indNoTour := 0]
  
  data <-
    dt_left_join(
      data,
      ind_tour_tab,
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    )
  
  data[
    measuredElementSuaFbs == "industrial" &
      !is.na(indNoTour) &
      Protected == FALSE,
    `:=`(
      Value = indNoTour,
      # XXX: probably other flags (initially protected, then unprotected)
      flagObservationStatus = "E",
      flagMethod = "e"
    )
  ]
  
  data[, indNoTour := NULL]
}

# data[, Protected := Protected_orig]
# data[, Protected_orig := NULL]

##################### / INDUSTRIAL-TOURISM ###############################

data_ind_tour <-
  data[measuredElementSuaFbs %chin% c("industrial", "tourist")]

if (nrow(data_ind_tour) > 0) {
  
  setnames(data_ind_tour, "measuredItemSuaFbs", "measuredItemFbsSua")
  
  data_ind_tour <-
    data_ind_tour[!is.na(Value), names(data_to_save_unbalanced), with = FALSE]
  
  data_ind_tour[
    measuredElementSuaFbs == "industrial",
    measuredElementSuaFbs := "5165"
  ]
  
  data_ind_tour[
    measuredElementSuaFbs == "tourist",
    measuredElementSuaFbs := "5164"
  ]
  
  data_to_save_unbalanced <- rbind(data_to_save_unbalanced, data_ind_tour)
}

if (exists("data_outliers")) {
  setnames(data_outliers, c("measuredItemSuaFbs", "measuredElementSuaFbs"), c("measuredItemFbsSua", "name"))
  
  data_outliers <- dt_left_join(data_outliers, codes, by = "name")
  
  data_outliers <- data_outliers[, names(data_to_save_unbalanced), with = FALSE]
  
  data_to_save_unbalanced <- rbind(data_to_save_unbalanced, data_outliers)
}

data_to_save_unbalanced <- data_to_save_unbalanced[timePointYears >= startYear]

out <-
  SaveData(
    domain = "suafbs",
    dataset = "sua_unbalanced",
    data = data_to_save_unbalanced,
    waitTimeout = 20000
  )

if (STOP_AFTER_DERIVED == TRUE) {
  dbg_print("stop after production of derived")
  
  if (exists("out")) {
    
    print(paste(out$inserted + out$ignored, "derived products written"))
    
    if (!CheckDebug()) {
      send_mail(
        from = "do-not-reply@fao.org",
        to = swsContext.userEmail,
        subject = "Production of derived items created",
        body = paste("The plugin stopped after production of derived items. A file with shareDownUp is available in", sub("/work/SWS_R_Share/", "", shareDownUp_file))
        
      )
    }
    
  } else {
    print("The SUA_bal_compilation plugin had a problem when saving derived data.")
  }
  
  stop("Plugin stopped after derived, as requested. This is fine.")
}


## CLEAN sua_balanced
message("wipe sua_balanced session")
sessionKey_suabal = swsContext.datasets[[1]]
CONFIG <- GetDatasetConfig(sessionKey_suabal@domain, sessionKey_suabal@dataset)
datatoClean=GetData(sessionKey_suabal)
datatoClean=datatoClean[timePointYears%in%yearVals]
datatoClean[, Value := NA_real_]
datatoClean[, CONFIG$flags := NA_character_]
SaveData(CONFIG$domain, CONFIG$dataset , data = datatoClean, waitTimeout = Inf)


computed_shares_send <- rbindlist(computed_shares_send, fill = TRUE)

# XXX
computed_shares_send <- unique(computed_shares_send)

colnames(computed_shares_send) <-
  sub("Value_", "", colnames(computed_shares_send))

setnames(
  computed_shares_send,
  c("timePointYears", "measuredItemSuaFbs", "measuredItemParentCPC"),
  c("year", "measuredItemChildCPC_tree", "measuredItemParentCPC_tree")
)

computed_shares_send <-
  nameData(
    "suafbs",
    "ess_fbs_commodity_tree2",
    computed_shares_send
  )

setnames(
  computed_shares_send,
  c("geographicAreaM49", "geographicAreaM49_description",
    "measuredItemChildCPC_tree", "measuredItemChildCPC_tree_description",
    "measuredItemParentCPC_tree", "measuredItemParentCPC_tree_description"),
  c("Country", "Country_name", "Child", "Child_name", "Parent", "Parent_name")
)

computed_shares_send[, zero_weigth := Child %chin% zeroWeight]

computed_shares_send[, `:=`(Child = paste0("'", Child), Parent = paste0("'", Parent))]

negative_availability <- rbindlist(negative_availability)

if (nrow(negative_availability) > 0) {
  
  # FIXME: stocks may be generated twice for parents in multiple level,
  # (though, the resulting figures are the same). Fix in the prod deriv loop.
  negative_availability <- unique(negative_availability)
  
  negative_availability_var <-
    negative_availability[,
                          .(
                            country,
                            year,
                            measuredItemFbsSua,
                            element = "availability",
                            Value = availability,
                            flagObservationStatus = "I",
                            flagMethod = "i"
                          )
    ]
  
  negative_availability[, availability := NULL]
  
  negative_availability <-
    rbind(
      negative_availability,
      unique(negative_availability_var) #FIXME
    )
  
  negative_availability <-
    data.table::dcast(
      negative_availability,
      country + measuredItemFbsSua + year ~ element,
      value.var = "Value"
    )
  
  negative_availability <-
    nameData("suafbs", "sua_unbalanced", negative_availability)
  
  negative_availability[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]
  
}



message("Line 4233")

fbsTree <- ReadDatatable("fbs_tree")

######################## rm new loss in dairy/meat #################

# dairy_meat_items <- fbsTree[id3 %chin% c("2948", "2943"), item_sua_fbs]
# 
# # TODO: Some of the code below is repeated. It should be rewritten so
# # that there is a single computation of new elements.
# 
# data_complete_loss <-
#   data.table(
#     geographicAreaM49 = unique(data$geographicAreaM49),
#     timePointYears = sort(unique(data$timePointYears)))[
#       unique(
#         data[
#           measuredElementSuaFbs == "loss" & !is.na(Value),
#           c("geographicAreaM49", "measuredItemSuaFbs"),
#           with = FALSE
#         ]
#       ),
#       on = "geographicAreaM49",
#       allow.cartesian = TRUE
#     ]
# 
# data_complete_loss <-
#   dt_left_join(
#     data_complete_loss,
#     data[
#       measuredElementSuaFbs == "loss" & Value > 0,
#       c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears", "Value"),
#       with = FALSE
#     ],
#     by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
#   )
# 
# data_complete_loss[, y := 1]
# 
# 
# data_complete_loss <-
#   data_complete_loss[,
#                      .(
#                        t_pre   = sum(y[timePointYears < startYear]),
#                        t_post  = sum(y[timePointYears >= startYear]),
#                        na_pre  = sum(is.na(Value[timePointYears < startYear])),
#                        na_post = sum(is.na(Value[timePointYears >= startYear]))
#                      ),
#                      by = c("geographicAreaM49", "measuredItemSuaFbs")
#   ]
# 
# 
# new_elements_loss <-
#   data_complete_loss[
#     na_pre == t_pre & na_post < t_post
#   ][
#     order(geographicAreaM49, measuredItemSuaFbs),
#     c("geographicAreaM49", "measuredItemSuaFbs"),
#     with = FALSE
#   ]
# 
# new_loss_dairy_meat <-
#   new_elements_loss[measuredItemSuaFbs %chin% dairy_meat_items]
# 
# new_loss_dairy_meat[, new_loss_dm := TRUE]
# 
# data <-
#   new_loss_dairy_meat[data, on = c("geographicAreaM49", "measuredItemSuaFbs")]
# 
# data[
#   new_loss_dm == TRUE &
#     timePointYears >= startYear &
#     measuredElementSuaFbs == "loss" &
#     Protected == FALSE,
#   `:=`(
#     Value = NA_real_,
#     flagObservationStatus = NA_character_,
#     flagMethod = NA_character_
#   )
# ]
# 
# data[, new_loss_dm := NULL]


######################## END / rm new loss in dairy/meat #################




# Protect all loss data, to keep it consistent with SDG indicator,
# whatever the flag is.
data[measuredElementSuaFbs == "loss", Protected := TRUE]



data <- dt_left_join(data, itemMap, by = "measuredItemSuaFbs")


## Split data based on the two factors we need to loop over
uniqueLevels <- unique(data[, c("geographicAreaM49", "timePointYears"), with = FALSE])

uniqueLevels <- uniqueLevels[order(geographicAreaM49, timePointYears)]

data[, stockable := measuredItemSuaFbs %chin% stockable_items]


## Remove stocks for non stockable items
#data <- data[!(measuredElementSuaFbs == 'stockChange' &
#               stockable == FALSE &
#               flagObservationStatus != "E" &
#               flagMethod != "f")]

data <- data[measuredElementSuaFbs != 'residual']

# Tourism consumption will be in the data only for selected countries
# and needs to be protected
data[measuredElementSuaFbs == "tourist", Protected := TRUE]


######################### Save UNB for validation #######################

sua_unbalanced <-
  data[,
       c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs",
         "measuredElementSuaFbs", "Value", "flagObservationStatus", "flagMethod"),
       with = FALSE
  ]

sua_unbalanced_aux <-
  sua_unbalanced[,
                 .(
                   supply =
                     sum(Value[measuredElementSuaFbs %chin% c("production", "imports")],
                         - Value[measuredElementSuaFbs %chin% c("exports", "stockChange")],
                         na.rm = TRUE),
                   utilizations =
                     sum(Value[!(measuredElementSuaFbs %chin%
                                   c("production", "imports", "exports", "stockChange"))],
                         na.rm = TRUE)
                 ),
                 by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  ][,
    imbalance := supply - utilizations
  ][
    supply > 0,
    imbalance_pct := imbalance / supply * 100
  ]

sua_unbalanced_aux <-
  melt(
    sua_unbalanced_aux,
    c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs"),
    variable.name = "measuredElementSuaFbs",
    value.name = "Value"
  )

sua_unbalanced_aux[, `:=`(flagObservationStatus = "I", flagMethod = "i")]

sua_unbalanced <- rbind(sua_unbalanced, sua_unbalanced_aux)


# saveRDS(
#   sua_unbalanced,
#   file.path(R_SWS_SHARE_PATH, "FBSvalidation", COUNTRY, "sua_unbalanced.rds")
# )

######################### Save UNB for validation #######################




data[
  measuredElementSuaFbs %chin% c("feed", "food", "industrial", "loss", "seed", "tourist"),
  movsum_value := RcppRoll::roll_sum(shift(Value), 3, fill = 'extend', align = 'right'),
  by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
]

data[
  measuredElementSuaFbs %chin% c("feed", "food", "industrial", "loss", "seed", "tourist"),
  mov_share := movsum_value / sum(movsum_value, na.rm = TRUE),
  by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
]

# Impute share if missing
data[
  measuredElementSuaFbs %chin% c("feed", "food", "industrial", "loss", "seed", "tourist"),
  mov_share := rollavg(mov_share),
  by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
]

# Set sum of shares = 1
data[
  measuredElementSuaFbs %chin% c("feed", "food", "industrial", "loss", "seed", "tourist"),
  mov_share := mov_share / sum(mov_share, na.rm = TRUE),
  by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
]


data[is.na(Official), Official := FALSE]
data[is.na(Protected), Protected := FALSE]


# Filter elements that appear for the first time

sel_vars <-
  c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")

data_complete <-
  data.table(
    geographicAreaM49 = unique(data$geographicAreaM49),
    timePointYears = sort(unique(data$timePointYears))
  )[
    unique(data[, sel_vars, with = FALSE]),
    on = "geographicAreaM49",
    allow.cartesian = TRUE
  ]

sel_vars <-
  c("geographicAreaM49", "measuredItemSuaFbs",
    "measuredElementSuaFbs", "timePointYears")

data_complete <-
  dt_left_join(
    data_complete,
    data[Value > 0, c(sel_vars, "Value"), with = FALSE],
    by = sel_vars
  )

data_complete[, y := 1]

data_complete <-
  data_complete[,
                .(
                  t_pre   = sum(y[timePointYears < startYear]),
                  t_post  = sum(y[timePointYears >= startYear]),
                  na_pre  = sum(is.na(Value[timePointYears < startYear])),
                  na_post = sum(is.na(Value[timePointYears >= startYear]))
                ),
                by = c("geographicAreaM49", "measuredElementSuaFbs", "measuredItemSuaFbs")
  ]

new_elements <-
  data_complete[
    na_pre == t_pre & na_post < t_post
  ][
    order(geographicAreaM49, measuredElementSuaFbs, measuredItemSuaFbs),
    .(geographicAreaM49, measuredElementSuaFbs, measuredItemFbsSua = measuredItemSuaFbs)
  ]

new_loss <-
  new_elements[
    measuredElementSuaFbs == "loss",
    .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua, new_loss = TRUE)
  ]

new_elements <- nameData("suafbs", "sua_unbalanced", new_elements, except = "measuredElementSuaFbs")

new_elements[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]

tmp_file_new <- file.path(TMP_DIR, paste0("NEW_ELEMENTS_", COUNTRY, ".csv"))

write.csv(new_elements, tmp_file_new)

# / Filter elements that appear for the first time


dbg_print("set thresholds")


if (THRESHOLD_METHOD == 'share') {
  
  dbg_print("thresholds, share")
  # We base thresholds on SUA balanced data
  suabalMinMax = copy(data_suabal)
  
  suabalMinMax <- merge(suabalMinMax, codes, by = "measuredElementSuaFbs")
  
  suabalMinMax[, measuredElementSuaFbs := name]
  
  suabalMinMax[, name := NULL]
  
  setnames(suabalMinMax, "measuredItemFbsSua", "measuredItemSuaFbs")
  
  
  suabalMinMax[,
               supply :=
                 sum(
                   Value[measuredElementSuaFbs %in% c('production', 'imports')],
                   - Value[measuredElementSuaFbs %in% c('exports', 'stockChange')],
                   na.rm = TRUE
                 ),
               by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
               ]
  
  # NOTE: here we redefine what "supply" is just for seed.
  
  suabalMinMax[,
               supply := ifelse(measuredElementSuaFbs == "seed", Value[measuredElementSuaFbs == "production"], supply),
               by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
               ]
  
  suabalMinMax[supply < 0, supply := 0]
  
  suabalMinMax[
    !(measuredElementSuaFbs %chin% c("production", "imports", "exports", "stockChange")),
    util_share := Value / supply
    ]
  
  suabalMinMax[is.infinite(util_share) | is.nan(util_share), util_share := NA_real_]
  
  # share < 0 shouldn't happen. Also, tourist can be negative.
  suabalMinMax[util_share < 0 & measuredElementSuaFbs != "tourist", util_share := 0]
  
  suabalMinMax[util_share > 1, util_share := 1]
  
  suabalMinMax[,
               `:=`(
                 min_util_share = min(util_share[timePointYears %in% refYear], na.rm = TRUE),
                 max_util_share = max(util_share[timePointYears %in% refYear], na.rm = TRUE)
               ),
               by = c("measuredItemSuaFbs", "measuredElementSuaFbs", "geographicAreaM49")
               ]
  
  suabalMinMax[is.infinite(min_util_share) | is.nan(min_util_share), min_util_share := NA_real_]
  
  suabalMinMax[is.infinite(max_util_share) | is.nan(max_util_share), max_util_share := NA_real_]
  
  suabalMinMax[max_util_share > 1, max_util_share := 1]
  
  data = merge(data, unique(suabalMinMax[,.(geographicAreaM49, measuredItemSuaFbs, measuredElementSuaFbs, 
                                            min_util_share, max_util_share)]), 
               by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs"), all.x = T)
  
  
  data[,
       supply :=
         sum(
           Value[measuredElementSuaFbs %in% c('production', 'imports')],
           - Value[measuredElementSuaFbs %in% c('exports', 'stockChange')],
           na.rm = TRUE
         ),
       by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  # NOTE: here we redefine what "supply" is just for seed.
  
  data[,
       supply := ifelse(measuredElementSuaFbs == "seed", Value[measuredElementSuaFbs == "production"], supply),
       by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  data[supply < 0, supply := 0]
  
  # data[
  #   !(measuredElementSuaFbs %chin% c("production", "imports", "exports", "stockChange")),
  #   util_share := Value / supply
  # ]
  # 
  # data[is.infinite(util_share) | is.nan(util_share), util_share := NA_real_]
  # 
  # # share < 0 shouldn't happen. Also, tourist can be negative.
  # data[util_share < 0 & measuredElementSuaFbs != "tourist", util_share := 0]
  # 
  # data[util_share > 1, util_share := 1]
  
  # data[,
  #      `:=`(
  #        min_util_share = min(util_share[timePointYears %in% 2000:(startYear-1)], na.rm = TRUE),
  #        max_util_share = max(util_share[timePointYears %in% 2000:(startYear-1)], na.rm = TRUE)
  #      ),
  #      by = c("measuredItemSuaFbs", "measuredElementSuaFbs", "geographicAreaM49")
  # ]
  
  #data[is.infinite(min_util_share) | is.nan(min_util_share), min_util_share := NA_real_]
  
  #data[is.infinite(max_util_share) | is.nan(max_util_share), max_util_share := NA_real_]
  
  #data[max_util_share > 1, max_util_share := 1]
  
  data[,
       `:=`(
         min_threshold = supply * min_util_share,
         max_threshold = supply * max_util_share
       )
  ]
  
  data[, supply := NULL]
  
} else {
  stop("Invalid method.")
}


data[,
     `:=`(
       min_adj = min_threshold / Value,
       max_adj = max_threshold / Value
     )
]


data[min_adj > 1, min_adj := 0.9]
data[min_adj < 0, min_adj := 0.01]

data[max_adj < 1, max_adj := 1.1]
data[max_adj > 10, max_adj := 10] # XXX Too much?

# Fix for new "loss" element. If loss gets in as a new utilization,
# remove the thresholds for food.

data <-
  dt_left_join(
    data,
    new_loss,
    by = c('geographicAreaM49', 'measuredItemSuaFbs')
  )

data[
  new_loss == TRUE & measuredElementSuaFbs == "food",
  `:=`(min_adj = 0, max_adj = 10)
] # XXX is 10 enough?

data[, new_loss := NULL]

# / Fix for new "loss" element. If loss gets in as a new utilization,
# / remove the thresholds for food.



# Recalculate the levels, given the recalculation of adj factors
data[, min_threshold := Value * min_adj]
data[, max_threshold := Value * max_adj]

# We need the min and max to be "near" one (but not too near) in case of
# protected figures, so that they are not changed
data[Protected == TRUE, min_adj := 0.999]
data[Protected == TRUE, max_adj := 1.001]

# This fix should also be done in optim function?
data[
  dplyr::near(min_adj, 1) & dplyr::near(max_adj, 1),
  `:=`(
    min_adj = 0.999,
    max_adj = 1.001
  )
]

data[,
     `:=`(
       Food_Median       = median(Value[measuredElementSuaFbs == "food" & timePointYears %in% (startYear-4):(startYear-1)], na.rm=TRUE),
       Feed_Median       = median(Value[measuredElementSuaFbs == "feed" & timePointYears %in% (startYear-4):(startYear-1)], na.rm=TRUE),
       Industrial_Median = median(Value[measuredElementSuaFbs == "industrial" & timePointYears %in% (startYear-4):(startYear-1)], na.rm=TRUE),
       Production_Median = median(Value[measuredElementSuaFbs == "production" & timePointYears %in% (startYear-4):(startYear-1)], na.rm=TRUE),
       Import_Median =  median(Value[measuredElementSuaFbs == "imports" & timePointYears %in% (startYear-4):(startYear-1)], na.rm=TRUE)
     ),
     by = c("geographicAreaM49", "measuredItemSuaFbs")
]
message("Line 4630")


z_comp_shares <- copy(computed_shares_send)

z_comp_shares[, Protected_exports := NULL]
z_comp_shares[, Protected_imports := NULL]
z_comp_shares[, Protected_production := NULL]
z_comp_shares[, Protected_stockChange := NULL]

# FIXME: fix above
z_comp_shares[, Parent := sub("'", "", Parent)]
z_comp_shares[, Child := sub("'", "", Child)]


setcolorder(
  z_comp_shares,
  c("Country", "Country_name", "Parent", "Parent_name",
    "Child", "Child_name", "year", "production", "imports", "exports",
    "stockChange", "extractionRate", "shareDownUp", "shareUpDown",
    "availability", "Pshare", "zero_weigth", "Processed_parent")
)

dbg_print("SHARES workbook, create")

wb <- createWorkbook()
addWorksheet(wb, "SHARES")
style_protected <- createStyle(fgFill = "pink")
style_formula <- createStyle(fgFill = "green")
style_text <- createStyle(numFmt = "TEXT")
style_comma <- createStyle(numFmt = "COMMA")
writeData(wb, "SHARES", z_comp_shares)

for (i in c("exports", "imports", "production", "stockChange")) {
  a_col <- grep(paste0("^", i, "$"), names(z_comp_shares))
  a_rows <- which(computed_shares_send[[paste0("Protected_", i)]] == TRUE) + 1
  
  if (all(length(a_col) > 0, length(a_rows) > 0)) {
    addStyle(wb, sheet = "SHARES", style_protected, rows = a_rows, cols = a_col)
  }
}

addStyle(wb, sheet = "SHARES", style_text, rows = 1:nrow(z_comp_shares) + 1, cols = grep("^(Parent|Child)$", names(z_comp_shares)), gridExpand = TRUE, stack = TRUE)

addFilter(wb, "SHARES", row = 1, cols = 1:ncol(z_comp_shares))

# Formulas
pos <- lapply(colnames(z_comp_shares), function(x) letters[which(colnames(z_comp_shares) == x)])
names(pos) <- colnames(z_comp_shares)

sheet_rows   <- 1 + seq_len(nrow(z_comp_shares))
frml_start <- which(colnames(z_comp_shares) == "Processed_parent") + 2

z_comp_shares[, frml_prot_no_zw := computed_shares_send$Protected_production == TRUE & zero_weigth == FALSE]
z_comp_shares[, frml_prot_no_zw_zero_updown := computed_shares_send$Protected_production == TRUE & zero_weigth == FALSE & dplyr::near(shareUpDown, 0)]
z_comp_shares[, prot := computed_shares_send$Protected_production]
z_comp_shares[, frml_n_prot_zero_updown := sum(prot == TRUE & dplyr::near(shareUpDown, 0) == TRUE), by = c("Parent", "year")]
z_comp_shares[, frml_n_prot := sum(prot == TRUE), by = c("Parent", "year")]
z_comp_shares[, prot := NULL]
z_comp_shares[, frml_multi_parent := length(unique(Parent)) > 1, by = c("Child", "year")]
z_comp_shares[, frml_any_prot_zero_updown := any(frml_prot_no_zw_zero_updown == TRUE), by = c("Parent", "year")]

z_comp_shares[, frml_process_parent_num := paste0(pos$production, sheet_rows, "*", pos$shareDownUp, sheet_rows, "/", pos$extractionRate, sheet_rows)]
z_comp_shares[, frml_process_parent_den := paste0(pos$shareUpDown, sheet_rows)]

z_comp_shares[, frml_any_prot := any(frml_prot_no_zw == TRUE), by = c("Parent", "year")]

z_comp_shares[, frml_process_parent := paste0(pos$availability, sheet_rows, "*", pos$Pshare, sheet_rows)]

z_comp_shares[,
              frml_process_parent :=
                dplyr::case_when(
                  frml_any_prot_zero_updown == TRUE & !is.na(frml_n_prot) & !is.na(frml_n_prot_zero_updown) & frml_n_prot == frml_n_prot_zero_updown  ~ frml_process_parent,
                  frml_any_prot == TRUE             ~ paste("(", paste(frml_process_parent_num[frml_prot_no_zw == TRUE], collapse = " + "), ") / (", paste(frml_process_parent_den[frml_prot_no_zw == TRUE], collapse = " + "), ")"),
                  TRUE                              ~ frml_process_parent
                ),
              by = c("Parent", "year")
]

#z_comp_shares[, frml_process_parent := paste0(pos$availability, sheet_rows, "*", pos$Pshare, sheet_rows)]
#z_comp_shares[frml_any_prot == TRUE, frml_process_parent := paste("(", paste(frml_process_parent_num[frml_prot_no_zw == TRUE], collapse = " + "), ") / (", paste(frml_process_parent_den[frml_prot_no_zw == TRUE], collapse = " + "), ")"), by = c("Parent", "year")]
## XXX Here we should actually check that is protected AND zero Updown just for ONE
#z_comp_shares[frml_any_prot_zero_updown == TRUE, frml_process_parent := paste(frml_process_parent[frml_prot_no_zw == TRUE], collapse = " + "), by = c("Parent", "year")]

z_comp_shares[, frml_prod := paste0(letters[frml_start], sheet_rows, "*", pos$extractionRate, sheet_rows, "*", pos$shareUpDown, sheet_rows)]
z_comp_shares[frml_multi_parent == TRUE, frml_prod := paste(frml_prod, collapse = " + "), by = c("Child", "year")]

z_comp_shares[, frml_updown := paste0(pos$shareUpDown, sheet_rows)]
z_comp_shares[, frml_check_updown := paste(frml_updown[zero_weigth == FALSE], collapse = " + "), by = c("Parent", "year")]

z_comp_shares[, frml_downup := paste0(pos$shareDownUp, sheet_rows)]
z_comp_shares[, frml_check_downup := paste(frml_downup[zero_weigth == FALSE], collapse = " + "), by = c("Child", "year")]

frml_prod           <- z_comp_shares$frml_prod
frml_process_parent <- z_comp_shares$frml_process_parent
frml_check_updown   <- z_comp_shares$frml_check_updown
frml_check_downup   <- z_comp_shares$frml_check_downup

z_comp_shares[, names(z_comp_shares)[grepl("frml_", names(z_comp_shares))] := NULL]

writeFormula(wb, sheet = "SHARES", x = frml_process_parent, startCol = frml_start + 0, startRow = 2)
writeFormula(wb, sheet = "SHARES", x = frml_prod,           startCol = frml_start + 1, startRow = 2)
writeFormula(wb, sheet = "SHARES", x = frml_check_updown,   startCol = frml_start + 2, startRow = 2)
writeFormula(wb, sheet = "SHARES", x = frml_check_downup,   startCol = frml_start + 3, startRow = 2)

writeData(wb, "SHARES", cbind("PROCESSED_PARENT", "PRODUCTION", "SUM UPdown", "SUM DOWNup"), startCol = frml_start, colNames = FALSE)
addStyle(wb, sheet = "SHARES", style_formula, rows = 1, cols = frml_start:(frml_start + 3))

addStyle(wb, sheet = "SHARES", style_comma, rows = 1:nrow(z_comp_shares) + 1, cols = c(grep("^(production|imports|exports|stockChange|availability|Processed_parent)$", names(z_comp_shares)), frml_start:(frml_start + 3)), gridExpand = TRUE, stack = TRUE)

dbg_print(paste("SHARES workbook, save", getwd(), tmp_file_shares))
message("Line 5102")

saveWorkbook(wb, tmp_file_shares, overwrite = TRUE)

# For testing purposes:
#send_mail(
#  from = "do-not-reply@fao.org",
#  to = swsContext.userEmail,
#  subject = "Results from SUA_bal_compilation plugin",
#  body = c("XXXXXXX", tmp_file_shares))
#
##stop("HI!!!!!!!!!!")

## 1 => year = 2014
#i <- 1

dbg_print("starting balancing loop")

# XXX Only from 2004 onwards
uniqueLevels <- uniqueLevels[timePointYears >= startYear][order(timePointYears)]

standData <- vector(mode = "list", length = nrow(uniqueLevels))

for (i in seq_len(nrow(uniqueLevels))) {
  
  dbg_print(paste("in balancing loop, start", i))
  
  # For stocks, the first year no need to see back in time. After the first year was done,
  # stocks may have changed, so opening need to be changed in "data".
  
  if (i > 1) {
    dbg_print(paste("check stocks change in balancing", i))
    
    items_stocks_changed <-
      unique(standData[[i-1]][!is.na(change_stocks)]$measuredItemSuaFbs)
    
    if (length(items_stocks_changed) > 0) {
      
      dbg_print(paste("recalculate stocks changed", i))
      
      stocks_modif <-
        rbind(
          # Previous data (balanced)
          rbindlist(standData)[
            !is.na(Value) &
              measuredElementSuaFbs == 'stockChange' &
              measuredItemSuaFbs %chin% items_stocks_changed,
            list(geographicAreaM49, timePointYears, measuredItemSuaFbs, delta = Value)
          ],
          # New data (unbalanced)
          data[
            !is.na(Value) &
              timePointYears > unique(standData[[i-1]]$timePointYears) &
              measuredElementSuaFbs == 'stockChange' &
              measuredItemSuaFbs %chin% items_stocks_changed,
            list(geographicAreaM49, timePointYears, measuredItemSuaFbs, delta = Value)
          ]
        )
      
      data_for_opening <-
        dt_left_join(
          all_opening_stocks[
            timePointYears >= startYear &
              measuredItemFbsSua %chin% items_stocks_changed,
            .(geographicAreaM49,  measuredItemSuaFbs =  measuredItemFbsSua,
              timePointYears, new_opening = Value)
          ],
          stocks_modif,
          by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
        )
      
      data_for_opening[is.na(delta), delta := 0]
      
      data_for_opening <- data_for_opening[order(geographicAreaM49, measuredItemSuaFbs, timePointYears)]
      
      dbg_print(paste("update opening stocks", i))
      
      data_for_opening <- update_opening_stocks(data_for_opening)
      
      dbg_print(paste("merge data in opening stocks", i))
      
      all_opening_stocks <-
        dt_left_join(
          all_opening_stocks,
          data_for_opening[,
                           .(
                             geographicAreaM49,
                             measuredItemFbsSua = measuredItemSuaFbs,
                             timePointYears,
                             new_opening
                           )
          ],
          by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
        )
      
      dbg_print(paste("new_opening", i))
      
      all_opening_stocks[
        !is.na(new_opening) & (round(new_opening) != round(Value) | is.na(Value)),
        `:=`(
          Value = new_opening,
          flagObservationStatus = "E",
          flagMethod = "u",
          Protected = FALSE
        )
      ]
      
      all_opening_stocks[, new_opening := NULL]
    }
  }
  
  filter <- uniqueLevels[i, ]
  
  sel_vars <- c("geographicAreaM49", "timePointYears")
  
  treeSubset <- tree[filter, on = sel_vars]
  
  treeSubset[, sel_vars := NULL, with = FALSE]
  
  dataSubset <- data[filter, on = sel_vars]
  
  dbg_print(paste("actual balancing", i))
  
  standData[[i]] <- 
    newBalancing(
      data = dataSubset,
      #nutrientData = subNutrientData,
      #batchnumber = batchnumber,
      Utilization_Table = Utilization_Table
    )
  
  # FIXME: we are now assigning the "Protected" flag to ALL processing as
  # after the first loop it should have been computed and that value SHOULD
  # never be touched again.
  standData[[i]][measuredElementSuaFbs == "foodManufacturing", Protected := TRUE]
  
  dbg_print(paste("in balancing loop, end", i))
}

all_opening_stocks[, measuredElementSuaFbs := "5113"]

dbg_print("end of balancing loop")

standData <- rbindlist(standData)

calculateImbalance(standData)

standData[
  supply > 0,
  imbalance_percent := imbalance / supply * 100
]

# If the imbalance is relatively small (less than 5% in absoulte value)
# a new allocation is done, this time with no limits.

# Here we need to protect seed, because of its lik to production.
standData[measuredElementSuaFbs == "seed", Protected := TRUE]

dbg_print("balancing of imbalances < 5%")

standData[, small_imb := FALSE]

standData[
  data.table::between(imbalance_percent, -5, -0.01) |
    data.table::between(imbalance_percent, 0.01, 5),
  small_imb := TRUE
]

if (nrow(standData[small_imb == TRUE]) > 0) {
  
  standData[, can_balance := FALSE]
  
  # The following two instructions basically imply to assign the
  # (small) imbalance with no limits (except food and seed, among utilizations)
  
  sel_vars <- c("production", "imports", "exports", "stockChange", "food", "seed")
  
  standData[
    measuredElementSuaFbs %!in% sel_vars &
      Protected == FALSE &
      !is.na(min_threshold),
    min_threshold := 0
  ]
  
  standData[
    measuredElementSuaFbs %!in% sel_vars &
      Protected == FALSE &
      !is.na(max_threshold),
    max_threshold := Inf
  ]
  
  standData[
    !is.na(Value) &
      Protected == FALSE &
      !(data.table::between(Value, min_threshold, max_threshold, incbounds = FALSE) %in% FALSE) &
      !(measuredElementSuaFbs %chin%
          c("production", "imports", "exports", "stockChange", "foodManufacturing", "seed")),
    can_balance := TRUE
  ]
  
  standData[,
            elements_balance := any(can_balance),
            by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  if (nrow(standData[small_imb == TRUE & elements_balance == TRUE]) > 0) {
    
    standData[
      small_imb == TRUE & elements_balance == TRUE,
      adjusted_value := balance_proportional(.SD),
      by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
    ]
    
    standData[
      !is.na(adjusted_value) & adjusted_value != Value,
      `:=`(
        Value = adjusted_value,
        flagObservationStatus = "E",
        flagMethod = "n"
      )
    ]
    
    standData[, adjusted_value := NULL]
  }
}

calculateImbalance(standData)

standData[
  supply > 0,
  imbalance_percent := round(imbalance / supply * 100,2)
]

standData[, `:=` (imbalance = round(imbalance), utilizations = round(utilizations),
                  supply = round(supply))
]


######################### Save BAL for validation #######################

dbg_print("sua_balanced for validation")

sel_vars <-
  c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs",
    "measuredElementSuaFbs", "Value", "flagObservationStatus", "flagMethod")

sua_balanced <-
  rbind(
    data[timePointYears < startYear, sel_vars, with = FALSE],
    standData[, sel_vars, with = FALSE]
  )

sua_balanced_aux <-
  sua_balanced[,
               .(
                 supply =
                   sum(Value[measuredElementSuaFbs %chin% c("production", "imports")],
                       - Value[measuredElementSuaFbs %chin% c("exports", "stockChange")],
                       na.rm = TRUE),
                 utilizations =
                   sum(Value[!(measuredElementSuaFbs %chin%
                                 c("production", "imports", "exports", "stockChange"))],
                       na.rm = TRUE)
               ),
               by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  ][,
    imbalance := supply - utilizations
  ][
    supply > 0,
    imbalance_pct := imbalance / supply * 100
  ]

sua_balanced_aux <-
  melt(
    sua_balanced_aux,
    c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs"),
    variable.name = "measuredElementSuaFbs",
    value.name = "Value"
  )

sua_balanced_aux[, `:=`(flagObservationStatus = "I", flagMethod = "i")]

sua_balanced <- rbind(sua_balanced, sua_balanced_aux)

# saveRDS(
#   sua_balanced,
#   file.path(R_SWS_SHARE_PATH, "FBSvalidation", COUNTRY, "sua_balanced.rds")
# )


######################### / Save BAL for validation #######################


# saveRDS(
#   computed_shares_send,
#   file.path(R_SWS_SHARE_PATH, 'mongeau', paste0('computed_shares_send_', COUNTRY, '.rds'))
# )



imbalances <-
  unique(
    standData[,
              list(
                geographicAreaM49,
                measuredElementSuaFbs = "5166",
                measuredItemFbsSua = measuredItemSuaFbs,
                timePointYears,
                Value = imbalance,
                flagObservationStatus = "I",
                flagMethod = "i",
                Protected = FALSE
              )
    ]
  )

imbalances_to_send <-
  standData[
    !is.na(Value) &
      outside(imbalance, -100, 100) &
      timePointYears >= startYear,
    .(country = geographicAreaM49, year = timePointYears, measuredItemSuaFbs,
      element = measuredElementSuaFbs, value = round(Value),
      flag = paste(flagObservationStatus, flagMethod, sep = ","))
  ]

if (nrow(imbalances_to_send) > 0) {
  
  imbalances_to_send <-
    data.table::dcast(
      imbalances_to_send,
      country + measuredItemSuaFbs + year ~ element,
      value.var = c("value", "flag")
    )
  
  names(imbalances_to_send) <- sub("value_", "", names(imbalances_to_send))
  
  imbalances_to_send <-
    dt_left_join(
      imbalances_to_send,
      unique(standData[, .(country = geographicAreaM49, year = timePointYears,
                           measuredItemSuaFbs, supply, utilizations, imbalance,
                           imbalance_percent)]),
      by = c("country", "year", "measuredItemSuaFbs")
    )
  
  d_imbal_info <-
    imbalances_to_send[,
                       .(country, year, measuredItemSuaFbs,
                         supply = round(supply, 2),
                         imbalance = round(imbalance, 2),
                         perc_imb = round(abs(imbalance / supply) * 100, 2))
    ]
  
  imbalances_info <-
    c(
      all_items = nrow(unique(standData[, .(geographicAreaM49, timePointYears, measuredItemSuaFbs)])),
      imb_tot = nrow(d_imbal_info),
      imb_pos_supply = nrow(d_imbal_info[supply > 0]),
      imb_gt_5percent = nrow(d_imbal_info[supply > 0][perc_imb > 5]),
      imb_avg_percent = d_imbal_info[supply > 0, mean(abs(perc_imb))]
    )
  
  setnames(imbalances_to_send, "measuredItemSuaFbs", "measuredItemFbsSua")
  
  imbalances_to_send <-
    nameData('suafbs', 'sua_unbalanced', imbalances_to_send,
             except = c('measuredElementSuaFbs'))
  
  # imbalances_to_send[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]
  
  data_negtrade <-
    imbalances_to_send[
      utilizations == 0 &
        imbalance < 0 &
        round(imbalance, 10) == round(supply, 10)
    ]
  
  data_negtrade[, imbalance_percent := NULL]
} else {
  imbalances_to_send <-
    data.table(
      info = c("Great, no imbalances!",
               "Well, at least not greater than 100 tonnes in absolute value")
    )
  
  data_negtrade <- imbalances_to_send
  
  imbalances_info <-
    c(
      all_items = 0,
      imb_tot = 0,
      imb_pos_supply = 0,
      imb_gt_5percent = 0,
      imb_avg_percent = 0
    )
}


non_existing_for_imputation <-
  data.table(measuredItemFbsSua = non_existing_for_imputation)

non_existing_for_imputation <-
  nameData('suafbs', 'sua_unbalanced', non_existing_for_imputation)

non_existing_for_imputation[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]

# exclude Live animals from output files
itemMap <- GetCodeList(domain = "agriculture", dataset = "aproduction", "measuredItemCPC")
itemMap <- itemMap[, .(measuredItemSuaFbs = code, type)]

LivestockToExcludeFromFile = itemMap[type %in% c("LSPR", "POPR"), measuredItemSuaFbs]
if ("measuredItemFbsSua" %in% names(imbalances_to_send)) {
  imbalances_to_send = imbalances_to_send[measuredItemFbsSua %!in% LivestockToExcludeFromFile,]
  
}

# data_negtrade = data_negtrade[measuredItemFbsSua %!in% LivestockToExcludeFromFile,]
# negative_availability = negative_availability[measuredItemFbsSua %!in% LivestockToExcludeFromFile,]

wb <- createWorkbook() 
addWorksheet(wb, "imbalance") 
writeData(wb, "imbalance", imbalances_to_send) 
dbg_print(paste("IMBALANCE workbook, save", getwd(), tmp_file_imb)) 
saveWorkbook(wb, tmp_file_imb, overwrite = TRUE) 

# write.csv(imbalances_to_send,          tmp_file_imb)
#write.csv(data_negtrade,               tmp_file_NegNetTrade)
#write.csv(negative_availability,       tmp_file_negative)
#write.csv(non_existing_for_imputation, tmp_file_non_exist)
#write.csv(fixed_proc_shares,           tmp_file_fix_shares)



# FIXME: see also the one done for elementToCodeNames
codes <- as.data.table(tibble::tribble(
  ~code,  ~name,
  "5910", "exports",
  "5520", "feed",
  "5141", "food",
  "5023", "foodManufacturing",
  "5610", "imports",
  "5165", "industrial",
  "5016", "loss",
  "5510", "production",
  "5525", "seed",
  "5164", "tourist",
  "5071", "stockChange"
))

standData <- standData[codes, on = c('measuredElementSuaFbs' = 'name')]


standData <-
  standData[,
            c("geographicAreaM49", "code", "measuredItemSuaFbs", "timePointYears",
              "Value", "flagObservationStatus", "flagMethod", "Protected"),
            with = FALSE
  ]

setnames(
  standData,
  c("code", "measuredItemSuaFbs"),
  c("measuredElementSuaFbs", "measuredItemFbsSua")
)

standData <- standData[!is.na(Value)]

# These cases should not happen (i.e., all flags should already be
# set), but in any case, add specific flags so to check.
standData[is.na(flagObservationStatus), flagObservationStatus := "M"]
standData[is.na(flagMethod), flagMethod := "q"]


# Calculate calories

calories_per_capita <-
  dt_left_join(
    # Food
    standData[
      measuredElementSuaFbs == '5141',
      list(
        geographicAreaM49,
        #measuredElementSuaFbs = "664",
        measuredItemFbsSua,
        timePointYears,
        food = Value,
        flagObservationStatus = "T",
        flagMethod = "i"
      )
    ],
    # Calories
    nutrientData[,
                 list(
                   geographicAreaM49,
                   measuredItemFbsSua = measuredItemCPC,
                   measuredElementSuaFbs = measuredElement,
                   timePointYears = timePointYearsSP,
                   nutrient = Value
                 )
    ],
    by = c('geographicAreaM49', 'timePointYears', 'measuredItemFbsSua'),
    allow.cartesian = TRUE
  )

calories_per_capita <-
  dt_left_join(
    calories_per_capita,
    popSWS[, list(geographicAreaM49, timePointYears, population = Value)],
    by = c('geographicAreaM49', 'timePointYears')
  )

data_for_foodGrams<-calories_per_capita[measuredElementSuaFbs=="664"]
data_for_foodGrams[,Value:=(food*1000000)/(365*population*1000)]
data_for_foodGrams[, c("food", "nutrient", "population") := NULL]
data_for_foodGrams[,measuredElementSuaFbs:="665"]
data_for_foodGrams[, Protected := FALSE]

#Add population in SUA as an item

data_pop<-popSWS[,measuredItemFbsSua:="F0001"]
data_pop[,`:=`(Protected=TRUE,flagObservationStatus="T",flagMethod="c")]


data_pop<-data_pop[timePointYears>=startYear,list(geographicAreaM49,measuredItemFbsSua,timePointYears,flagObservationStatus,flagMethod,
                                             measuredElementSuaFbs=measuredElement,Value,Protected)]

calories_per_capita_total<-copy(calories_per_capita)

calories_per_capita[, Value := food * nutrient / population / 365 * 10]

calories_per_capita[, Protected := FALSE]

calories_per_capita[, c("food", "nutrient", "population") := NULL]

calories_per_capita_total[, Value := food * nutrient/100]

calories_per_capita_total[, Protected := FALSE]

calories_per_capita_total[, c("food", "nutrient", "population") := NULL]

calories_per_capita_total[measuredElementSuaFbs=="664",
                          measuredElementSuaFbs:="261"]

calories_per_capita_total[measuredElementSuaFbs=="674",
                          measuredElementSuaFbs:="271"]

calories_per_capita_total[measuredElementSuaFbs=="684",
                          measuredElementSuaFbs:="281"]

calories_per_capita<-rbind(calories_per_capita,calories_per_capita_total,data_for_foodGrams,data_pop)

standData <-
  rbind(
    standData,
    imbalances,
    all_opening_stocks,
    calories_per_capita
  )

standData <- standData[timePointYears >= startYear & !is.na(Value)]

standData[, Protected := NULL]



######### Combine old and new calories and data, and get outliers #############

combined_calories <-
  rbind(
    data_suabal[measuredElementSuaFbs == "664" & timePointYears < startYear],
    calories_per_capita[, names(data_suabal), with = FALSE]
  )

combined_calories <-
  combined_calories[order(geographicAreaM49, measuredItemFbsSua, timePointYears)]

combined_calories <-
  combined_calories[,
                    `:=`(
                      perc.change = (Value / shift(Value) - 1) * 100,
                      Historical_value_2010_2013 = mean(Value[timePointYears < startYear], na.rm = TRUE)
                    ),
                    by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  ]

combined_calories[, outlier := ""]

combined_calories[
  (shift(Value) > 5 | Value > 5) & abs(perc.change) > 10 & timePointYears >= startYear,
  outlier := "OUTLIER",
  by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
]

combined_calories <-
  combined_calories[
    unique(combined_calories[outlier == "OUTLIER", .(geographicAreaM49, measuredItemFbsSua)]),
    on = c("geographicAreaM49", "measuredItemFbsSua")
  ]

old_sua_bal <- data_suabal[timePointYears < startYear]

old_sua_bal <-
  old_sua_bal[,
              supply :=
                sum(
                  Value[measuredElementSuaFbs %chin% c("5510", "5610")],
                  - Value[measuredElementSuaFbs %chin% c("5910", "5071")],
                  na.rm = TRUE
                ),
              by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
  ]

old_sua_bal <-
  old_sua_bal[
    measuredElementSuaFbs %chin%
      c("5510", "5023", "5016", "5165", "5520", "5525", "5164", "5141")
  ]

old_sua_bal <-
  old_sua_bal[,
              .(
                mean_ratio = mean(Value / supply, na.rm = TRUE),
                Meanold = mean(Value, na.rm = TRUE)
              ),
              by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  ]

new_sua_bal <-
  dt_left_join(
    standData,
    old_sua_bal,
    by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  )

new_sua_bal[
  measuredElementSuaFbs %chin%
    c("5510", "5023", "5016", "5165", "5520", "5525", "5164", "5141"),
  outlier := outside(Value / Meanold, 0.5, 2)
]

new_sua_bal[,
            supply :=
              sum(
                Value[measuredElementSuaFbs %chin% c("5510", "5610")],
                - Value[measuredElementSuaFbs %chin% c("5910", "5071")],
                na.rm = TRUE
              ),
            by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
]

new_sua_bal[, outl_on_supp := abs(Value / supply - mean_ratio) >= 0.1]

new_sua_bal <-
  new_sua_bal[
    measuredElementSuaFbs %chin%
      c("5510", "5023", "5016", "5165", "5520", "5525", "5164", "5141")
  ]

new_sua_bal <-
  new_sua_bal[
    abs(Meanold - Value) > 10000 &
      ((measuredElementSuaFbs !=5510 & outlier == TRUE & outl_on_supp == TRUE &
          Value > 1000 & abs(Meanold - Value) > 10000) |
         (outlier == TRUE & measuredElementSuaFbs == 5510))
  ]

if (nrow(new_sua_bal) > 0) {
  
  new_sua_bal <-
    new_sua_bal[,
                .(geographicAreaM49, measuredItemFbsSua, measuredElementSuaFbs,
                  timePointYears, Historical_value_2010_2013 = Meanold, outlier = "OUTLIER")
    ]
  
  sel_vars <-
    c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  
  out_elems_items <-
    rbind(
      data_suabal[timePointYears < startYear][unique(new_sua_bal[, sel_vars, with = FALSE]), on = sel_vars],
      standData[unique(new_sua_bal[, sel_vars, with = FALSE]), on = sel_vars]
    )
  
  out_elems_items <-
    dt_left_join(
      out_elems_items,
      new_sua_bal,
      by = c("geographicAreaM49", "measuredItemFbsSua",
             "measuredElementSuaFbs", "timePointYears")
    )
  
  out_elems_items[is.na(outlier), outlier := ""]
  
  out_elems_items[,
                  Historical_value_2010_2013 := unique(na.omit(Historical_value_2010_2013)),
                  c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  ]
  
  out_elems_items <-
    out_elems_items[order(geographicAreaM49, measuredItemFbsSua, measuredElementSuaFbs, timePointYears)]
  
  out_elems_items[,
                  perc.change := (Value / shift(Value) - 1) * 100,
                  by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  ]
} else {
  out_elems_items <-
    new_sua_bal[0, .(geographicAreaM49, measuredElementSuaFbs,
                     measuredItemFbsSua, timePointYears, Value,
                     flagObservationStatus, flagMethod,
                     perc.change = NA_real_,
                     Historical_value_2010_2013 = NA_real_, outlier)]
}

out_elems_items <- rbind(combined_calories, out_elems_items, fill = TRUE)

out_elems_items[, Value := round(Value, 2)]
out_elems_items[, perc.change := round(perc.change, 2)]
out_elems_items[, Historical_value_2010_2013 := round(Historical_value_2010_2013, 2)]


tmp_file_outliers <- file.path(TMP_DIR, paste0("OUTLIERS_", COUNTRY, ".csv"))

out_elems_items <-
  nameData("suafbs", "sua_unbalanced", out_elems_items, except = "timePointYears")

write.csv(out_elems_items, tmp_file_outliers)

message("Line 5462")


######### / Combine old and new calories and data, and get outliers ###########


if (nrow(data_suabal[timePointYears >= startYear]) > 0) {
  
  sel_vars <- 
    c('geographicAreaM49', 'measuredElementSuaFbs',
      'measuredItemFbsSua', 'timePointYears') 
  
  data_suabal_missing <-
    data_suabal[timePointYears >= startYear][!standData, on = sel_vars]
  
  if (nrow(data_suabal_missing) > 0) {
    
    data_suabal_missing[,
                        `:=`(
                          Value = NA_real_,
                          flagObservationStatus = NA_character_,
                          flagMethod = NA_character_
                        )
    ]
    
    standData <- rbind(standData, data_suabal_missing)
  }
}

######### LOSS RATIO CHECK ######### livia 
if(nrow(standData > 0) & "5016" %in% unique(standData$measuredElementSuaFbs)){
  
  losses_processedData <- copy(standData)
  
  losses_processedData <- losses_processedData[measuredElementSuaFbs %in% c("5510", "5610", "5016"),]
  
  setnames(losses_processedData, c("measuredItemFbsSua","measuredElementSuaFbs"), c("measuredItemCPC","measuredElement"))
  
  losses_processedData <-
    denormalise(
      normalisedData = losses_processedData,
      denormaliseKey = "measuredElement",
      fillEmptyRecords = TRUE
    )
  
  losses_processedData[, Value_measuredElement_5126 := Value_measuredElement_5016 / (sum(Value_measuredElement_5510, Value_measuredElement_5610, na.rm = TRUE)),
                       by = c("geographicAreaM49","measuredItemCPC", "timePointYears")]
  
  losses_processedData[, flagObservationStatus_measuredElement_5126 := flagObservationStatus_measuredElement_5016]
  
  losses_processedData[, flagMethod_measuredElement_5126 := flagMethod_measuredElement_5016]
  
  losses_normalisedData <- normalise(losses_processedData)
  
  losses_threshold <- losses_normalisedData[measuredElement %in% "5126" & Value >= 0.3,]
  
  setnames(losses_threshold,  c("measuredItemCPC","measuredElement"),c("measuredItemFbsSua","measuredElementSuaFbs"))
  
  losses_threshold <- nameData("suafbs", "sua_balanced", losses_threshold, except = c("measuredElementSuaFbs", "timePointYears"))
  
  losses_threshold[, measuredElementSuaFbs := "Loss Ratio %"]
  
  wb <- createWorkbook() 
  addWorksheet(wb, "losses_to_check") 
  writeData(wb, "losses_to_check", losses_threshold) 
  dbg_print(paste("Losses workbook, save", getwd(), tmp_file_losses_to_check)) 
  saveWorkbook(wb, tmp_file_losses_to_check, overwrite = TRUE) 
  
}


#### INDUSTRIAL TO CHECK #####################################à
#tmp_file_industrial_to_check <- file.path(TMP_DIR, paste0("INDUSTRIAL_TO_CHECK_", COUNTRY, ".xlsx"))
if(nrow(standData > 0) & "5165" %in% unique(standData$measuredElementSuaFbs)){
  
  industrial_processedData <- copy(standData)
  
  industrial_processedData <- industrial_processedData[measuredElementSuaFbs %in% c("5165"),]
  
  not_industrial_table <- ReadDatatable("not_industrial_use_table")
  
  industrial_processedData <- merge(industrial_processedData, not_industrial_table[, c("cpc_code","not_industrial"), with = F], all.x = T,
                                    by.x = "measuredItemFbsSua", by.y = "cpc_code")
  
  industrial_processedData <- industrial_processedData[not_industrial %in% "message",]
  industrial_processedData <- industrial_processedData[Value > 0 & !is.na(Value),]
  industrial_processedData <- unique(industrial_processedData[, c("measuredItemFbsSua", "geographicAreaM49", "measuredElementSuaFbs"), with = F])
  
  industrial_processedData <- nameData("suafbs", "sua_balanced", industrial_processedData)
  
  wb <- createWorkbook() 
  addWorksheet(wb, "industrial_to_check") 
  writeData(wb, "industrial_to_check", industrial_processedData) 
  dbg_print(paste("Industrial workbook, save", getwd(), tmp_file_industrial_to_check)) 
  saveWorkbook(wb, tmp_file_industrial_to_check, overwrite = TRUE) 
  
}

########## DES calculation

sel_vars <- c('geographicAreaM49', 'timePointYears', 'measuredItemFbsSua')

des <- 
  rbind(
    data_suabal[
      measuredElementSuaFbs == '664' & timePointYears %in% 2010:(startYear-1),
      .(Value = sum(Value, na.rm = TRUE)),
      by = sel_vars 
    ],
    standData[
      measuredElementSuaFbs == '664' & timePointYears >= startYear,
      .(Value = sum(Value, na.rm = TRUE)),
      by =  sel_vars
    ]
  )

des <-
  rbind(
    des,
    des[,
        .(measuredItemFbsSua = 'S2901', Value = sum(Value)),
        by = c('geographicAreaM49', 'timePointYears')
    ]
  )

des[, Value := round(Value, 2)]

f_des <- file.path(R_SWS_SHARE_PATH, "FBSvalidation", COUNTRY, "des.rds")

if (file.exists(f_des)) {
  file.copy(f_des, sub("des\\.rds", "des_prev.rds", f_des))
}

# saveRDS(des, f_des)

##### Plot of main DES absolute variations

des_diff <-
  des[
    order(geographicAreaM49, measuredItemFbsSua, timePointYears),
    .(year = timePointYears, diff = Value - shift(Value)),
    .(geographicAreaM49, item = measuredItemFbsSua)
  ][
    year != min(year)
  ]

des_diff <- des_diff[item %chin% des_diff[abs(diff) > 20]$item]

des_diff[item == "S2901", item := "GRAND TOTAL"]

# plot_main_des_diff <-
#   ggplot(des_diff, aes(x = year, diff, group = item, color = item)) +
#   geom_line(size = 1) +
#   geom_point(size = 3) +
#   ggtitle("Absolute variation of DES and main variations of items",
#           subtitle = COUNTRY_NAME)
# 
# tmp_file_plot_main_des_diff <-
#   file.path(TMP_DIR, paste0("PLOT_MAIN_DES_DIFF_", COUNTRY, ".pdf"))
# 
# ggsave(tmp_file_plot_main_des_diff, plot = plot_main_des_diff)

##### / Plot of main DES absolute variations


# ##### Plot of main DES
# 
main_des_items <-
  des[,
      .(geographicAreaM49, year = timePointYears, item = measuredItemFbsSua, Value)
  ][
    item %chin% des[Value > 100]$measuredItemFbsSua
  ][
    order(geographicAreaM49, item, year)
  ]

main_des_items[item == "S2901", item := "GRAND TOTAL"]

# plot_main_des_items <-
#   ggplot(main_des_items, aes(x = year, Value, group = item, color = item)) +
#   geom_line(size = 1) +
#   geom_point(size = 3) +
#   ggtitle("Main DES items (> 100 Calories)", subtitle = COUNTRY_NAME)
# 
# tmp_file_plot_main_des_items <-
#   file.path(TMP_DIR, paste0("PLOT_MAIN_DES_ITEMS_", COUNTRY, ".pdf"))
# 
# ggsave(tmp_file_plot_main_des_items, plot = plot_main_des_items)

# ##### / Plot of main DES
# 
##### Plot of main diff avg DES
des_main_diff_avg <-
  des[
    measuredItemFbsSua != "S2901",
    .(pre = mean(Value[timePointYears < startYear]), post = mean(Value[timePointYears >= startYear])),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

des_main_diff_avg <- des_main_diff_avg[abs(post - pre) > 20]

des_main_diff_avg <- melt(des_main_diff_avg, c("geographicAreaM49", "measuredItemFbsSua"))

des_main_diff_avg <- nameData("suafbs", "sua_unbalanced", des_main_diff_avg, except = "geographicAreaM49")

# plot_des_main_diff_avg <-
#   ggplot(des_main_diff_avg,
#          aes(x = measuredItemFbsSua_description, y = value,
#              group = rev(variable), fill = variable)) +
#   geom_col(position = "dodge") +
#   coord_flip() +
#   ggtitle("Main diff (> 20 Cal) in DES average pre (< startYear) and post (>= startYear)",
#           subtitle = COUNTRY_NAME)
# 
# 
# tmp_file_plot_des_main_diff_avg <-
# file.path(TMP_DIR, paste0("PLOT_DES_MAIN_DIFF_AVG_", COUNTRY, ".pdf"))
# 
# ggsave(tmp_file_plot_des_main_diff_avg, plot = plot_des_main_diff_avg)

# 
# ##### Plot of main diff avg DES
# 
des_cast <-
  data.table::dcast(
    des,
    geographicAreaM49 + measuredItemFbsSua ~ timePointYears,
    fun.aggregate = sum,
    value.var = "Value"
  )

des_cast <- nameData("suafbs", "sua_balanced", des_cast)

setorderv(des_cast, names(des_cast)[ncol(des_cast)], order = -1)


# Main items, more then 90%

des_main_90 <-
  des[
    measuredItemFbsSua != "S2901" & timePointYears >= startYear,
    .(tot = sum(Value)),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ][
    order(-tot)
  ][,
    cumsum := cumsum(tot)
  ]

des_main_90 <-
  des_main_90[
    des[
      measuredItemFbsSua != "S2901" & timePointYears >= startYear,
      .(maintot = sum(Value)),
      by = "geographicAreaM49"
    ],
    on = "geographicAreaM49"
  ][
    cumsum < maintot * 0.9
  ][,
    c("tot", "cumsum", "maintot") := NULL
  ]

des_main <-
  rbind(
    des_cast[measuredItemFbsSua == "S2901"],
    des_cast[des_main_90, on = c("geographicAreaM49", "measuredItemFbsSua")]
  )

des_main <-
  rbind(
    des_main,
    des_main[,
             lapply(.SD, function(x) round(sum(x[-1]) / x[1] * 100, 2)),
             .SDcols = c(sort(unique(des$timePointYears)))
    ],
    fill = TRUE
  )

des_main[
  is.na(measuredItemFbsSua),
  measuredItemFbsSua_description := "PERCENTAGE OF MAIN OVER TOTAL"
]

########## create XLSX for main DES items

des_main_90_and_tot <-
  rbind(
    data.table(
      geographicAreaM49 = unique(des_main_90$geographicAreaM49),
      measuredItemFbsSua = "S2901"
    ),
    des_main_90
  )

des_level_diff <-
  des[
    order(geographicAreaM49, measuredItemFbsSua, timePointYears),
    .(
      Value = Value - shift(Value),
      timePointYears
    ),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

des_level_diff_cast <-
  data.table::dcast(
    des_level_diff,
    geographicAreaM49 + measuredItemFbsSua ~ timePointYears,
    fun.aggregate = sum,
    value.var = "Value"
  )


des_perc_diff <-
  des[
    order(geographicAreaM49, measuredItemFbsSua, timePointYears),
    .(
      Value = Value / shift(Value) - 1,
      timePointYears = timePointYears
    ),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

des_perc_diff[is.infinite(Value) | is.nan(Value), Value := NA_real_]

des_perc_diff_cast <-
  data.table::dcast(
    des_perc_diff,
    geographicAreaM49 + measuredItemFbsSua ~ timePointYears,
    fun.aggregate = sum,
    value.var = "Value"
  )

sel_vars <-
  c("geographicAreaM49", "geographicAreaM49_description",
    "measuredItemFbsSua", "measuredItemFbsSua_description")

by_vars <- c("geographicAreaM49", "measuredItemFbsSua")

des_level_diff_cast <-
  merge(des_level_diff_cast, des_cast[, sel_vars, with = FALSE ], by = by_vars)

setcolorder(des_level_diff_cast, names(des_cast))

des_perc_diff_cast <-
  merge(des_perc_diff_cast, des_cast[, sel_vars, with = FALSE], by = by_vars)

setcolorder(des_perc_diff_cast, names(des_cast))

des_level_diff_cast <- des_level_diff_cast[des_main_90_and_tot, on = by_vars]

des_perc_diff_cast <- des_perc_diff_cast[des_main_90_and_tot, on = by_vars]


wb <- createWorkbook()

addWorksheet(wb, "DES_MAIN")
addWorksheet(wb, "DES_MAIN_diff")
addWorksheet(wb, "DES_MAIN_diff_perc")

writeData(wb, "DES_MAIN", des_main)
writeData(wb, "DES_MAIN_diff", des_level_diff_cast)
writeData(wb, "DES_MAIN_diff_perc", des_perc_diff_cast)

style_cal_0      <- createStyle(fgFill = "black", fontColour = "white", textDecoration = "bold")
style_cal_gt_10  <- createStyle(fgFill = "lightyellow")
style_cal_gt_20  <- createStyle(fgFill = "yellow")
style_cal_gt_50  <- createStyle(fgFill = "orange")
style_cal_gt_100 <- createStyle(fgFill = "red")
style_percent    <- createStyle(numFmt = "PERCENTAGE")
style_mean_diff  <- createStyle(borderColour = "blue", borderStyle = "double", border = "TopBottomLeftRight")

# All zeros in 2014-2018, that were not zeros in 2010-2013
zeros <-
  des_main[, .SD, .SDcols = as.character(yearVals)] == 0 &
  des_main[, rowMeans(.SD), .SDcols = as.character(2010:(startYear-1))] > 0

for (i in which(names(des_level_diff_cast) %in% 2011:endYear)) {
  addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 10], style = style_cal_gt_10, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 20], style = style_cal_gt_20, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 50], style = style_cal_gt_50, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 100], style = style_cal_gt_100, gridExpand = TRUE)
  
  if (names(des_level_diff_cast)[i] %in% yearVals) {
    j <- names(des_level_diff_cast)[i]
    rows_with_zero <- zeros[, j]
    if (any(rows_with_zero == TRUE)) {
      addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(zeros))[zeros[, j]], style = style_cal_0, gridExpand = TRUE)
    }
  }
  
  addStyle(wb, "DES_MAIN_diff", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 10], style = style_cal_gt_10, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN_diff", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 20], style = style_cal_gt_20, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN_diff", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 50], style = style_cal_gt_50, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN_diff", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 100], style = style_cal_gt_100, gridExpand = TRUE)
}

high_variation_mean <-
  abs(des_main[, rowMeans(.SD), .SDcols = as.character(2010:(startYear-1))] / des_main[, rowMeans(.SD), .SDcols = as.character(yearVals)] - 1) > 0.30

if (any(high_variation_mean == TRUE)) {
  high_variation_mean <- 1 + (1:nrow(des_main))[high_variation_mean]
  addStyle(wb, sheet = "DES_MAIN", style_mean_diff, rows = high_variation_mean, cols = which(names(des_level_diff_cast) %in% yearVals), gridExpand = TRUE, stack = TRUE)
}


addStyle(wb, sheet = "DES_MAIN_diff_perc", style_percent, rows = 1:nrow(des_level_diff_cast) + 1, cols = which(names(des_level_diff_cast) %in% 2011:endYear), gridExpand = TRUE, stack = TRUE)

setColWidths(wb, "DES_MAIN", 4, 40)
setColWidths(wb, "DES_MAIN_diff", 4, 40)
setColWidths(wb, "DES_MAIN_diff_perc", 4, 40)

tmp_file_des_main <- file.path(TMP_DIR, paste0("DES_MAIN_ITEMS_", COUNTRY, ".xlsx"))
message("Line 5807")

saveWorkbook(wb, tmp_file_des_main, overwrite = TRUE)


########## / create XLSX for main DES items


tmp_file_des <- file.path(TMP_DIR, paste0("DES_", COUNTRY, ".csv"))

#des_cast[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]

write.csv(des_cast, tmp_file_des)

########## / DES calculation


# Nutrients (except 664), are required only in last round (when saving)
if (!(TRUE %in% swsContext.computationParams$save_nutrients)) {
  standData <- standData[measuredElementSuaFbs %!in% c('261', '271', '281', '674', '684')]
}

 out <- SaveData(domain = "suafbs", dataset = "sua_balanced", data = standData, waitTimeout = 20000)

if (exists("out")) {
  
  body_message <-
    sprintf(
      "Plugin completed in %1.2f minutes.

      ################################################
      # Imbalances (greater than 100 t in abs value) #
      ################################################

      unbalanced items: %s (out of %s)
      unbalanced items for items with positive supply: %s
      unbalanced items for items with imbalance > 5%%: %s
      average percent imbalance in absolute value: %1.2f

      ###############################################
      ############       Parameters       ###########
      ###############################################

      country = %s
      thresold method = %s
      fill_extraction_rates= %s

      ###############################################
      ###########       ShareDownUp       ###########
      ###############################################

      shareDownUp can be modified here:

      %s

      ###############################################
      ##############   Removed feed   ###############
      ###############################################

      The following NEW feed items were removed as including them
      would have created a huge negative imbalance:

      %s

      The following NEW feed items are dubious, they might be
      removed, but you will need to do it manually:

      %s

      ###############################################
      ##############       Flags       ##############
      ###############################################

      E,-: Balancing: utilization modified OR Processed created down up using official data
      E,b: Final balancing (industrial, Feed)
      E,c: Balancing: production modified to compensate net trade
      E,e: Outlier replaced
      E,h: Balancing: imbalance to food, given food is only utilization -Food Residual
      E,i: Food processed generated, using availability
      E,n: Balancing of small imbalances OR Stock variation corrected for over accumulation(to change)
      E,s: Balancing: stocks modified
      E,u: Stocks variation generated by the model OR updated opening stocks
      I,c: Derived production generated
      I,e: Module imputation
      I,i: Residual item (identity) OR stocks as 20 percent of supply in 2013
      I,-: Opening stocks as cumulated stocks variations 1961-2013
      M,q: Cases for which flags were not set (should never happen)
      T,i: Calories per capita created
      T,p: Oilworld stocks data
      T,h: USDA stocks data

      
    
      ###############################################
      ##############       Files       ##############
      ###############################################

      The following files are attached:


      - DES_MAIN_ITEMS_*.xlsx = as DES_*.csv, but only with items that
          accounted for nearly 90%% on average over 2014-2018

      - DES_*.csv = calculation of DES (total and by items)

      - OUTLIERS_*.csv = outliers in Calories (defined as those that account
          for more than 5 Calories with an increase of more than 15%%)

      - SHARES_*.xlsx = Parents/Children shares, availability, etc.

      - IMBALANCE_*.csv = imbalances, supply and utilizations

      - LOSSES_TO_CHECK.xlsx = items with loss ratio bigger than 30%%

      - NEW_ELEMENTS_*.csv = elements that appear for the first time

      
      - STOCK_CORRECTION_*.csv = Corrected Stocks

      ",
      difftime(Sys.time(), start_time, units = "min"),
      imbalances_info[['imb_tot']],
      prettyNum(imbalances_info[['all_items']], big.mark = ","),
      imbalances_info[['imb_pos_supply']],
      imbalances_info[['imb_gt_5percent']],
      imbalances_info[['imb_avg_percent']],
      COUNTRY,
      THRESHOLD_METHOD,
      FILL_EXTRACTION_RATES,
      sub('/work/SWS_R_Share/', '', shareDownUp_file),
      msg_new_feed_remove,
      msg_new_feed_dubious
    )
  
  if (!CheckDebug()) {
    send_mail(
      from = "do-not-reply@fao.org",
      to = swsContext.userEmail,
      subject = "Results from SUA_bal_compilation plugin",
      body = c(body_message,
               # tmp_file_plot_main_des_items,
               # tmp_file_plot_main_des_diff,
               # tmp_file_plot_des_main_diff_avg,
               tmp_file_des_main,
               tmp_file_des,
               tmp_file_outliers,
               tmp_file_shares,
               tmp_file_imb,
               tmp_file_losses_to_check,
               tmp_file_industrial_to_check,
               #tmp_file_extr,
               #tmp_file_negative,
               #tmp_file_non_exist,
               tmp_file_new,
               #tmp_file_fix_shares,
               #tmp_file_NegNetTrade,
               tmp_file_Stock_correction
      )
    )
  }
  
  unlink(TMP_DIR, recursive = TRUE)
  
  out[c("inserted", "appended", "ignored", "discarded")] <-
    lapply(
      out[c("inserted", "appended", "ignored", "discarded")],
      function(x) ifelse(is.na(x[[1]]), 0, x[[1]])
    )
  
  last_msg <- paste(out$inserted + out$appended, "observations written")
  
  if (out$discarded + out$ignored > 0) {
    last_msg <- paste(last_msg, "and issues with", out$discarded + out$ignored)
  }
  
  f <- file.path(R_SWS_SHARE_PATH, "FBSvalidation", "reference", "plugin_success.txt")
  
  if (file.exists(f)) {
    
    f <- read.delim(f, header = FALSE, stringsAsFactors = FALSE)
    
    last_msg <- paste0("<div>", last_msg, "</div><br />",
                       "<div style = \"color: red; font-size: 20px; text-align: center;\">",
                       f[, sample(2:ncol(f), 1)][1],
                       "</div><br /><div style = \"color: red; font-size: 20px; background-size: ",
                       "300px 200px; width: 300; height: 200px; background-image: url('",
                       f[1, 1], "')\"></div>")
  }
  
  print(last_msg)
  
} else {
  
  print("The SUA_bal_compilation plugin had a problem when saving data.")
  
}
SWS-Methodology/faoswsStandardization documentation built on Feb. 7, 2022, 5:05 a.m.