modules/BC_SUA_bal_compilation/main.R

library(faosws)
library(faoswsUtil)
library(faoswsBalancing)
library(faoswsStandardization)
library(dplyr)
library(data.table)
library(tidyr)
library(openxlsx)

# The only parameter is the string to print
# COUNTRY is taken from environment (parameter)
dbg_print <- function(x) {
  message(paste0("NEWBAL (", COUNTRY, "): ", x))
}

start_time <- Sys.time()

R_SWS_SHARE_PATH <- Sys.getenv("R_SWS_SHARE_PATH")

if (CheckDebug()) {
  R_SWS_SHARE_PATH <- "//hqlprsws1.hq.un.fao.org/sws_r_share"

  mydir <- "modules/BC_SUA_bal_compilation"
  
  SETTINGS <- faoswsModules::ReadSettings(file.path(mydir, "sws.yml"))
  
  SetClientFiles(SETTINGS[["certdir"]])
  
  GetTestEnvironment(baseUrl = SETTINGS[["server"]], token = SETTINGS[["token"]])
}


COUNTRY <- as.character(swsContext.datasets[[1]]@dimensions$geographicAreaM49@keys)

COUNTRY_suaunbal <- as.character(swsContext.datasets[[2]]@dimensions$geographicAreaM49@keys)

COUNTRY_shareup <- as.character(swsContext.datasets[[3]]@dimensions$geographicAreaM49@keys)

COUNTRY_sharedown <- as.character(swsContext.datasets[[4]]@dimensions$geographicAreaM49@keys)

if(sum(COUNTRY == c(COUNTRY_suaunbal,COUNTRY_sharedown,COUNTRY_shareup)) < 3){
  dbg_print("Session countries are not identical")
  print(paste0("Session countries are not identical"))
  stop("Session countries are not identical")
  
}

COUNTRY_NAME <-
  nameData(
    "suafbs", "sua_unbalanced",
    data.table(geographicAreaM49 = COUNTRY))$geographicAreaM49_description

dbg_print("parameters")

USER <- regmatches(
  swsContext.username,
  regexpr("(?<=/).+$", swsContext.username, perl = TRUE)
)

# Allow only officers to run more than one country
officers = c("filipczuk", "tayyib", "habimanad")

if(length(COUNTRY) > 1 & !(USER %in% officers)){
  stop("You currently can not run the module on multiple countries at once.")
}

# if (!file.exists(file.path(R_SWS_SHARE_PATH, USER))) {
#   dir.create(file.path(R_SWS_SHARE_PATH, USER))
# }


STOP_AFTER_DERIVED <- as.logical(swsContext.computationParams$stop_after_derived)

THRESHOLD_METHOD <- 'share'

FIX_OUTLIERS <- TRUE

FILL_EXTRACTION_RATES <- TRUE

YEARS <- as.character(2000:2019)

# Define query related timePointYears
startYear = 2010
endYear = 2013

refYear = 2014:2019  

TMP_DIR <- file.path(tempdir(), USER)
if (!file.exists(TMP_DIR)) dir.create(TMP_DIR, recursive = TRUE)

tmp_file_imb         <- file.path(TMP_DIR, paste0("IMBALANCE_", COUNTRY, ".xlsx"))
tmp_file_imbCSV      <- file.path(TMP_DIR, paste0("IMBALANCE_", COUNTRY, ".csv"))
tmp_file_shares      <- file.path(TMP_DIR, paste0("SHARES_", COUNTRY, ".xlsx"))
# tmp_file_negative    <- file.path(TMP_DIR, paste0("NEGATIVE_AVAILAB_", COUNTRY, ".csv"))
# tmp_file_non_exist   <- file.path(TMP_DIR, paste0("NONEXISTENT_", COUNTRY, ".csv"))
# tmp_file_fix_shares  <- file.path(TMP_DIR, paste0("FIXED_PROC_SHARES_", COUNTRY, ".csv"))
# tmp_file_NegNetTrade <- file.path(TMP_DIR, paste0("NEG_NET_TRADE_", COUNTRY, ".csv"))
# tmp_file_Stock_correction <- file.path(TMP_DIR, paste0("STOCK_CORRECTION_", COUNTRY, ".csv"))
# (deletefile)

# Always source files in R/ (useful for local runs).
# Your WD should be in faoswsStandardization/
sapply(dir("R", full.names = TRUE), source)


p <- defaultStandardizationParameters()

p$itemVar <- "measuredItemSuaFbs"
p$mergeKey[p$mergeKey == "measuredItemCPC"] <- "measuredItemSuaFbs"
p$elementVar <- "measuredElementSuaFbs"
p$childVar <- "measuredItemChildCPC"
p$parentVar <- "measuredItemParentCPC"
p$createIntermetiateFile <- "TRUE"
p$protected <- "Protected"
p$official <- "Official"





shareDownUp_file <-
  file.path(paste0("shareDownUp_", COUNTRY, ".csv"))


tourist_cons_table <- ReadDatatable("keep_tourist_consumption")

stopifnot(nrow(tourist_cons_table) > 0)

TourismNoIndustrial <- tourist_cons_table[small == "X"]$tourist
NonSmallIslands<-tourist_cons_table[small != "X"]$tourist

dbg_print("define functions")

######### FUNCTIONS: at some point, they will be moved out of this file. ####

# Replacement for merge(x, y, by = VARS, all.x = TRUE) that do not set keys
# By default it behaves as dplyr::left_join(). If nomatch = 0, non-matching
# rows will not be returned
dt_left_join <- function(x, y, by = NA, allow.cartesian = FALSE,
                         nomatch = NA) {
  if (anyNA(by)) {
    stop("'by' is required")
  }

  if (any(!is.data.table(x), !is.data.table(y))) {
    stop("'x' and 'y' should be data.tables")
  }

  res <- y[x, on = by, allow.cartesian = allow.cartesian, nomatch = nomatch]

  setcolorder(res, c(names(x), setdiff(names(y), names(x))))

  res
}

dt_full_join <- function(x, y, by = NA) {
  if (anyNA(by)) {
    stop("'by' is required")
  }

  if (any(!is.data.table(x), !is.data.table(y))) {
    stop("'x' and 'y' should be data.tables")
  }

  res <- merge(x, y, by = by, all = TRUE)

  # merge sets the key to `by`
  setkey(res, NULL)

  res
}

message("Line 140")
#function used for countries which had stock data in the past (before 2014)
#linearization taking into account the supply before 2013. 
coeffs_stocks_mod <- function(x) {
  tmp <- lm(data = x[timePointYears < startYear], supply_inc ~ supply_exc + trend)

  as.list(tmp$coefficients)
}

# This function will recalculate opening stocks from the first
# observation. Always.
update_opening_stocks <- function(x) {
  x <- x[order(geographicAreaM49, measuredItemSuaFbs, -timePointYears)] # reversed order for backward compilation

  groups <- unique(x[, c("geographicAreaM49", "measuredItemSuaFbs"), with = FALSE])

  res <- list()

  for (i in seq_len(nrow(groups))) {
    z <- x[groups[i], on = c("geographicAreaM49", "measuredItemSuaFbs")]
    #z <- z[order(geographicAreaM49, measuredItemSuaFbs, -timePointYears)]
    if (nrow(z) > 1) {
      for (j in seq_len(nrow(z))[-1]) {

        # negative delta cannot be more than opening
        
        # two ifelse commented for bc
        # if (z$delta[j] < 0 & abs(z$delta[j]) > z$new_opening[j]) {
        #   z$delta[j] <- - z$new_opening[j]
        # }

      if(z$new_opening[j-1] - z$delta[j] > 0){
        z$new_opening[j] = z$new_opening[j-1] - z$delta[j]
      }else{
      z$delta[j] = z$new_opening[j-1]
      z$new_opening[j] = 0
      }
      }

    }
    res[[i]] <- z
  }
  rbindlist(res)
}


# Fill NAs by LOCF/FOCB/interpolation if more than two
# non-missing observations are available, otherwhise just
# replicate the only non-missing observation
na.fill_ <- function(x) {
  if(sum(!is.na(x)) > 1) {
    zoo::na.fill(x, "extend")
  } else {
    rep(x[!is.na(x)], length(x))
  }
}



`%!in%` <- Negate(`%in%`)


# RemainingToProcessedParent() and RemainingProdChildToAssign() will
# be used in the derivation of shareDownUp

RemainingToProcessedParent <- function(data) {
  data[, 
    parent_already_processed :=
      ifelse(
        is.na(parent_qty_processed),
        parent_qty_processed,
        sum(processed_to_child / extractionRate, na.rm = TRUE)
      ),
    by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
  ]
  
  data[, remaining_processed_parent := round(parent_qty_processed - parent_already_processed)]
  
  data[remaining_processed_parent < 0, remaining_processed_parent := 0]

  data[,
    only_child_left :=
      sum(is.na(processed_to_child)) == 1 &
        is.na(processed_to_child) &
        !is.na(production_of_child) &
        !is.na(parent_qty_processed) &
        production_of_child > 0,
    by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
  ]
  
  data[
    only_child_left == TRUE,
    processed_to_child := remaining_processed_parent * extractionRate
  ]
  
  data[,
    parent_already_processed :=
      ifelse(
        is.na(parent_qty_processed),
        parent_qty_processed,
        sum(processed_to_child / extractionRate, na.rm = TRUE)
      ),
    by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
  ]
  
  data[, remaining_processed_parent := round(parent_qty_processed - parent_already_processed)]

  data[remaining_processed_parent < 0, remaining_processed_parent := 0]
  
  return(data)
}


RemainingProdChildToAssign <- function(data) {
  
  data[,
    available_processed_child := sum(processed_to_child, na.rm = TRUE),
    by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
  ]
  
  data[, remaining_to_process_child := round(production_of_child - available_processed_child)]

  data[remaining_to_process_child < 0, remaining_to_process_child := 0]
  
  data[,
    only_parent_left :=
      sum(is.na(processed_to_child)) == 1 &
        is.na(processed_to_child) &
        !is.na(parent_qty_processed) &
        parent_qty_processed >= 0
    ]
  
  data[only_parent_left==TRUE,processed_to_child:=ifelse(only_parent_left==TRUE,remaining_to_process_child,processed_to_child)]
  
  data[,
    available_processed_child := sum(processed_to_child, na.rm = TRUE),
    by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
  ]
  
  data[, remaining_to_process_child := round(production_of_child - available_processed_child)]

  data[remaining_to_process_child < 0, remaining_to_process_child := 0]

  return(data)
}


# The fmax function is used when fixing the processingShare of coproducts.
# If TRUE it means that "+" or "or" cases are involved.
fmax <- function(child, main, share, plusor = FALSE) {
  main <- unique(main)

  if (plusor) {
    found <- sum(sapply(child, function(x) grepl(x, main), USE.NAMES = FALSE))

    if (found == 0) {
      return(max(share, na.rm = TRUE))
    } else if (found == 1) {
      return(
        share[(1:length(child))[sapply(child, function(x) grepl(x, main), USE.NAMES = FALSE)]]
      )
    } else { # should be 2
      return(max(share[(1:length(child))[sapply(child, function(x) grepl(x, main), USE.NAMES = FALSE)]], na.rm = TRUE))
    }
  } else {
    if (sum(grepl(main, child)) > 0) {
      share[child == main]
    } else {
      max(share, na.rm = TRUE)
    }
  }
}

# Function that calculates imbalance as supply - utilizations (both calculated
# inside the function, dropped if keep_(supply|utilizations) set to FALSE).
calculateImbalance <- function(data,
                               supply_add = c("production", "imports"),
                               supply_subtract = c("exports", "stockChange"),
                               supply_all = union(supply_add, supply_subtract),
                               item_name = "measuredItemSuaFbs",
                               bygroup = c("geographicAreaM49", "timePointYears", item_name),
                               keep_supply = TRUE,
                               keep_utilizations = TRUE) {
  
  stopifnot(is.data.table(data))
  
  data[,
    `:=`(
      supply =
        sum(Value[measuredElementSuaFbs %chin% supply_add],
            - Value[measuredElementSuaFbs %chin% supply_subtract],
            na.rm = TRUE),
      # All elements that are NOT supply elements
      utilizations =
        sum(Value[!(measuredElementSuaFbs %chin% supply_all)],
            na.rm = TRUE)
    ),
    by = bygroup
  ][,
    imbalance := supply - utilizations
  ]
  
  data[(is.na(supply) & is.na(utilizations)) | (supply == 0 & utilizations == 0), imbalance :=  NA ]
  
  if (keep_supply == FALSE) {
    data[, supply := NULL]
  }
  
  if (keep_utilizations == FALSE) {
    data[, utilizations := NULL]
  }
  
}

outside <- function(x, lower = NA, upper = NA) {
  x < lower | x > upper
}


send_mail <- function(from = NA, to = NA, subject = NA,
                      body = NA, remove = FALSE) {
  
  if (missing(from)) from <- 'no-reply@fao.org'
  
  if (missing(to)) {
    if (exists('swsContext.userEmail')) {
      to <- swsContext.userEmail
    }
  }
  
  if (is.null(to)) {
    stop('No valid email in `to` parameter.')
  }
  
  if (missing(subject)) stop('Missing `subject`.')
  
  if (missing(body)) stop('Missing `body`.')
  
  if (length(body) > 1) {
    body <-
      sapply(
        body,
        function(x) {
          if (file.exists(x)) {
            # https://en.wikipedia.org/wiki/Media_type 
            file_type <-
              switch(
                tolower(sub('.*\\.([^.]+)$', '\\1', basename(x))),
                txt  = 'text/plain',
                csv  = 'text/csv',
                png  = 'image/png',
                jpeg = 'image/jpeg',
                jpg  = 'image/jpeg',
                gif  = 'image/gif',
                xls  = 'application/vnd.ms-excel',
                xlsx = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
                doc  = 'application/msword',
                docx = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
                pdf  = 'application/pdf',
                zip  = 'application/zip',
                # https://stackoverflow.com/questions/24725593/mime-type-for-serialized-r-objects
                rds  = 'application/octet-stream'
              )

            if (is.null(file_type)) {
              stop(paste(tolower(sub('.*\\.([^.]+)$', '\\1', basename(x))),
                         'is not a supported file type.'))
            } else {
              res <- sendmailR:::.file_attachment(x, basename(x), type = file_type)

              if (remove == TRUE) {
                unlink(x)
              }

              return(res)
            }
          } else {
            return(x)
          }
        }
      )
  } else if (!is.character(body)) {
    stop('`body` should be either a string or a list.')
  }
  
  sendmailR::sendmail(from, to, subject, as.list(body))
}

balance_proportional <- function(data) {

  x <- copy(data)

  x <-
    x[
      can_balance == TRUE,
      c("measuredElementSuaFbs", "Value",
        "mov_share", "imbalance", "min_threshold",
        "max_threshold", "can_balance"),
      with = FALSE
    ]

  mov_share_sum <- sum(x$mov_share, na.rm = TRUE)

  if (mov_share_sum > 0) {
    # Recalculate mov_share, as we are excluding protected values
    x[, mov_share := mov_share / mov_share_sum]
  } else {
    # It means that there are no back shares, so use current values
    x[!is.na(Value) & can_balance == TRUE, mov_share := Value / sum(Value, na.rm = TRUE)]
  }

  x[is.na(Value), Value := 0]

  x[, adjusted_value := 0]

  x[Value + mov_share * imbalance >= 0, adjusted_value := Value + mov_share * imbalance]

  x[adjusted_value > Value & adjusted_value > max_threshold, adjusted_value := max_threshold]

  x[adjusted_value < Value & adjusted_value < min_threshold, adjusted_value := min_threshold]

  x <-  x[, c("measuredElementSuaFbs", "adjusted_value"), with = FALSE][data, on = "measuredElementSuaFbs"]

  return(as.numeric(x$adjusted_value))
}


# rollavg() is a rolling average function that uses computed averages
# to generate new values if there are missing values (and FOCB/LOCF).
# I.e.:
# vec <- c(NA, 2, 3, 2.5, 4, 3, NA, NA, NA)
#
#> RcppRoll::roll_mean(myvec, 3, fill = 'extend', align = 'right')
#[1]       NA       NA       NA 2.500000 3.166667 3.166667       NA       NA       NA 
#
#> rollavg(myvec)
#[1] 2.000000 2.000000 3.000000 2.500000 4.000000 3.000000 3.166667 3.388889 3.185185

rollavg <- function(x, order = 3) {
  # order should be > 2
  stopifnot(order >= 3)
  
  non_missing <- sum(!is.na(x))
  
  # For cases that have just two non-missing observations
  order <- ifelse(order > 2 & non_missing == 2, 2, order)
  
  if (non_missing == 1) {
    x[is.na(x)] <- na.omit(x)[1]
  } else if (non_missing >= order) {
    n <- 1
    while(any(is.na(x)) & n <= 10) { # 10 is max tries
      movav <- suppressWarnings(RcppRoll::roll_mean(x, order, fill = 'extend', align = 'right'))
      movav <- data.table::shift(movav)
      x[is.na(x)] <- movav[is.na(x)]
      n <- n + 1
    }
    
    x <- zoo::na.fill(x, 'extend')
  }
  
  return(x)
}


newBalancing <- function(data, Utilization_Table) {
  
  # Contains a variable that indicates whether stocks changed
  data[, change_stocks := NA_integer_]
  
  # XXX define "food residual" those items for which the only utilization
  # is food. Food processing can also be another possible utilization and
  # if there is that does not change its food-residualness, this is why
  # the check is done before assigning food processing.
  # NW: I have commented the conditions out
  # Now it only checks to make sure that food is the only utilization
  # We noticed that some of the food items were missing from the utilization table
  # This is still different from the previous approach of assigning all of the imbalance to 
  # food when "none of the other utilizations are activable"
  data[,
    food_resid :=
      # It's a food item & ...
      (measuredItemSuaFbs %chin% Utilization_Table[food_item == 'X', cpc_code] &
      # food exists & ...
      # !is.na(Value[measuredElementSuaFbs == 'food']) &
      Food_Median > 0 & !is.na(Food_Median)) &
      # ... is the only utilization
      all(is.na(Value[!(measuredElementSuaFbs %chin%
                          c('loss', 'food', 'production', 'imports',
                            'exports', 'stockChange','foodManufacturing', 'tourist'))])),
    by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  # 0143	Cottonseed
  # 01442	Mustard seed
  # 01443	Rapeseed or colza seed
  # 01446	Safflower seed
  # 01449.90	Other oil seeds, n.e.
  # 01491.02	Palm kernels
  # 01801	Sugar beet
  # 01802	Sugar cane
  # 01809	Other sugar crops n.e.
  
  
  special_list<-c("0143","01442","01443","01446","01449.90","01491.02","01801","01802","01809")
  
  data[measuredItemSuaFbs %chin% special_list,
       food_resid :=
         # It's a food item & ...
         (measuredItemSuaFbs %chin% Utilization_Table[food_item == 'X', cpc_code] &
            # food exists & ...
            # !is.na(Value[measuredElementSuaFbs == 'food']) &
            Food_Median > 0 & !is.na(Food_Median)) &
         # ... is the only utilization
         all(is.na(Value[!(measuredElementSuaFbs %chin%
                             c('loss', 'food', 'production', 'imports',
                               'exports', 'stockChange','foodManufacturing', 'tourist'))])),
       by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
       ]
  
  # Checking if the commodity has past value before assigning the residual
  # imbalance at the end of the balancing procees
  data[,
    `:=`(
      feed_resid =
        # It's a feed item or have past value & ...
        #(measuredItemSuaFbs %in% Utilization_Table[feed == 'X', cpc_code] |
        (Feed_Median > 0 & !is.na(Feed_Median)) &
        #feed is the only utilization....
        all(is.na(Value[!(measuredElementSuaFbs %chin%
                             c('feed', 'production', 'imports', 'exports',
                               'stockChange','foodManufacturing'))])),
      # It's a industrial item or have past value & ...
      industrial_resid = Industrial_Median > 0 & !is.na(Industrial_Median)),
    by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ] 
   
   
  data[,
    supply :=
      sum(
        Value[measuredElementSuaFbs %chin% c('production', 'imports')],
        - Value[measuredElementSuaFbs %chin% c('exports')],
        na.rm = TRUE
      ),
    by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  calculateImbalance(data)
  
  # When production needs to be created . ELIMINATO!
  # data[
  #   Protected == FALSE &
  #     # Only primary
  #     measuredItemSuaFbs %chin% Utilization_Table[primary_item == "X"]$cpc_code &
  #     measuredElementSuaFbs == 'production' &
  #     supply < 0 &
  #     stockable == FALSE &
  #     !is.na(Value) & Value != 0,
  #   `:=`(
  #     Value = Value - imbalance,
  #     flagObservationStatus = "E",
  #     flagMethod = "c"
  #   )
  # ]
  # 
  # calculateImbalance(data)
  
  # Try to assign the maximum of imbalance to stocks
  # NOTE: in the conditions below, 2 was 0.2, indicating that no more than
  # 20% should go to stocks. Now, the condition was relaxed a lot (200%)
  
  # if we have new production or new imports, we assign the imbalance to food (if it is a food item) or to feed(it is a feed item)
  
  data[imbalance>0 & (is.na(Production_Median)|Production_Median==0) & (is.na(Import_Median)|Import_Median==0)
       & Protected==FALSE & measuredElementSuaFbs== "food"
       & measuredItemSuaFbs %chin% Utilization_Table[food_item == "X"]$cpc_code, 
       `:=`(
         Value =
           ifelse(
             is.na(Value) & imbalance > 0,
             imbalance,
             ifelse(Value + imbalance >= 0, Value + imbalance, 0)
           ),
         flagObservationStatus = "E",
         flagMethod = "h"
       )
  ]
  
  # recalculate imbalance
  
  calculateImbalance(data)
  
  data[imbalance>0 & (is.na(Production_Median)|Production_Median==0)&(is.na(Import_Median)|Import_Median==0) & 
         Protected==FALSE & measuredElementSuaFbs== "feed"
       & measuredItemSuaFbs %chin% Utilization_Table[feed == "X"]$cpc_code, 
       `:=`(
         Value =
           ifelse(
             is.na(Value) & imbalance > 0,
             imbalance,
             ifelse(Value + imbalance >= 0, Value + imbalance, 0)
           ),
         flagObservationStatus = "E",
         flagMethod = "h"
       )
  ]
  
  
  # recalculate imbalance
  calculateImbalance(data)
  
  
  
  # For BC: Create lag to ensure 2014 restiriction
  
  modified_opening = copy(all_opening_stocks)
  modified_opening = all_opening_stocks[order(geographicAreaM49, measuredItemFbsSua, -timePointYears),]
  modified_opening[, stock_lag := dplyr::lag(Value, 1L)]
  
  data[supply<0, supply:=0]
  
  data <-
    dt_left_join(
      data,
      modified_opening[,
        .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
          timePointYears, opening_stocks = Value, stock_lag = stock_lag)
      ],
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    )
  # Remove copy
  rm(modified_opening)
  
  data[ , ProdImp := sum(Value[measuredElementSuaFbs %in% c("production", "import")], na.rm = TRUE), by = c("geographicAreaM49", 
  "measuredItemSuaFbs", "timePointYears")]
  
  data[ , Poslim := supply*2, by = c("geographicAreaM49","measuredItemSuaFbs", "timePointYears")]
  
  
  data[,
    Value_0 := ifelse(is.na(Value), 0, Value)
  ][
    Protected == FALSE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "stockChange" &
      stockable == TRUE,
    change_stocks :=
      # The numbers indicate the case. Assignmnet (value and flags) will be done below
      case_when(
        # case 1: we don't want stocks to change sign.
        sign(Value_0) * sign(Value_0 + imbalance) == -1                                                                            ~ 1L,
        # case 2: if positive variation does not match opening of successive year
        Value_0 >= 0 & (stock_lag-(Value_0+imbalance) < 0)                                                                           ~ 2L,
        # case 3: if negative stock generate too high stock, set to max withdrawal=supply*2-opening
        Value_0 <= 0 & (Value_0 + imbalance <= 0) & (opening_stocks - (Value_0 + imbalance) > supply * 2) & Poslim<=opening_stocks   ~ 3L,
        # case 4: if negative stock generate too high stock, set to max withdrawal=supply*2-opening
        Value_0 <= 0 & (Value_0 + imbalance <= 0) & (opening_stocks - (Value_0 + imbalance) > supply * 2) & Poslim>opening_stocks  ~ 4L,
        # case 5: negative stock is not too high  ok
        Value_0 <= 0 & (Value_0 + imbalance <= 0) & (opening_stocks - (Value_0 + imbalance) <= supply * 2)  ~ 5L,
        # case 6: if positive variation is ok
        Value_0 >= 0 & (stock_lag-(Value_0+imbalance) >= 0)                                                 ~ 6L
        
      )
  ]

  data[change_stocks == 1L, Value := 0]
  
  ##all the other cases are left with no constraint for BC
  data[change_stocks == 2L, Value := stock_lag]
  data[change_stocks == 3L,Value := 0]
  data[change_stocks == 4L,Value := opening_stocks-(supply*2)]
  data[change_stocks == 5L, Value := Value_0 + imbalance]
  data[change_stocks == 6L, Value := Value_0 + imbalance]
  
  data[
    change_stocks %in% 1L:6L,
    `:=`(flagObservationStatus = "E", flagMethod = "s")
  ]
  
  # # For BC: ensure that opening of year t is in lign with opening and variation in t-1 and give appropriate flag (this check has
  # been incorporated into the 6 conditions)
  
  # data[measuredElementSuaFbs == "stockChange" & stock_lag - Value < 0, 
  #      `:=` (Value = stock_lag, flagObservationStatus = "E", flagMethod = "s")]
  # 
  
 
  data[, stock_lag := NULL]  
  data[, Value_0 := NULL]

  data[, opening_stocks := NULL]
  
  data[, ProdImp := NULL]
  data[, Poslim := NULL]
  # Recalculate imbalance
  calculateImbalance(data)

  # Assign imbalance to food if food "only" (not "residual") item
  data[
    Protected == FALSE &
      food_resid == TRUE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "food",
    `:=`(
      Value = ifelse(is.na(Value) & imbalance > 0, imbalance, ifelse(Value + imbalance >= 0, Value + imbalance, 0)),
      flagObservationStatus = "E",
      flagMethod = "h"
    )
  ]
  
  if (COUNTRY %in% TourismNoIndustrial) {
    data[measuredElementSuaFbs == "tourist" & flagMethod!= "f", Protected := FALSE]
    
    adj_tour_ind <-
      data.table::dcast(
        data[
          measuredItemSuaFbs %chin% Utilization_Table[food_item == "X"]$cpc_code &
            measuredElementSuaFbs %chin% c("industrial", "tourist")
          ],
        geographicAreaM49 + timePointYears + measuredItemSuaFbs ~ measuredElementSuaFbs,
        value.var = "Value"
      )
    
    adj_tour_ind <- adj_tour_ind[industrial > 0]
    
    adj_tour_ind[, new_tourist := industrial]
    adj_tour_ind[!is.na(tourist), new_tourist :=industrial]
    
    adj_tour_ind[, c("industrial", "tourist") := NULL]
    
    by_vars <- c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    
    data <- dt_left_join(data, adj_tour_ind, by = by_vars)
    
    data[
      measuredElementSuaFbs %in% c("industrial", "tourist"),
      any_protected := any(Protected == TRUE),
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
      ]
    
    data[
      !is.na(new_tourist) & measuredElementSuaFbs == "tourist" & any_protected == FALSE,
      `:=`(
        Value = new_tourist,
        flagObservationStatus = "E",
        flagMethod = "e"
      )
      ]
    
    data[
      !is.na(new_tourist) & measuredElementSuaFbs == "industrial" & any_protected == FALSE,
      `:=`(
        Value = 0,
        flagObservationStatus = "E",
        flagMethod = "e"
      )
      ]
    
    data[, c("any_protected", "new_tourist") := NULL]
    data[measuredElementSuaFbs == "tourist", Protected := TRUE]
    
  }

  for (j in 1:10) {

    # Recalculate imbalance
    calculateImbalance(data)

    data[, can_balance := FALSE]

    data[
      !is.na(Value) &
        Protected == FALSE &
        !(data.table::between(Value, min_threshold, max_threshold, incbounds = FALSE) %in% FALSE) &
        !(measuredElementSuaFbs %chin%
          c("production", "imports", "exports", "stockChange", "foodManufacturing", "seed")),
      can_balance := TRUE
    ]

    data[,
      elements_balance := any(can_balance),
      by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
    ]

    if (nrow(data[dplyr::near(imbalance, 0) == FALSE & elements_balance == TRUE]) == 0) {
      break()
    } else {
      data[
        dplyr::near(imbalance, 0) == FALSE &
          elements_balance == TRUE,
        adjusted_value := balance_proportional(.SD),
        by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
      ]

      data[
        !is.na(adjusted_value) & !dplyr::near(adjusted_value, Value),
        `:=`(
          Value = adjusted_value,
          flagObservationStatus = "E",
          flagMethod = "-"
        )
      ]

      data[, adjusted_value := NULL]
    }

  }
      
  # At this point the imbalance (in the best case scenario) should be zero,
  # the following re-calculation is useful only for debugging
  
  calculateImbalance(data)

  # Assign imbalance to food if food "only" (not "residual") item
  data[
    Protected == FALSE &
      food_resid == TRUE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "food",
    `:=`(
      Value =
        ifelse(
          is.na(Value) & imbalance > 0,
          imbalance,
          ifelse(Value + imbalance >= 0, Value + imbalance, 0)
        ),
      flagObservationStatus = "E",
      flagMethod = "h"
    )
  ]
  
  calculateImbalance(data)
  
  # Assign the residual imbalance to industrial if the conditions are met
  data[geographicAreaM49 %in% TourismNoIndustrial & measuredItemSuaFbs %in% Utilization_Table[food_item == 'X', cpc_code],
       industrial_resid := FALSE
       ]
  
  data[
    Protected == FALSE &
      industrial_resid == TRUE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "industrial",
    `:=`(
      Value =
        ifelse(
          is.na(Value) & imbalance > 0,
          imbalance,
          ifelse(Value + imbalance >= 0, Value + imbalance, Value)
        ),
      flagObservationStatus = "E",
      flagMethod = "b"
    )
  ]
  

  calculateImbalance(data)
  
  # Assign the residual imbalance to feed if the conditions are met
  data[
    Protected == FALSE &
      feed_resid == TRUE &
      dplyr::near(imbalance, 0) == FALSE &
      measuredElementSuaFbs == "feed",
    `:=`(
      # XXX: this creates a warning when no assignment is done:
      # Coerced 'logical' RHS to 'double'
      Value =
        ifelse(
          is.na(Value) & imbalance > 0,
          imbalance,
          ifelse(Value + imbalance >= 0, Value + imbalance, Value)
        ),
      flagObservationStatus = "E",
      flagMethod = "b"
    )
  ]
 
  calculateImbalance(data)
  
  data[, c("supply", "utilizations", "imbalance", "mov_share_rebased") := NULL]
    
  return(data)
                  
}




############################## / FUNCTIONS ##################################


dbg_print("end functions")



#####################################  TREE #################################
message("Line 816")

dbg_print("download tree")

# tree <- getCommodityTreeNewMethod(COUNTRY, YEARS)
tree <- getCommodityTreeNewMethod(COUNTRY, as.character(2000:max(refYear)))
stopifnot(nrow(tree) > 0)

tree <- tree[geographicAreaM49 %chin% COUNTRY]


# The `tree_exceptions` will npo be checked by validateTree()

# Exception: high share conmfirmed by official data
tree_exceptions <- tree[geographicAreaM49 == "392" & measuredItemParentCPC == "0141" & measuredItemChildCPC == "23995.01"]

if (nrow(tree_exceptions) > 0) {
  tree <- tree[!(geographicAreaM49 == "392" & measuredItemParentCPC == "0141" & measuredItemChildCPC == "23995.01")]
}

validateTree(tree)

if (nrow(tree_exceptions) > 0) {
  tree <- rbind(tree, tree_exceptions)
  rm(tree_exceptions)
}

## NA ExtractionRates are recorded in the sws dataset as 0
## for the standardization, we nee them to be treated as NA
## therefore here we are re-changing it

# TODO: Keep all protected 0 ERs
tree[Value == 0 & !(flagObservationStatus == "E" & flagMethod == "f"), Value := NA]

#proc_level_exceptions <- ReadDatatable("processing_level_exceptions")
#
#if (nrow(proc_level_exceptions) > 0) {
#  setnames(proc_level_exceptions, c("m49_code", "parent", "child"),
#           c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC"))
#
#  tree <-
#    tree[!proc_level_exceptions[is.na(level)],
#         on = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC")]
#
#  proc_level_exceptions <- proc_level_exceptions[!is.na(level)]
#}

tree_to_send <- tree[is.na(Value) & measuredElementSuaFbs=="extractionRate"]
message("Line 864")

#if (FILL_EXTRACTION_RATES == TRUE) {

  expanded_tree <-
    merge(
      data.table(
        geographicAreaM49 = unique(tree$geographicAreaM49),
        timePointYears = as.character(sort(2000:max(refYear)))
      ),
      unique(
        tree[,
          c("geographicAreaM49", "measuredElementSuaFbs",
            "measuredItemParentCPC", "measuredItemChildCPC"),
          with = FALSE
        ]
      ),
      by = "geographicAreaM49",
      all = TRUE,
      allow.cartesian = TRUE
    )

  tree <- tree[expanded_tree, on = colnames(expanded_tree)]

  # flags for carry forward/backward
  tree[is.na(Value), c("flagObservationStatus", "flagMethod") := list("E", "t")]

  tree <-
    tree[!is.na(Value)][
      tree,
      on = c("geographicAreaM49", "measuredElementSuaFbs",
             "measuredItemParentCPC", "measuredItemChildCPC",
             "timePointYears"),
      roll = -Inf
    ]

  tree <-
    tree[!is.na(Value)][
      tree,
      on = c("geographicAreaM49", "measuredElementSuaFbs",
             "measuredItemParentCPC", "measuredItemChildCPC",
             "timePointYears"),
      roll = Inf
  ]

  # keep orig flags
  tree[, flagObservationStatus := i.i.flagObservationStatus]
  tree[, flagMethod := i.i.flagMethod]

  tree[, names(tree)[grep("^i\\.", names(tree))] := NULL]
# }


# XXX: connections removed here that should not exist in
# the commodity tree (so, they should be fixed there)
tree[
  #timePointYears >= 2014 &
    ((measuredItemParentCPC == "02211" & measuredItemChildCPC == "22212") |
      #cheese from whole cow milk cannot come from skim mulk of cow
    (measuredItemParentCPC == "22110.02" & measuredItemChildCPC == "22251.01") |
      # WE DELETED _tree of the variable name
      (measuredItemParentCPC == "02211" & measuredItemChildCPC == "22251.02")
    ),
  `:=`(
    Value = NA,
    flagObservationStatus = "M",
    flagMethod = "n"
  )
]

#correction of milk tree for Czechia TO DO: generalize for the next round

tree[
   geographicAreaM49=="203" &
  
  #“whole milk powder”  from whole cow milk cannot come from skim mulk of cow
    ((measuredItemParentCPC == "22110.02" & measuredItemChildCPC == "22211") |
       #“whole milk condensed” from whole cow milk cannot come from skim mulk of cow
       (measuredItemParentCPC == "22110.02" & measuredItemChildCPC == "22222.01")),
  `:=`(
    Value = NA,
    flagObservationStatus = "M",
    flagMethod = "n"
  )
  ]


# saveRDS(
#   tree[
#     !is.na(Value) & measuredElementSuaFbs == "extractionRate",
#     -grepl("measuredElementSuaFbs", names(tree)),
#     with = FALSE
#   ],
#   file.path(R_SWS_SHARE_PATH, "FBSvalidation", COUNTRY, "tree.rds")
# )

tree_to_send <-
  tree_to_send %>% 
  dplyr::anti_join(
    tree[is.na(Value) & measuredElementSuaFbs == "extractionRate"],
    by = c("geographicAreaM49", "measuredElementSuaFbs",
           "measuredItemParentCPC", "measuredItemChildCPC", "timePointYears",
           "Value", "flagObservationStatus", "flagMethod")
  ) %>%
  dplyr::select(-Value) %>%
  dplyr::left_join(
    tree,
    by = c("geographicAreaM49", "measuredElementSuaFbs",
           "measuredItemParentCPC", "measuredItemChildCPC",
           "timePointYears", "flagObservationStatus", "flagMethod")
  ) %>%
  setDT()

tree_to_send <-
  tree_to_send[,
    c("geographicAreaM49", "measuredElementSuaFbs", "measuredItemParentCPC",
      "measuredItemChildCPC", "timePointYears", "Value",
      "flagObservationStatus", "flagMethod"),
    with = FALSE
  ]

setnames(
  tree_to_send,
  c("measuredItemParentCPC", "measuredItemChildCPC"),
  c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree")
)

tree_to_send <-
  nameData("suafbs", "ess_fbs_commodity_tree2", tree_to_send,
           except = c('measuredElementSuaFbs', 'timePointYears'))

tree_to_send[,
  `:=`(
    measuredItemParentCPC_tree = paste0("'", measuredItemParentCPC_tree),
    measuredItemChildCPC_tree = paste0("'", measuredItemChildCPC_tree))
]

# tmp_file_extr <- file.path(TMP_DIR, paste0("FILLED_ER_", COUNTRY, ".csv"))
# 
# write.csv(tree_to_send, tmp_file_extr)
# (deletefile)

# XXX remove NAs
tree <- tree[!is.na(Value)]

uniqueLevels <- unique(tree[, c("geographicAreaM49", "timePointYears"), with = FALSE])

levels <- list()

treeLevels <- list()

for (i in seq_len(nrow(uniqueLevels))) {
  filter <- uniqueLevels[i, ]
  
  treeCurrent <- tree[filter, on = c("geographicAreaM49", "timePointYears")]
  
  levels <- findProcessingLevel(treeCurrent, "measuredItemParentCPC", "measuredItemChildCPC")

  setnames(levels, "temp", "measuredItemParentCPC")
  
  treeLevels[[i]] <- dt_left_join(treeCurrent, levels, by = "measuredItemParentCPC")
}

tree <- rbindlist(treeLevels)

tree[,
  processingLevel := max(processingLevel, na.rm = TRUE),
  by = c("geographicAreaM49", "timePointYears",
         "measuredElementSuaFbs", "measuredItemChildCPC")
]

# XXX Check if this one is still good or it can be obtained within the dataset
processed_item_datatable <- ReadDatatable("processed_item")
processedCPC <- processed_item_datatable[, measured_item_cpc]


# XXX what is this for?
itemMap <- GetCodeList(domain = "agriculture", dataset = "aproduction", "measuredItemCPC")
itemMap <- itemMap[, .(measuredItemSuaFbs = code, type)]

##################################### / TREE ################################


############################ POPULATION #####################################

key <-
  DatasetKey(
    domain = "population",
    dataset = "population_unpd",
    dimensions =
      list(
        geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
        measuredElementSuaFbs = Dimension(name = "measuredElement", keys = "511"), # 511 = Total population
        timePointYears = Dimension(name = "timePointYears", keys = YEARS)
      )
  )

dbg_print("download population")

popSWS <- GetData(key)

stopifnot(nrow(popSWS) > 0)

popSWS[geographicAreaM49 == "156", geographicAreaM49 := "1248"]

# Fix for missing regional official data in the country total
# Source: DEMOGRAPHIC SURVEY, Kurdistan Region of Iraq, July 2019, IOM UN Migration
# ("the KRI population at 5,122,747 individuals and the overall Iraqi
# population at 36,004,552 individuals", pag.14; it implies 14.22805%)
# https://iraq.unfpa.org/sites/default/files/pub-pdf/KRSO%20IOM%20UNFPA%20Demographic%20Survey%20Kurdistan%20Region%20of%20Iraq_0.pdf
popSWS[geographicAreaM49 == "368" & timePointYears %in% startYear:endYear, Value := Value * 0.8577195]


############################ / POPULATION ##################################

############################################################### DATA #######


# 5510 Production[t]
# 5610 Import Quantity [t]
# 5071 Stock Variation [t]
# 5023 Export Quantity [t]
# 5910 Loss [t]
# 5016 Industrial uses [t]
# 5165 Feed [t]
# 5520 Seed [t]
# 5525 Tourist Consumption [t]
# 5164 Residual other uses [t]
# 5141 Food [t]
# 664 Food Supply (/capita/day) [Kcal]

elemKeys <- c("5510", "5610", "5071", "5113", "5023", "5910", "5016",
              "5165", "5520", "5525", "5164", "5166", "5141")

itemKeys <- GetCodeList(domain = "suafbs", dataset = "sua_unbalanced", "measuredItemFbsSua")
itemKeys <- itemKeys$code


# NOTE: the definition of "food_resid" changed (see inside newBalancing)
# (so, the tables below are not used anymore)

#food_classification_country_specific <-
#  ReadDatatable("food_classification_country_specific",
#                where = paste0("geographic_area_m49 IN ('", COUNTRY, "')"))
#
#food_classification_country_specific <-
#  food_classification_country_specific[geographic_area_m49 == COUNTRY]
#
#food_only_items <- food_classification_country_specific[food_classification == 'Food Residual', measured_item_cpc]


Utilization_Table <- ReadDatatable("utilization_table_2018")

stockable_items <- Utilization_Table[stock == 'X', cpc_code]

zeroWeight <- ReadDatatable("zero_weight")[, item_code]

flagValidTable <- ReadDatatable("valid_flags")


# We decided to unprotect E,e (replaced outliers). Done after they are
# replaced (i.e., if initially an Ee exists, it should remain; they need
# to be unprotected for balancing.
flagValidTable[flagObservationStatus == 'E' & flagMethod == 'e', Protected := FALSE]


# XXX: we need to unprotect I,c because it was being assigned
# to imputation of production of derived which is NOT protected.
# This will need to change in the next exercise.
flagValidTable[flagObservationStatus == 'I' & flagMethod == 'c', Protected := FALSE]


# Nutrients are:
# 1001 Calories
# 1003 Proteins
# 1005 Fats
nutrientCodes = c("1001", "1003", "1005")

nutrientData <-
  getNutritiveFactors(
    measuredElement = nutrientCodes, 
    timePointYears = as.character(startYear:endYear),
    geographicAreaM49 = COUNTRY
  )

######### CREAM SWEDEN 

nutrientData[geographicAreaM49=="752" & measuredItemCPC=="22120"& measuredElement=="1001",Value:=195]
nutrientData[geographicAreaM49=="752" & measuredItemCPC=="22120"& measuredElement=="1003",Value:=3]
nutrientData[geographicAreaM49=="752" & measuredItemCPC=="22120"& measuredElement=="1005",Value:=19]

### MILK SWEDEN
nutrientData[geographicAreaM49%in%c("756","300","250","372","276")& measuredItemCPC=="22251.01"& measuredElement=="1001",Value:=387]
nutrientData[geographicAreaM49%in%c("756","300","250","372","276")& measuredItemCPC=="22251.01"& measuredElement=="1003",Value:=26]
nutrientData[geographicAreaM49%in%c("756","300","250","372","276")& measuredItemCPC=="22251.01"& measuredElement=="1005",Value:=30]

nutrientData[geographicAreaM49=="300"& measuredItemCPC=="22253"& measuredElement=="1001",Value:=310]
nutrientData[geographicAreaM49=="300"& measuredItemCPC=="22253"& measuredElement=="1003",Value:=23]
nutrientData[geographicAreaM49=="300"& measuredItemCPC=="22253"& measuredElement=="1005",Value:=23]


nutrientData[measuredElement=="1001",measuredElement:="664"]

nutrientData[measuredElement=="1003",measuredElement:="674"]

nutrientData[measuredElement=="1005",measuredElement:="684"]

if (CheckDebug()) {
  key <-
    DatasetKey(
      domain = "suafbs",
      dataset = "sua_unbalanced",
      dimensions =
        list(
          geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
          measuredElementSuaFbs = Dimension(name = "measuredElementSuaFbs", keys = elemKeys),
          measuredItemFbsSua = Dimension(name = "measuredItemFbsSua", keys = itemKeys),
          timePointYears = Dimension(name = "timePointYears", keys = YEARS)
        )
    )
} else {
  key <- swsContext.datasets[[2]]
  
  key@dimensions$timePointYears@keys <- YEARS
  key@dimensions$measuredItemFbsSua@keys <- itemKeys
  key@dimensions$measuredElementSuaFbs@keys <- elemKeys
  key@dimensions$geographicAreaM49@keys <- COUNTRY
}

dbg_print("download data")


# LOAD
data <- GetData(key)

# Remove item that is not par of agriculture domain
data <- data[measuredItemFbsSua != "F1223"]


dbg_print("convert sugar")

##############################################################
######### SUGAR RAW CODES TO BE CONVERTED IN 2351F ###########
##############################################################
data <- convertSugarCodes_new(data)

#################### FODDER CROPS ##########################################

# TODO: create SWS datatable for this.
# Some of these items may be missing in reference files,
# thus we carry forward the last observation.
fodder_crops_items <-
  tibble::tribble(
    ~description, ~code,
    "Maize for forage", "01911",
    "Alfalfa for forage", "01912",
    "Sorghum for forage", "01919.01",
    "Rye grass for forage", "01919.02",
    "Other grasses for forage", "01919.91",
    "Clover for forage", "01919.03",
    "Other oilseeds for forage", "01919.94",
    "Other legumes for  forage", "01919.92",
    "Cabbage for fodder", "01919.04",
    "Mixed grass and legumes for forage", "01919.93",
    "Turnips for fodder", "01919.05",
    "Beets for fodder", "01919.06",
    "Carrots for fodder", "01919.07",
    "Swedes for fodder", "01919.08",
    "Other forage products, nes", "01919.96",
    "Other forage crops, nes", "01919.95",
    "Hay for forage, from legumes", "01919.10",
    "Hay for forage, from grasses", "01919.09",
    "Hay for forage, from other crops nes", "01919.11"
  )


fodder_crops_availab <-
  data[
    measuredItemFbsSua %chin% fodder_crops_items$code &
      measuredElementSuaFbs == "5510"
  ]

if (nrow(fodder_crops_availab) > 0) {

  fodder_crops_complete <-
    CJ(
      geographicAreaM49 = unique(fodder_crops_availab$geographicAreaM49),
      measuredElementSuaFbs = "5510",
      timePointYears = unique(data$timePointYears),
      measuredItemFbsSua = unique(fodder_crops_availab$measuredItemFbsSua)
    )

  fodder_crops_complete <-
    fodder_crops_complete[order(geographicAreaM49, measuredItemFbsSua, timePointYears)]

  fodder_crops <-
    dt_left_join(
      fodder_crops_complete,
      fodder_crops_availab[, .(geographicAreaM49, measuredElementSuaFbs,
                               measuredItemFbsSua, timePointYears, Value)],
      by = c("geographicAreaM49", "measuredElementSuaFbs",
             "measuredItemFbsSua", "timePointYears")
  )

  fodder_crops[,
    Value := zoo::na.locf(Value),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

  fodder_crops_new <-
    fodder_crops[
      !fodder_crops_availab,
      on = c("geographicAreaM49", "measuredElementSuaFbs",
             "measuredItemFbsSua", "timePointYears")
    ]


  if (nrow(fodder_crops_new) > 0) {

    fodder_crops_new[, `:=`(flagObservationStatus = "E", flagMethod = "t")]

    data <- rbind(data, fodder_crops_new)

  }
}

#################### / FODDER CROPS ########################################


################## STOCKS ##################################################


opening_stocks_2014 <-
  ReadDatatable(
    "opening_stocks_2014",
    where = paste0("m49_code IN (",
                   paste(shQuote(COUNTRY, type = "sh"), collapse = ", "), ")")
  )

# The procedure works even if no previous opening stocks  exists, but in that
# case there is no way to know if data does not exist because there was an
# issue downloading data or because it actually does not exist. Luckily there
# are many calls to SWS, which should fail if there are issues: it is unlikely
# that only this one fails.
#stopifnot(nrow(opening_stocks_2014) > 0)
message("Line 1286")

non_null_prev_deltas <-
  unique(
    data[
      measuredElementSuaFbs == "5071" & timePointYears %in% refYear
    ][,
      .SD[sum(!dplyr::near(Value, 0)) > 0],
      by = c("geographicAreaM49", "measuredItemFbsSua")
    ][,
      .(geographicAreaM49, measuredItemFbsSua)
    ]
  )

opening_stocks_2014[opening_stocks < 0, opening_stocks := 0]

opening_stocks_2014 <-
  opening_stocks_2014[
    m49_code %in% COUNTRY,
    .(
      geographicAreaM49 = m49_code,
      measuredItemFbsSua = cpc_code,
      timePointYears = "2014",
      Value_cumulated = opening_stocks
    )
  ]

# Keep only those for which recent variations are available (for back compilation: stocks make sense if we have stocks in 2014-2019)
opening_stocks_2014 <-
  opening_stocks_2014[
    non_null_prev_deltas,
    on = c("geographicAreaM49", "measuredItemFbsSua"),
    nomatch = 0
  ]

original_opening_stocks <- data[measuredElementSuaFbs == "5113"]

original_opening_stocks <-
  flagValidTable[
    original_opening_stocks,
    on = c("flagObservationStatus", "flagMethod")
  ][,
    Valid := NULL
  ]


# Remove protected in 2014 from cumulated
opening_stocks_2014 <-
  opening_stocks_2014[
    !original_opening_stocks[
      timePointYears == "2014" & Protected == TRUE,
      .(geographicAreaM49, measuredItemFbsSua)
    ],
    on = c("geographicAreaM49", "measuredItemFbsSua")
  ]


all_opening_stocks <-
  merge(
    original_opening_stocks,
    opening_stocks_2014,
    by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
    all = TRUE
  )


all_opening_stocks[
  !Protected %in% TRUE & is.na(Value) & !is.na(Value_cumulated),
  `:=`(
    Value = Value_cumulated,
    flagObservationStatus = "I",
    flagMethod = "-",
    # We protect these, in any case, because they should not
    # be overwritten, even if not (semi) official or expert
    Protected = TRUE,
    measuredElementSuaFbs = "5113",
    timePointYears = "2014"
  )
][,
  Value_cumulated := NULL
]


### For back-compilation we make use of the fully validated 2014 stocks. No need to make up stocks if not available

# Now, for all remaining stockable items, we create opening
# stocks in 2014 as 20% of supply in 2013

# remaining_opening_stocks <-
#   data[
#     timePointYears == "2013" &
#       measuredItemFbsSua %chin%
#         setdiff(
#           stockable_items,
#           all_opening_stocks$measuredItemFbsSua
#         )
#   ][,
#     .(
#       opening_20 =
#         sum(
#           Value[measuredElementSuaFbs %chin% c("5510", "5610")],
#           - Value[measuredElementSuaFbs == "5910"],
#           na.rm = TRUE
#         ) * 0.2,
#       timePointYears = "2014"
#     ),
#     by = c("geographicAreaM49", "measuredItemFbsSua")
#   ]

# remaining_opening_stocks[opening_20 < 0, opening_20 := 0]

# all_opening_stocks <-
#   merge(
#     all_opening_stocks,
#     remaining_opening_stocks,
#     by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
#     all = TRUE
#   )



all_opening_stocks <- all_opening_stocks[!is.na(timePointYears)]

# all_opening_stocks[
#   !Protected %in% TRUE & is.na(Value) & !is.na(opening_20),
#   `:=`(
#     Value = opening_20,
#     flagObservationStatus = "I",
#     flagMethod = "i",
#     # We protect these, in any case, because they should not
#     # be overwritten, even if not (semi) official or expert
#     Protected = TRUE
#   )
# ][,
#   opening_20 := NULL
# ]

#min(all_opening_stocks$timePointYears)
complete_all_opening <-
  CJ(
    geographicAreaM49 = unique(all_opening_stocks$geographicAreaM49),
    timePointYears = as.character(2010:min(refYear)),
    measuredItemFbsSua = unique(all_opening_stocks$measuredItemFbsSua)
  )


all_opening_stocks <-
  merge(
    complete_all_opening,
    all_opening_stocks,
    by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
    all = TRUE
  )

all_opening_stocks[, orig_val := Value]

all_opening_stocks <-
  all_opening_stocks[!is.na(Value)][
    all_opening_stocks,
    on = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"),
    roll = Inf]

all_opening_stocks[
  is.na(orig_val) & !is.na(Value),
  `:=`(
    Protected = FALSE,
    flagObservationStatus = "E",
    flagMethod = "t"
  )
]

all_opening_stocks[, measuredElementSuaFbs := "5113"]

all_opening_stocks[, orig_val := NULL]

all_opening_stocks[, names(all_opening_stocks)[grep("^i\\.", names(all_opening_stocks))] := NULL]

all_opening_stocks <- all_opening_stocks[!is.na(timePointYears)]



# Generate stocks variations for items for which opening
# exists for ALL years and variations don't exist

## Changed for back compilation
opening_avail_all_years <-
  all_opening_stocks[
    !is.na(Value) & timePointYears < 2014,
    .SD[.N == (endYear - startYear + 1)],
    measuredItemFbsSua
  ]

delta_avail_for_open_all_years <-
  data[
    measuredElementSuaFbs == "5071" &
      timePointYears < 2014 &
      measuredItemFbsSua %chin% opening_avail_all_years$measuredItemFbsSua,
    .SD[.N == (endYear - startYear + 1)],
    measuredItemFbsSua
  ]

to_generate_by_ident <-
  setdiff(
    opening_avail_all_years$measuredItemFbsSua,
    delta_avail_for_open_all_years$measuredItemFbsSua
  )

data_rm_ident <- data[measuredElementSuaFbs == "5071" & measuredItemFbsSua %chin% to_generate_by_ident]

if (length(to_generate_by_ident) > 0) {

  opening_avail_all_years <-
    opening_avail_all_years[measuredItemFbsSua %chin% to_generate_by_ident]

  opening_avail_all_years[, Protected := NULL]

  next_opening_key <-
    DatasetKey(
      domain = "Stock",
      dataset = "stocksdata",
      dimensions =
        list(
          geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
          measuredElementSuaFbs = Dimension(name = "measuredElement", keys = "5113"),
          measuredItemFbsSua = Dimension(name = "measuredItemCPC", keys = unique(opening_avail_all_years$measuredItemFbsSua)),
          timePointYears = Dimension(name = "timePointYears", keys = as.character(as.numeric(max(opening_avail_all_years$timePointYears)) + 1))
        )
    )

  next_opening <- GetData(next_opening_key)

  setnames(
    next_opening,
    c("measuredItemCPC", "measuredElement"),
    c("measuredItemFbsSua", "measuredElementSuaFbs")
  )

  opening_avail_all_years <-
    rbind(opening_avail_all_years, next_opening)

  opening_avail_all_years <-
    opening_avail_all_years[
      order(geographicAreaM49, measuredItemFbsSua, timePointYears)
    ]

  opening_avail_all_years[,
    delta := shift(Value, type = "lead") - Value,
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

  opening_avail_all_years[,
    delta :=
      ifelse(
        timePointYears == max(timePointYears) & is.na(delta),
        0,
        delta
      ),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

  opening_avail_all_years <-
    dt_left_join(
      opening_avail_all_years,
      flagValidTable,
      by = c("flagObservationStatus", "flagMethod")
    )

  opening_avail_all_years[shift(Protected, type = "lead") == TRUE & Protected == TRUE, `:=`(delta_flag_obs = "T", delta_flag_method = "p")]
  opening_avail_all_years[!(shift(Protected, type = "lead") == TRUE & Protected == TRUE), `:=`(delta_flag_obs = "I", delta_flag_method = "i")]

  delta_identity <-
    opening_avail_all_years[
      timePointYears %in% as.character(startYear:endYear), # changed for back compilation
      .(
        geographicAreaM49,
        measuredItemFbsSua,
        timePointYears,
        measuredElementSuaFbs = "5071",
        Value = delta,
        flagObservationStatus = delta_flag_obs,
        flagMethod = delta_flag_method
        )
    ]

  delta_identity <-
    delta_identity[!data_rm_ident,
                   on = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")]

  data <- rbind(data, delta_identity)

  data <-
    data[order(geographicAreaM49, measuredItemFbsSua,
               timePointYears, measuredElementSuaFbs)]

}

# / Generate stocks variations for items for which opening
# / exists for ALL years and variations don't exist



# Recalculate opening stocks


data_for_opening <-
  dt_left_join(
    all_opening_stocks[,
      .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
        timePointYears, new_opening = Value)
    ],
    data[
      measuredElementSuaFbs == '5071' &
        timePointYears %in% as.character(startYear:(endYear +1)) & # changed years for back compilation 
        !is.na(Value),
      .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
        timePointYears, delta = Value)
    ],
    by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  )

data_for_opening[is.na(delta), delta := 0]

data_for_opening <- data_for_opening[timePointYears <= endYear+1] # changed for back compilation (note: 2014 needs to be in there!!)


### Unprotect cumulative stock variation that resulted in negative opening in 2014

# Variation2013 = data[measuredElementSuaFbs == "5071" & timePointYears == "2013", .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua, variation2013 = Value, FlagVariation = paste0("(", flagObservationStatus, ",", flagMethod, ")"))]
# Opening2013 = data_for_opening[timePointYears == "2013", .(geographicAreaM49, measuredItemSuaFbs= measuredItemSuaFbs, opening2013 = new_opening)]
# Opening2014 = data_for_opening[timePointYears == "2014", .(geographicAreaM49, measuredItemSuaFbs= measuredItemSuaFbs, opening2014 = new_opening)]
# 
# FirstMerge = merge(Variation2013, Opening2013, by = c("geographicAreaM49", "measuredItemSuaFbs"))
# 
# StockCheck = merge(FirstMerge, Opening2014, by = c("geographicAreaM49", "measuredItemSuaFbs"))
# StockCheck[, ConsistencyCheck := dplyr::near(round(opening2013 + variation2013), round(opening2014)) ]
# StockCheckProblems = StockCheck[ConsistencyCheck == FALSE, ]

# stock items to unprotect (unprotect all the stock variation with Eh for round 2021)
CumulativeToUnprotect = data[flagObservationStatus== "E" & flagMethod== "h" & measuredElementSuaFbs == "5071" & timePointYears %in% as.character(startYear:endYear), measuredItemFbsSua]

# assign same flag as estimated stock variation
data[measuredItemFbsSua %in% CumulativeToUnprotect & measuredElementSuaFbs == "5071" & timePointYears %in% as.character(startYear:endYear), `:=` (flagObservationStatus = "E", flagMethod = "u") ]


# take care of NA stocks in 2014 to build the series for stocks
data_for_opening[, naOpening := new_opening]
data_for_opening[is.na(new_opening), new_opening := 0]

data_for_opening <- update_opening_stocks(data_for_opening)


# if there are manual insertion of stock variations that are not consistent with opening 2014, set them to 0, todo(giulia)
data_for_opening2<-copy(data_for_opening)
data_for_opening2<- data_for_opening2[order(geographicAreaM49, measuredItemSuaFbs, -timePointYears)]
data_for_opening2<-data_for_opening2[, stock_lag := dplyr::lag(new_opening, 1L)]
data_for_opening2<-data_for_opening2[ , diff:=stock_lag-delta]
data_for_opening2<-data_for_opening2[, remove:= ifelse(diff<0 & !is.na(diff),TRUE,FALSE)]

RemoveVar<-unique(data_for_opening2[remove==TRUE])
RemoveVar<-RemoveVar[ , c(1,2,3,9), with=FALSE]


data_for_opening <- update_opening_stocks(data_for_opening)

# put original NAs to 2014 stocks
data_for_opening[is.na(naOpening) & timePointYears == "2014", new_opening := naOpening ]
data_for_opening[,naOpening := NULL ]

# reset
all_opening_stocks <-
  dt_left_join(
    all_opening_stocks,
    data_for_opening[,
      .(
        geographicAreaM49,
        measuredItemFbsSua = measuredItemSuaFbs,
        timePointYears,
        new_opening
      )
    ],
    by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
  )

all_opening_stocks[
  !is.na(new_opening) & (round(new_opening) != round(Value) | is.na(Value)),
  `:=`(
    Value = new_opening,
    flagObservationStatus = "E",
    flagMethod = "u",
    Protected = FALSE
  )
]

all_opening_stocks[, new_opening := NULL]

# / Recalculate opening stocks


### Add flag information
data <-
  dt_left_join(data, flagValidTable,
               by = c("flagObservationStatus", "flagMethod"))

data[flagObservationStatus %chin% c("", "T"), `:=`(Official = TRUE, Protected = TRUE)]
data[is.na(Official), Official := FALSE]
data[is.na(Protected), Protected := FALSE]


# We remove "5113" (opening stocks) as will be stored in separate table.
data <- data[measuredElementSuaFbs != "5113"]

dbg_print("elementToCodeNames")

# XXX FIXME: the elementCodesToNames below is
# not working proberply, see issue #38
codes <- as.data.table(tibble::tribble(
  ~measuredElementSuaFbs,  ~name,
  "5910", "exports",
  "5520", "feed",
  "5141", "food",
  "5023", "foodManufacturing",
  "5610", "imports",
  "5165", "industrial",
  "5016", "loss",
  "5510", "production",
  "5525", "seed",
  "5164", "tourist",
  "5071", "stockChange"
))

# switch from element codes to element names
data <- dt_left_join(data, codes, by = "measuredElementSuaFbs")

data[, measuredElementSuaFbs := name]

data[, name := NULL]

if (nrow(RemoveVar) !=0) {
  
  RemoveVar<-setnames(RemoveVar, "measuredItemSuaFbs", "measuredItemFbsSua")
  data<-merge(data, RemoveVar, by= c("measuredItemFbsSua", "geographicAreaM49", "timePointYears"), all.x = T)
  data<- data[remove==TRUE & measuredElementSuaFbs=="stockChange", Value:=0]
  
  data<-data[ , remove:=NULL]
  
}


# NOTE: if this should be used again (#38), camelCase the element names (#45)
#data <-
#  elementCodesToNames(
#    data,
#    itemCol = "measuredItemFbsSua",
#    elementCol = "measuredElementSuaFbs"
#  )


# XXX: there are some NAs here, but probably there shouldn't
data <- data[!is.na(measuredElementSuaFbs)]

setnames(data, "measuredItemFbsSua", "measuredItemSuaFbs")
message("Line 1686")


########## Remove feed if new element and negative imbalance is huge

# Feed requires country-specific reference files, which are being updated.
# Until these get reviewed, the feed module will generate feed items in
# some countries, where it is not required/needed/appropriate. Below, we
# remove the NEW feed item if the NEGATIVE imbalance obtained by including
# it is more than 50% of supply (e.g., -72%).

# THIS PART IS NOT NEEDED FOR BACK COMPILATION, because nef feed are already generated in the 2014-2019
# new_feed <-
#   data[
#     measuredElementSuaFbs == "feed",
#     .(
#       pre = sum(!is.na(Value[timePointYears %in% 2010:2013])),
#       post = sum(!is.na(Value[timePointYears >= 2014]))
#     ),
#     by = c("geographicAreaM49", "measuredItemSuaFbs")
#   ][
#     # pre == 0 & post > 0
#     pre > 0 & post == 0 # turned around for back compilation
#   ][,
#     c("pre", "post") := NULL
#   ]

# msg_new_feed_remove <- "No NEW feed removed"
# msg_new_feed_dubious <- "No other NEW items should be removed"

#  if (nrow(new_feed) > 0) {
# 
#   # prev_data_avg <-
#   #   data[
#   #     new_feed, on = c("geographicAreaM49", "measuredItemSuaFbs")
#   #   ][
#   #     timePointYears >= 2010 & timePointYears <= 2013
#   #   ][
#   #     order(geographicAreaM49, measuredItemSuaFbs, measuredElementSuaFbs, timePointYears),
#   #     Value := na.fill_(Value),
#   #     by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
#   #   ][,
#   #     .(Value = sum(Value) / sum(!is.na(Value)), timePointYears = 0),
#   #     by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
#   #   ]
#   # 
#   # calculateImbalance(prev_data_avg, supply_subtract = c("exports", "stockChange"))
# 
#   # prev_processed_avg <-
#   #   prev_data_avg[
#   #     supply > 0 &
#   #       utilizations > 0 &
#   #       measuredElementSuaFbs == "foodManufacturing" &
#   #       !is.na(Value),
#   #     .(geographicAreaM49, measuredItemSuaFbs, proc_ratio = Value / supply)
#   #   ]
# 
# 
#   new_data_avg <-
#     data[
#       new_feed,
#       on = c("geographicAreaM49", "measuredItemSuaFbs")
#     ][
#       # measuredElementSuaFbs != "foodManufacturing" & timePointYears >= 2014
#     ][
#       order(geographicAreaM49, measuredItemSuaFbs, measuredElementSuaFbs, timePointYears),
#       Value := na.fill_(Value),
#       by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
#     ][,
#       .(Value = sum(Value) / sum(!is.na(Value) & !dplyr::near(Value, 0)), timePointYears = 1),
#       by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
#     ][
#       !is.nan(Value)
#     ]
# 
#   # calculateImbalance(
#   #   new_data_avg,
#   #   keep_utilizations = FALSE,
#   #   supply_subtract = c("exports", "stockChange")
#   # )
# 
#   # new_data_supply <-
#   #   unique(
#   #     new_data_avg[,
#   #       c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears", "supply"),
#   #       with = FALSE
#   #     ]
#   #   )
# 
#   # new_data_proc <-
#   #   dt_left_join(
#   #     new_data_supply,
#   #     prev_processed_avg,
#   #     by = c("geographicAreaM49", "measuredItemSuaFbs"),
#   #     nomatch = 0
#   #   )
#   # 
#   # new_data_proc <-
#   #   new_data_proc[
#   #     supply > 0,
#   #     .(geographicAreaM49, measuredItemSuaFbs,
#   #       measuredElementSuaFbs = "foodManufacturing",
#   #       Value = supply * proc_ratio, timePointYears = 1)
#   #   ]
#   # 
#   # new_data_avg[, c("supply", "imbalance") := NULL]
# 
#   # new_data_avg <-
#   #   rbind(
#   #     new_data_avg,
#   #     new_data_proc
#   #   )
#   # 
#   # calculateImbalance(new_data_avg, supply_subtract = c("exports", "stockChange"))
# 
# 
#   new_feed_to_remove <-
#     unique(
#       new_data_avg[
#         imbalance / supply <= -0.3,
#         c("geographicAreaM49", "measuredItemSuaFbs"),
#         with = FALSE
#       ]
#     )
# 
#   new_feed_to_remove[, measuredElementSuaFbs := "feed"]
#   new_feed_to_remove[, remove_feed := TRUE]
# 
#   new_feed_dubious <-
#     unique(
#       new_data_avg[
#         imbalance / supply > -0.3 & imbalance / supply <= -0.05,
#         .(geographicAreaM49, measuredItemSuaFbs, x = imbalance / supply)
#       ][order(x)][, x := NULL]
#     )
# 
#   data <-
#     merge(
#       data,
#       new_feed_to_remove,
#       by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs"),
#       all.x = TRUE
#   )
# 
#   feed_to_remove <-
#     data[
#       remove_feed == TRUE & Protected == FALSE
#     ][,
#       .(geographicAreaM49, measuredItemSuaFbs, measuredElementSuaFbs, timePointYears)
#     ]
# 
#   data[, remove_feed := NULL]
# 
#   data <- data[!feed_to_remove, on = names(feed_to_remove)]
# 
#   if (nrow(feed_to_remove) > 0) {
#     msg_new_feed_remove <- paste(new_feed_to_remove$measuredItemSuaFbs, collapse = ", ")
#   }
# 
#   if (nrow(new_feed_dubious) > 0) {
#     msg_new_feed_dubious <- paste(new_feed_dubious$measuredItemSuaFbs, collapse = ", ")
#   }
# }


########## / Remove feed if new element and negative imbalance is huge


treeRestricted <-
  tree[,
    c("measuredItemParentCPC", "measuredItemChildCPC", "processingLevel"),
    with = FALSE
  ]

treeRestricted <- unique(treeRestricted[order(measuredItemChildCPC)])

primaryInvolved <- faoswsProduction::getPrimary(processedCPC, treeRestricted, p)

dbg_print("primary involved descendents")

# XXX: check here, 0111 is in results (why?)
primaryInvolvedDescendents <-
  getChildren(
    commodityTree = treeRestricted,
    parentColname = "measuredItemParentCPC",
    childColname = "measuredItemChildCPC",
    topNodes = primaryInvolved
  )


# stocks need to be generated for those items for
# which "opening stocks" are available

items_to_generate_stocks <-
  unique(all_opening_stocks$measuredItemFbsSua)

stock <-
  CJ(
    measuredItemSuaFbs    = items_to_generate_stocks,
    measuredElementSuaFbs = 'stockChange',
    geographicAreaM49     = unique(data$geographicAreaM49),
    timePointYears        = unique(data$timePointYears)
  )

# rbind with anti_join
data <-
  rbind(
    data,
    stock[!data, on = c('measuredItemSuaFbs', 'measuredElementSuaFbs',
                        'geographicAreaM49', 'timePointYears')],
    fill = TRUE
  )

# XXX what is primaryInvolvedDescendents ?????????
#deriv <- CJ(measuredItemSuaFbs = primaryInvolvedDescendents, measuredElementSuaFbs = 'production', geographicAreaM49 = unique(data$geographicAreaM49), timePointYears = unique(data$timePointYears))
deriv <-
  CJ(
    measuredItemSuaFbs    = unique(tree$measuredItemChildCPC),
    measuredElementSuaFbs = 'production',
    geographicAreaM49     = unique(data$geographicAreaM49),
    timePointYears        = unique(data$timePointYears)
  )

# rbind with anti_join
data <-
  rbind(
    data,
    deriv[!data, on = c('measuredItemSuaFbs', 'measuredElementSuaFbs',
                        'geographicAreaM49', 'timePointYears')],
    fill = TRUE
  )



data[is.na(Official), Official := FALSE]
data[is.na(Protected), Protected := FALSE]



data[, stockable := measuredItemSuaFbs %chin% stockable_items]


# XXX: Remove items for which no production, imports or exports exist after 2013.
non_existing_for_imputation <-
  setdiff(
    data[
      measuredElementSuaFbs %chin% c('production', 'imports', 'exports') &
        timePointYears >= 2014,
      unique(measuredItemSuaFbs)
    ],
    data[
      measuredElementSuaFbs %chin% c('production', 'imports', 'exports') &
        timePointYears <= 2013,
      unique(measuredItemSuaFbs)
    ]
  )

data <- data[!(measuredItemSuaFbs %chin% non_existing_for_imputation)]

message("Line 1943")

############################################### Create derived production ###################################

dbg_print("derivation of shareDownUp")

# ############# ShareDownUp ----------------------------------------
# 
# #this table will be used to assign to zeroweight comodities 
# #the processed quantities of their coproduct
coproduct_table <- ReadDatatable('zeroweight_coproducts')

stopifnot(nrow(coproduct_table) > 0)

# Can't do anything if this information if missing, so remove these cases
coproduct_table <- coproduct_table[!is.na(branch)]

### XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
### XXX removing these cases as '22242.01' appears as zero-weight XXXX
### XXX for main product = '22110.04'                             XXXX
### XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
coproduct_table <- coproduct_table[branch != '22242.01 + 22110.04']

coproduct_table <- coproduct_table[, .(measured_item_child_cpc, branch)]

coproduct_for_sharedownup <- copy(coproduct_table)

coproduct_for_sharedownup_easy <- coproduct_for_sharedownup[!grepl('\\+|or', branch)]

coproduct_for_sharedownup_plus <- coproduct_for_sharedownup[grepl('\\+', branch)]

coproduct_for_sharedownup_plus <-
  rbind(
    tidyr::separate(
      coproduct_for_sharedownup_plus,
      branch,
      into = c('main1', 'main2'),
      remove = FALSE,
      sep = ' *\\+ *')[, .(measured_item_child_cpc, branch= main1)],
    tidyr::separate(
      coproduct_for_sharedownup_plus,
      branch,
      into = c('main1', 'main2'),
      remove = FALSE,
      sep = ' *\\+ *')[, .(measured_item_child_cpc,branch = main2)]
  )

coproduct_for_sharedownup_plus <- unique(coproduct_for_sharedownup_plus)


coproduct_for_sharedownup_or <- coproduct_for_sharedownup[grepl('or', branch)]

coproduct_for_sharedownup_or <-
  rbind(
    #coproduct_table_or,
    tidyr::separate(
      coproduct_for_sharedownup_or,
      branch,
      into = c('main1', 'main2'),
      remove = FALSE,
      sep = ' *or *')[, .(measured_item_child_cpc, branch= main1)],
    tidyr::separate(
      coproduct_for_sharedownup_or,
      branch,
      into = c('main1', 'main2'),
      remove = FALSE,
      sep = ' *or *')[, .(measured_item_child_cpc,branch = main2)]
  )

coproduct_for_sharedownup_or <- unique(coproduct_for_sharedownup_or)

coproduct_for_sharedownup <-
  rbind(
    coproduct_for_sharedownup_easy,
    coproduct_for_sharedownup_plus,
    coproduct_for_sharedownup_or
  )

## solve the problem of wrong connection (raw centrifugal sugar --> bagasse towards raw cane and beet --> bagasse)
coproduct_for_sharedownup[measured_item_child_cpc == "39140.02" & branch == "23511.01", branch := "2351f"]
# 
# 
#Using the whole tree not by level
ExtrRate <-
  tree[
    !is.na(Value) &
      measuredElementSuaFbs == 'extractionRate'
    ][,
      .(
        measuredItemParentCPC,
        geographicAreaM49,
        measuredItemChildCPC,
        timePointYears,
        extractionRate = Value,
        processingLevel
      )
    ]

## GET SUA BALANCED from here now to calculate Pshares
# Download also calories
elemKeys_all <- c(elemKeys, "664")

# Get ALL data from SUA BALANCED, do an anti-join, set to NA whatever was not
# generated here so to clean the session on unbalanced of non-existing cells

if (CheckDebug()) {
  key_all <-
    DatasetKey(
      domain = "suafbs",
      dataset = "sua_balanced",
      dimensions =
        list(
          geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
          measuredElementSuaFbs = Dimension(name = "measuredElementSuaFbs", keys = elemKeys_all),
          measuredItemFbsSua = Dimension(name = "measuredItemFbsSua", keys = itemKeys),
          # Get from 2010, just for the SUA aggregation
          timePointYears = Dimension(name = "timePointYears", keys = as.character(2010:2019))
        )
    )
} else {
  key_all <- swsContext.datasets[[1]]
  
  key_all@dimensions$timePointYears@keys <- as.character(2010:2019)
  key_all@dimensions$measuredItemFbsSua@keys <- itemKeys
  key_all@dimensions$measuredElementSuaFbs@keys <- elemKeys_all
  key_all@dimensions$geographicAreaM49@keys <- COUNTRY
}


data_suabal <- GetData(key_all)

dataPshareBalanced = copy(data_suabal)

# merge in names (will also keep only elemnts for which we need thresholds, i.e. excl. stock opeings)
dataPshareBalanced <- merge(dataPshareBalanced, codes, by = "measuredElementSuaFbs")

dataPshareBalanced[, measuredElementSuaFbs := name]

dataPshareBalanced[, name := NULL]


# We include utilizations to identify if proceseed if the only utilization
data_tree <-
  dataPshareBalanced[
    measuredElementSuaFbs %chin%
      c('production', 'imports', 'exports',
        'stockChange', 'foodManufacturing', 'loss',
        'food', 'industrial', 'feed', 'seed')
  ]



#subset the tree accordingly to parents and child present in the SUA data

ExtrRate <-
  ExtrRate[
    measuredItemChildCPC %chin% data_tree$measuredItemSuaFbs &
      measuredItemParentCPC %chin% data_tree$measuredItemSuaFbs
  ]


setnames(data_tree, "measuredItemFbsSua", "measuredItemParentCPC")


 dataProcessingShare<-copy(data_tree)
# 
# data_tree <-
#   merge(
#     data_tree,
#     ExtrRate,
#     by = c(p$parentVar, p$geoVar, p$yearVar),
#     allow.cartesian = TRUE,
#     all.y = TRUE
#   )
# 
# data_tree <- as.data.table(data_tree)
# 
# # the availability for parent that have one child and only processed as utilization will 
# # be entirely assigned to processed for that its unique child even for 2014 onwards
# data_tree[,
#   availability :=
#     sum(
#       Value[get(p$elementVar) %in% c(p$productionCode, p$importCode)],
#       - Value[get(p$elementVar) %in% c(p$exportCode, "stockChange")],
#       na.rm = TRUE
#     ),
#   by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
# ]
# 
# data_tree[availability<0,availability:=0]
# 
# # used to chack if a parent has processed as utilization
# data_tree[,
#   proc_Median :=
#     median(
#       Value[measuredElementSuaFbs == "foodManufacturing" & timePointYears %in% 2000:2019],
#       na.rm=TRUE
#     ),
#   by = c(p$parentVar, p$geoVar)
# ]
# 
# # boolean variable taking TRUE if the parent has only processed as utilization 
# data_tree[,
#   unique_proc :=
#     proc_Median > 0 &
#       !is.na(proc_Median) &
#       # ... is the only utilization
#       all(is.na(Value[!(measuredElementSuaFbs %chin%
#                         c('production', 'imports', 'exports',
#                           'stockChange','foodManufacturing'))])),
#   by = c(p$parentVar, p$geoVar, p$yearVar)
# ]
# 
# sel_vars <-
#   c("measuredItemParentCPC", "geographicAreaM49", "timePointYears",
#     "measuredElementSuaFbs", "flagObservationStatus", "flagMethod",
#     "Value", "Official", "measuredItemChildCPC", "extractionRate",
#     "processingLevel")
# 
# data_tree<-
#   unique(
#     data_tree[, c(sel_vars, "availability", "unique_proc"), with = FALSE],
#     by = sel_vars
#   )
# 
# message("Line 2118")
# 
# # dataset to calculate the number of parent of each child and the number of children of each parent
# # including zeroweight commodities
# sel_vars <- c("geographicAreaM49", "measuredItemParentCPC",
#               "measuredItemChildCPC","timePointYears")
# data_count <- unique(data_tree[, sel_vars, with = FALSE], by = sel_vars)
# 
# #Caculate the number of parent of each child
# data_count[,
#   number_of_parent := .N,
#   by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
# ]
# 
# # calculate the number of children of each parent
# # we exclude zeroweight to avoid doublecounting of children (processing)
# data_count[measuredItemChildCPC %!in% zeroWeight,
#   number_of_children := uniqueN(measuredItemChildCPC),
#   by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
# ]
# 
# data_tree <-
#   dt_left_join(
#     data_tree,
#     data_count,
#     by = c(p$parentVar, p$childVar, p$geoVar, p$yearVar),
#     allow.cartesian = TRUE
#   )
# 
# # dataset containing the processed quantity of parents
# food_proc <-
#   unique(
#     data_tree[
#       measuredElementSuaFbs == "foodManufacturing",
#       c("geographicAreaM49", "measuredItemParentCPC", "timePointYears", "Value"),
#       with = FALSE
#     ],
#     by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears", "Value")
#   )
# 
# setnames(food_proc, "Value", "parent_qty_processed")
# 
# # avoid recaculation of shareDownUp from 2014 onwards
# food_proc[timePointYears > 2013, parent_qty_processed := NA_real_]
# 
# data_tree <-
#   dt_left_join(
#     data_tree,
#     food_proc,
#     by = c(p$parentVar, p$geoVar, p$yearVar),
#     allow.cartesian = TRUE
#   )
# 
# sel_vars <- 
#   c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC",
#     "timePointYears", "extractionRate", "parent_qty_processed",
#     "processingLevel", "number_of_parent", "number_of_children")
# 
# data_tree <-
#   unique(
#     data_tree[, c(sel_vars, "availability", "unique_proc"), with = FALSE],
#     by = sel_vars
#   )
# 
# 
# # dataset containing the production of child commodities
# dataprodchild <- data[measuredElementSuaFbs %chin% c('production')]
# 
# setnames(dataprodchild, "measuredItemSuaFbs", "measuredItemChildCPC")
# 
# dataprodchild <-
#   unique(
#     dataprodchild[,
#       c("geographicAreaM49", "measuredItemChildCPC",
#         "timePointYears", "Value", "flagObservationStatus",
#         "flagMethod"),
#       with = FALSE
#     ]
#   )
# 
# setnames(dataprodchild, "Value", "production_of_child")
# 
# message("Line 2200")
# 
# # to avoid resestimation based on estimated data (processed and production of child) from 2014 onwards
# dataprodchild[timePointYears > 2013, production_of_child := NA_real_]
# 
# data_tree <-
#   dt_left_join(
#     data_tree,
#     dataprodchild,
#     by = c(p$geoVar, p$childVar, p$yearVar)
#   )
# 
# # ShareDownups for zeroweights are calculated  separately
# # to avoid double counting when agregating processed quantities of parent
# 
# # dataset containing informations of zeroweight commodities
# data_zeroweight <- data_tree[measuredItemChildCPC %chin% zeroWeight]
# 
# # import data for coproduct relation
zw_coproduct <-
  coproduct_for_sharedownup[,
    .(zeroweight = measured_item_child_cpc, measuredItemChildCPC = branch)
  ]
# 
# zw_coproduct <- unique(zw_coproduct, by = c("measuredItemChildCPC", "zeroweight"))
# 
# # We subset the zeroweight coproduct reference table by taking only zeroweights and their coproduct
# # that are childcommodities in the tree of the country
# zw_coproduct <-
#   zw_coproduct[
#     measuredItemChildCPC %chin% data_tree$measuredItemChildCPC &
#       zeroweight %chin% data_tree$measuredItemChildCPC
#   ]
# 
# 
# # Computing information for non zeroweight commodities
# data_tree <- data_tree[measuredItemChildCPC %!in% zeroWeight]
# 
# # this dataset will be used when creating processing share and shareUpdown
# dataComplete <- copy(data_tree)
# 
# # Quantity of parent destined to the production of the given child (only for child with one parent for the moment)
# data_tree[, processed_to_child := ifelse(number_of_parent == 1, production_of_child, NA_real_)]
# 
# # if a parent has one child, all the production of the child comes from that parent
# data_tree[
#   number_of_children == 1,
#   processed_to_child := parent_qty_processed * extractionRate,
#   processed_to_child
# ]
# 
# data_tree[production_of_child == 0, processed_to_child := 0]
# 
# # assigning the entired availability to processed for parent having only processed as utilization
# data_tree[
#   number_of_children == 1 & unique_proc == TRUE,
#   processed_to_child := availability * extractionRate
# ]
# 
# 
# # mirror assignment for imputing processed quantity for multple parent children
# # 5 loop is sufficient to deal with all the cases
# 
# for (k in 1:5) {
#   data_tree <- RemainingToProcessedParent(data_tree)
#   data_tree <- RemainingProdChildToAssign(data_tree)
# }
# 
# data_tree <- RemainingToProcessedParent(data_tree)
# 
# # proportional allocation of the remaing production of multiple parent children
# data_tree[,
#   processed_to_child :=
#     ifelse(
#       number_of_parent > 1 & is.na(processed_to_child),
#       (remaining_to_process_child * is.na(processed_to_child) * remaining_processed_parent) / sum((remaining_processed_parent * is.na(processed_to_child)), na.rm = TRUE),
#       processed_to_child),
#   by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
# ]
# 
# # Update of remaining production to assing ( should be zero for 2000:2013)
# data_tree[,
#   parent_already_processed :=
#     ifelse(
#       is.na(parent_qty_processed),
#       parent_qty_processed,
#       sum(processed_to_child / extractionRate,na.rm = TRUE)
#     ),
#   by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
# ]
# 
# data_tree[, remaining_processed_parent := round(parent_qty_processed - parent_already_processed)]
# 
# data_tree[
#   remaining_processed_parent < 0,
#   remaining_processed_parent := 0
# ]
# 
# 
# # Impute processed quantity for 2014 onwards using 3 years average
# # (this only to imput shareDownUp)
# data_tree <-
#   data_tree[
#     order(geographicAreaM49, measuredItemParentCPC, measuredItemChildCPC, timePointYears),
#     processed_to_child_avg := rollavg(processed_to_child, order = 3),
#     by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC")
#   ]
# 
# setkey(data_tree, NULL)
# 
# data_tree[timePointYears > 2013 & is.na(processed_to_child), processed_to_child := processed_to_child_avg]
# 
# 
# # Back to zeroweight cases(we assign to zeroweights the processed quantity of their coproduct(already calculated))
# 
# zw_coproduct_bis <-
#   merge(
#     data_tree,
#     zw_coproduct,
#     by = "measuredItemChildCPC",
#     allow.cartesian = TRUE,
#     all.y = TRUE
#   )
# 
# zw_coproduct_bis[,
#   `:=`(
#     measuredItemChildCPC = zeroweight,
#     processed_to_child = processed_to_child / extractionRate
#   )
# ]
# 
# sel_vars <- 
#   c("geographicAreaM49", "measuredItemParentCPC",
#     "measuredItemChildCPC", "timePointYears")
# 
# zw_coproduct_bis <-
#   zw_coproduct_bis[, c(sel_vars, "processed_to_child"), with = FALSE]
# 
# # Correction to milk tree issue ( zeroweight can be associated with 2 main products from the same parent)
# # example: butter of cow milk
# zw_coproduct_bis[,
#   processed_to_child := sum(processed_to_child, na.rm = TRUE),
#   by = sel_vars 
# ]
# 
# zw_coproduct_bis <-
#   unique(zw_coproduct_bis, by = colnames(zw_coproduct_bis))
# 
# data_zeroweight <-
#   dt_left_join(data_zeroweight, zw_coproduct_bis, by = sel_vars)
# 
# data_zeroweight[,
#   processed_to_child := processed_to_child * extractionRate
# ]
# 
# sel_vars <-
#   c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC",
#     "timePointYears", "number_of_parent", "parent_qty_processed",
#     "production_of_child", "processed_to_child")
# 
# data_zeroweight <- data_zeroweight[, sel_vars, with = FALSE]
# 
# data_tree <- data_tree[, sel_vars, with = FALSE]
# 
# #combining zeroweight and non zero weight commodities
# data_tree <- rbind(data_tree, data_zeroweight)
# 
# #calculate ShareDownUp
# data_tree[,
#   shareDownUp := processed_to_child / sum(processed_to_child, na.rm = TRUE),
#   by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")
# ]
# 
# #some corrections...
# 
# data_tree[is.na(shareDownUp) & number_of_parent == 1, shareDownUp := 1]
# 
# data_tree[
#   (production_of_child == 0 | is.na(production_of_child)) &
#     measuredItemChildCPC %!in% zeroWeight &
#     timePointYears < 2014,
#   shareDownUp := 0
# ]
# 
# data_tree[
#   (parent_qty_processed == 0 | is.na(parent_qty_processed)) &
#     timePointYears < 2014,
#   shareDownUp :=0
# ]
# 
# data_tree[is.na(shareDownUp), shareDownUp := 0]
# 
# data_tree <-
#   unique(
#     data_tree[,
#       c("geographicAreaM49", "measuredItemParentCPC",
#         "measuredItemChildCPC", "timePointYears", "shareDownUp"),
#       with = FALSE
#     ]
#   )

########################GENERARING ProcessedShare for parent----------------------------------------
######################## ANd ShareUpDown for children ##############################################
####################################################################################################

dataProcessingShare[,
                    availability :=
                      sum(
                        Value[get(p$elementVar) %in% c(p$productionCode, p$importCode)],
                        - Value[get(p$elementVar) %in% c(p$exportCode, "stockChange")],
                        na.rm = TRUE
                      ),
                    #by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
                    by = c(p$geoVar, p$yearVar, p$parentVar)
                    ]


## For BC compilation we do not need to create the sums for 2014-2019 Loss and Process 
# (if not present in 2014-2019, they are not deleted for 2010:2013 instead)
dataProcessingShare[,SumLoss:=sum(
  Value[measuredElementSuaFbs=="loss" & timePointYears %in% 2014:2019],na.rm = TRUE
  ),
  by=c("geographicAreaM49","measuredItemParentCPC")
  ]

dataProcessingShare[,SumProd:=sum(
  Value[measuredElementSuaFbs=="production" & timePointYears %in% 2014:2019],na.rm = TRUE
),
by=c("geographicAreaM49","measuredItemParentCPC")
]


dataProcessingShare[,parent_qty_processed:=Value[measuredElementSuaFbs=="foodManufacturing"],
by=c("geographicAreaM49","measuredItemParentCPC","timePointYears")
]

dataProcessingShare[,Prod:=Value[measuredElementSuaFbs=="production"],
                    by=c("geographicAreaM49","measuredItemParentCPC","timePointYears")
                    ]

 dataProcessingShare[,RatioLoss:=SumLoss/SumProd]

dataProcessingShare[,
                    loss_Median_before :=
                      median(
                        Value[measuredElementSuaFbs == "loss" & timePointYears %in% 2000:2013],
                        na.rm=TRUE
                      ),
                    by = c(p$parentVar, p$geoVar)
                    ]

dataProcessingShare[,NewLoss:=ifelse(is.na(loss_Median_before) & !is.na(RatioLoss),TRUE,FALSE)]

# dataProcessingShare[,
#                     NewLoss :=
#        # have  loss for 2014-2019 & ...
#        !is.na(RatioLoss)&
#           # does not have past losses &
#        is.na(loss_Median_before) &
#        # ... loss,process and tourist are the only utilization
#        all(is.na(Value[!(measuredElementSuaFbs %chin%
#                            c('loss', 'production', 'imports',
#                              'exports', 'stockChange','foodManufacturing', 'tourist'))])),
#      by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
#      ]

# dataset containing the processed quantity of parents
dataProcessingShare <-
  unique(
    dataProcessingShare[,
      c("geographicAreaM49", "measuredItemParentCPC", "timePointYears", "Prod",
        "availability","RatioLoss","NewLoss","parent_qty_processed"),
      with = FALSE
      ],
    by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears", "Prod",
           "availability","RatioLoss","NewLoss","parent_qty_processed")
  )
 #setnames(dataProcessingShare, "Value", "Prod")

# Processing share and ShareUpDown
dataProcessingShare <-
  unique(
    dataProcessingShare[,
                        c("geographicAreaM49", "timePointYears", "availability",
                          "measuredItemParentCPC", "parent_qty_processed",
                          "RatioLoss","NewLoss","Prod"),
                        with = FALSE
                        ],
    by = c("geographicAreaM49", "timePointYears",
           "measuredItemParentCPC", "parent_qty_processed")
  )

dataProcessingShare[timePointYears %in% as.character(startYear:endYear), parent_qty_processed := NA_real_]

#correction of Pshare to adjust prcessed in case of new Loss

dataProcessingShare[is.na(RatioLoss),RatioLoss:=0]
dataProcessingShare[is.na(Prod),Prod:=0]


# dataProcessingShare[,
#   Pshare := ifelse(NewLoss==TRUE,(parent_qty_processed-RatioLoss*Prod) / availability,
#                    parent_qty_processed / availability),
#   by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
# ]


dataProcessingShare[,
  Pshare := parent_qty_processed / availability,
  by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
]

dataProcessingShare[Pshare > 1, Pshare := 1]
dataProcessingShare[Pshare < 0,Pshare := 0]
dataProcessingShare[is.nan(Pshare), Pshare := NA_real_]

dataProcessingShare <-
  dataProcessingShare[
    order(geographicAreaM49, measuredItemParentCPC, -timePointYears),
    Pshare_avr := rollavg(Pshare, order = 3),
    by = c("geographicAreaM49", "measuredItemParentCPC")
  ]

setkey(dataProcessingShare, NULL)

# we estimate Pshare for 2014 onwards, even if processed are modified manually because a this stage
# availability of 2014 onwards are known (stocks are not generated yet)
dataProcessingShare[timePointYears %in% as.character(startYear:endYear), Pshare := Pshare_avr]
dataProcessingShare[, Pshare_avr := NULL]

data_Pshare <-
  dataProcessingShare[,
    c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
      "parent_qty_processed", "Pshare","NewLoss"),
    with = FALSE
  ]

data_Pshare<-merge(
  data_Pshare,

  data[measuredElementSuaFbs=="loss",list(geographicAreaM49,timePointYears,measuredItemParentCPC=measuredItemSuaFbs,Loss=Value)],

  by=c(p$geoVar,p$parentVar,p$yearVar),
  all.x = TRUE
)
# 
# # calculating shareUpdown for each child
# data_ShareUpDoawn <-
#   dataComplete[,
#     c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
#       "availability", "parent_qty_processed", "measuredItemChildCPC",
#       "extractionRate", "production_of_child", "number_of_children"),
#     with = FALSE
#   ]
#   
# data_ShareUpDoawn <-
#   unique(
#     data_ShareUpDoawn,
#     by = colnames(data_ShareUpDoawn)
#   )
# 
# data_ShareUpDoawn <- merge(
#   data_ShareUpDoawn,
#   data_tree,
#   by = c("geographicAreaM49", "measuredItemParentCPC",
#          "measuredItemChildCPC", "timePointYears"),
#   all = TRUE
# )
# 
# data_ShareUpDoawn <- data_ShareUpDoawn[measuredItemChildCPC %!in% zeroWeight]
# data_ShareUpDoawn[, shareUpDown := NA_real_]
# 
# data_ShareUpDoawn[
#   !is.na(parent_qty_processed) & extractionRate>0,
#   shareUpDown := (production_of_child / extractionRate) * shareDownUp / sum(production_of_child / extractionRate * shareDownUp, na.rm = TRUE),
#   by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
# ]
# 
# data_ShareUpDoawn[is.nan(shareUpDown), shareUpDown := NA_real_]
# 
# data_ShareUpDoawn <-
#   data_ShareUpDoawn[
#     order(geographicAreaM49, measuredItemParentCPC, measuredItemChildCPC, timePointYears),
#     shareUpDown_avg := rollavg(shareUpDown, order = 3),
#     by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC")
#   ]
# 
# setkey(data_ShareUpDoawn, NULL)
# 
# data_ShareUpDoawn[timePointYears > 2013, shareUpDown := shareUpDown_avg]
# 
# data_ShareUpDoawn[, shareUpDown_avg := NULL]
# 
# data_ShareUpDoawn[
#   timePointYears > 2013,
#   shareUpDown := shareUpDown / sum(shareUpDown, na.rm = TRUE),
#   by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
# ]
# 
# sel_vars <-
#   c("geographicAreaM49", "measuredItemParentCPC",
#     "measuredItemChildCPC", "timePointYears", "shareUpDown")
# 
# data_ShareUpDoawn_final <- data_ShareUpDoawn[, sel_vars, with = FALSE]
# 
# shareUpDown_zeroweight <-
#   merge(
#     data_ShareUpDoawn_final,
#     zw_coproduct,
#     by = "measuredItemChildCPC",
#     allow.cartesian = TRUE,
#     all.y = TRUE
#   )
# 
# shareUpDown_zeroweight[, measuredItemChildCPC := zeroweight]
# 
# shareUpDown_zeroweight <- shareUpDown_zeroweight[, sel_vars, with = FALSE]
# 
# # Correction to milk tree issue ( zeroweight can be associated with 2 main products from the same parent)
# # example: butter of cow milk
# shareUpDown_zeroweight<-
#   shareUpDown_zeroweight[,
#      shareUpDown := sum(shareUpDown, na.rm = TRUE),
#      by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC", "timePointYears")
#   ]
# 
# shareUpDown_zeroweight <-
#   unique(
#     shareUpDown_zeroweight,
#     by = colnames(shareUpDown_zeroweight)
#   )
# 
# # /correction
# 
# 
# data_ShareUpDoawn_final <- rbind(data_ShareUpDoawn_final, shareUpDown_zeroweight)
# 
# # some correction
# data_ShareUpDoawn_final[is.na(shareUpDown), shareUpDown := 0]
# data_ShareUpDoawn_final[!is.na(shareUpDown), flagObservationStatus := "I"]
# data_ShareUpDoawn_final[!is.na(shareUpDown), flagMethod := "c"]
# 
# 
# # sAVE DATA TO SWS
# 
# shareUpDown_to_save <- copy(data_ShareUpDoawn_final)
# 
# shareUpDown_to_save[, measuredElementSuaFbs := "5431"]
# 
# setnames(
#   shareUpDown_to_save,
#   c("measuredItemParentCPC", "measuredItemChildCPC"),
#   c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree")
# )
# 
# setnames(shareUpDown_to_save, "shareUpDown", "Value")

#shareUpDown_to_save[, Value := round(Value, 3)]
message("Line 2628")

sessionKey_shareUpDown <- swsContext.datasets[[3]]


CONFIG <- GetDatasetConfig(sessionKey_shareUpDown@domain, sessionKey_shareUpDown@dataset)

## changed for back compilation (to include post-2014 years for backwards MA routine)
sessionKey_shareUpDown@dimensions$timePointYears@keys = as.character(startYear:max(refYear))

data_shareUpDown_sws <- GetData(sessionKey_shareUpDown, omitna = FALSE)

#data_shareUpDown_sws[timePointYears %in% as.character(2010:2013), `:=` (Value = NA_real_, flagObservationStatus = NA_character_, flagMethod = NA_character_)]
#consistency check: sum of shareupDown by parent should be 1.
message("Line 2690")
# items that have been manually changed
# changedShareItems = unique(data_shareUpDown_sws[flagObservationStatus == "E" & flagMethod == "f", 
#                                                 .(geographicAreaM49, measuredItemParentCPC_tree, timePointYears)])

data_ShareUpDoawn_final_invalid <-
  data_shareUpDown_sws[
    measuredItemChildCPC_tree %!in% zeroWeight,
    .(sum_shares = round(sum(Value, na.rm = TRUE),1)),
    by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC_tree")
  ][
    !dplyr::near(sum_shares, 1) & !dplyr::near(sum_shares, 0) 
  ]

data_ShareUpDoawn_final_invalid<-data_ShareUpDoawn_final_invalid[timePointYears%in%startYear:endYear]

if (nrow(data_ShareUpDoawn_final_invalid) > 0) {
  data_ShareUpDoawn_final_invalid[, measuredItemParentCPC_tree := paste0("'", measuredItemParentCPC_tree)]
  write.csv(
    data_ShareUpDoawn_final_invalid, 
    file.path(paste0("ShareUpDown_toCorrect_", COUNTRY, ".csv")), row.names = FALSE
  )
  
  if (!CheckDebug()) {
    send_mail(
      from = "do-not-reply@fao.org",
      to = swsContext.userEmail,
      subject = "Some shareUpDown are invalid",
      body = c(paste("There are some invalid shareUpDown (they do not sum to 1). See attachment and fix them in the SWS shareUpDown dataset"),
               file.path(paste0("ShareUpDown_toCorrect_", COUNTRY, ".csv")))
    )
  }
  
  stop("Some shareUpDown can create conflict. Check your email.")
}




data_shareUpDown_sws[
  #timePointYears >= 2014 &
  ((measuredItemParentCPC_tree == "02211" & measuredItemChildCPC_tree == "22212") |
     #cheese from whole cow milk cannot come from skim mulk of cow
     (measuredItemParentCPC_tree == "22110.02" & measuredItemChildCPC_tree == "22251.01")),
    Value := 0
  
  ]

# set the 2010-2013 periods NA
# data_shareUpDown_sws[timePointYears %in% as.character(startYear:endYear), Value := NA ]
setnames(data_shareUpDown_sws,  c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree"),
         c("measuredItemParentCPC", "measuredItemChildCPC"))
BC_dataForProc <-
  data_shareUpDown_sws[
     order(geographicAreaM49, measuredItemParentCPC, measuredItemChildCPC, -timePointYears),
     shareUpDown_avr := rollavg(Value, order = 3),
     by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC")
   ]
#test = data_shareUpDown_sws[measuredItemParentCPC == "0112" & measuredItemChildCPC == "23120.03", ]
# creating zerowight table for back compilation 
BC_dataForProc[timePointYears %in% as.character(startYear:endYear), Value := shareUpDown_avr ]

BC_dataForProc[, shareUpDown_avr := NULL]

BC_dataForProc[is.nan(Value) | is.na(Value), Value := 0]

# moved in the saving condition
BC_dataForProc[ , Value := Value / sum(Value[measuredItemChildCPC %!in% zeroWeight], na.rm = TRUE), 
                 by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")]
BC_zeroweight = BC_dataForProc[measuredItemChildCPC %!in% zeroWeight, ]

# coproduct is defined at the beginning of the shares script (commented for bc)
setnames(coproduct_for_sharedownup, "branch", "measuredItemChildCPC")

BC_zeroweight =  merge(BC_zeroweight, coproduct_for_sharedownup, by = c("measuredItemChildCPC"), all.y=TRUE)

BC_zeroweight[, measuredItemChildCPC := measured_item_child_cpc]

BC_zeroweight[, measured_item_child_cpc := NULL]

BC_zeroweight[, flagObservationStatus := "I"]
BC_zeroweight[, flagMethod := "c"]

# take care of skim milk multiple child problem (we lose the flags)
BC_zeroweight = unique(BC_zeroweight[, Value := sum(Value, na.rm = T), by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC", "timePointYears")])


# BC_zeroweight[, Value := ifelse(measuredItemChildCPC == "22110.02", sum(Value), Value), 
#               by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC", "timePointYears")]

# BC_zeroweight = BC_zeroweight[!duplicated(BC_zeroweight[, .(geographicAreaM49, measuredItemChildCPC, measuredItemParentCPC, timePointYears)])]
#saving ShareUpDown For the first time #all flage are (i,c) like production of derived

BC_dataForProc = rbind(BC_dataForProc[measuredItemChildCPC %!in% zeroWeight, ], BC_zeroweight)

# if (nrow(data_shareUpDown_sws) == 0) {
#   faosws::SaveData(
#     domain = "suafbs",
#     dataset = "up_down_share",
#     data = shareUpDown_to_save,
#     waitTimeout = 20000
#   )
# } else {
#  
#   setnames(
#     data_shareUpDown_sws,
#     c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree", "Value"),
#     c("measuredItemParentCPC", "measuredItemChildCPC", "shareUpDown")
#   )
  
  # data_ShareUpDoawn_to_use <- copy(data_shareUpDown_sws)
  # 
  # data_ShareUpDoawn_to_use <-
  #   data_ShareUpDoawn_to_use[, colnames(data_ShareUpDoawn_final), with = FALSE]
  # 
  # data_ShareUpDoawn_final<-data_ShareUpDoawn_final[timePointYears %in% as.character(2014:2019)]
  
  #If a new connection parent-child is added, the shareUpDown of all the children are affected
  #So we overwrite the share of all the children of the parent in the new connection
sharesDataFromTree = copy(tree)

sharesDataFromTree = sharesDataFromTree[measuredElementSuaFbs == "extractionRate" & timePointYears %in% as.character(startYear:endYear), ]

sharesDataFromTree[, processingLevel := NULL]
sharesDataFromTree = unique(sharesDataFromTree)  
# initialize shares
sharesDataFromTree[,  `:=` (measuredElementSuaFbs = "5431", Value = 0, flagObservationStatus ="I", flagMethod = "c")]


new_connection <- sharesDataFromTree[!BC_dataForProc,
                                          on = c('geographicAreaM49', 'measuredItemParentCPC',
                                                 'measuredItemChildCPC', 'timePointYears')
                                          ]

BC_dataForProc = rbind(BC_dataForProc, new_connection)
  
if (nrow(new_connection) > 0) {
  setnames(new_connection, c("measuredItemParentCPC", "measuredItemChildCPC"), c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree"))
  faosws::SaveData(
    domain = "suafbs",
    dataset = "up_down_share",
    data = new_connection,
    waitTimeout = 20000
  )
}


#   # rbind with anti_join
#   data_ShareUpDoawn_final <-
#     rbind(
#       data_ShareUpDoawn_to_use[!new_connection,           #we exclude parent included in the new connection
#                                on = c('geographicAreaM49',
#                                       'measuredItemParentCPC',
#                                       'timePointYears')],
#       unique(
#         data_ShareUpDoawn_final[
#         unique(new_connection[,c('geographicAreaM49',
#                           'measuredItemParentCPC',
#                           'timePointYears'),with=FALSE]),
#         on = c('geographicAreaM49',
#                'measuredItemParentCPC',
#                'timePointYears')
#       ]
#       ),
#       fill = TRUE
#     )
# 
# }

#consistency check: sum of shareUpDown by parent should exceed 1.
message("Line 2690")

data_ShareUpDoawn_final_invalid <-
  BC_dataForProc[
    measuredItemChildCPC %!in% zeroWeight,
    .(sum_shares = round(sum(Value, na.rm = TRUE),1)),
    by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
  ][
    !dplyr::near(sum_shares, 1) & !dplyr::near(sum_shares, 0)
  ]

data_ShareUpDoawn_final_invalid<-data_ShareUpDoawn_final_invalid[timePointYears%in%startYear:endYear]


if (nrow(data_ShareUpDoawn_final_invalid) > 0) {
  
  write.csv(
    data_ShareUpDoawn_final_invalid,
    file.path(paste0("ShareUpDown_toCorrect_", COUNTRY, ".csv"))
  )
  
  if (!CheckDebug()) {
    send_mail(
      from = "do-not-reply@fao.org",
      to = swsContext.userEmail,
      subject = "Some shareUpDown are invalid",
      body = c(paste("There are some invalid shareUpDown (they do not sum to 1). See attachment and fix them in the SWS shareUpDown dataset"),
               file.path(paste0("ShareUpDown_toCorrect_", COUNTRY, ".csv")))
    )
  }
  
  stop("Some shareUpDown can create conflict. Check your email.")
}
# 
# if(nrow(data_ShareUpDoawn_final)!=0){
#   
#   
# }

#/processingShare---------------------

message("Line 2729")


# / Tables that will be required by the co-product issue (fix processingShare)

sessionKey_shareDownUp <- swsContext.datasets[[4]]

CONFIG <- GetDatasetConfig(sessionKey_shareDownUp@domain, sessionKey_shareDownUp@dataset)

## changed for back compilation (to include post-2014 years for backwards MA routine)
sessionKey_shareDownUp@dimensions$timePointYears@keys = as.character(startYear:max(refYear))

data_shareDownUp_sws <- GetData(sessionKey_shareDownUp, omitna = FALSE)

# Check on consistency of shareDownUp
shareDownUp_invalid <-
  data_shareDownUp_sws[,
                .(sum_shares = round(sum(Value),1)),
                by = c("geographicAreaM49", "timePointYears", "measuredItemChildCPC_tree")
  ][
    !dplyr::near(sum_shares, 1) & !dplyr::near(sum_shares, 0)
  ][
    timePointYears  %in% as.character(startYear:endYear)
  ]

if (nrow(shareDownUp_invalid) > 0) {
  
  shareDownUp_invalid <-
    data_shareDownUp_sws[
      shareDownUp_invalid,
      on = c("geographicAreaM49", "timePointYears", "measuredItemChildCPC_tree")
    ]
  
  write.csv(
    shareDownUp_invalid,
    file.path(paste0("shareDownUp_INVALID_", COUNTRY, ".csv"))
  )
  
  if (!CheckDebug()) {
    send_mail(
      from = "do-not-reply@fao.org",
      to = swsContext.userEmail,
      subject = "Some shareDownUp are invalid",
      body = c(paste("There are some invalid shareDownUp (they do not sum to 1). See attachment and fix them in", sub("/work/SWS_R_Share/", "", shareDownUp_file)),
               file.path(paste0("shareDownUp_INVALID_", COUNTRY, ".csv")))
    )
  }
  
  unlink(file.path(R_SWS_SHARE_PATH, USER, paste0("shareDownUp_INVALID_", COUNTRY, ".csv")))
  
  stop("Some shares are invalid. Check your email.")
}

# ########### set parents present in sws and not present in the estimated data to zero ###########
# #unique(data_shareDownUp_sws$geographicAreaM49) created problem if the downup session start as empty changed with data_tree
# data_tree_test <-
#   merge(
#     data.table(
#       geographicAreaM49 = unique(data_tree$geographicAreaM49),
#       timePointYears = sort(unique(data_tree$timePointYears))
#     ),
#     unique(
#       data_shareDownUp_sws[,
#            list(geographicAreaM49, 
#                 measuredItemParentCPC = measuredItemParentCPC_tree, 
#                 measuredItemChildCPC = measuredItemChildCPC_tree)
#            ]
#     ),
#     by = "geographicAreaM49",
#     all = TRUE,
#     allow.cartesian = TRUE
#   )
# 
# #not solved issue
# #diff_test <- data_tree[!data_tree_test, on = c("geographicAreaM49","measuredItemParentCPC",
# #                                              "measuredItemChildCPC","timePointYears")]
# 
# missing_connections <- data_tree_test[!data_tree, on = c("geographicAreaM49","measuredItemParentCPC",
#                                                "measuredItemChildCPC","timePointYears")]
# 
# missing_connections <- missing_connections[, shareDownUp := 0 ]
# 
# missing_connections <- missing_connections[, list(geographicAreaM49,measuredItemParentCPC,measuredItemChildCPC,
#                                         timePointYears,shareDownUp)]
# 
# 
# data_tree <- rbind(data_tree, missing_connections)
# 
# ### WE are here######
# 
# 
# 
# if (nrow(data_shareDownUp_sws)!=0) {
#   
#   SHAREDOWNUP_LOADED <- TRUE
#   
#   shareDownUp_previous <-copy(data_shareDownUp_sws)
#   setnames(shareDownUp_previous,c("Value","measuredItemParentCPC_tree","measuredItemChildCPC_tree"),
#            c("shareDownUp","measuredItemParentCPC","measuredItemChildCPC"))
#  
#   #/correction
  

### BACK COMPILATION DOWNUP SHARES

# TODO: clear the shares in pullData (or work with flags somehow). For now all shares will be created everytime balancing is run
# set the 2010-2013 periods NA
# data_shareDownUp_sws[timePointYears %in% as.character(startYear:endYear), Value := NA ]

setnames(data_shareDownUp_sws,  c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree"),
         c("measuredItemParentCPC", "measuredItemChildCPC"))
BC_dataDownUp <-
  data_shareDownUp_sws[
    order(geographicAreaM49, measuredItemParentCPC, measuredItemChildCPC, -timePointYears),
    shareDownUp_avr := rollavg(Value, order = 3),
    by = c("geographicAreaM49", "measuredItemParentCPC", "measuredItemChildCPC")
  ]

# creating zerowight table for back compilation 
BC_dataDownUp[timePointYears %in% as.character(startYear:endYear), Value := shareDownUp_avr ]

BC_dataDownUp[, shareDownUp_avr := NULL]

BC_dataDownUp[is.nan(Value) | is.na(Value), Value := 0]


 BC_dataDownUp[ , Value := Value / sum(Value, na.rm = TRUE), 
                by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears")]


  # Check on consistency of shareDownUp
  shareDownUp_invalid <-
    BC_dataDownUp[,
      .(sum_shares = round(sum(Value),1)),
      by = c("geographicAreaM49", "timePointYears", "measuredItemChildCPC")
    ][
      !dplyr::near(sum_shares, 1) & !dplyr::near(sum_shares, 0)
    ][
      timePointYears  %in% as.character(startYear:endYear)
    ]
  
  if (nrow(shareDownUp_invalid) > 0) {
    
    shareDownUp_invalid <-
      BC_dataDownUp[
        shareDownUp_invalid,
        on = c("geographicAreaM49", "timePointYears", "measuredItemChildCPC")
      ]
    
    write.csv(
      shareDownUp_invalid,
      file.path(paste0("shareDownUp_INVALID_", COUNTRY, ".csv"))
    )
    
    if (!CheckDebug()) {
      send_mail(
        from = "do-not-reply@fao.org",
        to = swsContext.userEmail,
        subject = "Some shareDownUp are invalid",
        body = c(paste("There are some invalid shareDownUp (they do not sum to 1). See attachment and fix them in", sub("/work/SWS_R_Share/", "", shareDownUp_file)),
                 file.path(paste0("shareDownUp_INVALID_", COUNTRY, ".csv")))
      )
    }
    
    unlink(file.path(R_SWS_SHARE_PATH, USER, paste0("shareDownUp_INVALID_", COUNTRY, ".csv")))
    
    stop("Some shares are invalid. Check your email.")
  }
  
  # shareDownUp_previous[,
  #   `:=`(
  #     measuredItemParentCPC = sub("'", "", measuredItemParentCPC),
  #     measuredItemChildCPC = sub("'", "", measuredItemChildCPC)
  #   )
  # ]
  
# setnames(shareDownUp_previous, "shareDownUp", "shareDownUp_prev")
  
  
#} else {
  
#   SHAREDOWNUP_LOADED <- FALSE
#   
#   shareDownUp_previous <-
#     data.table(
#       geographicAreaM49 = character(),
#       timePointYears = character(),
#       measuredItemParentCPC = character(),
#       measuredItemChildCPC = character(),
#       shareDownUp_prev = numeric(),
#       protect_share = logical()
#     )
# }

#final sharedowmUp

# if (nrow(shareDownUp_previous) > 0) {
#   data_tree_final <-
#     dt_left_join(
#       data_tree,
#       shareDownUp_previous,
#       by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
#     )
  

  # data_tree_final[flagObservationStatus == "E" & flagMethod=="f", shareDownUp := shareDownUp_prev]
  # 
  # data_tree_final[!is.na(shareDownUp) & is.na(flagMethod),`:=`(flagObservationStatus="I", flagMethod="i",
  #                                                              measuredElementSuaFbs="5432")]
  
  message("Line 2830")
  
  ##Automatic update of shareDOwnUp (to discuss with Salar)
  #data_tree_final[,shareDownUp1:=ifelse(protect_share==FALSE & sum(protect_share==TRUE)>0,
  #                                     shareDownUp/sum(shareDownUp[protect_share==FALSE],na.rm = TRUE)*(1-shareDownUp[protect_share==TRUE]),
  #                                     shareDownUp
  #),
  #by=c(p$geoVar,p$yearVar,p$childVar)
  #]
#   data_tree_final[, shareDownUp_prev := NULL]
#   data_tree_final_save <- copy(data_tree_final)
#   
#   data_tree_final[,`:=`(measuredElementSuaFbs=NULL,flagObservationStatus=NULL,flagMethod=NULL)]
#   data_tree_final <- unique(data_tree_final, by = colnames(data_tree_final))
# } else {
#   data_tree_final <- data_tree
#   data_tree_final_save <- copy(data_tree_final)
#   data_tree_final_save[,`:=`(flagObservationStatus="I", flagMethod="i",
#                              measuredElementSuaFbs="5432")]
#   # write.csv(data_tree_final_save, shareDownUp_file, row.names = FALSE)
# }


# # Check on consistency of shareDownUp
# shareDownUp_invalid <-
#   data_tree_final[,
#     .(sum_shares = round(sum(shareDownUp),1)),
#     by = c("geographicAreaM49", "timePointYears", "measuredItemChildCPC")
#   ][
#     !dplyr::near(sum_shares, 1) & !dplyr::near(sum_shares, 0)
#   ][
#     timePointYears >= 2014
#   ]
# 
# if (nrow(shareDownUp_invalid) > 0) {
#   
#   shareDownUp_invalid <-
#     data_tree_final[
#       shareDownUp_invalid,
#       on = c("geographicAreaM49", "timePointYears", "measuredItemChildCPC")
#     ]
#   
#   write.csv(
#     shareDownUp_invalid,
#     file.path(R_SWS_SHARE_PATH, USER, paste0("shareDownUp_INVALID_", COUNTRY, ".csv"))
#   )
#   
#   if (!CheckDebug()) {
#     send_mail(
#       from = "do-not-reply@fao.org",
#       to = swsContext.userEmail,
#       subject = "Some shareDownUp are invalid",
#       body = c(paste("There are some invalid shareDownUp (they do not sum to 1). See attachment and fix them in", sub("/work/SWS_R_Share/", "", shareDownUp_file)),
#                file.path(R_SWS_SHARE_PATH, USER, paste0("shareDownUp_INVALID_", COUNTRY, ".csv")))
#     )
#   }
#   
#   unlink(file.path(R_SWS_SHARE_PATH, USER, paste0("shareDownUp_INVALID_", COUNTRY, ".csv")))
#   
#   stop("Some shares are invalid. Check your email.")
# }

# / Check on consistency of shareDownUp

# setnames(data_tree_final_save,c("measuredItemParentCPC","measuredItemChildCPC","shareDownUp"),
#          c("measuredItemParentCPC_tree","measuredItemChildCPC_tree","Value"))


# len_downUp <- nrow(data_tree_final_save)
# 
# i = 1
# 
# while(i < len_downUp){
#     j = i + 500
#     if(j > len_downUp){
#       j = len_downUp
#     }
#     
#     faosws::SaveData(
#       domain = "suafbs",
#       dataset = "down_up_share",
#       data = data_tree_final_save[i:j,],
#       waitTimeout = 20000
#     )
#   
#     i = j + 1
# }

# Prepare for save in SWS
setnames(BC_dataDownUp, c("measuredItemParentCPC", "measuredItemChildCPC"), c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree"))
BC_dataDownUp = BC_dataDownUp[timePointYears %in% as.character(startYear:endYear),]
BC_dataDownUp[is.na(Value), flagObservationStatus := NA]
BC_dataDownUp[is.na(Value), flagMethod := NA]
# for UpDown
setnames(BC_dataForProc, c("measuredItemParentCPC", "measuredItemChildCPC"), c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree"))
BC_dataForProc = BC_dataForProc[timePointYears %in% as.character(startYear:endYear),]
BC_dataForProc[is.na(Value), flagObservationStatus := NA]
BC_dataForProc[is.na(Value), flagMethod := NA]

sessionKey_downUp = swsContext.datasets[[4]]
CONFIG <- GetDatasetConfig(sessionKey_downUp@domain, sessionKey_downUp@dataset)
datatoClean=GetData(sessionKey_downUp)


# Check if shares are present, otherwise write them (for back-compilation)
## save back once the shares estimation only (we keep the reference year structure)

if(nrow(datatoClean[timePointYears %in% as.character(startYear:endYear), ]) == 0){

  # BC_dataDownUp[ , Value := Value / sum(Value, na.rm = TRUE), 
  #                by = c("geographicAreaM49", "measuredItemChildCPC_tree", "timePointYears")]
  
faosws::SaveData(
  domain = "suafbs",
  dataset = "down_up_share",
  data = BC_dataDownUp,
  waitTimeout = 20000
)
}
data_tree_final = BC_dataDownUp[, list(geographicAreaM49, measuredItemParentCPC = measuredItemParentCPC_tree, measuredItemChildCPC = measuredItemChildCPC_tree, timePointYears = timePointYears, shareDownUp =Value)]

data_tree_final[is.nan(shareDownUp) | is.na(shareDownUp), shareDownUp := 0 ]

sessionKey_upDown = swsContext.datasets[[3]]
CONFIG <- GetDatasetConfig(sessionKey_upDown@domain, sessionKey_upDown@dataset)
datatoClean=GetData(sessionKey_upDown)


if(nrow(datatoClean[timePointYears %in% as.character(startYear:endYear), ]) == 0){
   # BC_dataForProc[ , Value := Value / sum(Value[measuredItemChildCPC_tree %!in% zeroWeight], na.rm = TRUE), 
   #                 by = c("geographicAreaM49", "measuredItemParentCPC_tree", "timePointYears")]
  BC_dataForProc[!is.na(Value) , `:=` (flagObservationStatus = "I", flagMethod = "c")]
  
faosws::SaveData(
  domain = "suafbs",
  dataset = "up_down_share",
  data = BC_dataForProc,
  waitTimeout = 20000
)
}

data_ShareUpDoawn_final = BC_dataForProc[, list(geographicAreaM49, measuredItemParentCPC = measuredItemParentCPC_tree, measuredItemChildCPC = measuredItemChildCPC_tree, timePointYears = timePointYears, shareUpDown =Value, flagObservationStatus, flagMethod)]
data_ShareUpDoawn_final[is.nan(shareUpDown) | is.na(shareUpDown), shareUpDown := 0 ]

message("Line 2905")

# / ShareDownUp -------------------------------------------------------



# stockable items for which a historical series of at least
# 5 non-missing/non-null data points exist (changed 2021)
historical_avail_stocks <-
  data[
    measuredElementSuaFbs == "stockChange" &
      timePointYears > endYear & # we reversed this for back compilation and check for stocks in 2014-2019
      !is.na(Value) &
      stockable == TRUE,
    .(n = sum(!dplyr::near(Value, 0))),
    by = c("geographicAreaM49", "measuredItemSuaFbs")
  ][
    n >= 3
  ][,
    n := NULL
  ]



# Keep processingShare and shareDownUp
computed_shares <- list()
computed_shares_send <- list()
# Keep negative availability
negative_availability <- list()

#keep shareuPdOWN
updated_shareUpDOwn<-list()

fixed_proc_shares <- list()

original_stock_variation <-
  data[
    measuredElementSuaFbs == "stockChange" &
      # timePointYears >= 2014 &
      timePointYears %in% as.character(startYear:endYear) &
      !is.na(Value),
    c(
      "geographicAreaM49", "measuredItemSuaFbs",
      "timePointYears", "Value", "flagObservationStatus", "flagMethod"
    )
  ]

dbg_print("starting derived production loop")

#if (length(primaryInvolvedDescendents) == 0) {
  message("No primary commodity involved in this country")
#} else {
  
  #USED TO AVOID DUPLICATES
  data[, Processed_estimed := FALSE]
  
  for (lev in sort(unique(tree$processingLevel))) {
    #testing purpose
     #lev=0
    dbg_print(paste("derived production loop, level", lev))

    treeCurrentLevel <-
      tree[
        !is.na(Value) &
          processingLevel == lev &
          measuredElementSuaFbs == 'extractionRate',
        .(
          measuredItemParentCPC,
          geographicAreaM49,
          measuredItemChildCPC,
          timePointYears,
          extractionRate = Value
        )
      ]
    
    #tree containing children of all parent of current level
    tree_parent_Level<-
      tree[
        !is.na(Value) &
          measuredElementSuaFbs == 'extractionRate' &
          measuredItemParentCPC %chin% treeCurrentLevel[[p$parentVar]],
        .(
          measuredItemParentCPC,
          geographicAreaM49,
          measuredItemChildCPC,
          timePointYears,
          extractionRate = Value,
          processingLevel
        )
      ]

    ############# Create stocks, if missing ####################
    # Stocks will be generated also for those cases for which we have data
    condition_for_stocks <-
      data$stockable == TRUE &
      data$measuredItemSuaFbs %chin% items_to_generate_stocks &
      #data$measuredItemSuaFbs %chin% treeCurrentLevel$measuredItemParentCPC &
      data$measuredElementSuaFbs %chin% c('production', 'imports', 'exports') &
      !(is.na(data$production) & is.na(data$imports) & is.na(data$exports))

    if (any(condition_for_stocks)) {
      dbg_print("generating stocks")

      data_histmod_stocks <-
        data[
          measuredItemSuaFbs %chin% historical_avail_stocks$measuredItemSuaFbs
        ]

      if (nrow(data_histmod_stocks) > 0) {
        dbg_print("data for histmod available")

        data_histmod_stocks <-
          data_histmod_stocks[,
            .(
              supply_inc =
                sum(
                  Value[measuredElementSuaFbs %chin% c("production", "imports")],
                  - Value[measuredElementSuaFbs %chin% c("exports", "stockChange")],
                  na.rm = TRUE
                ),
              supply_exc =
                sum(
                  Value[measuredElementSuaFbs %chin% c("production", "imports")],
                  - Value[measuredElementSuaFbs == "exports"],
                  na.rm = TRUE
                )
            ),
            keyby = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
          ][,
            trend := seq_len(.N),
            by = c("geographicAreaM49", "measuredItemSuaFbs")
          ]

        dbg_print("coeffs_stocks_mod")

        data_histmod_stocks[,
          c("c_int", "c_sup", "c_trend") := coeffs_stocks_mod(.SD),
          by = c("geographicAreaM49", "measuredItemSuaFbs")
        ][,
          supply_inc_pred := c_int + c_sup * supply_exc + c_trend * trend,
        ][,
          `:=`(
            min_new_supply_inc = min(supply_inc[supply_inc > 0 & timePointYears >= min(refYear) & timePointYears <= max(refYear)], na.rm = TRUE),
            avg_new_supply_inc = mean(supply_inc[supply_inc > 0 & timePointYears >= min(refYear) & timePointYears <= max(refYear)], na.rm = TRUE)
          ),
          by = c("geographicAreaM49", "measuredItemSuaFbs")
        ][
          supply_inc_pred <= 0 & timePointYears %in% as.character(startYear %in% endYear),
          supply_inc_pred := min_new_supply_inc
        ][,
          delta_pred := supply_exc - supply_inc_pred
        ]

        data_histmod_stocks[
          supply_inc > 100,
          abs_stock_perc := abs(supply_exc - supply_inc) / supply_inc
        ]

        data_histmod_stocks[,
          abs_delta_pred_perc := abs(delta_pred) / avg_new_supply_inc
        ]

        # Max without the actual maximum, to avoid extremes
        data_histmod_stocks[,
          #upper_abs_stock_perc := max(abs_stock_perc[-(which(abs_stock_perc == max(abs_stock_perc, na.rm = TRUE)))], na.rm = TRUE),
          upper_abs_stock_perc := max(abs_stock_perc[timePointYears > 2013], na.rm = TRUE),
          by = c("geographicAreaM49", "measuredItemSuaFbs")
        ]

        # upper_abs_stock_perc can be infinite if abs_stock_perc is always missing
        # (note that abs_stock_perc is calculated only for supply > 100)
        data_histmod_stocks[
          abs_delta_pred_perc > upper_abs_stock_perc &
            timePointYears %in% as.character(startYear:endYear) &
            !is.infinite(upper_abs_stock_perc),
          delta_pred :=
            upper_abs_stock_perc * avg_new_supply_inc * sign(delta_pred)
        ]

        data_for_stocks <-
          data_histmod_stocks[
            timePointYears %in% as.character(startYear:(endYear + 1)) , # changed for BC to bring 2014 on board as well
            .(geographicAreaM49, measuredItemSuaFbs,
              timePointYears, delta = delta_pred)
          ]

        if (nrow(data_for_stocks) > 0) {
          # HERE
          data_for_stocks <-
            dt_left_join(
              data_for_stocks,
              all_opening_stocks[,
                .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
                  timePointYears, new_opening = Value)
              ],
              by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
            )

          stockdata <-
            data[
              Protected == TRUE &
                measuredElementSuaFbs == "stockChange" &
                !is.na(Value) &
                measuredItemSuaFbs %chin% data_for_stocks$measuredItemSuaFbs,
              .(
                geographicAreaM49,
                measuredItemSuaFbs,
                timePointYears,
                stockvar = Value
              )
            ]

          data_for_stocks <-
            dt_left_join(
              data_for_stocks,
              stockdata,
              by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
            )

          data_for_stocks[!is.na(stockvar), delta := stockvar]
          
          data_for_stocks[, stockvar := NULL]

          # Stock withdrawal cannot exceed opening stocks in the first year
          data_for_stocks[
            timePointYears == as.character(startYear) & delta < 0 & abs(delta) > new_opening,
            delta := - new_opening
          ]
          
          data_for_stocks[, naOpening := new_opening]
          
          # take care of NA openings of 2014 
          data_for_stocks[is.na(new_opening), new_opening := 0]
          
          # NOTE: Data here should be ordered by country/item/year (CHECK)
          data_for_stocks <- update_opening_stocks(data_for_stocks)
          
          
          # For BC: get rid of 2014 year used for balancing
          data_for_stocks = data_for_stocks[timePointYears != 2014,]
          data_for_stocks[is.na(naOpening), new_opening := naOpening ]
          data_for_stocks[,naOpening := NULL ]
          
          data <-
            dt_left_join(
              data,
              data_for_stocks,
              by = c('geographicAreaM49', 'timePointYears', 'measuredItemSuaFbs')
            )
          
          # data<-data[measuredElementSuaFbs=="stockChange" & flagObservationStatus=="E" & flagMethod=="f", Protected:=FALSE]

          data[
            measuredElementSuaFbs == "stockChange" &
              Protected == FALSE &
              !is.na(delta),
            `:=`(
              Value = delta,
              flagObservationStatus = "E",
              flagMethod = "u",
              Protected = FALSE
            )
          ]

          data[, c("delta", "new_opening") := NULL]

          all_opening_stocks <-
            dt_full_join(
              all_opening_stocks,
              data_for_stocks[,
                .(
                  geographicAreaM49,
                  measuredItemFbsSua = measuredItemSuaFbs,
                  timePointYears,
                  new_opening
                )
              ],
              by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
            )

          all_opening_stocks[
            !is.na(new_opening) & (round(new_opening) != round(Value) | is.na(Value)),
            `:=`(
              Value = new_opening,
              flagObservationStatus = "E",
              flagMethod = "u",
              Protected = FALSE
            )
          ]

          all_opening_stocks[, new_opening := NULL]
        }
      }
    }

    ####### Processing check for BC #####################
    
    
    
    
    ############# / Create stocks, if missing ##################
    
    
    # #Correcting stocks
    # 
    # data_st<-copy(data)
    # 
    # data_st[,
    #         supply :=
    #           sum(
    #             Value[get(p$elementVar) %in% c(p$productionCode, p$importCode)],
    #             # XXX p$stockCode is "stockChange", not "stockChange"
    #             - Value[get(p$elementVar) %in% c(p$exportCode)],
    #             na.rm = TRUE
    #           ),
    #         by = c(p$geoVar, p$yearVar, p$itemVar)
    #         ]
    # data_st<-data_st[measuredElementSuaFbs=="stockChange",]
    # 
    # 
    # data_stocks <-
    #   merge(
    #     data_st,
    #     all_opening_stocks[,list(geographicAreaM49,measuredItemSuaFbs=measuredItemFbsSua,
    #                              timePointYears,openingStocks=Value)],
    #     by = c(p$geoVar, p$itemVar, p$yearVar)
    #   )
    # 
    # data_stocks[,stockCheck:=ifelse(sign(Value) == 1 & Value+openingStocks>2*supply & supply>0,TRUE,FALSE)]
    # 
    # #create max_column:
    # data_stocks[, maxValue := 2*supply-openingStocks]
    # data_stocks[, stocks_checkVal := ifelse(stockCheck == T & sign(maxValue)==1, maxValue , 0)]
    # 
    # 
    # data_stocks[flagObservationStatus=="E" & flagMethod=="f",Protected:=FALSE]
    # 
    # #for(i in 2014:max(unique(as.numeric(data_stocks$timePointYears)))){
    # for(i in startYear:endYear){   # adjusted for back compilation
    # 
    #   data_stocks[, Value:=ifelse(timePointYears==i & stockCheck==TRUE & Protected==FALSE,stocks_checkVal,Value)]
    # 
    #   data_stocks[order(geographicAreaM49,measuredItemSuaFbs, timePointYears),prev_stocks:=c(NA, Value[-.N]),
    #               by= c(p$geoVar, p$itemVar)]
    # 
    #   data_stocks[order(geographicAreaM49,measuredItemSuaFbs, timePointYears),prev_opening:=c(NA, openingStocks[-.N]),
    #               by= c(p$geoVar, p$itemVar)]
    # 
    # 
    #   if(i<max(unique(as.numeric(data_stocks$timePointYears)))){
    # 
    #     data_stocks[,openingStocks:=ifelse(timePointYears==i+1 ,prev_opening+prev_stocks,openingStocks)]
    # 
    #   }
    # 
    # }
    # 
    # 
    # data <-
    #   merge(
    #     data,
    #     data_stocks[,list(geographicAreaM49,measuredItemSuaFbs,measuredElementSuaFbs,
    #                       timePointYears,corr_stocks=Value,stockCheck)],
    #     by = c(p$geoVar,p$elementVar, p$itemVar, p$yearVar),
    #     all.x = TRUE
    #   )


    # data[
    #   measuredElementSuaFbs == "stockChange" &
    #     Protected == FALSE &
    #     !is.na(corr_stocks) &
    #     stockCheck==TRUE,
    #   `:=`(
    #     Value = corr_stocks,
    #     flagObservationStatus = "E",
    #     flagMethod = "n",
    #     Protected = FALSE
    #   )
    #   ]
    # 
    # data[
    #   measuredElementSuaFbs == "stockChange" &
    #     flagObservationStatus=="E" &
    #     flagMethod=="f" &
    #     !is.na(corr_stocks) &
    #     stockCheck==TRUE,
    #   `:=`(
    #     Value = corr_stocks,
    #     flagObservationStatus = "E",
    #     flagMethod = "n",
    #     Protected = FALSE
    #   )
    #   ]
    # 
    # 
    # 
    # data[,corr_stocks:=NULL]
    # data[,stockCheck:=NULL]


    # stock_correction_send<-data[measuredElementSuaFbs == "stockChange" &
    #                               flagObservationStatus=="E" &
    #                               flagMethod=="n",]
    # 
    # write.csv(stock_correction_send,tmp_file_Stock_correction)






    # all_opening_stocks <-
    #   merge(
    #     all_opening_stocks,
    #     data_stocks[,list(geographicAreaM49,measuredItemFbsSua=measuredItemSuaFbs,
    #                       timePointYears,corr_op_stocks=openingStocks,stockCheck)],
    #     by = c(p$geoVar, "measuredItemFbsSua", p$yearVar),
    #     all.x = TRUE
    #   )
    # 
    # all_opening_stocks[, stockCheck:= dplyr::lag(stockCheck)]
    # 
    # all_opening_stocks[
    #   measuredElementSuaFbs == "5113" &
    #     Protected == FALSE &
    #     !is.na(corr_op_stocks) &
    #     stockCheck==TRUE,
    #   `:=`(
    #     Value = corr_op_stocks,
    #     flagObservationStatus = "E",
    #     flagMethod = "u",
    #     Protected = FALSE
    #   )
    #   ]
    # 
    # all_opening_stocks[,corr_op_stocks:=NULL]
    # all_opening_stocks[,stockCheck:=NULL]
    # 
    # 
    ############# / Create stocks, if missing ##################
    message("Line 3185")
    
    dataMergeTree <- data[measuredElementSuaFbs %chin% c('production', 'imports', 'exports', 'stockChange')]
    
    setnames(dataMergeTree, "measuredItemSuaFbs", "measuredItemParentCPC")
    
    dbg_print("dataMerge + treeCurrentLevel")

    dataMergeTree <-
      merge(
        dataMergeTree,
        tree_parent_Level,
        by = c(p$parentVar, p$geoVar, p$yearVar),
        allow.cartesian = TRUE
      )

    dbg_print("dataMerge + data_tree_final")

    dataMergeTree <-
      merge(
        dataMergeTree,
        data_tree_final,
        by = c(p$parentVar, p$childVar, p$geoVar, p$yearVar),
        allow.cartesian = TRUE
      )

    setkey(dataMergeTree, NULL)
    
    dbg_print("dataMerge availability")

    dataMergeTree[,
      availability :=
        sum(
          Value[get(p$elementVar) %in% c(p$productionCode, p$importCode)],
          # XXX p$stockCode is "stockChange", not "stockChange"
          - Value[get(p$elementVar) %in% c(p$exportCode, "stockChange")],
          na.rm = TRUE
        ),
      by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
    ]
    
    dbg_print("negative availability")

    negative_availability[[as.character(lev)]] <-
      unique(
        dataMergeTree[
          availability < -100,
          .(
            country            = geographicAreaM49,
            year               = timePointYears,
            measuredItemFbsSua = measuredItemParentCPC,
            element            = measuredElementSuaFbs,
            Value,
            flagObservationStatus,
            flagMethod,
            availability
          )
        ]
      )
    
    dbg_print("zero out negative availability")

    # XXX: Replace negative availability, so we get zero production, instead of negative. This, , however, should be fixed in advance, somehow.
    dataMergeTree[availability < 0, availability := 0]
    
    dbg_print("dataMergeTree + shareDownUp_previous")

    message("Line 3252")
    
    ##########Caculating and updating processed of parent--------------------------
    #amsata
    datamergeNew <- copy(dataMergeTree)
    
    datamergeNew <-
      datamergeNew[,
        c("geographicAreaM49", "measuredItemParentCPC", "availability",
          "measuredItemChildCPC", "timePointYears", "extractionRate",
          "processingLevel", "shareDownUp"),
        with = FALSE
      ]
    
    datamergeNew <- unique(datamergeNew, by = colnames(datamergeNew))
    
    datamergeNew <-
      merge(
        datamergeNew,
        data[
          measuredElementSuaFbs == 'production',
          list(geographicAreaM49, timePointYears,
               measuredItemChildCPC = measuredItemSuaFbs, production = Value,
               Protected, Official, flagObservationStatus, flagMethod)
          ],
        by = c('geographicAreaM49', 'timePointYears', 'measuredItemChildCPC')
      )

    setkey(datamergeNew, NULL)

    datamergeNew[, manual := flagObservationStatus == "E" & flagMethod == "f"]

    datamergeNew[, c("flagObservationStatus", "flagMethod") := NULL]
    
    datamergeNew_zw <- datamergeNew[measuredItemChildCPC %chin% zeroWeight]
    
    datamergeNew <- datamergeNew[measuredItemChildCPC %!in% zeroWeight]
    
    datamergeNew <-
      dt_left_join(
        datamergeNew,
        data_Pshare,
        by = c(p$parentVar, p$geoVar, p$yearVar),
        allow.cartesian = TRUE
      ) 
    
    datamergeNew <-
      dt_left_join(
        datamergeNew,
        data_ShareUpDoawn_final,
        by = c(p$geoVar, p$yearVar, p$parentVar, p$childVar)
      ) 
    
    datamergeNew <-
      datamergeNew[,
        c("geographicAreaM49", "measuredItemParentCPC", "availability", "Pshare","NewLoss","Loss",
          "measuredItemChildCPC", "timePointYears", "extractionRate",
          "processingLevel", "shareDownUp", "shareUpDown", "production",
          "Protected", "flagObservationStatus", "flagMethod", "Official", "manual"),
        with = FALSE
      ]

    datamergeNew <- unique(datamergeNew)
    
    # datamergeNew[,zeroweight:=measuredItemChildCPC %in% zeroWeight]
    
    
    # including processed of parent
    datamergeNew <-
      dt_left_join(
        datamergeNew,
        data[
          measuredElementSuaFbs == 'foodManufacturing',
          list(geographicAreaM49, timePointYears,
               measuredItemParentCPC = measuredItemSuaFbs,
               processedBis = Value, processedProt = Protected)
          ],
        by = c('geographicAreaM49', 'timePointYears', 'measuredItemParentCPC')
      )
    
    # datamergeNew[timePointYears >= 2014 & Protected == FALSE, Value := NA][, Protected := NULL]
    datamergeNew[timePointYears %in% as.character(startYear:endYear) & Protected == FALSE, production := NA]
    datamergeNew[timePointYears %in% as.character(startYear:endYear) & processedProt == FALSE, processedBis := NA]
    
    # Calculate the number of child with protected production for each parent
    datamergeNew[,
      Number_childPro := sum(Protected == TRUE & shareUpDown > 0, na.rm = TRUE),
      by = c(p$parentVar, p$geoVar, p$yearVar)
    ]
    
    
    datamergeNew[ ,
                 AllProotected:=sum(shareUpDown[Protected==TRUE],na.rm = TRUE),
                 by=c("geographicAreaM49","timePointYears","measuredItemParentCPC")
                 ]
    
    datamergeNew[is.na(shareUpDown),shareUpDown:=0]
    
    #Update of the share up down (da vedere, giulia)
    # # datamergeNew[extractionRate>0 ,
    #               shareUpDown:=ifelse(Protected==TRUE & AllProotected!=1,(production*shareDownUp/extractionRate) / sum(production[Protected==TRUE] * shareDownUp[Protected==TRUE] / extractionRate[Protected==TRUE], na.rm = TRUE)*
    #                                     (1-sum(shareUpDown[Protected==FALSE],na.rm = TRUE)),shareUpDown),
    #               by=c("geographicAreaM49","timePointYears","measuredItemParentCPC")
    #               ]
    datamergeNew[,AllProotected:=NULL]
    # calculate the number of child of each parent where processed used to be send
    datamergeNew[,
      Number_child := sum(shareUpDown > 0, na.rm = TRUE),
      by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
    ]

    # we estimate processed quantity for parent
    # based on the historique trend of processed percentage (For BC, we do not need to check for NewLoss, because we average backwards)
    #datamergeNew[, Processed := ifelse(NewLoss==TRUE,Pshare * (availability-ifelse(is.na(Loss),0,Loss)),Pshare * availability)] 
    datamergeNew[, Processed := Pshare * availability] # for BC
    
    # However, we may have cases where some production of child commodities are official
    datamergeNew[sign(Processed) == -1, Processed := 0]
    # case1: some child production are official but not all
    # in this case we update the shareUpDown of the child commodities for which the production is official
    # if the sum of their ShareUpdown is greater than 1, we update processed using the official production of those
    # children and then we update the shareUpDown of different child commodities
    datamergeNew[, c("shareUpDown_NEW", "Processed_new") := NA_real_]
    
    # A child is converted up to process of parent if it has a protected production 
    # with shareUpDown>0, shareDownUp>0 and extractionRate > 0. this avoid issue caused by multiple parent
    # children with protected production
    
    datamergeNew[,
      child_DownUp :=
        Protected == TRUE &
        shareUpDown > 0 &
        extractionRate > 0 &
        shareDownUp > 0
    ]
    
    # GianLuca suggestion----------------
    
    datamergeNew[Number_childPro > 0, #& extractionRate > 0,
      #sum_shareUD_high == TRUE,
      Processed_new :=
        sum(
          production[child_DownUp == TRUE] / extractionRate[child_DownUp == TRUE] * shareDownUp[child_DownUp == TRUE],
          na.rm = TRUE
        ) /
        sum(shareUpDown[child_DownUp == TRUE], na.rm = TRUE),
      by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
    ]

    datamergeNew[,
      Processed_new := ifelse(is.na(Processed_new), mean(Processed_new, na.rm = TRUE), Processed_new),
      by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
    ]
    
    datamergeNew[,
      processed_down_up := sum(child_DownUp, na.rm = TRUE) > 0,
      by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears")
    ]
    
    datamergeNew[is.na(Processed_new) & processed_down_up == TRUE, Processed_new := 0]

    datamergeNew[Processed_new == 0 & processed_down_up == FALSE, Processed_new := NA_real_]
    
    datamergeNew[
      Protected == TRUE & manual == FALSE,
      `:=`(flagObservationStatus = "T", flagMethod = "-")
    ]
   
    datamergeNew[
      Protected == TRUE & manual == TRUE,
      `:=`(flagObservationStatus = "E", flagMethod = "f")
    ]
   
    datamergeNew[
      Protected==FALSE,
      `:=`(flagObservationStatus = "I", flagMethod = "c")
    ]
    
    datamergeNew[!is.na(shareUpDown_NEW), shareUpDown := shareUpDown_NEW]
    
    # GianLuca------------------------
    
    
    #Update the processed quantities
    
    dataForProc <-
      datamergeNew[,
        c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
          "availability","Pshare","Processed", "processedBis", "Processed_new"),
        with = FALSE
      ]

    dataForProc <- unique(dataForProc)
    
    dataForProc[
      timePointYears %in% as.character(startYear:endYear) & !is.na(Processed_new),
      processedBis := Processed_new
    ]
    
    # cancel the automatic update of Pshare for 2014 onward
    #However this lines can be kept for futur improvement
    
    # dataForProc[,
    #   Pshare := processedBis / availability,
    #   by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")
    # ]
    # dataForProc[is.nan(Pshare), Pshare := NA_real_]
    # dataForProc[Pshare > 1, Pshare := 1]
    # dataForProc[Pshare < 0, Pshare := 0]
    # dataForProc <-
    #   dataForProc[
    #     order(geographicAreaM49, measuredItemParentCPC, timePointYears),
    #     Pshare_avr := rollavg(Pshare, order = 3),
    #     by = c("geographicAreaM49", "measuredItemParentCPC")
    #   ]
    # 
    # setkey(dataForProc, NULL)
    # 
    # dataForProc[timePointYears > 2013 & is.na(Pshare), Pshare := Pshare_avr]
    # 
    # dataForProc[, Pshare_avr := NULL]
    # 
    # dataForProc[, Processed := Pshare * availability]

    dataForProc[!is.na(Processed_new), Processed := Processed_new]

    datamergeNew <-
      datamergeNew[,
        c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
          "measuredItemChildCPC", "extractionRate", "processingLevel",
          "shareDownUp", "shareUpDown", "production", "Protected",
          "flagObservationStatus", "flagMethod", "Official", "manual",
          "processed_down_up"),
        with = FALSE
      ]
    
    datamergeNew <-
      merge(
        dataForProc,
        datamergeNew,
        by = c(p$geoVar, p$yearVar, p$parentVar),
        allow.cartesian = TRUE
      ) 
    
    setkey(datamergeNew, NULL)
    
    #if the shareUpDown of a zeroweight coproduct is updated, the shareUpDown of
    #the corresponding zeroweight should also be updated
    
    #merge zeroweight with the tree
    
    datamergeNew_zeroweight <-
      merge(
      datamergeNew,
        zw_coproduct ,
        by = "measuredItemChildCPC",
        allow.cartesian = TRUE
      )

    setkey(datamergeNew_zeroweight, NULL)
    
    datamergeNew_zeroweight[, measuredItemChildCPC := zeroweight]
    
    datamergeNew_zeroweight <-
      datamergeNew_zeroweight[,
        c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
          "measuredItemChildCPC", "Pshare", "shareUpDown", "Processed",
          "flagObservationStatus", "flagMethod", "processed_down_up"),
        with = FALSE
      ]
    
    #correction
    datamergeNew_zeroweight<-
      datamergeNew_zeroweight[,
        shareUpDown := sum(shareUpDown, na.rm = TRUE),
        by = c("geographicAreaM49", "measuredItemParentCPC",
               "measuredItemChildCPC", "timePointYears")
      ]
    
    datamergeNew_zeroweight <- unique(datamergeNew_zeroweight)

    #/correction
    
    datamergeNew_zeroweight <-
      merge(
        datamergeNew_zeroweight,
        datamergeNew_zw,
        by = c("geographicAreaM49", "timePointYears",
               "measuredItemParentCPC", "measuredItemChildCPC")
      )
    
    datamergeNew <-
      datamergeNew[, colnames(datamergeNew_zeroweight), with = FALSE]
    
    datamergeNew_final <- rbind(datamergeNew, datamergeNew_zeroweight)
    
    #some correction
    datamergeNew_final[is.na(shareUpDown), shareUpDown := NA_real_]
    
    updated_shareUpDOwn[[as.character(lev)]] <-
      datamergeNew_final[
        processingLevel == lev,
        c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
          "measuredItemChildCPC", "shareUpDown", "flagObservationStatus",
          "flagMethod"),
        with = FALSE
      ]

    #/processed of parent-----------------
    estimated_processed <-
      datamergeNew_final[,
        list(geographicAreaM49, timePointYears,
             measuredItemSuaFbs = measuredItemParentCPC,
             Processed, processed_down_up)
      ]

    estimated_processed <- unique(estimated_processed)
     
    complete_food_proc <-
      CJ(
        measuredItemSuaFbs    = unique(estimated_processed$measuredItemSuaFbs),
        measuredElementSuaFbs = 'foodManufacturing',
        geographicAreaM49     = unique(estimated_processed$geographicAreaM49),
        timePointYears        = unique(estimated_processed$timePointYears)
      )
    
    # rbind with anti_join
    data <-
      rbind(
        data,
        complete_food_proc[
          !data,
          on = c('measuredItemSuaFbs', 'measuredElementSuaFbs',
                 'geographicAreaM49', 'timePointYears')
        ],
        fill = TRUE
      )
    
    data <-
      dt_left_join(
        data,
        estimated_processed[timePointYears %in% as.character(startYear:endYear)],
        by = c('geographicAreaM49', 'timePointYears', 'measuredItemSuaFbs'),
        allow.cartesian = TRUE
      )
    
    data[is.na(Protected), Protected := FALSE]
    data[is.na(Official), Official := FALSE]
    data[is.na(Processed_estimed), Processed_estimed := FALSE]
    
    data[
      measuredElementSuaFbs == "foodManufacturing" &
        Protected == FALSE &!is.na(Processed) & Processed_estimed == FALSE,
      `:=`(
        Value = Processed,
        flagObservationStatus = "E",
        flagMethod = ifelse(processed_down_up == TRUE, "-", "i"),
        Processed_estimed = TRUE
      )
    ][,
      c("Processed", "processed_down_up") := NULL
    ]
    # if we want to save also the empty (that will overwrite previously estimated processed)
    # [is.na(Value),`:=`(
    #   flagObservationStatus = "",
    #   flagMethod ="") ]
    # 
    
    ####### estimating production of derived
    data_production <-
      datamergeNew_final[
        processingLevel == lev,
        c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
          "availability", "Pshare", "measuredItemChildCPC", "extractionRate",
          "shareDownUp", "shareUpDown", "production", "Protected"),
        with = FALSE
      ]
    
    data_production <- unique(data_production)
    
    data_production <-
      dt_left_join(
        data_production,
        unique(
          data[
            measuredElementSuaFbs == "foodManufacturing" & !is.na(Value),
            .(geographicAreaM49, timePointYears,
              measuredItemParentCPC = measuredItemSuaFbs,
              Processed_parent = Value)
          ]
        ),
        by = c(p$geoVar, p$yearVar, p$parentVar)
      )
    
    data_production[,
      #new_imputation:=Processed_parent*shareUpDown*extractionRate*shareDownUp
      new_imputation := Processed_parent * shareUpDown * extractionRate #shareDownUp is not part of the formula
    ]
    
    data_production <-
      data_production[,
        c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
          "availability", "Pshare", "measuredItemChildCPC", "production",
          "new_imputation", "extractionRate", "shareDownUp",
          "shareUpDown", "Processed_parent"),
        with = FALSE
      ]
    
    data_production <- unique(data_production, by = c(colnames(data_production))) 
    
    sel_vars <- 
      c("geographicAreaM49", "timePointYears", "measuredItemParentCPC",
        "availability", "Pshare", "measuredItemChildCPC", "extractionRate",
        "shareDownUp", "shareUpDown", "Processed_parent")
    
    computed_shares[[as.character(lev)]] <-
      data_production[, sel_vars, with = FALSE]
    
    # XXX: change this name
    computed_shares_send[[as.character(lev)]] <-
      data_production[, sel_vars, with = FALSE]
    
    dbg_print("sum unique of new_imputation")

    data_production <-
      data_production[,
        .(
          measuredElementSuaFbs = 'production',
          # NOTE: the sum(unique()) is just a temporary HACK: check how to do it properly
          imputed_deriv_value = sum(unique(round(new_imputation, 2)), na.rm = TRUE)
        ),
        .(geographicAreaM49, timePointYears, measuredItemSuaFbs = measuredItemChildCPC)
      ]
    
    
    data_production_complete <-
      CJ(
        measuredItemSuaFbs    = unique(data_production$measuredItemSuaFbs),
        measuredElementSuaFbs = 'production',
        geographicAreaM49     = unique(data_production$geographicAreaM49),
        timePointYears        = unique(data_production$timePointYears)
      )

    setkey(data_production_complete, NULL)

    sel_vars <- 
      c('geographicAreaM49', 'timePointYears',
        'measuredItemSuaFbs', 'measuredElementSuaFbs') 

    # rbind with anti_join
    data <-
      rbind(
        data,
        data_production_complete[!data, on = sel_vars],
        fill = TRUE
      )
    
    data <- dt_left_join(data, data_production, by = sel_vars)

    dbg_print("z data for shares to send")

    z <-
      data[
        measuredElementSuaFbs %chin% c("production", "imports", "exports", "stockChange"),
        c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs",
          "measuredElementSuaFbs", "Value", "Protected", "imputed_deriv_value"),
        with = FALSE
      ]

    z[
      measuredElementSuaFbs == 'production' & Protected == FALSE,
      Value := imputed_deriv_value
    ]

    z[, imputed_deriv_value := NULL]

    # XXX stocks create duplicates
    z <- unique(z)

    dbg_print("dcast z data")

    z <-
      data.table::dcast(
        z,
        geographicAreaM49 + timePointYears + measuredItemSuaFbs ~ measuredElementSuaFbs,
        value.var = c("Value", "Protected")
      )
    
    # XXX stockChange may change below (and also production, if required)

    dbg_print("z data + computed_shares_send")

    tmp <-
      z[computed_shares_send[[as.character(lev)]],
        on = c('geographicAreaM49'  = 'geographicAreaM49',
               'timePointYears'     = 'timePointYears',
               'measuredItemSuaFbs' = 'measuredItemChildCPC')]

    setkey(tmp, NULL)

    # XXX stocks create duplicates
    tmp <- unique(tmp)

    computed_shares_send[[as.character(lev)]] <- tmp
    
    dbg_print("assign production of derived to non protected/frozen data")

    # Assign if non-protected only non-fozen data
    # (XXX: here only measuredItemSuaFbs that are child should be assigned)
    data[
      timePointYears %in% as.character(startYear:endYear) &
        Protected == FALSE &
        measuredElementSuaFbs == 'production' &
        !is.na(imputed_deriv_value),
      `:=`(
        Value = imputed_deriv_value,
        flagObservationStatus = "I", flagMethod = "c")
    ]
    
    data[, imputed_deriv_value := NULL]
  }
# }
  
  ### Processed check for BC and remove processed that does not link to children
  
  ProcessedCheck = copy(data)
  setnames(ProcessedCheck, c("measuredItemSuaFbs", "Value"), c("measuredItemParentCPC", "foodManufacturing"))
  ProcessedCheck = ProcessedCheck[measuredElementSuaFbs == "foodManufacturing" & flagMethod == "i" , ]
  
  ProductionCheck = copy(data)
  setnames(ProductionCheck, c("measuredItemSuaFbs", "Value"), c("measuredItemChildCPC", "production"))
  ProductionCheck = ProductionCheck[measuredElementSuaFbs == "production" & flagMethod == "c" , ]
  
  CheckProcessedData = merge(
    ProcessedCheck[, .(geographicAreaM49, timePointYears, measuredItemParentCPC, foodManufacturing)],
    tree[measuredElementSuaFbs == "extractionRate", .(geographicAreaM49, measuredItemParentCPC, measuredItemChildCPC, timePointYears)],
    by = c("geographicAreaM49", "measuredItemParentCPC", "timePointYears"), all.x = TRUE
  )
  
  CheckProcessedData = merge(
    CheckProcessedData,
    ProductionCheck[, .(geographicAreaM49, timePointYears, measuredItemChildCPC, production)],
    by = c("geographicAreaM49", "measuredItemChildCPC", "timePointYears"), all.x = TRUE
  )
  
  CheckProcessedData[, 
                     ProcessedCheck := sum(production, na.rm = TRUE), 
                     by = c("geographicAreaM49", "timePointYears", "measuredItemParentCPC")]
  
  CheckProcessedData[ProcessedCheck == 0, foodManufacturing := 0  ]
  
  CheckProcessedData = subset(CheckProcessedData, ProcessedCheck == 0)
  
  if(nrow(CheckProcessedData)>0) {
  
  saveProcessedCheck = unique(CheckProcessedData[ , list(geographicAreaM49, measuredItemSuaFbs = measuredItemParentCPC, 
                                              measuredElementSuaFbs = "foodManufacturing", 
                                              Value_processedCheck = foodManufacturing,
                                              timePointYears)])
  saveProcessedCheck =  saveProcessedCheck[timePointYears %in% as.character(startYear:endYear),]}
  
  # save these back to data
  if(exists("saveProcessedCheck")){
  data = merge(data,
          saveProcessedCheck,
          by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs", "measuredElementSuaFbs"), all.x=T) 
  
  data[measuredElementSuaFbs == "foodManufacturing" & !is.na(Value_processedCheck) & Value != 0, Value := Value_processedCheck ]
  data[, Value_processedCheck := NULL]
  }
  
  ## The stocks are not working !!!!
testSave = data[measuredElementSuaFbs %in% c("production", "foodManufacturing", "stockChanges", "stock") &
                  timePointYears %in% as.character(startYear:endYear), ]
testSave[ measuredElementSuaFbs == "production", measuredElementSuaFbs := "5510"]
testSave[ measuredElementSuaFbs == "foodManufacturing", measuredElementSuaFbs := "5023"]
testSave[ measuredElementSuaFbs == "stockChanges", measuredElementSuaFbs := "5071"]
testSave[ measuredElementSuaFbs == "stock", measuredElementSuaFbs := "5113"]
testSave = testSave[, .(geographicAreaM49, measuredItemFbsSua = measuredItemSuaFbs, measuredElementSuaFbs, timePointYears, Value, flagObservationStatus, flagMethod)]

# faosws::SaveData(
#   domain = "suafbs",
#   dataset = "sua_unbalanced",
#   data = testSave,
#   waitTimeout = 20000
# )

# data[measuredElementSuaFbs == "production" & dplyr::near(Value,0) & flagObservationStatus == "I" &
#     flagMethod == "c", `:=` (Value = NA, flagObservationStatus = NA, flagMethod = NA) ]

setkey(data, NULL)

computed_shares <- rbindlist(computed_shares)

updated_shareUpDOwn <- rbindlist(updated_shareUpDOwn)


shareUpDown_to_save <- copy(updated_shareUpDOwn)
shareUpDown_to_save[, measuredElementSuaFbs := "5431"]

setnames(
  shareUpDown_to_save,
  c("measuredItemParentCPC", "measuredItemChildCPC", "shareUpDown"),
  c("measuredItemParentCPC_tree", "measuredItemChildCPC_tree", "Value")
)

# shareUpDown_to_save <- shareUpDown_to_save[!is.na(Value)]
shareUpDown_to_save[is.na(Value),Value:=0]

data_shareUpDown_sws <- GetData(sessionKey_shareUpDown)

#shareUpDown_to_save[, Value := round(Value, 3)]

faosws::SaveData(
  domain = "suafbs",
  dataset = "up_down_share",
  data = shareUpDown_to_save,
  waitTimeout = 20000
)


remove_connection<-data_shareUpDown_sws[((measuredItemParentCPC_tree == "02211" & measuredItemChildCPC_tree == "22212") |
                                           #cheese from whole cow milk cannot come from skim mulk of cow
                                           (measuredItemParentCPC_tree == "22110.02" & measuredItemChildCPC_tree == "22251.01") |
                                           (measuredItemParentCPC_tree == "02211" & measuredItemChildCPC_tree == "22251.02"))]


if (nrow(remove_connection)>0){

  remove_connection[,`:=`(Value = NA, flagObservationStatus = NA, flagMethod = NA)]
  faosws::SaveData(
    domain = "suafbs",
    dataset = "up_down_share",
    data = remove_connection,
    waitTimeout = 20000
  )
}

dbg_print("end of derived production loop")


data_deriv <-
  data[
    measuredElementSuaFbs == 'production' &
      timePointYears %in% as.character(startYear:endYear),
    list(
      geographicAreaM49,
      measuredElementSuaFbs = "5510",
      measuredItemFbsSua = measuredItemSuaFbs,
      timePointYears,
      Value,
      flagObservationStatus,
      flagMethod
    )
  ]

# save processed data
data_processed <-
  data[
    measuredElementSuaFbs == 'foodManufacturing' &
      timePointYears %in% as.character(startYear:endYear),
    list(
      geographicAreaM49,
      measuredElementSuaFbs = "5023",
      measuredItemFbsSua = measuredItemSuaFbs,
      timePointYears,
      Value,
      flagObservationStatus,
      flagMethod
    )
  ]

# Save stock data in SUA Unbalanced

data_stock_to_save <-
  data[
    measuredElementSuaFbs == 'stockChange' &
      timePointYears %in% as.character(startYear:endYear) &
      !is.na(Value),
    list(
      geographicAreaM49,
      measuredElementSuaFbs = "5071",
      measuredItemFbsSua = measuredItemSuaFbs,
      timePointYears,
      Value,
      flagObservationStatus,
      flagMethod
    )
  ]

opening_stocks_to_save <- copy(all_opening_stocks)
opening_stocks_to_save[, Protected := NULL]

data_to_save_unbalanced <-
  rbind(
    data_deriv,
    data_processed,
    data_stock_to_save,
    opening_stocks_to_save
  )

data_to_save_unbalanced <- data_to_save_unbalanced[!is.na(Value)]



dbg_print("complete data elements")

data <-
  plyr::ddply(
    data,
    .variables = c('geographicAreaM49', 'timePointYears'),
    .fun = function(x) addMissingElements(as.data.table(x), p)
  )

setDT(data)


######################## OUTLIERS #################################
# re-writing of Cristina's outliers plugin with data.table syntax #

dbg_print("start outliers")

if (FIX_OUTLIERS == TRUE) {

  sel_vars <- c('geographicAreaM49', 'timePointYears',
                'measuredItemSuaFbs', 'measuredElementSuaFbs')

  commDef <- ReadDatatable("fbs_commodity_definitions") # XXX: updated?

  stopifnot(nrow(commDef) > 0)

  primaryProxyPrimary_items <-
    commDef[proxy_primary == "X" | primary_commodity == "X"]$cpc

  food_items <- commDef[food_item == "X"]$cpc

  
  
  
  dout <-
    CJ(
      geographicAreaM49 = unique(data$geographicAreaM49),
      timePointYears = unique(data$timePointYears),
      measuredItemSuaFbs = unique(data$measuredItemSuaFbs),
      measuredElementSuaFbs = unique(data$measuredElementSuaFbs)
    )

  dout <- data[dout, on = sel_vars]

  dout[is.na(Protected), Protected := FALSE]

  # Protecting here Ee as we should not correct already corrected outliers
  # dout[flagObservationStatus == "E" & flagMethod == "e", Protected := TRUE]

  
  ### FOR BC: Calculate thresholds from outlier routine based on sua balanced
  
  # Download also calories (need them later) moved upwards for the processing shares
  # elemKeys_all <- c(elemKeys, "664")
  # 
  # if (CheckDebug()) {
  #   key_all <-
  #     DatasetKey(
  #       domain = "suafbs",
  #       dataset = "sua_balanced",
  #       dimensions =
  #         list(
  #           geographicAreaM49 = Dimension(name = "geographicAreaM49", keys = COUNTRY),
  #           measuredElementSuaFbs = Dimension(name = "measuredElementSuaFbs", keys = elemKeys_all),
  #           measuredItemFbsSua = Dimension(name = "measuredItemFbsSua", keys = itemKeys),
  #           # Get from 2010, just for the SUA aggregation
  #           timePointYears = Dimension(name = "timePointYears", keys = YEARS)
  #         )
  #     )
  # } else {
  #   key_all <- swsContext.datasets[[1]]
  #   
  #   key_all@dimensions$timePointYears@keys <- YEARS
  #   key_all@dimensions$measuredItemFbsSua@keys <- itemKeys
  #   key_all@dimensions$measuredElementSuaFbs@keys <- elemKeys_all
  #   key_all@dimensions$geographicAreaM49@keys <- COUNTRY
  # }
  
  
  data_suabal <- GetData(key_all)
  
  
  suabalCJ <-
    CJ(
      geographicAreaM49 = unique(data_suabal$geographicAreaM49),
      timePointYears = unique(as.character(refYear)),
      measuredItemFbsSua = unique(data_suabal$measuredItemFbsSua),
      measuredElementSuaFbs = unique(data_suabal$measuredElementSuaFbs)
    )
  
  outlier_suabal = merge(suabalCJ, data_suabal, all.x=T, by = c("geographicAreaM49", "timePointYears", "measuredItemFbsSua", 
                                                                "measuredElementSuaFbs"))

  # merge in names (will also keep only elemnts for which we need thresholds, i.e. excl. stock opeings)
  outlier_suabal <- merge(outlier_suabal, codes, by = "measuredElementSuaFbs")
  
  outlier_suabal[, measuredElementSuaFbs := name]
  
  outlier_suabal[, name := NULL]
  
  setnames(outlier_suabal, "measuredItemFbsSua", "measuredItemSuaFbs")
  
outlier_suabal[,
    `:=`(
      production = Value[measuredElementSuaFbs  == "production"],
      supply     = sum(Value[measuredElementSuaFbs %in% c("production", "imports")],
                       - Value[measuredElementSuaFbs %in% c("exports", "stockChange")],
                       na.rm = TRUE),
      domsupply  = sum(Value[measuredElementSuaFbs %in% c("production", "imports")],
                       na.rm = TRUE)
    ),
    by = c('geographicAreaM49', 'measuredItemSuaFbs', 'timePointYears')
  ]

  outlier_suabal[measuredElementSuaFbs %chin% c("feed", "industrial", "tourist"), element_supply := supply]
  outlier_suabal[measuredElementSuaFbs == "seed", element_supply := production]
  outlier_suabal[measuredElementSuaFbs == "loss", element_supply := domsupply]

  outlier_suabal[element_supply < 0, element_supply := NA_real_]

  outlier_suabal[, ratio := Value / element_supply]

  

  # Calculate thresholds form sua balanced values
  outlier_suabal[,
                 `:=`(
                   mean_ratio = mean(ratio[timePointYears %in% refYear], na.rm = TRUE),
                   Meanold    = mean(Value[timePointYears %in% refYear], na.rm = TRUE)
                 ),
                 by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
  ]
  
  # FOr BC, we dont need thresholds if there is no values in 2014-2019
  # dout[
  #   timePointYears %in% as.character(startYear:endYear) & (is.na(mean_ratio) | is.nan(mean_ratio) | is.infinite(mean_ratio)),
  #   mean_ratio := median(ratio[timePointYears %in% as.character(startYear:endYear)], na.rm = TRUE),
  #   by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
  # ]
  # 
  # dout[
  #   timePointYears %in% as.character(startYear:endYear) & (is.na(Meanold) | is.nan(Meanold) | is.infinite(Meanold)),
  #   Meanold := median(Value[timePointYears %in% as.character(startYear:endYear)], na.rm = TRUE),
  #   by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
  # ]
  
  # for BC we want NAs in 2014-2019 to delete entries for 2010-2013 
  outlier_suabal[mean_ratio > 1, mean_ratio := 1]
  outlier_suabal[mean_ratio < 0, mean_ratio := 0]
  outlier_suabal[is.na(mean_ratio) | is.nan(mean_ratio), mean_ratio := 0]
  outlier_suabal[is.na(Meanold) | is.nan(Meanold), Meanold := 0]
  outlier_suabal[, abs_diff_threshold := ifelse(measuredElementSuaFbs == "feed", 0.1, 0.05)]
  
  # This is in forward compilation, where we get thresholds from balanced (but in sua unbalanced) past
  # dout[,
  #      `:=`(
  #        production = Value[measuredElementSuaFbs  == "production"],
  #        supply     = sum(Value[measuredElementSuaFbs %in% c("production", "imports")],
  #                         - Value[measuredElementSuaFbs %in% c("exports", "stockChange")],
  #                         na.rm = TRUE),
  #        domsupply  = sum(Value[measuredElementSuaFbs %in% c("production", "imports")],
  #                         na.rm = TRUE)
  #      ),
  #      by = c('geographicAreaM49', 'measuredItemSuaFbs', 'timePointYears')
  # ]
  # 
  # dout[measuredElementSuaFbs %chin% c("feed", "industrial"), element_supply := supply]
  # dout[measuredElementSuaFbs == "seed", element_supply := production]
  # dout[measuredElementSuaFbs == "loss", element_supply := domsupply]
  # 
  # dout[element_supply < 0, element_supply := NA_real_]
  # 
  # dout[, ratio := Value / element_supply]
  # 
  # 
  # dout[,
  #   `:=`(
  #     mean_ratio = mean(ratio[timePointYears %in% refYear], na.rm = TRUE),
  #     Meanold    = mean(Value[timePointYears %in% refYear], na.rm = TRUE)
  #   ),
  #   by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
  # ]

  # If the element is new, there will be no `mean_ratio` and `Meanold`, thus we
  # use the new values to re-calculate them.

  # dout[
  #   timePointYears %in% as.character(startYear:endYear) & (is.na(mean_ratio) | is.nan(mean_ratio) | is.infinite(mean_ratio)),
  #   mean_ratio := median(ratio[timePointYears %in% as.character(startYear:endYear)], na.rm = TRUE),
  #   by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
  # ]
  # 
  # dout[
  #   timePointYears %in% as.character(startYear:endYear) & (is.na(Meanold) | is.nan(Meanold) | is.infinite(Meanold)),
  #   Meanold := median(Value[timePointYears %in% as.character(startYear:endYear)], na.rm = TRUE),
  #   by = c('geographicAreaM49', 'measuredItemSuaFbs', 'measuredElementSuaFbs')
  # ]
  # 
  # dout[mean_ratio > 1, mean_ratio := 1]
  # 
  # dout[, abs_diff_threshold := ifelse(measuredElementSuaFbs == "feed", 0.1, 0.05)]

  # now we merge dout with the thresholds calculated in sua balanced
  
  dout[,
                 `:=`(
                   production = Value[measuredElementSuaFbs  == "production"],
                   supply     = sum(Value[measuredElementSuaFbs %in% c("production", "imports")],
                                    - Value[measuredElementSuaFbs %in% c("exports", "stockChange")],
                                    na.rm = TRUE),
                   domsupply  = sum(Value[measuredElementSuaFbs %in% c("production", "imports")],
                                    na.rm = TRUE)
                 ),
                 by = c('geographicAreaM49', 'measuredItemSuaFbs', 'timePointYears')
  ]
  
  dout[measuredElementSuaFbs %chin% c("feed", "industrial", "tourist"), element_supply := supply]
  dout[measuredElementSuaFbs == "seed", element_supply := production]
  dout[measuredElementSuaFbs == "loss", element_supply := domsupply]
  
  dout[element_supply < 0, element_supply := NA_real_]
  
  dout[, ratio := Value / element_supply]
  
  
  
  
    dout = merge(dout, 
        unique(outlier_suabal[, .(measuredElementSuaFbs, geographicAreaM49, measuredItemSuaFbs, 
                            mean_ratio, Meanold, abs_diff_threshold)]),
        by = c("measuredElementSuaFbs", "geographicAreaM49", "measuredItemSuaFbs"), all.x=T)
  
  dout<- dout[is.na(ratio), ratio:=0]  
  
  dout[
    Protected == FALSE &
      timePointYears %in% as.character(startYear:endYear) & # XXX: parameterise
      mean_ratio > 0 & 
       abs(element_supply) > 0 &
      measuredElementSuaFbs %chin% c("feed", "seed", "loss", "industrial", "tourist") &
      # the conditions below define the outlier, or cases that were NA
      (is.na(Value) |
         abs(ratio - mean_ratio) > abs_diff_threshold |
         mean_ratio == 1 |
         (mean_ratio != 0 & dplyr::near(ratio, 0)) |
         (outside(Value / Meanold, 0.5, 2) & outside(Value - Meanold, -10000, 10000))),
    impute := element_supply * mean_ratio
  ]

  # get rid of utilizations that are zero in 2014-2019
  dout[Meanold == 0 & mean_ratio == 0 & measuredElementSuaFbs %in% c("feed", "seed", "industrial", "loss", "tourist"), impute := 0 ]

  # Remove imputation for loss that is non-food and non-primaryProxyPrimary
  dout[
    measuredElementSuaFbs == "loss" &
      !(measuredItemSuaFbs %chin% food_items &
          measuredItemSuaFbs %chin% primaryProxyPrimary_items),
    impute := NA_real_
  ]
  

  fbsTree <- ReadDatatable("fbs_tree")
  
  # Remove imputation for seed of Fruits and vegetables 
  dout[
    measuredElementSuaFbs == "seed" &
      measuredItemSuaFbs %chin% fbsTree[id3 == "2918"|id3=="2919"]$item_sua_fbs,
    impute := NA_real_
  ]
  

  # Exclude feed as the first time it needs to be generated, without saving
  # outliers to sua_unbalanced as they have E,e flag (which is protected...).
  # In other words, feed has not been run yet if first time.
  #STOP_AFTER_DERIVED = FALSE
  

  if (STOP_AFTER_DERIVED == TRUE) {
    dout <- dout[measuredElementSuaFbs != "feed"]
  }


  dout <-
    dout[
      !is.na(impute) & (is.na(Value) | round(Value, 1) != round(impute, 1) | Value==0),
      list(
        geographicAreaM49,
        measuredItemSuaFbs,
        measuredElementSuaFbs,
        timePointYears,
        Value_imputed = impute
      )
    ]

  if (nrow(dout) > 0) {

    data <- dt_full_join(data, dout, by = sel_vars)

    data[
      !is.na(Value_imputed) & (Protected == FALSE | is.na(Protected)),
      `:=`(
        Value = Value_imputed,
        flagObservationStatus = "E",
        flagMethod = "e",
        Protected = FALSE)
      ]
    
     data[Value == 0 & flagObservationStatus == "E"& flagMethod == "e", `:=` (Value = NA_real_, flagObservationStatus = NA_character_, flagMethod = NA_character_) ]
    
    
    data_outliers <-
      data[!is.na(Value_imputed) & timePointYears %in% as.character(startYear:endYear) &
           measuredElementSuaFbs %in% c("feed", "seed", "loss", "industrial", "tourist")]

    data[, Value_imputed := NULL]
  }
}

dbg_print("end outliers")

####################### / OUTLIERS #################################

#######################  STOCK CHECk (BC) #################################

Variation2013 = data[measuredElementSuaFbs == "stockChange" & timePointYears == "2013", .(geographicAreaM49, measuredItemSuaFbs, variation2013 = Value, FlagVariation = paste0("(", flagObservationStatus, ",", flagMethod, ")"))]
Opening2013 = opening_stocks_to_save[measuredElementSuaFbs == "5113" & timePointYears == "2013", .(geographicAreaM49, measuredItemSuaFbs= measuredItemFbsSua, opening2013 = Value, FlagOpening2013 = paste0("(", flagObservationStatus, ",", flagMethod, ")"))]
Opening2014 = opening_stocks_to_save[measuredElementSuaFbs == "5113" & timePointYears == "2014", .(geographicAreaM49, measuredItemSuaFbs= measuredItemFbsSua, opening2014 = Value, FlagOpening2014 = paste0("(", flagObservationStatus, ",", flagMethod, ")"))]

FirstMerge = merge(Variation2013, Opening2013, by = c("geographicAreaM49", "measuredItemSuaFbs"))

StockCheck = merge(FirstMerge, Opening2014, by = c("geographicAreaM49", "measuredItemSuaFbs"))
StockCheck[, ConsistencyCheck := dplyr::near(round(opening2013 + variation2013), round(opening2014)) ]

StockCheckProblems = StockCheck[ConsistencyCheck == FALSE, ]
setnames(StockCheckProblems, "measuredItemSuaFbs", "measuredItemFbsSua")
StockCheckProblems = nameData("suafbs", "sua_unbalanced", StockCheckProblems)

StocksMismatch <- file.path(TMP_DIR, paste0("STOCKMISMATCH", COUNTRY, ".csv"))

write.csv(StockCheckProblems, StocksMismatch)


#######################  STOCK CHECk (BC) #################################

##################### INDUSTRIAL-TOURISM ###############################

dbg_print("Fix tourism/industrial")

# data[, Protected_orig := Protected]
# data[flagObservationStatus == "E" & flagMethod == "e", Protected := TRUE]

# In the past, industrial contained what is now tourism =>
# remove tourism from industrial.
# XXX: it seems (actually, is) slow.
if (COUNTRY %in% NonSmallIslands) {
  ind_tour_tab <-
    data[
      measuredItemSuaFbs %chin% Utilization_Table[food_item == "X"]$cpc_code &
        timePointYears %in% as.character(startYear:endYear) &
        measuredElementSuaFbs %chin% c("industrial", "tourist") &
        !is.na(Value),
      .SD[.N == 2], # both industrial and tourism need to be non-missing
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    ]

  ind_tour_tab[,
    any_protected := any(Protected == TRUE),
    by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  ]

  ind_tour_tab <- ind_tour_tab[any_protected == FALSE]

  ind_tour_tab[, any_protected := FALSE]

  ind_tour_tab <-
    ind_tour_tab[,
      .(
        indNoTour =
          ifelse(sign(Value[measuredElementSuaFbs == "tourist"])==1,Value[measuredElementSuaFbs == "industrial"] -
                   Value[measuredElementSuaFbs == "tourist"],Value[measuredElementSuaFbs == "industrial"])
      ),
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    ]

  ind_tour_tab[indNoTour < 0, indNoTour := 0]

  data <-
    dt_left_join(
      data,
      ind_tour_tab,
      by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
    )

  data[
    measuredElementSuaFbs == "industrial" &
      !is.na(indNoTour) &
      Protected == FALSE,
    `:=`(
      Value = indNoTour,
      # XXX: probably other flags (initially protected, then unprotected)
      flagObservationStatus = "E",
      flagMethod = "e"
    )
  ]

  data[, indNoTour := NULL]
}

# data[, Protected := Protected_orig]
# data[, Protected_orig := NULL]

##################### / INDUSTRIAL-TOURISM ###############################

data_ind_tour <-
  data[measuredElementSuaFbs %chin% c("industrial", "tourist")]

if (nrow(data_ind_tour) > 0) {

  setnames(data_ind_tour, "measuredItemSuaFbs", "measuredItemFbsSua")

  data_ind_tour <-
    data_ind_tour[!is.na(Value), names(data_to_save_unbalanced), with = FALSE]

  data_ind_tour[
    measuredElementSuaFbs == "industrial",
    measuredElementSuaFbs := "5165"
  ]

  data_ind_tour[
    measuredElementSuaFbs == "tourist",
    measuredElementSuaFbs := "5164"
  ]

  data_to_save_unbalanced <- rbind(data_to_save_unbalanced, data_ind_tour)
}

if (exists("data_outliers")) {
  
  #  commented for back compilation because for some reason measuredItemFBSSua is already present. Probably due to 
  #  some change in the code before
   setnames(data_outliers, c("measuredItemSuaFbs", "measuredElementSuaFbs"), c("measuredItemFbsSua", "name"))
  #setnames(data_outliers, c("measuredElementSuaFbs"), c("name"))
  
  data_outliers <- dt_left_join(data_outliers, codes, by = "name")

  data_outliers <- data_outliers[, names(data_to_save_unbalanced), with = FALSE]

  data_to_save_unbalanced <- rbind(data_to_save_unbalanced, data_outliers)
}

data_to_save_unbalanced <- data_to_save_unbalanced[timePointYears %in% startYear:endYear]

data_to_save_unbalanced[measuredElementSuaFbs == "5510" & dplyr::near(Value,0) & flagObservationStatus == "I" &
     flagMethod == "c", `:=` (Value = NA, flagObservationStatus = NA, flagMethod = NA) ]


out <-
  SaveData(
    domain = "suafbs",
    dataset = "sua_unbalanced",
    data = data_to_save_unbalanced,
    waitTimeout = 20000
  )

if (STOP_AFTER_DERIVED == TRUE) {
  dbg_print("stop after production of derived")

  if (exists("out")) {

    print(paste(out$inserted + out$ignored, "derived products written"))

    if (!CheckDebug()) {
      send_mail(
        from = "do-not-reply@fao.org",
        to = swsContext.userEmail,
        subject = "Production of derived items created",
        body = paste("The plugin stopped after production of derived items.")

      )
    }

  } else {
    print("The SUA_bal_compilation plugin had a problem when saving derived data.")
  }
  
  dbg_print("Plugin stopped after derived, as requested. This is fine.")
  paste0("Plugin stopped after derived, as requested. This is fine.")
  print("Plugin stopped after derived, as requested. This is fine.")
  message("Plugin stopped after derived, as requested. This is fine.")
  stop("Plugin stopped after derived, as requested. This is fine.")
}


## CLEAN sua_balanced
message("wipe sua_balanced session")
sessionKey_suabal = swsContext.datasets[[1]]
CONFIG <- GetDatasetConfig(sessionKey_suabal@domain, sessionKey_suabal@dataset)
datatoClean=GetData(sessionKey_suabal)
datatoClean=datatoClean[timePointYears %in% as.character(startYear:endYear)]
datatoClean[, Value := NA_real_]
datatoClean[, CONFIG$flags := NA_character_]
SaveData(CONFIG$domain, CONFIG$dataset , data = datatoClean, waitTimeout = Inf)


computed_shares_send <- rbindlist(computed_shares_send, fill = TRUE)

# XXX
computed_shares_send <- unique(computed_shares_send)

colnames(computed_shares_send) <-
  sub("Value_", "", colnames(computed_shares_send))

setnames(
  computed_shares_send,
  c("timePointYears", "measuredItemSuaFbs", "measuredItemParentCPC"),
  c("year", "measuredItemChildCPC_tree", "measuredItemParentCPC_tree")
)

computed_shares_send <-
  nameData(
    "suafbs",
    "ess_fbs_commodity_tree2",
    computed_shares_send
  )

setnames(
  computed_shares_send,
  c("geographicAreaM49", "geographicAreaM49_description",
    "measuredItemChildCPC_tree", "measuredItemChildCPC_tree_description",
    "measuredItemParentCPC_tree", "measuredItemParentCPC_tree_description"),
  c("Country", "Country_name", "Child", "Child_name", "Parent", "Parent_name")
)

computed_shares_send[, zero_weigth := Child %chin% zeroWeight]

computed_shares_send[, `:=`(Child = paste0("'", Child), Parent = paste0("'", Parent))]

negative_availability <- rbindlist(negative_availability)

if (nrow(negative_availability) > 0) {

  # FIXME: stocks may be generated twice for parents in multiple level,
  # (though, the resulting figures are the same). Fix in the prod deriv loop.
  negative_availability <- unique(negative_availability)

  negative_availability_var <-
    negative_availability[,
      .(
        country,
        year,
        measuredItemFbsSua,
        element = "availability",
        Value = availability,
        flagObservationStatus = "I",
        flagMethod = "i"
      )
    ]

  negative_availability[, availability := NULL]

  negative_availability <-
    rbind(
      negative_availability,
      unique(negative_availability_var) #FIXME
    )

  negative_availability <-
    data.table::dcast(
      negative_availability,
      country + measuredItemFbsSua + year ~ element,
      value.var = "Value"
    )

  negative_availability <-
    nameData("suafbs", "sua_unbalanced", negative_availability)

  negative_availability[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]

}






message("Line 4233")




fbsTree <- ReadDatatable("fbs_tree")

######################## rm new loss in dairy/meat #################

dairy_meat_items <- fbsTree[id3 %chin% c("2948", "2943"), item_sua_fbs]

# TODO: Some of the code below is repeated. It should be rewritten so
# that there is a single computation of new elements.

data_complete_loss <-
  data.table(
    geographicAreaM49 = unique(data$geographicAreaM49),
    timePointYears = sort(unique(data$timePointYears)))[
      unique(
        data[
          measuredElementSuaFbs == "loss" & !is.na(Value),
          c("geographicAreaM49", "measuredItemSuaFbs"),
          with = FALSE
        ]
      ),
      on = "geographicAreaM49",
      allow.cartesian = TRUE
    ]

data_complete_loss <-
  dt_left_join(
    data_complete_loss,
    data[
      measuredElementSuaFbs == "loss" & Value > 0,
      c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears", "Value"),
      with = FALSE
    ],
    by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  )

data_complete_loss[, y := 1]


### NEED TO GO THROUGH IN DETAIl FOR BACK COMPILATION (we switched pre and post)
data_complete_loss <-
  data_complete_loss[,
    .(
      t_pre   = sum(y[timePointYears %in% refYear]),
      t_post  = sum(y[timePointYears %in% as.character(startYear:endYear)]),
      na_pre  = sum(is.na(Value[timePointYears %in% refYear])),
      na_post = sum(is.na(Value[timePointYears %in% as.character(startYear:endYear)]))
    ),
    by = c("geographicAreaM49", "measuredItemSuaFbs")
  ]


new_elements_loss <-
  data_complete_loss[
    na_pre == t_pre & na_post < t_post
  ][
    order(geographicAreaM49, measuredItemSuaFbs),
    c("geographicAreaM49", "measuredItemSuaFbs"),
    with = FALSE
  ]

new_loss_dairy_meat <-
  new_elements_loss[measuredItemSuaFbs %chin% dairy_meat_items]

new_loss_dairy_meat[, new_loss_dm := TRUE]

data <-
  new_loss_dairy_meat[data, on = c("geographicAreaM49", "measuredItemSuaFbs")]

data[
  new_loss_dm == TRUE &
    timePointYears %in% as.character(startYear:endYear) &
    measuredElementSuaFbs == "loss" &
    Protected == FALSE,
  `:=`(
    Value = NA_real_,
    flagObservationStatus = NA_character_,
    flagMethod = NA_character_
  )
]

data[, new_loss_dm := NULL]


######################## / rm new loss in dairy/meat #################




# Protect all loss data, to keep it consistent with SDG indicator,
# whatever the flag is.
data[measuredElementSuaFbs == "loss", Protected := TRUE]



data <- dt_left_join(data, itemMap, by = "measuredItemSuaFbs")


## Split data based on the two factors we need to loop over
uniqueLevels <- unique(data[, c("geographicAreaM49", "timePointYears"), with = FALSE])

uniqueLevels <- uniqueLevels[order(geographicAreaM49, timePointYears)]

data[, stockable := measuredItemSuaFbs %chin% stockable_items]


## Remove stocks for non stockable items
#data <- data[!(measuredElementSuaFbs == 'stockChange' &
#               stockable == FALSE &
#               flagObservationStatus != "E" &
#               flagMethod != "f")]

data <- data[measuredElementSuaFbs != 'residual']

# Tourism consumption will be in the data only for selected countries
# and needs to be protected
data[measuredElementSuaFbs == "tourist", Protected := TRUE]


######################### Save UNB for validation #######################

sua_unbalanced <-
  data[,
    c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs",
      "measuredElementSuaFbs", "Value", "flagObservationStatus", "flagMethod"),
    with = FALSE
  ]

sua_unbalanced_aux <-
  sua_unbalanced[,
    .(
      supply =
        sum(Value[measuredElementSuaFbs %chin% c("production", "imports")],
            - Value[measuredElementSuaFbs %chin% c("exports", "stockChange")],
            na.rm = TRUE),
      utilizations =
        sum(Value[!(measuredElementSuaFbs %chin%
                    c("production", "imports", "exports", "stockChange"))],
            na.rm = TRUE)
    ),
    by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  ][,
    imbalance := supply - utilizations
  ][
    supply > 0,
    imbalance_pct := imbalance / supply * 100
  ]

sua_unbalanced_aux <-
  melt(
    sua_unbalanced_aux,
    c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs"),
    variable.name = "measuredElementSuaFbs",
    value.name = "Value"
  )

sua_unbalanced_aux[, `:=`(flagObservationStatus = "I", flagMethod = "i")]

sua_unbalanced <- rbind(sua_unbalanced, sua_unbalanced_aux)


# saveRDS(
#   sua_unbalanced,
#   file.path(R_SWS_SHARE_PATH, "FBSvalidation", COUNTRY, "sua_unbalanced.rds")
# )

######################### Save UNB for validation #######################




data[
  measuredElementSuaFbs %chin% c("feed", "food", "industrial", "loss", "seed", "tourist"),
  movsum_value := RcppRoll::roll_sum(shift(Value), 3, fill = 'extend', align = 'right'),
  by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
]

data[
  measuredElementSuaFbs %chin% c("feed", "food", "industrial", "loss", "seed", "tourist"),
  mov_share := movsum_value / sum(movsum_value, na.rm = TRUE),
  by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
]

# Impute share if missing
data[
  measuredElementSuaFbs %chin% c("feed", "food", "industrial", "loss", "seed", "tourist"),
  mov_share := rollavg(mov_share),
  by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")
]

# Set sum of shares = 1
data[
  measuredElementSuaFbs %chin% c("feed", "food", "industrial", "loss", "seed", "tourist"),
  mov_share := mov_share / sum(mov_share, na.rm = TRUE),
  by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
]


data[is.na(Official), Official := FALSE]
data[is.na(Protected), Protected := FALSE]


# Filter elements that appear for the first time

sel_vars <-
  c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs")

data_complete <-
  data.table(
    geographicAreaM49 = unique(data$geographicAreaM49),
    timePointYears = sort(unique(data$timePointYears))
  )[
    unique(data[, sel_vars, with = FALSE]),
    on = "geographicAreaM49",
    allow.cartesian = TRUE
  ]

sel_vars <-
  c("geographicAreaM49", "measuredItemSuaFbs",
    "measuredElementSuaFbs", "timePointYears")

data_complete <-
  dt_left_join(
    data_complete,
    data[Value > 0, c(sel_vars, "Value"), with = FALSE],
    by = sel_vars
  )

data_complete[, y := 1]


## We switched pre and post for back compilation
data_complete <-
  data_complete[,
    .(
      t_pre   = sum(y[timePointYears %in% as.character(startYear:endYear)]),
      t_post  = sum(y[timePointYears %in% refYear]),
      na_pre  = sum(is.na(Value[timePointYears %in% as.character(startYear:endYear)])),
      na_post = sum(is.na(Value[timePointYears %in% refYear]))
    ),
    by = c("geographicAreaM49", "measuredElementSuaFbs", "measuredItemSuaFbs")
  ]

new_elements <-
  data_complete[
    na_pre == t_pre & na_post < t_post
  ][
    order(geographicAreaM49, measuredElementSuaFbs, measuredItemSuaFbs),
    .(geographicAreaM49, measuredElementSuaFbs, measuredItemFbsSua = measuredItemSuaFbs)
  ]

new_loss <-
  new_elements[
    measuredElementSuaFbs == "loss",
    .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua, new_loss = TRUE)
  ]

new_elements <- nameData("suafbs", "sua_unbalanced", new_elements, except = "measuredElementSuaFbs")

new_elements[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]

# tmp_file_new <- file.path(TMP_DIR, paste0("NEW_ELEMENTS_", COUNTRY, ".csv"))
# 
# write.csv(new_elements, tmp_file_new)
# (deletefile)

# / Filter elements that appear for the first time


dbg_print("set thresholds")


if (THRESHOLD_METHOD == 'share') {

  dbg_print("thresholds, share")
 # BC: we base thresholds on SUA balanced data
  suabalMinMax = copy(data_suabal)
  suabalMinMax <- merge(suabalMinMax, codes, by = "measuredElementSuaFbs")
  
  suabalMinMax[, measuredElementSuaFbs := name]
  
  suabalMinMax[, name := NULL]
  
  setnames(suabalMinMax, "measuredItemFbsSua", "measuredItemSuaFbs")
  
  
  suabalMinMax[,
    supply :=
      sum(
        Value[measuredElementSuaFbs %in% c('production', 'imports')],
        - Value[measuredElementSuaFbs %in% c('exports', 'stockChange')],
        na.rm = TRUE
      ),
    by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]

  # NOTE: here we redefine what "supply" is just for seed.

  suabalMinMax[,
    supply := ifelse(measuredElementSuaFbs == "seed", Value[measuredElementSuaFbs == "production"], supply),
    by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]

  suabalMinMax[supply < 0, supply := 0]

  suabalMinMax[
    !(measuredElementSuaFbs %chin% c("production", "imports", "exports", "stockChange")),
    util_share := Value / supply
  ]

  suabalMinMax[is.infinite(util_share) | is.nan(util_share), util_share := NA_real_]

  # share < 0 shouldn't happen. Also, tourist can be negative.
  suabalMinMax[util_share < 0 & measuredElementSuaFbs != "tourist", util_share := 0]

  suabalMinMax[util_share > 1, util_share := 1]

  suabalMinMax[,
    `:=`(
      min_util_share = min(util_share[timePointYears %in% refYear], na.rm = TRUE),
      max_util_share = max(util_share[timePointYears %in% refYear], na.rm = TRUE)
    ),
    by = c("measuredItemSuaFbs", "measuredElementSuaFbs", "geographicAreaM49")
  ]

  suabalMinMax[is.infinite(min_util_share) | is.nan(min_util_share), min_util_share := NA_real_]

  suabalMinMax[is.infinite(max_util_share) | is.nan(max_util_share), max_util_share := NA_real_]

  suabalMinMax[max_util_share > 1, max_util_share := 1]

  data = merge(data, unique(suabalMinMax[,.(geographicAreaM49, measuredItemSuaFbs, measuredElementSuaFbs, 
                                     min_util_share, max_util_share)]), 
               by = c("geographicAreaM49", "measuredItemSuaFbs", "measuredElementSuaFbs"), all.x = T)
  
  
  data[,
               supply :=
                 sum(
                   Value[measuredElementSuaFbs %in% c('production', 'imports')],
                   - Value[measuredElementSuaFbs %in% c('exports', 'stockChange')],
                   na.rm = TRUE
                 ),
               by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  # NOTE: here we redefine what "supply" is just for seed.
  
  data[,
               supply := ifelse(measuredElementSuaFbs == "seed", Value[measuredElementSuaFbs == "production"], supply),
               by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]
  
  data[,
    `:=`(
      min_threshold = supply * min_util_share,
      max_threshold = supply * max_util_share
    )
  ]

  data[, supply := NULL]

} else {
  stop("Invalid method.")
}

# what is this adjustment
data[,
  `:=`(
    min_adj = min_threshold / Value,
    max_adj = max_threshold / Value
  )
]


data[min_adj > 1, min_adj := 0.9]
data[min_adj < 0, min_adj := 0.01]

data[max_adj < 1, max_adj := 1.1]
data[max_adj > 10, max_adj := 10] # XXX Too much?

# Fix for new "loss" element. If loss gets in as a new utilization,
# remove the thresholds for food.

data <-
  dt_left_join(
    data,
    new_loss,
    by = c('geographicAreaM49', 'measuredItemSuaFbs')
  )

data[
  new_loss == TRUE & measuredElementSuaFbs == "food",
  `:=`(min_adj = 0, max_adj = 10)
] # XXX is 10 enough?

data[, new_loss := NULL]

# / Fix for new "loss" element. If loss gets in as a new utilization,
# / remove the thresholds for food.



# Recalculate the levels, given the recalculation of adj factors
data[, min_threshold := Value * min_adj]
data[, max_threshold := Value * max_adj]



# We need the min and max to be "near" one (but not too near) in case of
# protected figures, so that they are not changed
data[Protected == TRUE, min_adj := 0.999]
data[Protected == TRUE, max_adj := 1.001]

# This fix should also be done in optim function?
data[
  dplyr::near(min_adj, 1) & dplyr::near(max_adj, 1),
  `:=`(
    min_adj = 0.999,
    max_adj = 1.001
  )
]


# Back compilation: we take median food information form sua balanced (2014-2019)
# Note: in BC, meadian was changed to be the mean!!
suabal_Food_Median = copy(outlier_suabal)
suabal_Food_Median[,
     `:=`(
       Food_Median       = mean(Value[measuredElementSuaFbs == "food" & timePointYears %in% refYear], na.rm=TRUE),
       Feed_Median       = mean(Value[measuredElementSuaFbs == "feed" & timePointYears %in% refYear], na.rm=TRUE),
       Industrial_Median = mean(Value[measuredElementSuaFbs == "industrial" & timePointYears %in% refYear], na.rm=TRUE),
       Production_Median = mean(Value[measuredElementSuaFbs == "production" & timePointYears %in% refYear], na.rm=TRUE),
       Import_Median     = mean(Value[measuredElementSuaFbs == "imports" & timePointYears %in% refYear], na.rm=TRUE)
     ),
     by = c("geographicAreaM49", "measuredItemSuaFbs")
]

Medians_for_balancing =  unique(suabal_Food_Median[, .(geographicAreaM49, measuredItemSuaFbs, Food_Median, Feed_Median, Industrial_Median, Production_Median,Import_Median)])

data = merge(data, Medians_for_balancing, by = c("geographicAreaM49", "measuredItemSuaFbs"))


# data[,
#   `:=`(
#     Food_Median       = median(Value[measuredElementSuaFbs == "food" & timePointYears %in% refYear], na.rm=TRUE),
#     Feed_Median       = median(Value[measuredElementSuaFbs == "feed" & timePointYears %in% refYear], na.rm=TRUE),
#     Industrial_Median = median(Value[measuredElementSuaFbs == "industrial" & timePointYears %in% refYear], na.rm=TRUE)
#   ),
#   by = c("geographicAreaM49", "measuredItemSuaFbs")
# ]
message("Line 4630")


z_comp_shares <- copy(computed_shares_send)

z_comp_shares[, Protected_exports := NULL]
z_comp_shares[, Protected_imports := NULL]
z_comp_shares[, Protected_production := NULL]
z_comp_shares[, Protected_stockChange := NULL]

# FIXME: fix above
z_comp_shares[, Parent := sub("'", "", Parent)]
z_comp_shares[, Child := sub("'", "", Child)]


setcolorder(
  z_comp_shares,
  c("Country", "Country_name", "Parent", "Parent_name",
    "Child", "Child_name", "year", "production", "imports", "exports",
    "stockChange", "extractionRate", "shareDownUp", "shareUpDown",
    "availability", "Pshare", "zero_weigth", "Processed_parent")
)

dbg_print("SHARES workbook, create")

wb <- createWorkbook()
addWorksheet(wb, "SHARES")
style_protected <- createStyle(fgFill = "pink")
style_formula <- createStyle(fgFill = "green")
style_text <- createStyle(numFmt = "TEXT")
style_comma <- createStyle(numFmt = "COMMA")
writeData(wb, "SHARES", z_comp_shares)

for (i in c("exports", "imports", "production", "stockChange")) {
  a_col <- grep(paste0("^", i, "$"), names(z_comp_shares))
  a_rows <- which(computed_shares_send[[paste0("Protected_", i)]] == TRUE) + 1

  if (all(length(a_col) > 0, length(a_rows) > 0)) {
    addStyle(wb, sheet = "SHARES", style_protected, rows = a_rows, cols = a_col)
  }
}

addStyle(wb, sheet = "SHARES", style_text, rows = 1:nrow(z_comp_shares) + 1, cols = grep("^(Parent|Child)$", names(z_comp_shares)), gridExpand = TRUE, stack = TRUE)

addFilter(wb, "SHARES", row = 1, cols = 1:ncol(z_comp_shares))

# Formulas
pos <- lapply(colnames(z_comp_shares), function(x) letters[which(colnames(z_comp_shares) == x)])
names(pos) <- colnames(z_comp_shares)

sheet_rows   <- 1 + seq_len(nrow(z_comp_shares))
frml_start <- which(colnames(z_comp_shares) == "Processed_parent") + 2

z_comp_shares[, frml_prot_no_zw := computed_shares_send$Protected_production == TRUE & zero_weigth == FALSE]
z_comp_shares[, frml_prot_no_zw_zero_updown := computed_shares_send$Protected_production == TRUE & zero_weigth == FALSE & dplyr::near(shareUpDown, 0)]
z_comp_shares[, prot := computed_shares_send$Protected_production]
z_comp_shares[, frml_n_prot_zero_updown := sum(prot == TRUE & dplyr::near(shareUpDown, 0) == TRUE), by = c("Parent", "year")]
z_comp_shares[, prot := NULL]
z_comp_shares[, frml_multi_parent := length(unique(Parent)) > 1, by = c("Child", "year")]
z_comp_shares[, frml_any_prot_zero_updown := any(frml_prot_no_zw_zero_updown == TRUE), by = c("Parent", "year")]

z_comp_shares[, frml_process_parent_num := paste0(pos$production, sheet_rows, "*", pos$shareDownUp, sheet_rows, "/", pos$extractionRate, sheet_rows)]
z_comp_shares[, frml_process_parent_den := paste0(pos$shareUpDown, sheet_rows)]

z_comp_shares[, frml_any_prot := any(frml_prot_no_zw == TRUE), by = c("Parent", "year")]

z_comp_shares[, frml_process_parent := paste0(pos$availability, sheet_rows, "*", pos$Pshare, sheet_rows)]

z_comp_shares[,
  frml_process_parent :=
    dplyr::case_when(
      frml_any_prot_zero_updown == TRUE & frml_n_prot_zero_updown == 1 ~ paste(frml_process_parent[frml_prot_no_zw == TRUE], collapse = " + "),
      frml_any_prot == TRUE             ~ paste("(", paste(frml_process_parent_num[frml_prot_no_zw == TRUE], collapse = " + "), ") / (", paste(frml_process_parent_den[frml_prot_no_zw == TRUE], collapse = " + "), ")"),
      TRUE                              ~ frml_process_parent
    ),
    by = c("Parent", "year")
]

#z_comp_shares[, frml_process_parent := paste0(pos$availability, sheet_rows, "*", pos$Pshare, sheet_rows)]
#z_comp_shares[frml_any_prot == TRUE, frml_process_parent := paste("(", paste(frml_process_parent_num[frml_prot_no_zw == TRUE], collapse = " + "), ") / (", paste(frml_process_parent_den[frml_prot_no_zw == TRUE], collapse = " + "), ")"), by = c("Parent", "year")]
## XXX Here we should actually check that is protected AND zero Updown just for ONE
#z_comp_shares[frml_any_prot_zero_updown == TRUE, frml_process_parent := paste(frml_process_parent[frml_prot_no_zw == TRUE], collapse = " + "), by = c("Parent", "year")]

z_comp_shares[, frml_prod := paste0(letters[frml_start], sheet_rows, "*", pos$extractionRate, sheet_rows, "*", pos$shareUpDown, sheet_rows)]
z_comp_shares[frml_multi_parent == TRUE, frml_prod := paste(frml_prod, collapse = " + "), by = c("Child", "year")]

z_comp_shares[, frml_updown := paste0(pos$shareUpDown, sheet_rows)]
z_comp_shares[, frml_check_updown := paste(frml_updown[zero_weigth == FALSE], collapse = " + "), by = c("Parent", "year")]

z_comp_shares[, frml_downup := paste0(pos$shareDownUp, sheet_rows)]
z_comp_shares[, frml_check_downup := paste(frml_downup[zero_weigth == FALSE], collapse = " + "), by = c("Child", "year")]

frml_prod           <- z_comp_shares$frml_prod
frml_process_parent <- z_comp_shares$frml_process_parent
frml_check_updown   <- z_comp_shares$frml_check_updown
frml_check_downup   <- z_comp_shares$frml_check_downup

z_comp_shares[, names(z_comp_shares)[grepl("frml_", names(z_comp_shares))] := NULL]

writeFormula(wb, sheet = "SHARES", x = frml_process_parent, startCol = frml_start + 0, startRow = 2)
writeFormula(wb, sheet = "SHARES", x = frml_prod,           startCol = frml_start + 1, startRow = 2)
writeFormula(wb, sheet = "SHARES", x = frml_check_updown,   startCol = frml_start + 2, startRow = 2)
writeFormula(wb, sheet = "SHARES", x = frml_check_downup,   startCol = frml_start + 3, startRow = 2)

writeData(wb, "SHARES", cbind("PROCESSED_PARENT", "PRODUCTION", "SUM UPdown", "SUM DOWNup"), startCol = frml_start, colNames = FALSE)
addStyle(wb, sheet = "SHARES", style_formula, rows = 1, cols = frml_start:(frml_start + 3))

addStyle(wb, sheet = "SHARES", style_comma, rows = 1:nrow(z_comp_shares) + 1, cols = c(grep("^(production|imports|exports|stockChange|availability|Processed_parent)$", names(z_comp_shares)), frml_start:(frml_start + 3)), gridExpand = TRUE, stack = TRUE)

dbg_print(paste("SHARES workbook, save", getwd(), tmp_file_shares))
message("Line 4740")

saveWorkbook(wb, tmp_file_shares, overwrite = TRUE)

# For testing purposes:
#send_mail(
#  from = "do-not-reply@fao.org",
#  to = swsContext.userEmail,
#  subject = "Results from SUA_bal_compilation plugin",
#  body = c("XXXXXXX", tmp_file_shares))
#
##stop("HI!!!!!!!!!!")

## 1 => year = 2014
i <- 1

dbg_print("starting balancing loop")

# XXX Only from 2004 onwards 
#### REVERT BALANCING SEQUENCE FOR BACK-COMPILATION
uniqueLevels <- uniqueLevels[timePointYears %in% as.character(c(startYear-1):c(endYear + 1))][order(-timePointYears)]

standData <- vector(mode = "list", length = nrow(uniqueLevels))

# set the first slot to the 2014 (not to be touchd)
standData[[1]] <- data[timePointYears == "2014", ]
standData[[1]]$change_stocks = NA_integer_


# for BC we leave 2014 as it is
for (i in 2:nrow(uniqueLevels)) {

  dbg_print(paste("in balancing loop, start", i))

  # For stocks, the first year no need to see back in time. After the first year was done,
  # stocks may have changed, so opening need to be changed in "data".
  
  #if (i > 1) { #for BC we need to align with 2014 opening stocks, too
    dbg_print(paste("check stocks change in balancing", i))

   items_stocks_changed <- 
      unique(standData[[i-1]][!is.na(change_stocks)]$measuredItemSuaFbs)

    if (length(items_stocks_changed) > 0) {

      dbg_print(paste("recalculate stocks changed", i))

      stocks_modif <-
        rbind(
          # Previous data (balanced)
          rbindlist(standData, fill = TRUE)[
            !is.na(Value) &
              measuredElementSuaFbs == 'stockChange' &
              measuredItemSuaFbs %chin% items_stocks_changed,
            list(geographicAreaM49, timePointYears, measuredItemSuaFbs, delta = Value)
          ],
          # New data (unbalanced)
          data[
            !is.na(Value) &
              timePointYears < unique(standData[[i-1]]$timePointYears) &
              measuredElementSuaFbs == 'stockChange' &
              measuredItemSuaFbs %chin% items_stocks_changed,
            list(geographicAreaM49, timePointYears, measuredItemSuaFbs, delta = Value)
          ]
        )

      data_for_opening <-
        dt_left_join(
          all_opening_stocks[
            timePointYears %in% as.character(startYear:c(endYear + 1)) &
              measuredItemFbsSua %chin% items_stocks_changed,
            .(geographicAreaM49,  measuredItemSuaFbs =  measuredItemFbsSua,
              timePointYears, new_opening = Value)
          ],
          stocks_modif,
          by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
        )

      data_for_opening[is.na(delta), delta := 0]

      data_for_opening <- data_for_opening[order(geographicAreaM49, measuredItemSuaFbs, timePointYears)]

      dbg_print(paste("update opening stocks", i))
      
      # take care of NA stocks in 2014
      data_for_opening[, naOpening := new_opening ]
      data_for_opening[is.na(new_opening), new_opening := 0]
    
      
      data_for_opening <- update_opening_stocks(data_for_opening)
      # For BC: get rid of 2014 year used for balancing
      data_for_opening = data_for_opening[timePointYears != 2014,]
      data_for_opening[is.na(naOpening), new_opening := naOpening ]
      data_for_opening[,naOpening := NULL ]
      
      dbg_print(paste("merge data in opening stocks", i))

      all_opening_stocks <-
        dt_left_join(
          all_opening_stocks,
          data_for_opening[,
            .(
              geographicAreaM49,
              measuredItemFbsSua = measuredItemSuaFbs,
              timePointYears,
              new_opening
            )
          ],
          by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
        )

      dbg_print(paste("new_opening", i))

      all_opening_stocks[
        !is.na(new_opening) & (round(new_opening) != round(Value) | is.na(Value)),
        `:=`(
          Value = new_opening,
          flagObservationStatus = "E",
          flagMethod = "u",
          Protected = FALSE
        )
      ]

      all_opening_stocks[, new_opening := NULL]
    }
  #}

  filter <- uniqueLevels[i, ]

  sel_vars <- c("geographicAreaM49", "timePointYears")

  treeSubset <- tree[filter, on = sel_vars]

  treeSubset[, sel_vars := NULL, with = FALSE]

  dataSubset <- data[filter, on = sel_vars]

  dbg_print(paste("actual balancing", i))

  standData[[i]] <-
    newBalancing(
      data = dataSubset,
      #nutrientData = subNutrientData,
      #batchnumber = batchnumber,
      Utilization_Table = Utilization_Table
    )

  # FIXME: we are now assigning the "Protected" flag to ALL processing as
  # after the first loop it should have been computed and that value SHOULD
  # never be touched again.
  standData[[i]][measuredElementSuaFbs == "foodManufacturing", Protected := TRUE]

  dbg_print(paste("in balancing loop, end", i))
}


all_opening_stocks[, measuredElementSuaFbs := "5113"]

dbg_print("end of balancing loop")

standData = standData[2:(nrow(uniqueLevels)-1)] 

standData <- rbindlist(standData)


# if there are manual insertion of stock variations that are not consistent with opening 2014, set them to 0, todo(giulia)

data_for_opening <-
  dt_left_join(
    all_opening_stocks[,
                       .(geographicAreaM49, measuredItemSuaFbs = measuredItemFbsSua,
                         timePointYears, new_opening = Value)
    ],
    standData[
      measuredElementSuaFbs == 'stockChange' &
        timePointYears %in% as.character(startYear:(endYear +1)) & # changed years for back compilation 
        !is.na(Value),
      .(geographicAreaM49, measuredItemSuaFbs,
        timePointYears, delta = Value)
    ],
    by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  )

data_for_opening[is.na(delta), delta := 0]

data_for_opening <- data_for_opening[timePointYears <= endYear+1] # changed for back compilation (note: 2014 needs to be in there!!)

data_for_opening2<-copy(data_for_opening)
data_for_opening2<- data_for_opening2[order(geographicAreaM49, measuredItemSuaFbs, -timePointYears)]
data_for_opening2<-data_for_opening2[, stock_lag := dplyr::lag(new_opening, 1L)]
data_for_opening2<-data_for_opening2[ , diff:=stock_lag-delta]
data_for_opening2<-data_for_opening2[, remove:= ifelse(diff<0 & !is.na(diff),TRUE,FALSE)]
RemoveVar2<-unique(data_for_opening2[remove==TRUE])
RemoveVar2<-RemoveVar2[ , c(1,2,3,8), with=FALSE]


if (nrow(RemoveVar2) !=0) {

standData<-merge(standData, RemoveVar2, by= c("measuredItemSuaFbs", "geographicAreaM49", "timePointYears"), all.x = T)
standData<- standData[remove==TRUE & measuredElementSuaFbs=="stockChange", Value:=0]

standData<-standData[ , remove:=NULL]
}
  



standData = standData[timePointYears != "2009",]
calculateImbalance(standData)

standData[
  supply > 0,
  imbalance_percent := imbalance / supply * 100
]

# Filter stocks to dinf unusually high openings for starting year (back-compilation) to send by email

StocksTooHigh = merge(
standData[measuredElementSuaFbs == "stockChange", 
          .(geographicAreaM49, measuredItemFbsSua=measuredItemSuaFbs, timePointYears, Value, supply, flagObservationStatus, flagMethod) ], 
all_opening_stocks[, .(geographicAreaM49, measuredItemFbsSua, timePointYears, opening = Value)],
by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears"))

HighStocksToSend = StocksTooHigh[, StockExcessPercent := ((opening) / (supply + Value)) * 100 ]
HighStocksToSend = HighStocksToSend[StockExcessPercent > 200, ]
HighStocksToSend = nameData(domain = "suafbs", dataset = "sua_unbalanced", HighStocksToSend)
HighStocksToSend = HighStocksToSend[order(-supply), ]
HighStocksToSend = HighStocksToSend[supply > 0 & !is.infinite(StockExcessPercent), ]

tmp_file_highstocks <- file.path(TMP_DIR, paste0("HIGHSTOCKS_", COUNTRY, ".csv"))
write.csv(HighStocksToSend, tmp_file_highstocks)

# If the imbalance is relatively small (less than 5% in absoulte value)
# a new allocation is done, this time with no limits.

# Here we need to protect seed, because of its lik to production.
standData[measuredElementSuaFbs == "seed", Protected := TRUE]

dbg_print("balancing of imbalances < 5%")

standData[, small_imb := FALSE]

standData[
  data.table::between(imbalance_percent, -5, -0.01) |
    data.table::between(imbalance_percent, 0.01, 5),
  small_imb := TRUE
]

if (nrow(standData[small_imb == TRUE]) > 0) {

  standData[, can_balance := FALSE]

  # The following two instructions basically imply to assign the
  # (small) imbalance with no limits (except food and seed, among utilizations)

  sel_vars <- c("production", "imports", "exports", "stockChange", "food", "seed")

  standData[
    measuredElementSuaFbs %!in% sel_vars &
      Protected == FALSE &
      !is.na(min_threshold),
    min_threshold := 0
  ]

  standData[
    measuredElementSuaFbs %!in% sel_vars &
      Protected == FALSE &
      !is.na(max_threshold),
    max_threshold := Inf
  ]

  standData[
    !is.na(Value) &
      Protected == FALSE &
      !(data.table::between(Value, min_threshold, max_threshold, incbounds = FALSE) %in% FALSE) &
      !(measuredElementSuaFbs %chin%
        c("production", "imports", "exports", "stockChange", "foodManufacturing", "seed")),
    can_balance := TRUE
  ]

  standData[,
    elements_balance := any(can_balance),
    by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
  ]

  if (nrow(standData[small_imb == TRUE & elements_balance == TRUE]) > 0) {

    standData[
      small_imb == TRUE & elements_balance == TRUE,
      adjusted_value := balance_proportional(.SD),
      by = c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs")
    ]

    standData[
      !is.na(adjusted_value) & adjusted_value != Value,
      `:=`(
        Value = adjusted_value,
        flagObservationStatus = "E",
        flagMethod = "n"
      )
    ]

    standData[, adjusted_value := NULL]
  }
}

calculateImbalance(standData)

standData[
  supply > 0,
  imbalance_percent := round(imbalance / supply * 100,2)
]

standData[, `:=` (imbalance = round(imbalance), utilizations = round(utilizations),
                  supply = round(supply))
  ]


######################### Save BAL for validation #######################

dbg_print("sua_balanced for validation")

sel_vars <-
  c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs",
    "measuredElementSuaFbs", "Value", "flagObservationStatus", "flagMethod")

sua_balanced <-
  rbind(
    data[timePointYears %!in% as.character(startYear:endYear), sel_vars, with = FALSE],
    standData[, sel_vars, with = FALSE]
  )

sua_balanced_aux <-
  sua_balanced[,
    .(
      supply =
        sum(Value[measuredElementSuaFbs %chin% c("production", "imports")],
            - Value[measuredElementSuaFbs %chin% c("exports", "stockChange")],
            na.rm = TRUE),
      utilizations =
        sum(Value[!(measuredElementSuaFbs %chin%
                    c("production", "imports", "exports", "stockChange"))],
            na.rm = TRUE)
    ),
    by = c("geographicAreaM49", "measuredItemSuaFbs", "timePointYears")
  ][,
    imbalance := supply - utilizations
  ][
    supply > 0,
    imbalance_pct := imbalance / supply * 100
  ]

sua_balanced_aux <-
  melt(
    sua_balanced_aux,
    c("geographicAreaM49", "timePointYears", "measuredItemSuaFbs"),
    variable.name = "measuredElementSuaFbs",
    value.name = "Value"
  )

sua_balanced_aux[, `:=`(flagObservationStatus = "I", flagMethod = "i")]

sua_balanced <- rbind(sua_balanced, sua_balanced_aux)

# saveRDS(
#   sua_balanced,
#   file.path(R_SWS_SHARE_PATH, "FBSvalidation", COUNTRY, "sua_balanced.rds")
# )


######################### / Save BAL for validation #######################


# saveRDS(
#   computed_shares_send,
#   file.path(R_SWS_SHARE_PATH, 'mongeau', paste0('computed_shares_send_', COUNTRY, '.rds'))
# )



imbalances <-
  unique(
    standData[,
      list(
        geographicAreaM49,
        measuredElementSuaFbs = "5166",
        measuredItemFbsSua = measuredItemSuaFbs,
        timePointYears,
        Value = imbalance,
        flagObservationStatus = "I",
        flagMethod = "i",
        Protected = FALSE
      )
    ]
  )

imbalances_to_send <-
  standData[
    !is.na(Value) &
      outside(imbalance, -100, 100) &
      timePointYears %in% as.character(startYear:endYear),
    .(country = geographicAreaM49, year = timePointYears, measuredItemSuaFbs,
      element = measuredElementSuaFbs, value = round(Value),
      flag = paste(flagObservationStatus, flagMethod, sep = ","))
  ]

if (nrow(imbalances_to_send) > 0) {

  imbalances_to_send <-
    data.table::dcast(
      imbalances_to_send,
      country + measuredItemSuaFbs + year ~ element,
      value.var = c("value", "flag")
    )

  names(imbalances_to_send) <- sub("value_", "", names(imbalances_to_send))

  imbalances_to_send <-
    dt_left_join(
      imbalances_to_send,
      unique(standData[, .(country = geographicAreaM49, year = timePointYears,
                           measuredItemSuaFbs, supply, utilizations, imbalance,
                           imbalance_percent)]),
      by = c("country", "year", "measuredItemSuaFbs")
    )

  d_imbal_info <-
    imbalances_to_send[,
      .(country, year, measuredItemSuaFbs,
        supply = round(supply, 2),
        imbalance = round(imbalance, 2),
        perc_imb = round(abs(imbalance / supply) * 100, 2))
    ]

  imbalances_info <-
    c(
      all_items = nrow(unique(standData[, .(geographicAreaM49, timePointYears, measuredItemSuaFbs)])),
      imb_tot = nrow(d_imbal_info),
      imb_pos_supply = nrow(d_imbal_info[supply > 0]),
      imb_gt_5percent = nrow(d_imbal_info[supply > 0][perc_imb > 5]),
      imb_avg_percent = d_imbal_info[supply > 0, mean(abs(perc_imb))]
    )

  setnames(imbalances_to_send, "measuredItemSuaFbs", "measuredItemFbsSua")

  imbalances_to_send <-
    nameData('suafbs', 'sua_unbalanced', imbalances_to_send,
             except = c('measuredElementSuaFbs'))



  data_negtrade <-
    imbalances_to_send[
      utilizations == 0 &
        imbalance < 0 &
        round(imbalance, 10) == round(supply, 10)
    ]

  data_negtrade[, imbalance_percent := NULL]
} else {
  imbalances_to_send <-
    data.table(
      info = c("Great, no imbalances!",
               "Well, at least not greater than 100 tonnes in absolute value")
    )

  data_negtrade <- imbalances_to_send

  imbalances_info <-
    c(
      all_items = 0,
      imb_tot = 0,
      imb_pos_supply = 0,
      imb_gt_5percent = 0,
      imb_avg_percent = 0
    )
}


non_existing_for_imputation <-
  data.table(measuredItemFbsSua = non_existing_for_imputation)

non_existing_for_imputation <-
  nameData('suafbs', 'sua_unbalanced', non_existing_for_imputation)

non_existing_for_imputation[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]

# We want to exclude livestock primary that have heads as units form the imbalance list
LivestockToExcludeFromFile = itemMap[type %in% c("LSPR", "POPR"), measuredItemSuaFbs]
if ("measuredItemFbsSua" %in% names(imbalances_to_send)) {
  imbalances_to_send = imbalances_to_send[measuredItemFbsSua %!in% LivestockToExcludeFromFile,]
  
}


# Move imbalance list from csv to xls
wb <- createWorkbook() 
addWorksheet(wb, "imbalance") 
writeData(wb, "imbalance", imbalances_to_send) 
dbg_print(paste("IMBALANCE workbook, save", getwd(), tmp_file_imb)) 
saveWorkbook(wb, tmp_file_imb, overwrite = TRUE) 

if("measuredItemFbsSua" %in% names(imbalances_to_send)) {
imbalances_to_send[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]
write.csv(imbalances_to_send,          tmp_file_imbCSV)}
# write.csv(data_negtrade,               tmp_file_NegNetTrade)
# write.csv(negative_availability,       tmp_file_negative)
# write.csv(non_existing_for_imputation, tmp_file_non_exist)
# write.csv(fixed_proc_shares,           tmp_file_fix_shares)
# (deletefile)


# FIXME: see also the one done for elementToCodeNames
codes <- as.data.table(tibble::tribble(
  ~code,  ~name,
  "5910", "exports",
  "5520", "feed",
  "5141", "food",
  "5023", "foodManufacturing",
  "5610", "imports",
  "5165", "industrial",
  "5016", "loss",
  "5510", "production",
  "5525", "seed",
  "5164", "tourist",
  "5071", "stockChange"
))

standData <- standData[codes, on = c('measuredElementSuaFbs' = 'name')]


standData <-
  standData[,
    c("geographicAreaM49", "code", "measuredItemSuaFbs", "timePointYears",
      "Value", "flagObservationStatus", "flagMethod", "Protected"),
    with = FALSE
  ]

setnames(
  standData,
  c("code", "measuredItemSuaFbs"),
  c("measuredElementSuaFbs", "measuredItemFbsSua")
)

standData <- standData[!is.na(Value)]

# These cases should not happen (i.e., all flags should already be
# set), but in any case, add specific flags so to check.
standData[is.na(flagObservationStatus), flagObservationStatus := "M"]
standData[is.na(flagMethod), flagMethod := "q"]


# Calculate calories

calories_per_capita <-
  dt_left_join(
    # Food
    standData[
      measuredElementSuaFbs == '5141',
      list(
        geographicAreaM49,
        #measuredElementSuaFbs = "664",
        measuredItemFbsSua,
        timePointYears,
        food = Value,
        flagObservationStatus = "T",
        flagMethod = "i"
      )
    ],
    # Calories # where does the timePointYearsSP come from?
    nutrientData[,
      list(
        geographicAreaM49,
        measuredItemFbsSua = measuredItemCPC,
        measuredElementSuaFbs = measuredElement,
        timePointYears = timePointYearsSP,
        nutrient = Value
      )
    ],
    by = c('geographicAreaM49', 'timePointYears', 'measuredItemFbsSua'),
    allow.cartesian = TRUE
  )

calories_per_capita <-
  dt_left_join(
    calories_per_capita,
    popSWS[, list(geographicAreaM49, timePointYears, population = Value)],
    by = c('geographicAreaM49', 'timePointYears')
  )

data_for_foodGrams<-calories_per_capita[measuredElementSuaFbs=="664"]
data_for_foodGrams[,Value:=(food*1000000)/(365*population*1000)]
data_for_foodGrams[, c("food", "nutrient", "population") := NULL]
data_for_foodGrams[,measuredElementSuaFbs:="665"]
data_for_foodGrams[, Protected := FALSE]

#Add population in SUA as an item

data_pop<-popSWS[,measuredItemFbsSua:="F0001"]
data_pop[,`:=`(Protected=TRUE,flagObservationStatus="T",flagMethod="c")]


data_pop<-data_pop[timePointYears %in% as.character(startYear:endYear),list(geographicAreaM49,measuredItemFbsSua,timePointYears,flagObservationStatus,flagMethod,
                                             measuredElementSuaFbs=measuredElement,Value,Protected)]

calories_per_capita_total<-copy(calories_per_capita)

calories_per_capita[, Value := food * nutrient / population / 365 * 10]

calories_per_capita[, Protected := FALSE]

calories_per_capita[, c("food", "nutrient", "population") := NULL]

calories_per_capita_total[, Value := food * nutrient/100]

calories_per_capita_total[, Protected := FALSE]

calories_per_capita_total[, c("food", "nutrient", "population") := NULL]

calories_per_capita_total[measuredElementSuaFbs=="664",
                          measuredElementSuaFbs:="261"]

calories_per_capita_total[measuredElementSuaFbs=="674",
                          measuredElementSuaFbs:="271"]

calories_per_capita_total[measuredElementSuaFbs=="684",
                          measuredElementSuaFbs:="281"]

calories_per_capita<-rbind(calories_per_capita,calories_per_capita_total,data_for_foodGrams,data_pop)

standData <-
  rbind(
    standData,
    imbalances,
    all_opening_stocks,
    calories_per_capita
  )

standData <- standData[timePointYears %in% as.character(startYear:endYear) & !is.na(Value)]

standData[, Protected := NULL]


# Get ALL data from SUA BALANCED, do an anti-join, set to NA whatever was not
# generated here so to clean the session on unbalanced of non-existing cells

## MOVED PULL FROM SUA BALANCED INTO OUTLIER ROUTINE

######### Combine old and new calories and data, and get outliers #############

combined_calories <-
  rbind(
    data_suabal[measuredElementSuaFbs == "664" & timePointYears %!in% as.character(startYear:endYear)],
    calories_per_capita[, names(data_suabal), with = FALSE]
  )

combined_calories <-
  combined_calories[order(geographicAreaM49, measuredItemFbsSua, timePointYears)]

### Change for the back compilation what is Historical value
# Historical value for back compilation is 2014-2019 period
combined_calories <-
  combined_calories[,
    `:=`(
      perc.change = (Value / shift(Value) - 1) * 100,
      Reference_value_2014_2019 = mean(Value[timePointYears %in% refYear], na.rm = TRUE)
    ),
    by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  ]

combined_calories[, outlier := ""]

# changed for back compilation
combined_calories[
  (shift(Value) > 5 | Value > 5) & abs(perc.change) > 10 & timePointYears %in% as.character(startYear:endYear),
  outlier := "OUTLIER",
  by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
]

combined_calories <-
  combined_calories[
    unique(combined_calories[outlier == "OUTLIER", .(geographicAreaM49, measuredItemFbsSua)]),
    on = c("geographicAreaM49", "measuredItemFbsSua")
  ]

old_sua_bal <- data_suabal[timePointYears %!in% as.character(startYear:endYear)]

old_sua_bal <-
  old_sua_bal[,
    supply :=
      sum(
        Value[measuredElementSuaFbs %chin% c("5510", "5610")],
        - Value[measuredElementSuaFbs %chin% c("5910", "5071")],
        na.rm = TRUE
      ),
  by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
]

old_sua_bal <-
  old_sua_bal[
    measuredElementSuaFbs %chin%
      c("5510", "5023", "5016", "5165", "5520", "5525", "5164", "5141")
  ]

old_sua_bal <-
  old_sua_bal[,
    .(
      mean_ratio = mean(Value / supply, na.rm = TRUE),
      Meanold = mean(Value, na.rm = TRUE)
    ),
    by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  ]

new_sua_bal <-
  dt_left_join(
    standData,
    old_sua_bal,
    by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  )

new_sua_bal[
  measuredElementSuaFbs %chin%
    c("5510", "5023", "5016", "5165", "5520", "5525", "5164", "5141"),
  outlier := outside(Value / Meanold, 0.5, 2)
]

new_sua_bal[,
  supply :=
    sum(
      Value[measuredElementSuaFbs %chin% c("5510", "5610")],
      - Value[measuredElementSuaFbs %chin% c("5910", "5071")],
      na.rm = TRUE
    ),
  by = c("geographicAreaM49", "measuredItemFbsSua", "timePointYears")
]

new_sua_bal[, outl_on_supp := abs(Value / supply - mean_ratio) >= 0.1]

new_sua_bal <-
  new_sua_bal[
    measuredElementSuaFbs %chin%
      c("5510", "5023", "5016", "5165", "5520", "5525", "5164", "5141")
  ]

new_sua_bal <-
  new_sua_bal[
    abs(Meanold - Value) > 10000 &
    ((measuredElementSuaFbs !=5510 & outlier == TRUE & outl_on_supp == TRUE &
      Value > 1000 & abs(Meanold - Value) > 10000) |
    (outlier == TRUE & measuredElementSuaFbs == 5510))
  ]

if (nrow(new_sua_bal) > 0) {

  new_sua_bal <-
    new_sua_bal[,
      .(geographicAreaM49, measuredItemFbsSua, measuredElementSuaFbs,
        timePointYears, Reference_value_2014_2019 = Meanold, outlier = "OUTLIER")
    ]

  sel_vars <-
    c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")

  out_elems_items <-
    rbind(
      data_suabal[timePointYears %in% refYear][unique(new_sua_bal[, sel_vars, with = FALSE]), on = sel_vars],
      standData[unique(new_sua_bal[, sel_vars, with = FALSE]), on = sel_vars]
    )

  out_elems_items <-
    dt_left_join(
      out_elems_items,
      new_sua_bal,
      by = c("geographicAreaM49", "measuredItemFbsSua",
             "measuredElementSuaFbs", "timePointYears")
    )

  out_elems_items[is.na(outlier), outlier := ""]

  out_elems_items[,
                  Reference_value_2014_2019 := unique(na.omit(Reference_value_2014_2019)),
    c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  ]

  out_elems_items <-
    out_elems_items[order(geographicAreaM49, measuredItemFbsSua, measuredElementSuaFbs, timePointYears)]

  out_elems_items[,
    perc.change := (Value / shift(Value) - 1) * 100,
    by = c("geographicAreaM49", "measuredItemFbsSua", "measuredElementSuaFbs")
  ]
} else {
  out_elems_items <-
    new_sua_bal[0, .(geographicAreaM49, measuredElementSuaFbs,
                     measuredItemFbsSua, timePointYears, Value,
                     flagObservationStatus, flagMethod,
                     perc.change = NA_real_,
                     Reference_value_2014_2019 = NA_real_, outlier)]
}

out_elems_items <- rbind(combined_calories, out_elems_items, fill = TRUE)

out_elems_items[, Value := round(Value, 2)]
out_elems_items[, perc.change := round(perc.change, 2)]
out_elems_items[, Reference_value_2014_2019 := round(Reference_value_2014_2019, 2)]


# tmp_file_outliers <- file.path(TMP_DIR, paste0("OUTLIERS_", COUNTRY, ".csv"))

out_elems_items <-
  nameData("suafbs", "sua_unbalanced", out_elems_items, except = "timePointYears")

# write.csv(out_elems_items, tmp_file_outliers)

message("Line 5462")


######### / Combine old and new calories and data, and get outliers ###########


if (nrow(data_suabal[timePointYears %in% as.character(startYear:endYear)]) > 0) {

  sel_vars <-
    c('geographicAreaM49', 'measuredElementSuaFbs',
      'measuredItemFbsSua', 'timePointYears')

  data_suabal_missing <-
    data_suabal[timePointYears %in% as.character(startYear:endYear)][!standData, on = sel_vars]

  if (nrow(data_suabal_missing) > 0) {

    data_suabal_missing[,
      `:=`(
        Value = NA_real_,
        flagObservationStatus = NA_character_,
        flagMethod = NA_character_
      )
    ]

    standData <- rbind(standData, data_suabal_missing)
  }
}


########## DES calculation

sel_vars <- c('geographicAreaM49', 'timePointYears', 'measuredItemFbsSua')

des <-
  rbind(
    data_suabal[
      measuredElementSuaFbs == '664' & timePointYears %!in% as.character(startYear:endYear),
      .(Value = sum(Value, na.rm = TRUE)),
      by = sel_vars
    ],
    standData[
      measuredElementSuaFbs == '664' & timePointYears %in% as.character(startYear:endYear),
      .(Value = sum(Value, na.rm = TRUE)),
      by =  sel_vars
    ]
  )

des <-
  rbind(
    des,
    des[,
      .(measuredItemFbsSua = 'S2901', Value = sum(Value)),
      by = c('geographicAreaM49', 'timePointYears')
    ]
  )

des[, Value := round(Value, 2)]

f_des <- file.path(R_SWS_SHARE_PATH, "FBSvalidation", COUNTRY, "des.rds")

if (file.exists(f_des)) {
  file.copy(f_des, sub("des\\.rds", "des_prev.rds", f_des))
}

# saveRDS(des, f_des)

##### Plot of main DES absolute variations

des_diff <-
  des[
    order(geographicAreaM49, measuredItemFbsSua, timePointYears),
    .(year = timePointYears, diff = Value - shift(Value)),
    .(geographicAreaM49, item = measuredItemFbsSua)
  ][
    year != min(year)
  ]

des_diff <- des_diff[item %chin% des_diff[abs(diff) > 20]$item]

des_diff[item == "S2901", item := "GRAND TOTAL"]

plot_main_des_diff <-
  ggplot(des_diff, aes(x = year, diff, group = item, color = item)) +
    geom_line(size = 1) +
    geom_point(size = 3) +
    ggtitle("Absolute variation of DES and main variations of items",
            subtitle = COUNTRY_NAME)

# tmp_file_plot_main_des_diff <-
#   file.path(TMP_DIR, paste0("PLOT_MAIN_DES_DIFF_", COUNTRY, ".pdf"))
# 
# ggsave(tmp_file_plot_main_des_diff, plot = plot_main_des_diff)

##### / Plot of main DES absolute variations

##### Plot of main DES

main_des_items <-
  des[,
    .(geographicAreaM49, year = timePointYears, item = measuredItemFbsSua, Value)
  ][
    item %chin% des[Value > 100]$measuredItemFbsSua
  ][
    order(geographicAreaM49, item, year)
  ]

main_des_items[item == "S2901", item := "GRAND TOTAL"]

# plot_main_des_items <-
#   ggplot(main_des_items, aes(x = year, Value, group = item, color = item)) +
#     geom_line(size = 1) +
#     geom_point(size = 3) +
#     ggtitle("Main DES items (> 100 Calories)", subtitle = COUNTRY_NAME)
# 
# tmp_file_plot_main_des_items <-
#   file.path(TMP_DIR, paste0("PLOT_MAIN_DES_ITEMS_", COUNTRY, ".pdf"))
# 
# ggsave(tmp_file_plot_main_des_items, plot = plot_main_des_items)

##### / Plot of main DES

# PRE AND POST REVERTED FOR BACK COMPILATION
#  post = refYear period
# pre  is query period (plots are adjusted)
##### Plot of main diff avg DES
des_main_diff_avg <-
  des[
    measuredItemFbsSua != "S2901",
    .(post = mean(Value[timePointYears %in% refYear]), pre = mean(Value[timePointYears %in% as.character(startYear:endYear)])),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

des_main_diff_avg <- des_main_diff_avg[abs(post - pre) > 20]

des_main_diff_avg <- melt(des_main_diff_avg, c("geographicAreaM49", "measuredItemFbsSua"))

des_main_diff_avg <- nameData("suafbs", "sua_unbalanced", des_main_diff_avg, except = "geographicAreaM49")

# plot_des_main_diff_avg <-
#   ggplot(des_main_diff_avg,
#          aes(x = measuredItemFbsSua_description, y = value,
#              group = rev(variable), fill = variable)) +
#     geom_col(position = "dodge") +
#     coord_flip() +
#     ggtitle("Main diff (> 20 Cal) in DES average pre (<= 2013) and post (>= 2014)",
#             subtitle = COUNTRY_NAME)
# 
# 
# tmp_file_plot_des_main_diff_avg <-
#   file.path(TMP_DIR, paste0("PLOT_DES_MAIN_DIFF_AVG_", COUNTRY, ".pdf"))
# 
# ggsave(tmp_file_plot_des_main_diff_avg, plot = plot_des_main_diff_avg)


##### Plot of main diff avg DES

des_cast <-
  data.table::dcast(
    des,
    geographicAreaM49 + measuredItemFbsSua ~ timePointYears,
    fun.aggregate = sum,
    value.var = "Value"
  )

des_cast <- nameData("suafbs", "sua_balanced", des_cast)

setorderv(des_cast, names(des_cast)[ncol(des_cast)], order = -1)


# Main items, more then 90%

des_main_90 <-
  des[
    measuredItemFbsSua != "S2901" & timePointYears %in% as.character(startYear:endYear),
    .(tot = sum(Value)),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ][
    order(-tot)
  ][,
    cumsum := cumsum(tot)
  ]

des_main_90 <-
  des_main_90[
    des[
      measuredItemFbsSua != "S2901" &  timePointYears %in% as.character(startYear:endYear),
      .(maintot = sum(Value)),
      by = "geographicAreaM49"
    ],
    on = "geographicAreaM49"
  ][
    cumsum < maintot * 0.9
  ][,
    c("tot", "cumsum", "maintot") := NULL
  ]

des_main <-
  rbind(
    des_cast[measuredItemFbsSua == "S2901"],
    des_cast[des_main_90, on = c("geographicAreaM49", "measuredItemFbsSua")]
  )

des_main <-
  rbind(
    des_main,
    des_main[,
      lapply(.SD, function(x) round(sum(x[-1]) / x[1] * 100, 2)),
      .SDcols = c(sort(unique(des$timePointYears)))
    ],
    fill = TRUE
  )

des_main[
  is.na(measuredItemFbsSua),
  measuredItemFbsSua_description := "PERCENTAGE OF MAIN OVER TOTAL"
]

########## create XLSX for main DES items

des_main_90_and_tot <-
  rbind(
    data.table(
      geographicAreaM49 = unique(des_main_90$geographicAreaM49),
      measuredItemFbsSua = "S2901"
    ),
    des_main_90
  )

des_level_diff <-
  des[
    order(geographicAreaM49, measuredItemFbsSua, timePointYears),
    .(
      Value = Value - shift(Value),
      timePointYears
    ),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

des_level_diff_cast <-
  data.table::dcast(
    des_level_diff,
    geographicAreaM49 + measuredItemFbsSua ~ timePointYears,
    fun.aggregate = sum,
    value.var = "Value"
  )


des_perc_diff <-
  des[
    order(geographicAreaM49, measuredItemFbsSua, timePointYears),
    .(
      Value = Value / shift(Value) - 1,
      timePointYears = timePointYears
    ),
    by = c("geographicAreaM49", "measuredItemFbsSua")
  ]

des_perc_diff[is.infinite(Value) | is.nan(Value), Value := NA_real_]

des_perc_diff_cast <-
  data.table::dcast(
    des_perc_diff,
    geographicAreaM49 + measuredItemFbsSua ~ timePointYears,
    fun.aggregate = sum,
    value.var = "Value"
  )

sel_vars <-
  c("geographicAreaM49", "geographicAreaM49_description",
    "measuredItemFbsSua", "measuredItemFbsSua_description")

by_vars <- c("geographicAreaM49", "measuredItemFbsSua")

des_level_diff_cast <-
  merge(des_level_diff_cast, des_cast[, sel_vars, with = FALSE ], by = by_vars)

setcolorder(des_level_diff_cast, names(des_cast))

des_perc_diff_cast <-
  merge(des_perc_diff_cast, des_cast[, sel_vars, with = FALSE], by = by_vars)

setcolorder(des_perc_diff_cast, names(des_cast))

des_level_diff_cast <- des_level_diff_cast[des_main_90_and_tot, on = by_vars]

des_perc_diff_cast <- des_perc_diff_cast[des_main_90_and_tot, on = by_vars]


wb <- createWorkbook()

addWorksheet(wb, "DES_MAIN")
addWorksheet(wb, "DES_MAIN_diff")
addWorksheet(wb, "DES_MAIN_diff_perc")

writeData(wb, "DES_MAIN", des_main)
writeData(wb, "DES_MAIN_diff", des_level_diff_cast)
writeData(wb, "DES_MAIN_diff_perc", des_perc_diff_cast)

style_cal_0      <- createStyle(fgFill = "black", fontColour = "white", textDecoration = "bold")
style_cal_gt_10  <- createStyle(fgFill = "lightyellow")
style_cal_gt_20  <- createStyle(fgFill = "yellow")
style_cal_gt_50  <- createStyle(fgFill = "orange")
style_cal_gt_100 <- createStyle(fgFill = "red")
style_percent    <- createStyle(numFmt = "PERCENTAGE")
style_mean_diff  <- createStyle(borderColour = "blue", borderStyle = "double", border = "TopBottomLeftRight")

# All zeros in 2014-2019, that were not zeros in 2010-2013
# For BC: All zeros in <=2013, that were not zeros in 2014-2019
zeros <-
  des_main[, .SD, .SDcols = as.character(startYear:endYear)] == 0 &
  des_main[, rowMeans(.SD), .SDcols = c(sort(unique(des$timePointYears)))] > 0

for (i in which(names(des_level_diff_cast) %in% min(startYear):max(refYear))) {
  addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 10], style = style_cal_gt_10, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 20], style = style_cal_gt_20, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 50], style = style_cal_gt_50, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 100], style = style_cal_gt_100, gridExpand = TRUE)

  if (names(des_level_diff_cast)[i] %in% as.character(startYear:endYear)) {
    j <- names(des_level_diff_cast)[i]
    rows_with_zero <- zeros[, j]
    if (any(rows_with_zero == TRUE)) {
      addStyle(wb, "DES_MAIN", cols = i, rows = 1 + (1:nrow(zeros))[zeros[, j]], style = style_cal_0, gridExpand = TRUE)
    }
  }

  addStyle(wb, "DES_MAIN_diff", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 10], style = style_cal_gt_10, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN_diff", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 20], style = style_cal_gt_20, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN_diff", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 50], style = style_cal_gt_50, gridExpand = TRUE)
  addStyle(wb, "DES_MAIN_diff", cols = i, rows = 1 + (1:nrow(des_level_diff_cast))[abs(des_level_diff_cast[[i]]) > 100], style = style_cal_gt_100, gridExpand = TRUE)
}


high_variation_mean <-
  abs(des_main[, rowMeans(.SD), .SDcols = as.character(2010:2013)] / des_main[, rowMeans(.SD), .SDcols = as.character(2014:max(unique(des$timePointYears)))] - 1) > 0.30

if (any(high_variation_mean == TRUE)) {
  high_variation_mean <- 1 + (1:nrow(des_main))[high_variation_mean]
  addStyle(wb, sheet = "DES_MAIN", style_mean_diff, rows = high_variation_mean, cols = which(names(des_level_diff_cast) %in% 2014:2019), gridExpand = TRUE, stack = TRUE)
}


addStyle(wb, sheet = "DES_MAIN_diff_perc", style_percent, rows = 1:nrow(des_level_diff_cast) + 1, cols = which(names(des_level_diff_cast) %in% 2011:2019), gridExpand = TRUE, stack = TRUE)

setColWidths(wb, "DES_MAIN", 4, 40)
setColWidths(wb, "DES_MAIN_diff", 4, 40)
setColWidths(wb, "DES_MAIN_diff_perc", 4, 40)

tmp_file_des_main <- file.path(TMP_DIR, paste0("DES_MAIN_ITEMS_", COUNTRY, ".xlsx"))
message("Line 5807")

saveWorkbook(wb, tmp_file_des_main, overwrite = TRUE)


########## / create XLSX for main DES items


tmp_file_des <- file.path(TMP_DIR, paste0("DES_", COUNTRY, ".csv"))

#des_cast[, measuredItemFbsSua := paste0("'", measuredItemFbsSua)]

write.csv(des_cast, tmp_file_des)

########## / DES calculation


# Nutrients (except 664), are reuired only in last round (when saving)
if (!(TRUE %in% swsContext.computationParams$save_nutrients)) {
  standData <- standData[measuredElementSuaFbs %!in% c('261', '271', '281', '674', '684')]
}

# set meaningless zeroes and their flags to NA 
standData[Value == 0 & flagObservationStatus %in% c("I") & flagMethod %in% c("i"),
          `:=` (Value = NA, flagObservationStatus = NA, flagMethod = NA)]

standData[Value == 0 & flagObservationStatus %in% c("E") & flagMethod %in% c("e"),
          `:=` (Value = NA, flagObservationStatus = NA, flagMethod = NA)]

out <- SaveData(domain = "suafbs", dataset = "sua_balanced", data = standData, waitTimeout = 20000)

if (exists("out")) {

  
  ## UNTIL FEED IS BROUGHT ON BOARD IN BACK COMPILATION
  
  msg_new_feed_remove = "No additional feed items currently implemented"
  msg_new_feed_dubious = "---"
  body_message <-
    sprintf(
      "Plugin completed in %1.2f minutes.

      ################################################
      # Imbalances (greater than 100 t in abs value) #
      ################################################

      unbalanced items: %s (out of %s)
      unbalanced items for items with positive supply: %s
      unbalanced items for items with imbalance > 5%%: %s
      average percent imbalance in absolute value: %1.2f

      ###############################################
      ############       Parameters       ###########
      ###############################################

      country = %s
      thresold method = %s
      fill_extraction_rates= %s

      ###############################################
      ###########       ShareDownUp       ###########
      ###############################################

      shareDownUp can be modified here:

      %s

      ###############################################
      ##############   Removed feed   ###############
      ###############################################

      The following NEW feed items were removed as including them
      would have created a huge negative imbalance:

      %s

      The following NEW feed items are dubious, they might be
      removed, but you will need to do it manually:

      %s

      ###############################################
      ##############       Flags       ##############
      ###############################################

      E,-: Balancing: utilization modified OR Processed created down up using official data
      E,b: Final balancing (industrial, Feed)
      E,c: Balancing: production modified to compensate net trade
      E,e: Outlier replaced
      E,h: Balancing: imbalance to food, given food is only utilization -Food Residual
      E,i: Food processed generated, using availability
      E,n: Balancing of small imbalances OR Stock variation corrected for over accumulation(to change)
      E,s: Balancing: stocks modified
      E,u: Stocks variation generated by the model OR updated opening stocks
      I,c: Derived production generated
      I,e: Module imputation
      I,i: Residual item (identity) OR stocks as 20 percent of supply in 2013
      I,-: Opening stocks as cumulated stocks variations 1961-2013
      M,q: Cases for which flags were not set (should never happen)
      T,i: Calories per capita created
      T,p: Oilworld stocks data
      T,h: USDA stocks data



      ###############################################
      ##############       Files       ##############
      ###############################################

      The following files are attached:

      - PLOT_MAIN_DES_ITEMS_*.pdf = Plot of main DES items (> 100 Calories)

      - PLOT_MAIN_DES_DIFF_*.pdf = Plot of main DES Calories variations

      - PLOT_DES_MAIN_DIFF_AVG_*.pdf = Plot of main variations (> 20 Calories)
          in the AVERAGE DES pre (year <= 2013) and post (year >= 2014)

      - DES_MAIN_ITEMS_*.xlsx = as DES_*.csv, but only with items that
          accounted for nearly 90%% on average over 2014-2019

      - DES_*.csv = calculation of DES (total and by items)

      - OUTLIERS_*.csv = outliers in Calories (defined as those that account
          for more than 5 Calories with an increase of more than 15%%)

      - SHARES_*.xlsx = Parents/Children shares, availability, etc.

      - FILLED_ER_*.csv = filled extraction rates

      - IMBALANCE_*.csv = imbalances, supply and utilizations

      - NEGATIVE_AVAILAB_*.csv = items with negative availability

      - NONEXISTENT_*.csv = items for which no production, imports,
          or exports exist after 2013

      - NEW_ELEMENTS_*.csv = elements that appear for the first time

      - FIXED_PROC_SHARES_*.csv = items for which the processing share
          was fixed, to keep it consistent with its main co-product

      - NEG_NET_TRADE_*.csv = negative net trade

      - STOCK_CORRECTION_*.csv = Corrected Stocks

      ",
      difftime(Sys.time(), start_time, units = "min"),
      imbalances_info[['imb_tot']],
      prettyNum(imbalances_info[['all_items']], big.mark = ","),
      imbalances_info[['imb_pos_supply']],
      imbalances_info[['imb_gt_5percent']],
      imbalances_info[['imb_avg_percent']],
      COUNTRY,
      THRESHOLD_METHOD,
      FILL_EXTRACTION_RATES,
      sub('/work/SWS_R_Share/', '', shareDownUp_file),
       msg_new_feed_remove, 
       msg_new_feed_dubious 
    )

  if (!CheckDebug()) {
    send_mail(
      from = "do-not-reply@fao.org",
      to = swsContext.userEmail,
      subject = "Results from SUA_bal_compilation plugin",
      body = c(body_message,
               # tmp_file_plot_main_des_items,
               # tmp_file_plot_main_des_diff,
               # tmp_file_plot_des_main_diff_avg,
               tmp_file_des_main,
               tmp_file_des,
               # tmp_file_outliers,
               tmp_file_shares,
               tmp_file_imb,
               tmp_file_imbCSV,
               # tmp_file_extr,
               # tmp_file_negative,
               # tmp_file_non_exist,
               # tmp_file_new,
               # tmp_file_fix_shares,
               # tmp_file_NegNetTrade,
               StocksMismatch,
               # tmp_file_Stock_correction,
               tmp_file_highstocks
              )
    )
  }

  out[c("inserted", "appended", "ignored", "discarded")] <-
    lapply(
      out[c("inserted", "appended", "ignored", "discarded")],
      function(x) ifelse(is.na(x[[1]]), 0, x[[1]])
    )

  last_msg <- paste(out$inserted + out$appended, "observations written")

  if (out$discarded + out$ignored > 0) {
    last_msg <- paste(last_msg, "and issues with", out$discarded + out$ignored)
  }

  f <- file.path(R_SWS_SHARE_PATH, "FBSvalidation", "reference", "plugin_success.txt")

  if (file.exists(f)) {

    f <- read.delim(f, header = FALSE, stringsAsFactors = FALSE)

    last_msg <- paste0("<div>", last_msg, "</div><br />",
                       "<div style = \"color: red; font-size: 20px; text-align: center;\">",
                       f[, sample(2:ncol(f), 1)][1],
                       "</div><br /><div style = \"color: red; font-size: 20px; background-size: ",
                       "300px 200px; width: 300; height: 200px; background-image: url('",
                       f[1, 1], "')\"></div>")
  }

  print(last_msg)

} else {

  print("The SUA_bal_compilation plugin had a problem when saving data.")

}

# If we want output send to us
# if(!CheckDebug()){
#   send_mail(
#     from = "do-not-reply@fao.org",
#     to = c("bernhard.dalheimer@fao.org", "amsata.niang@fao.org", "pietro.marinelli@fao.org"),
#     subject = "Results from SUA_bal_compilation plugin",
#     body = c(sprintf("For analysis..."),
#              tmp_file_des_main,
#              tmp_file_des,
#              tmp_file_outliers,
#              tmp_file_imb,
#              StocksMismatch,
#              tmp_file_highstocks
#     )
#   )
# }

unlink(TMP_DIR, recursive = TRUE)



## TODO for back compilation:

# implement stock condition to have 2013 and 2014 aligned
# keep cumulative stock in pullData and take the shares as session data and clear 2010-2013

# revisit the stock implementation (with christian)

# start testing (China, India, Indonesia, Mexico, Bangladesh, Ethiopia, Senegal...)

# Stock mechanism

## Problems to debug:

# 1. Ef (manual estimates) in SUA unbalanced -Done, wait for feedback

# 2. Stocks do not align in Indonesia rice (Claudia, token present)

# 3. Loss numbers left do do

# 4. Processed solved!

# 5. Feed vs Food, Germ of Wheat left to do

# 01379.90 Mexico Crisitna

#Notes if openings are constant, and variation zero compilers have wiggle room in 2010-2013









## Testing results

# Solved issues:
#   
# a) The Ef flags are wiped everywhere, plug-in is updated
# 
# b) Missing processed is due to settings of the session, documentation updated
# 
# c) Stocks mismatches between years, plug-in updated
# 
# d) Adjust outliers to be based on sua balanced, plug-in is updated
# 
# d) Germ of wheat appearing in Feed and/or Food (Gluten in Senegal, feed/industrial), addressed in the outlier routine, plug-in is updated
# 
# e) Strange loss estimates, due to wrong outlier reference, addressed in outlier routine, plug-in is updated
#
# f) Pull derived production as primary for meats
#
# g) Message about high stock values
#
#
#
#
# Pending issues
#
# a) Stock block (Sumeda)
#
# b) Clean codes from session (Giulia)
#
# 
# Countries: 
# China, India, Indonesia, Mexico, Bangladesh, Ethiopia, Senegal
#
# Additional tests...


# Issues to debug

# shares (no flags), solve

# sweet potatoes in China (Ef), solved
# Sumeda and Claudia issue, no food, solved 
# Rachele: flags changing from Th of stocks, solved 

# standardization, TODO
# calculate Pshares from sua balanced (TODO)
# strange loss estimates (probably from loss domain)


# high shares in mexico for skim milk (high shares that can not be changed)
SWS-Methodology/faoswsStandardization documentation built on Feb. 7, 2022, 5:05 a.m.