tests/testthat/test-hierarchical.R

test_that("prep_hierarchical_data returns correct grouped hierarchies", {
  # Mock data setup
  data <- tibble::tibble(
    Segment = as.character(c(
      "Commercial", "Commercial", "Commercial", "Commercial", "Commercial", "Commercial",
      "Commercial", "Commercial", "Commercial", "Commercial", "Commercial", "Commercial",
      "Consumer", "Consumer", "Consumer", "Consumer", "Consumer", "Consumer",
      "Consumer", "Consumer", "Consumer", "Consumer", "Consumer", "Consumer"
    )),
    Country = as.character(c(
      "United (States)", "United (States)", "United (States)", "United (States)", "United (States)", "United (States)",
      "UK", "UK", "UK", "UK", "UK", "UK",
      "United (States)", "United (States)", "United (States)", "United (States)", "United (States)", "United (States)",
      "UK", "UK", "UK", "UK", "UK", "UK"
    )),
    Product = as.character(c(
      "Office", "Office", "Office", "Excel", "Excel", "Excel",
      "Office", "Office", "Office", "Excel", "Excel", "Excel",
      "Office", "Office", "Office", "Excel", "Excel", "Excel",
      "Office", "Office", "Office", "Excel", "Excel", "Excel"
    )),
    Date = as.Date(c(
      "1/1/2020", "2/1/2020", "3/1/2020", "1/1/2020", "2/1/2020", "3/1/2020",
      "1/1/2020", "2/1/2020", "3/1/2020", "1/1/2020", "2/1/2020", "3/1/2020",
      "1/1/2020", "2/1/2020", "3/1/2020", "1/1/2020", "2/1/2020", "3/1/2020",
      "1/1/2020", "2/1/2020", "3/1/2020", "1/1/2020", "2/1/2020", "3/1/2020"
    ), format = "%m/%d/%Y"),
    Target = c(1, 2, 3, 13, 14, 15, 25, 26, 27, 37, 38, 39, 1, 2, 3, 13, 14, 15, 25, 26, 27, 37, 38, 39),
    Value_Country = c(1, 2, 3, 1, 2, 3, 10, 20, 30, 10, 20, 30, 1, 2, 3, 1, 2, 3, 10, 20, 30, 10, 20, 30),
    Value_All = c(1, 2, 3, 37, 38, 39, 73, 74, 75, 109, 110, 111, 145, 146, 147, 181, 182, 183, 217, 218, 219, 253, 254, 255),
    Value_Product = c(1, 2, 3, 10, 11, 12, 1, 2, 3, 10, 11, 12, 1, 2, 3, 10, 11, 12, 1, 2, 3, 10, 11, 12),
    Value_Global = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
    Value_Segment_Product = c(100, 101, 102, 200, 201, 202, 100, 101, 102, 200, 201, 202, 300, 301, 302, 400, 401, 402, 300, 301, 302, 400, 401, 402)
  ) %>%
    tidyr::unite("Combo",
      c("Segment", "Country", "Product"),
      sep = "--",
      remove = F
    )

  # run prep hts function
  result_data <- prep_hierarchical_data(
    input_data = data,
    run_info = set_run_info(),
    combo_variables = c("Segment", "Country", "Product"),
    external_regressors = c("Value_Country", "Value_All", "Value_Product", "Value_Global", "Value_Segment_Product"),
    forecast_approach = "grouped_hierarchy",
    frequency_number = 12
  ) %>%
    dplyr::filter(Date == "2020-01-01")

  # Expected output setup
  expected_data <- tibble::tibble(
    Combo = as.character(c(
      "Total", "Segment_Commercial", "Segment_Consumer", "Country_United_States", "Country_UK",
      "Product_Office", "Product_Excel", "Commercial_United_States_Office", "Commercial_United_States_Excel",
      "Commercial_UK_Office", "Commercial_UK_Excel", "Consumer_United_States_Office", "Consumer_United_States_Excel",
      "Consumer_UK_Office", "Consumer_UK_Excel"
    )),
    Date = as.Date(c(
      "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01",
      "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01",
      "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01"
    )),
    Target = c(152, 76, 76, 28, 124, 52, 100, 1, 13, 25, 37, 1, 13, 25, 37),
    Value_Country = c(11, 11, 11, 1, 10, 11, 11, 1, 1, 10, 10, 1, 1, 10, 10),
    Value_All = c(1016, 220, 796, 364, 652, 436, 580, 1, 37, 73, 109, 145, 181, 217, 253),
    Value_Product = c(11, 11, 11, 11, 11, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10),
    Value_Global = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
    Value_Segment_Product = c(1000, 300, 700, 1000, 1000, 400, 600, 100, 200, 100, 200, 300, 400, 300, 400)
  )

  # Assertions
  expect_equal(result_data, expected_data)
})

test_that("prep_hierarchical_data returns correct standard hierarchies", {
  # Mock data setup
  data <- tibble::tibble(
    Area = as.character(c("EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "United States", "United States", "United States", "United States")),
    Country = as.character(c("Croatia", "Croatia", "Croatia", "Croatia", "Greece", "Greece", "Greece", "Greece", "United States", "United States", "United States", "United States")),
    Date = as.Date(c("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01")),
    Target = c(1, 2, 3, 4, 100, 101, 102, 103, 1000, 1001, 1002, 1003),
    Value_All = c(10, 11, 12, 13, 46, 47, 48, 49, 82, 83, 84, 85),
    Value_Global = c(50, 51, 52, 53, 50, 51, 52, 53, 50, 51, 52, 53),
    Value_Area = c(20, 21, 22, 23, 20, 21, 22, 23, 70, 71, 72, 73)
  ) %>%
    tidyr::unite("Combo",
      c("Area", "Country"),
      sep = "--",
      remove = F
    )

  # run prep hts function for standard hierarchy
  result_data <- prep_hierarchical_data(
    input_data = data,
    run_info = set_run_info(),
    combo_variables = c("Area", "Country"),
    external_regressors = c("Value_All", "Value_Global", "Value_Area"),
    forecast_approach = "standard_hierarchy",
    frequency_number = 12
  ) %>%
    dplyr::filter(Date == "2020-01-01")

  # Expected output setup for a standard hierarchical forecast
  expected_data <- tibble::tibble(
    Combo = as.character(c("Total", "A", "B", "EMEA_Croatia", "EMEA_Greece", "United_States_United_States")),
    Date = as.Date(c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01")),
    Target = c(1101, 101, 1000, 1, 100, 1000),
    Value_All = c(138, 56, 82, 10, 46, 82),
    Value_Global = c(50, 50, 50, 50, 50, 50),
    Value_Area = c(90, 90, 90, 20, 20, 70)
  )

  # Assertions
  expect_equal(result_data, expected_data)
})
microsoft/finnts documentation built on Oct. 30, 2024, 9:34 p.m.