test_that("prep_hierarchical_data returns correct grouped hierarchies", {
# Mock data setup
data <- tibble::tibble(
Segment = as.character(c(
"Commercial", "Commercial", "Commercial", "Commercial", "Commercial", "Commercial",
"Commercial", "Commercial", "Commercial", "Commercial", "Commercial", "Commercial",
"Consumer", "Consumer", "Consumer", "Consumer", "Consumer", "Consumer",
"Consumer", "Consumer", "Consumer", "Consumer", "Consumer", "Consumer"
)),
Country = as.character(c(
"United (States)", "United (States)", "United (States)", "United (States)", "United (States)", "United (States)",
"UK", "UK", "UK", "UK", "UK", "UK",
"United (States)", "United (States)", "United (States)", "United (States)", "United (States)", "United (States)",
"UK", "UK", "UK", "UK", "UK", "UK"
)),
Product = as.character(c(
"Office", "Office", "Office", "Excel", "Excel", "Excel",
"Office", "Office", "Office", "Excel", "Excel", "Excel",
"Office", "Office", "Office", "Excel", "Excel", "Excel",
"Office", "Office", "Office", "Excel", "Excel", "Excel"
)),
Date = as.Date(c(
"1/1/2020", "2/1/2020", "3/1/2020", "1/1/2020", "2/1/2020", "3/1/2020",
"1/1/2020", "2/1/2020", "3/1/2020", "1/1/2020", "2/1/2020", "3/1/2020",
"1/1/2020", "2/1/2020", "3/1/2020", "1/1/2020", "2/1/2020", "3/1/2020",
"1/1/2020", "2/1/2020", "3/1/2020", "1/1/2020", "2/1/2020", "3/1/2020"
), format = "%m/%d/%Y"),
Target = c(1, 2, 3, 13, 14, 15, 25, 26, 27, 37, 38, 39, 1, 2, 3, 13, 14, 15, 25, 26, 27, 37, 38, 39),
Value_Country = c(1, 2, 3, 1, 2, 3, 10, 20, 30, 10, 20, 30, 1, 2, 3, 1, 2, 3, 10, 20, 30, 10, 20, 30),
Value_All = c(1, 2, 3, 37, 38, 39, 73, 74, 75, 109, 110, 111, 145, 146, 147, 181, 182, 183, 217, 218, 219, 253, 254, 255),
Value_Product = c(1, 2, 3, 10, 11, 12, 1, 2, 3, 10, 11, 12, 1, 2, 3, 10, 11, 12, 1, 2, 3, 10, 11, 12),
Value_Global = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
Value_Segment_Product = c(100, 101, 102, 200, 201, 202, 100, 101, 102, 200, 201, 202, 300, 301, 302, 400, 401, 402, 300, 301, 302, 400, 401, 402)
) %>%
tidyr::unite("Combo",
c("Segment", "Country", "Product"),
sep = "--",
remove = F
)
# run prep hts function
result_data <- prep_hierarchical_data(
input_data = data,
run_info = set_run_info(),
combo_variables = c("Segment", "Country", "Product"),
external_regressors = c("Value_Country", "Value_All", "Value_Product", "Value_Global", "Value_Segment_Product"),
forecast_approach = "grouped_hierarchy",
frequency_number = 12
) %>%
dplyr::filter(Date == "2020-01-01")
# Expected output setup
expected_data <- tibble::tibble(
Combo = as.character(c(
"Total", "Segment_Commercial", "Segment_Consumer", "Country_United_States", "Country_UK",
"Product_Office", "Product_Excel", "Commercial_United_States_Office", "Commercial_United_States_Excel",
"Commercial_UK_Office", "Commercial_UK_Excel", "Consumer_United_States_Office", "Consumer_United_States_Excel",
"Consumer_UK_Office", "Consumer_UK_Excel"
)),
Date = as.Date(c(
"2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01",
"2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01",
"2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01"
)),
Target = c(152, 76, 76, 28, 124, 52, 100, 1, 13, 25, 37, 1, 13, 25, 37),
Value_Country = c(11, 11, 11, 1, 10, 11, 11, 1, 1, 10, 10, 1, 1, 10, 10),
Value_All = c(1016, 220, 796, 364, 652, 436, 580, 1, 37, 73, 109, 145, 181, 217, 253),
Value_Product = c(11, 11, 11, 11, 11, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10),
Value_Global = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
Value_Segment_Product = c(1000, 300, 700, 1000, 1000, 400, 600, 100, 200, 100, 200, 300, 400, 300, 400)
)
# Assertions
expect_equal(result_data, expected_data)
})
test_that("prep_hierarchical_data returns correct standard hierarchies", {
# Mock data setup
data <- tibble::tibble(
Area = as.character(c("EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "EMEA", "United States", "United States", "United States", "United States")),
Country = as.character(c("Croatia", "Croatia", "Croatia", "Croatia", "Greece", "Greece", "Greece", "Greece", "United States", "United States", "United States", "United States")),
Date = as.Date(c("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01", "2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01")),
Target = c(1, 2, 3, 4, 100, 101, 102, 103, 1000, 1001, 1002, 1003),
Value_All = c(10, 11, 12, 13, 46, 47, 48, 49, 82, 83, 84, 85),
Value_Global = c(50, 51, 52, 53, 50, 51, 52, 53, 50, 51, 52, 53),
Value_Area = c(20, 21, 22, 23, 20, 21, 22, 23, 70, 71, 72, 73)
) %>%
tidyr::unite("Combo",
c("Area", "Country"),
sep = "--",
remove = F
)
# run prep hts function for standard hierarchy
result_data <- prep_hierarchical_data(
input_data = data,
run_info = set_run_info(),
combo_variables = c("Area", "Country"),
external_regressors = c("Value_All", "Value_Global", "Value_Area"),
forecast_approach = "standard_hierarchy",
frequency_number = 12
) %>%
dplyr::filter(Date == "2020-01-01")
# Expected output setup for a standard hierarchical forecast
expected_data <- tibble::tibble(
Combo = as.character(c("Total", "A", "B", "EMEA_Croatia", "EMEA_Greece", "United_States_United_States")),
Date = as.Date(c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01", "2020-01-01")),
Target = c(1101, 101, 1000, 1, 100, 1000),
Value_All = c(138, 56, 82, 10, 46, 82),
Value_Global = c(50, 50, 50, 50, 50, 50),
Value_Area = c(90, 90, 90, 20, 20, 70)
)
# Assertions
expect_equal(result_data, expected_data)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.