#' Iterative proportional fitting to scale seed values to multiple target subtotals.
#'
#' Iterative proportional fitting to scale seed values to multiple target subtotals.
#'
#' @param datatable A data frame of values to be scaled to targets.
#' @param targets A list of data frames containing subtotal targets for the \code{datatable}. All totals in each target should be equal.
#' @param datatable.value.names The name of the series in \code{datatable} containing the values to be scaled.
#' @param target.value.names The names of the series in \code{targets} containing subtotals to scale. Can be string or array of strings.
#' @param max.error The maximum total absolute difference allowed between final scaled values and targets.
#' Iterative scaling will complete once the error is below this threshold or \code{max.iterations} occur.
#' @param max.iterations The maximum number of iterations of scaling. Iterative scaling with end once this value is reached, even if the error is above \code{max.error}.
#' @param freeze_cells Optional data frame of values with same series columns as \code{datatable}, specifying exact values to hit in the scaling.
#' Any rows or values not listed, or \code{NA}s, will be scaled as normal.
#' @param freeze_cells.value.name The name of the series of iced values in \code{freeze_cells}.
#' @param freeze_slice Optional data frame containing subtotal targets for the \code{datatable}.
#' Unlike \code{targets}, these data frames can be subsets only containing subtotals for one or more rows.
#' Any rows or values not listed, or \code{NA}s, will be scaled as normal.
#' Using \code{freeze_slice} for partial targets will increase the number of required iterations for scaling. This may require the user to increase the value of \code{max.iterations}.
#' Separate elements of a dimension within a single cell using " + " to scale subtotals.
#' @param freeze_slice.value.name The name or names of the series of iced values in \code{freeze_slice}.
#' @param minmax_cells Optional data frame of values with same series columns as \code{datatable}, specifying bounded values to hit in the scaling.
#' Provide minimumn and maximum values for a cell to be scaled.
#' Any rows or values not listed, or \code{NA}s, will be scaled as normal.
#' @param minmax_cells.value.names An array of length 2 of the names of the minimum and maximum values in \code{minmax_cells}.
#' @param minmax_slice Optional data frame containing subtotal targets for the \code{datatable}, specifying bounded values to hit in the scaling.
#' Provide minimumn and maximum values for a slice of the data frame to be scaled.
#' Unlike \code{targets}, these data frames can be subsets only containing subtotals for one or more rows.
#' Any rows or values not listed, or \code{NA}s, will be scaled as normal.
#' Using \code{minmax_slice} for partial targets will increase the number of required iterations for scaling. This may require the user to increase the value of \code{max.iterations}.
#' @param minmax_slice.value.name The name or names of the series of the minimum and maximum values in \code{minmax_slice}.
#' @param minmax.smash.param Numeric value of 0 < x < 1. Following an out-of-bounds occurence for \code{minmax_cells}, the \code{minmax.smash.param} is the additional value added to the scaled value to bring it back into bounds.
#' Values close to 0 bind the value to the violated bound, while close to 1 bind the value to the other bound.
#' Values closer to0 will require more iterations to complete.
#' @param growth_targets A data frame of cell or slice targets with growth rates over listed dimensions. See \code{ipfitr::ip_growth_transform} for detail.
#' @return A dataframe with the same dimensionality as \code{datatable}, with all values scaled to the subtotals specified in each data frame in \code{targets}.
#' @examples
#' tar1 <- data.frame(x = letters[1:2], value = c(50, 50))
#' tar2 <- data.frame(y = letters[3:5], value = c(20, 40, 40))
#' tar3 <- data.frame(z = letters[6:10], value = c(10, 20, 30, 40, 10))
#'
#' tar.list <- list(tar1, tar2, tar3)
#' df <- ip_create_seed(tar.list) %>% ip_fit(tar.list)
#' @export
ip_fit <- function(datatable, targets,
datatable.value.name = "value", target.value.names = "value",
max.error = 0.01, max.iterations = 25,
freeze_cells = NULL, freeze_cells.value.name = "value",
freeze_slice = NULL, freeze_slice.value.names = "value",
minmax_cells = NULL, minmax_cells.value.names = c("value_min", "value_max"),
minmax_slice = NULL, minmax_slice.value.names = c("value_min", "value_max"),
minmax.smash.param = 1/3,
growth_targets = NULL,
save.tars = FALSE, show.messages = TRUE) {
#Warnings
if(is.null(targets) | !is.list(targets) | !is.data.frame(targets[[1]])) {
stop("Targets must be a list of data frames.")
}
#Single Target
if(length(targets) == 1) {
if(show.messages) {
message(paste("Initializing IPF...",
"Only 1 target supplied. Data will be scaled to single target, along with freeze and minmax conditions."))
}
targets[[2]] <- targets[[1]]
} else {
message(paste("Initializing IPF...", length(targets), "targets supplied."))
}
#Check targets
target.checker <- vector(length = length(targets)) #Use Targets, not tar.list
for(i in seq_along(targets)){
x <- targets[[i]]
target.checker[i] <- sum(x[, target.value.names], na.rm=T)
}
if(length(unique(round(target.checker, 4))) > 1){
message("Warning: Supplied targets do not have the same totals. IPF may not converge.")
}
#Set initial conditions
current.error <- 10^9
current.iteration <- 1
minmax_cells.oob <- TRUE
minmax_slice.oob <- TRUE
tar.list <- targets
#New for 0.0.0.9005 - Slice targets now supplied as single data frame
#Freeze 2- Ice Slices
if(!is.null(freeze_slice)){
if(!is.data.frame(freeze_slice)) {stop("Parameter freeze_slice must be a data frame containing partial targets or subtotals.")}
if(show.messages) {
message(paste(nrow(freeze_slice), "Frozen slices (partial) targets supplied."))
}
freeze_slice_list <- ip_load_slice_a(freeze_slice, slice.value.name = freeze_slice.value.names)
tar.list <- c(tar.list, freeze_slice_list)
} else {
for(i in seq_along(tar.list)){
names(tar.list)[i] <- paste0("tar__", i)
}
}
#initialize
df0 <- datatable
names(df0)[names(df0) == datatable.value.name] <- "value"
for(i in seq_along(tar.list)){
if(names(tar.list)[i] == "" | is.null(names(tar.list))){
names(tar.list)[i] <- paste0("tar__", i)
}
}
#Freeze 1 - Ice Cells
if(!is.null(freeze_cells)){
if(show.messages) {
message(paste(nrow(freeze_cells), "Frozen cell values will be hit."))
}
names(freeze_cells)[names(freeze_cells) == freeze_cells.value.name] <- "frz__c"
df0 <- df0 %>%
left_join(freeze_cells, by = names(freeze_cells %>% select(-frz__c))) %>%
#Iced cells are not unknown, so we don't need to include them in the IPF
mutate(value = ifelse(is.na(frz__c), value, 0))
#Since we 0'd out the seed, we should also remove all the iced values from the targets
tar.list2 <- lapply(seq_along(tar.list), function(i){
x <- tar.list[[i]]
names(x)[names(x) %in% c(target.value.names, freeze_slice.value.names)] <- "value"
nm_tar <- names(tar.list)[i]
ice_target <- freeze_cells %>%
group_by_(.dots = as.list(names(x)[!(names(x) == "value")])) %>%
summarize(iced = sum(frz__c, na.rm=T)) %>%
ungroup()
df <- x %>%
left_join(ice_target, by = names(ice_target %>% select(-iced))) %>%
mutate(iced = ifelse(is.na(iced), 0, iced)) %>%
mutate(value = value - iced) %>%
select(-iced)
if(any(df$value < 0)){stop("Frozen cell values exceed supplied targets. Unable to rationalize.")}
#For Subtotal slices, need to set them all equal to the minimum
if(grepl("frz__subtl", nm_tar, fixed = TRUE)){
df <- df %>%
mutate(value = min(value, na.rm=T))
}
return(df)
})
names(tar.list2) <- names(tar.list)
tar.list <- tar.list2
} #End ice cells
#Freeze 3 - Slush Cells. Similar to Ice, but cells have a min/max bound, not specific value
if(!is.null(minmax_cells)) {
if(show.messages) {
message(paste(nrow(minmax_cells), "Min/max cell values supplied."))
}
names(minmax_cells)[names(minmax_cells) == minmax_cells.value.names[1]] <- "minmax__c_min"
names(minmax_cells)[names(minmax_cells) == minmax_cells.value.names[2]] <- "minmax__c_max"
df0 <- df0 %>%
left_join(minmax_cells, by = names(minmax_cells %>% select(-starts_with("minmax__"))))
}
#Freeze 4 - Slush Slices. Partial (or complete?) targets with min/max bound
if(!is.null(minmax_slice)) {
if(!is.data.frame(minmax_slice)) {
stop("Parameter minmax_slice must be a data frame containing partial min/max targets.")
}
slush.list <- ip_load_slice_a(minmax_slice, slice.value.name = minmax_slice.value.names, prefix = "mm")
if(show.messages) {
message(paste(nrow(slush.list), "Min/max slice targets supplied."))
}
#Freeze 1 inside Freeze 4 - Ice Cells
if(!is.null(freeze_cells)){
#Like the normal targets, need to subtract the frozen values for the min/max
slush.list <- lapply(slush.list, function(x){
names(x)[names(x) == minmax_slice.value.names[1]] <- "value_min"
names(x)[names(x) == minmax_slice.value.names[2]] <- "value_max"
ice_slush <- freeze_cells %>%
group_by_(.dots = as.list(names(x)[!(names(x) %in% c("value_min", "value_max"))])) %>%
summarize(iced = sum(frz__c, na.rm=T)) %>%
ungroup()
df <- x %>%
left_join(ice_slush, by = names(ice_slush %>% select(-iced))) %>%
mutate(iced = ifelse(is.na(iced), 0, iced)) %>%
mutate(value_min = value_min - iced,
value_max = value_max - iced) %>%
select(-iced)
if(any(df$value_min < 0)){stop("Frozen cell values exceed supplied minmax_slice targets. Unable to rationalize.")}
names(x)[names(x) == "value_min"] <- minmax_slice.value.names[1]
names(x)[names(x) == "value_max"] <- minmax_slice.value.names[2]
return(df)
})
} #End ice cells
#Format each slush slice df - give each value unique names
for( i in seq_along(slush.list)){
x <- slush.list[[i]]
names(x)[names(x) == minmax_slice.value.names[1]] <- paste0("minmax__s_min", i)
names(x)[names(x) == minmax_slice.value.names[2]] <- paste0("minmax__s_max", i)
names(slush.list[[i]]) <- names(x)
}
#Add each of those slice targets to the master data frame
for(x in slush.list){
df0 <- df0 %>%
left_join(x, by = names(x %>% select(-starts_with("minmax__"))))
}
} #End Freeze 4 setup
#Format targets - give each value unique names
for( i in seq_along(tar.list)){
x <- tar.list[[i]]
nm <- names(tar.list)[i]
#If it's a target, give it the prefix tar__
if(i <= length(targets)) {
names(x)[names(x) %in% target.value.names] <- paste0("tar__", i)
names(tar.list[[i]]) <- names(x)
} else if(grepl("__slice", nm, fixed=T)) { #Else it's a freeze slice
names(x)[names(x) %in% target.value.names] <- paste0("tar__frz__slice", i)
names(tar.list[[i]]) <- names(x)
} else {
names(x)[names(x) %in% target.value.names] <- paste0("tar__frz__subtl", i)
names(tar.list[[i]]) <- names(x)
}
}
#Format input seed
for(i in seq_along(tar.list)){
x <- tar.list[[i]]
df0 <- df0 %>%
left_join(x, by = names(x %>% select(-dplyr::contains("__"))))
#If it's a target and has NAs, replace them with 0
if(i <= length(targets)) {
df0[, paste0("tar__", i)][is.na(df0[, paste0("tar__", i)])] <- 0
}
}
# Growth targets
# These targets are for structure. The actual values come later
if(!is.null(growth_targets)){
if(show.messages) {
message(paste(nrow(growth_targets), "Growth rate cell/slice targets supplied."))
}
}
#IPF Loop
while((current.error > max.error | minmax_cells.oob == TRUE | minmax_slice.oob == TRUE) &
current.iteration <= max.iterations ) {
if(show.messages) {
message(paste("Iteration Summary:", current.iteration))
}
#Reset Freeze 3+4 - Slush Cells/Slice - to off
minmax_cells.oob <- FALSE
minmax_slice.oob <- FALSE
##
# Scaling
##
df1 <- df0
for(i in seq_along(tar.list)){
x <- tar.list[[i]]
nm <- names(tar.list)[i]
#If it's a target
if(i <= length(targets)) {
df1 <- df1 %>%
ip_scale_a(target_series = names(x)[!(names(x) %in% c("value"))], series_target = paste0("tar__", i))
} else if(grepl("__slice", nm, fixed=T)) { #Else it's a freeze slice
df1 <- df1 %>%
ip_scale_a(target_series = names(x)[!(names(x) %in% c("value"))],
series_target = paste0("tar__frz__slice", i), series_type = "slice")
} else {
df1 <- df1 %>%
ip_scale_a(target_series = names(x)[!(names(x) %in% c("value"))],
series_target = paste0("tar__frz__subtl", i), series_type = "subtl")
}
}
###
# Growth targets
###
if(!is.null(growth_targets)){
df1 <- df1[, !grepl("tar__growth__", names(df1), fixed = T)] #Remove previous yy value targets
#Create list of target dataframes, with growth rates converted to levels
growth_tars_current_df <- df1
#Need to add back in frozen cells for Growth calc
if(!is.null(freeze_cells)){
growth_tars_current_df <- growth_tars_current_df %>%
mutate(value = ifelse(is.na(frz__c), value, frz__c))
}
growth_tars_current <- growth_tars_current_df %>% ip_growth_transform(growth_targets)
for(i in seq_along(growth_tars_current)) {
names(growth_tars_current)[i] <- paste0("tar__growth__", i)
}
#Scale each growth value target
for(i in seq_along(growth_tars_current)){
x <- growth_tars_current[[i]]
df1 <- df1 %>%
left_join(x, by = names(x)[!(names(x) %in% paste0("tar__growth__", i))]) %>%
ip_scale_a(target_series = names(x)[!(names(x) %in% paste0("tar__growth__", i))],
series_target = paste0("tar__growth__", i))
}
} else {
growth_tars_current <- NULL
}
###
#Error
###
#For each target (except final one), calculate the error over each element in the target,
# Save results to a list of dfs
#err.list <- lapply(seq_along(head(tar.list, -1)), function(i){
err.list <- lapply(seq_along(tar.list), function(i){
x <- tar.list[[i]]
nm <- names(tar.list)[i]
#If it's a target
if(i <= length(targets) | grepl("__growth", nm, fixed=T)) {
df <- df1 %>%
ip_miss_a(names(x)[!(names(x) %in% c("value"))], series_target = paste0("tar__", i))
} else if(grepl("__slice", nm, fixed=T)) { #Else it's a freeze slice
df <- df1 %>%
ip_miss_a(names(x)[!(names(x) %in% c("value"))],
series_target = paste0("tar__frz__slice", i), series_type = "slice")
# } else if(grepl("__subtl", nm, fixed=T)){
} else {
df <- df1 %>%
ip_miss_a(names(x)[!(names(x) %in% c("value"))],
series_target = paste0("tar__frz__subtl", i), series_type = "subtl")
}
})
#Growth error
if(!is.null(growth_targets)){
err.list.growth <- lapply(seq_along(growth_tars_current), function(i){
x <- growth_tars_current[[i]]
nm <- names(growth_tars_current)[i]
df <- df1 %>%
ip_miss_a(names(x)[!(names(x) %in% c("value"))], series_target = paste0("tar__growth__", i))
})
err.list <- c(err.list, err.list.growth)
}
#For each error df, sum the error column, then sum those sums for total abs error
current.error <- sum(sapply(err.list, function(x){
return(sum(abs(x$error), na.rm=T))
}), na.rm=T)
if(show.messages) {
message(paste(" Iteration Error: ", round(current.error, 5)))
}
###
# Freeze 4 - Min/Max Slice
###
#Similar to Ice, but cells have a min/max bound, not specific value
#Calculate AFTER error, as this is its own deal breaker. If everything is in bounds, no problem
if(!is.null(minmax_slice)) {
#1) For each minmax_slice target, check to see there are any OOB
for(i in seq_along(slush.list)) {
x <- slush.list[[i]]
if(grepl("__subtl", names(slush.list)[i], fixed = T)){
type <- "subtl"
} else{
type <- "slice"
}
#Roll-up version of df1 to check slush targets. Use generic target names for easier programming
names(df1)[names(df1) == paste0("minmax__s_min", i)] <- "minmax__s_min"
names(df1)[names(df1) == paste0("minmax__s_max", i)] <- "minmax__s_max"
df1_slushs <- df1
names(x)[names(x) == paste0("minmax__s_min", i)] <- "minmax__s_min"
names(x)[names(x) == paste0("minmax__s_max", i)] <- "minmax__s_max"
df1_slushs <- df1_slushs %>%
#Group by names of the target, INCLUDING the target value names, since they are the same for each group element
do(
if(type == "subtl"){
group_by_(., .dots = as.list(c("minmax__s_min", "minmax__s_max")))
} else {
group_by_(., .dots = as.list(names(x)))
}
) %>%
summarize(value = sum(value, na.rm = T)) %>%
ungroup() %>%
mutate(check_slush_min = value < minmax__s_min,
check_slush_max = value > minmax__s_max) %>%
rowwise() %>%
mutate(check_slush_s = any(c(check_slush_max, check_slush_min), na.rm=T)) %>% #Look for OOB
ungroup() %>%
select(-check_slush_min, -check_slush_max)
#If any of the bounded cells are OOB,
# Then proceed to edit the df1 values to smash them into bounds
# Then adjust the other values in the opposite direction
if(any(df1_slushs$check_slush_s, na.rm = T)){
df1_slushs$check_slush_s <- NULL
if(show.messages) {
message(paste(" Out of bounds conditions present for minmax_slice: Target", i))
}
#First update the trigger to True - There needs to be another iterations
minmax_slice.oob <- TRUE
#Then figure out by how much to move the values
df1_slushs <- df1_slushs %>%
#Allow for no lower bound
mutate(minmax__s_min = ifelse(is.na(minmax__s_min) & !is.na(minmax__s_max), 0 , minmax__s_min)) %>%
mutate(
value_o_slush = ifelse(is.na(minmax__s_min) & is.na(minmax__s_max), NA, ifelse((value >= minmax__s_min) | is.na(minmax__s_min), value,
#Allow for no upper bound
ifelse(!is.na(minmax__s_max), minmax__s_min + (minmax.smash.param)*(minmax__s_max - minmax__s_min),
(1+minmax.smash.param)*minmax__s_min))),
value_o_slush = ifelse((value_o_slush <= minmax__s_max) | is.na(minmax__s_max), value_o_slush, minmax__s_max - (minmax.smash.param)*(minmax__s_max - minmax__s_min))
) %>%
#How much the remaining values have to move to compensate for that shift
mutate(value_o_remainder = ifelse(is.na(value_o_slush), value, NA),
ratio_remainder = (sum(value, na.rm = T) - sum(value_o_slush, na.rm=T))/sum(value_o_remainder, na.rm=T)) %>%
#Combine into a single ratio to apply to data cube
mutate(ratio = ifelse(is.na(value_o_slush), ratio_remainder, value_o_slush/value)) %>%
do(
if(type == "subtl"){
select(., -value, -value_o_slush, -value_o_remainder, -ratio_remainder)
} else {
select(., -value, -minmax__s_min, -minmax__s_max, -value_o_slush, -value_o_remainder, -ratio_remainder)
}
)
#Apply this df of ratios to our main data frame
df1 <- df1 %>%
left_join(df1_slushs, by = names(df1_slushs %>% select(-ratio))) %>%
mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
mutate(value = value * ratio) %>%
select(-ratio)
}
names(df1)[names(df1) == "minmax__s_min"] <- paste0("minmax__s_min", i)
names(df1)[names(df1) == "minmax__s_max"] <- paste0("minmax__s_max", i)
df1_slushs <- NULL
}
} #End Freeze 4
###
# Freeze 3 - Min/Max Cells
###
#Similar to Ice, but cells have a min/max bound, not specific value
#Calculate AFTER error, as this is its own deal breaker. If everything is in bounds, no problem
#Do Freeze 3 AFTER Freeze 4 because this is more restrictive
if(!is.null(minmax_cells)) {
df1 <- df1 %>%
mutate(check_slush_min = value < minmax__c_min,
check_slush_max = value > minmax__c_max) %>%
rowwise() %>%
mutate(check_slush_c = any(c(check_slush_max, check_slush_min), na.rm=T)) %>% #Look for OOB
ungroup() %>%
select(-check_slush_min, -check_slush_max)
#If any of the bounded cells are OOB, first update the trigger to True
minmax_cells.oob <- any(df1$check_slush_c, na.rm = T)
df1$check_slush_c <- NULL
#Then replace those values with 1/3 closer above/below that missed bound
if(minmax_cells.oob == TRUE) {
if(show.messages) {
message(" Out of bounds conditions present for minmax_cells.")
}
df1 <- df1 %>%
#Allow for no lower bound
mutate(minmax__c_min = ifelse(is.na(minmax__c_min) & !is.na(minmax__c_max), 0 , minmax__c_min)) %>%
mutate(value = ifelse((value >= minmax__c_min) | is.na(minmax__c_min), value,
#Allow for no upper bound
ifelse(!is.na(minmax__c_max), minmax__c_min + (minmax.smash.param)*(minmax__c_max - minmax__c_min),
(1+minmax.smash.param)*minmax__c_min)),
value = ifelse((value <= minmax__c_max) | is.na(minmax__c_max), value, minmax__c_max - (minmax.smash.param)*(minmax__c_max - minmax__c_min))
)
}
} #End Freeze 3
###
# Prepare for next loop
###
current.iteration <- current.iteration + 1
df0 <- df1
}
#Freeze 1 - Iced Cells
#Add back Iced Cells
if(!is.null(freeze_cells)){
df0 <- df0 %>%
mutate(value = ifelse(is.na(frz__c), value, frz__c))
}
#Save/print all the assumption columns?
# Currently, tars are reduced by Freeze tars.
# TODO: Add frozen values back into tars... or just replace tars. That's easier
if(!save.tars){
df0 <- df0 %>%
select(-dplyr::contains("__"))
}
return(df0)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.