This document presents validation results of r year
summary-level state IO model.
knitr::opts_chunk$set(echo = FALSE, message = FALSE, results = "asis") logging::basicConfig(level = 40)
library(devtools) load_all()
source('DefineMetadata.R',local=knitr::knit_global())
if (!exists("year")) { year <- 2012 }
source('LoadUSdata.R',local=knitr::knit_global()) cat(paste(year,"US IO data for states successfully loaded."))
source('Load1Rdata.R',local=knitr::knit_global()) cat(paste(year,"single-region IO data for states successfully loaded."))
source('Load2Rdata.R',local=knitr::knit_global()) cat(paste(year,"two region IO data for states successfully loaded."))
failures <- c() for (state in states[states != "Overseas"]) { # Calculate Industry Output (x) from Make and Use x_make <- rowSums(State_Summary_Make_ls[[state]]) x_use <- colSums(State_Summary_Use_ls[[state]][, rownames(US_Summary_Make)]) # Calculate relative differences in x_make and x_use rel_diff <- (x_use - x_make)/x_make # Compare rel_diff against 0 with a tolerance of 1E-3 failures_state <- formatValidationResult(rel_diff, abs_diff = TRUE, tolerance = 0.01)[["Failure"]] if (nrow(failures_state) > 0) { failures_state$State <- state } colnames(failures_state)[1:3] <- c("Industry", "Relative Diff", "Validation") failures <- rbind(failures, failures_state) } cat("\n") reportValidationResult(failures)
df0 <- US_Summary_Make df1 <- Reduce("+", State_Summary_Make_ls) rownames(df1) <- gsub(".*\\.", "", rownames(df1)) df1 <- df1[rownames(df0), colnames(df0)] # Compare aggregated state Make against US Make failures <- formatValidationResult(df1 - df0, abs_diff = TRUE, tolerance = 1E-3)[["Failure"]] reportValidationResult(failures)
#### (DEPRECATED) 2. Check commodity output #### (DEPRECATED) 2.1. Check if commodity output from state Make and Use are almost equal (relative difference <= 0.01). failures <- c() for (state in states[states != "Overseas"]) { # Calculate Commodity Output (q) from Make and Use q_make <- colSums(State_Summary_Make_ls[[state]]) q_use <- rowSums(State_Summary_Use_ls[[state]][rownames(US_Summary_Use), colnames(US_Summary_Use)]) # Calculate relative differences in q_make and q_use rel_diff <- (q_use - q_make)/q_make # Compare rel_diff against 0 with a tolerance of 1E-3 failures_state <- formatValidationResult(rel_diff, abs_diff = TRUE, tolerance = 0.01)[["Failure"]] if (nrow(failures_state) > 0) { failures_state$State <- state } colnames(failures_state)[1:3] <- c("Commodity", "Relative Diff", "Validation") failures <- rbind(failures, failures_state) } cat("\n") reportValidationResult(failures)
#### (DEPRECATED) 2.2. Check if sum of commodity output from state Use and sum demand from Domestic Use are almost equal (relative difference <= 0.01). q_sum <- Reduce("+", State_Summary_CommodityOutput_ls) demand_sum <- Reduce("+", lapply(State_Summary_DomesticUse_ls, rowSums))[commodities] # Calculate relative differences in q_sum and demand_sum rel_diff <- (q_sum - demand_sum)/demand_sum # Compare rel_diff against 0 with a tolerance of 1E-3 failures <- formatValidationResult(rel_diff, abs_diff = TRUE, tolerance = 0.01)[["Failure"]] colnames(failures_state)[1:3] <- c("Commodity", "Relative Diff", "Validation") cat("\n") reportValidationResult(failures)
Note: only exception being Overseas, which isn't used for further calculations, and if the same cell in US Make table is also negative.
US_failures <- formatValidationResult(US_Summary_Make, abs_diff = TRUE, tolerance = 0)[["Pass"]] colnames(US_failures) <- c("Industry", "Commodity") # Validate Make table and extract failures for each state failures <- data.frame() for (state in states[states != "Overseas"]) { df <- as.data.frame(State_Summary_Make_ls[[state]]) # Check if there is zero in state Make table failures_state <- formatValidationResult(df, abs_diff = TRUE, tolerance = 0)[["Pass"]] colnames(failures_state) <- c("Industry", "Commodity") # If failure is in US Make table, remove it from state failures if (nrow(failures_state) > 0) { failures_state$index <- paste(sub(".*\\.", "", failures_state$Industry), failures_state$Commodity) US_state_diff <- setdiff(failures_state$index, paste(US_failures$Industry, US_failures$Commodity)) failures_state <- failures_state[failures_state$index %in% US_state_diff, ] } failures_state$index <- rownames(failures_state) <- NULL failures <- rbind(failures, failures_state) } reportValidationResult(failures)
The threshold is set to 1E7 because there are differences (within +/- $10 million) between US industry output summed from Make and that summed from Use, comparing sum of state industry output (summed from state Use) to US industry output summed from US Make should account for those inherent differences at the national level.
df0 <- as.data.frame(rowSums(US_Summary_Make)) df1 <- Reduce("+", State_Summary_IndustryOutput_ls) rownames(df1) <- gsub(".*\\.", "", rownames(df1)) df1 <- df1[rownames(df0), ] # Compare aggregated state industry output against US industry output failures <- formatValidationResult(df1 - df0, abs_diff = TRUE, tolerance = 1E7)[["Failure"]] colnames(failures) <- c("Industry", "") reportValidationResult(failures)
The threshold is set to 1E7 because there are differences (within +/- $10 million) between US industry output summed from Make and that summed from Use, comparing sum of state industry output (summed from state Use) to US industry output summed from US Make should account for those inherent differences at the national level.
df0 <- as.data.frame(colSums(US_Summary_Make)) df1 <- Reduce("+", State_Summary_CommodityOutput_ls) rownames(df1) <- gsub(".*\\.", "", rownames(df1)) df1 <- df1[rownames(df0), ] # Compare aggregated state commodity output against US commodity output failures <- formatValidationResult(df1 - df0, abs_diff = TRUE, tolerance = 1E7)[["Failure"]] colnames(failures) <- c("Commodity", "") reportValidationResult(failures)
Note: even if the threshold is met, track the difference for each commodity. Save result as a type of quality control check.
df0 <- as.data.frame(rowSums(US_Summary_Use)) df1 <- Reduce("+", State_Summary_CommodityOutput_ls) rownames(df1) <- gsub(".*\\.", "", rownames(df1)) df1 <- df1[rownames(df0), ] # Compare aggregated state commodity output against row sum of US domestic Use failures <- formatValidationResult(df1 - df0, abs_diff = TRUE, tolerance = 1.11E7)[["Failure"]] colnames(failures) <- c("Commodity", "") reportValidationResult(failures) diff <- df1 - df0 colnames(diff) <- "q_state_sum - q_US_use" knitr::kable(diff, "simple")
US_zeros <- formatValidationResult(US_Summary_Make - 0, abs_diff = TRUE, tolerance = 0)[["Pass"]] colnames(US_zeros) <- c("Industry", "Commodity") failures <- data.frame() for (state in states) { # Find zero values in state Make table state_zeros <- formatValidationResult(State_Summary_Make_ls[[state]] - 0, abs_diff = TRUE, tolerance = 0)[["Pass"]] colnames(state_zeros)[1:2] <- c("Industry", "Commodity") state_zeros$index <- paste(sub(".*\\.", "", state_zeros$Industry), state_zeros$Commodity) # Find difference between state_zeros and US_zeros: zeros in US that are not zeros in state US_state_diff <- setdiff(paste(US_zeros$Industry, US_zeros$Commodity), state_zeros$index) failures_state <- state_zeros[state_zeros$index %in% US_state_diff, ] failures_state$index <- rownames(failures_state) <- NULL failures <- rbind(failures, failures_state) } reportValidationResult(failures)
Note: failures associated with 'F050 - Imports' are acceptable. Because state imports are not directly derived from US imports, a gap in imports between state sum and national total is reasonable.
StateUseValidationFailures_absdiff <- validateStateUseAgainstNationlUse(domestic = FALSE, rel_diff = FALSE) StateDomesticUseValidationFailures_absdiff <- validateStateUseAgainstNationlUse(domestic = TRUE, rel_diff = FALSE) StateUseValidationFailures_reldiff <- validateStateUseAgainstNationlUse(domestic = FALSE, rel_diff = TRUE) StateDomesticUseValidationFailures_reldiff <- validateStateUseAgainstNationlUse(domestic = TRUE, rel_diff = TRUE) # State Use tables cat("##### 8.1 State Use tables (checking absolute differences)\n") reportValidationResult(StateUseValidationFailures_absdiff) # State Domestic Use tables cat("##### 8.2 State Domestic Use tables (checking absolute differences)\n") reportValidationResult(StateDomesticUseValidationFailures_absdiff) # State Use tables cat("##### 8.3 State Use tables (checking relative differences)\n") reportValidationResult(StateUseValidationFailures_reldiff) # State Domestic Use tables cat("##### 8.4 State Domestic Use tables (checking relative differences)\n") reportValidationResult(StateDomesticUseValidationFailures_reldiff)
failures <- c() for (state in states[states != "Overseas"]) { # Calculate Commodity Output (q) from Make and Use q_make <- colSums(TwoRegionMake_ls[[state]]) use_2r <- TwoRegionDomesticUsewithTrade_ls[[state]] q_use <- c(rowSums(use_2r[["SoI2SoI"]][, c(industries, FD_cols, ITA_col, "ExportResidual")]) + rowSums(use_2r[["SoI2RoUS"]][, c(industries, FD_cols, ITA_col)]), rowSums(use_2r[["RoUS2RoUS"]][, c(industries, FD_cols, ITA_col, "ExportResidual")]) + rowSums(use_2r[["RoUS2SoI"]][, c(industries, FD_cols, ITA_col)])) # Calculate relative differences in q_make and q_use rel_diff <- (q_use - q_make)/q_make # Compare rel_diff against 0 with a tolerance of 1E-3 failures_state <- formatValidationResult(rel_diff, abs_diff = TRUE, tolerance = 0.01)[["Failure"]] if (nrow(failures_state) > 0) { failures_state$State <- state } colnames(failures_state)[1:3] <- c("Commodity", "Relative Diff", "Validation") failures <- rbind(failures, failures_state) } cat("\n") reportValidationResult(failures)
failures <- c() for (state in states[states != "Overseas"]) { # Find zero values in SoI commodity output CO_zeros <- formatValidationResult(State_Summary_CommodityOutput_ls[[state]] - 0, abs_diff = TRUE, tolerance = 0)[["Pass"]] colnames(CO_zeros) <- c("Commodity", "") # Find zero values in SoI2SoI ICF ratio ICF <- generateDomestic2RegionICFs(state, year, ioschema = 2012, iolevel = "Summary", ICF_sensitivity_analysis = FALSE, adjust_by = 0) SoI2SoI_ICF_zeros <- ICF[ICF$SoI2SoI == 0, 1] # Find difference between SoI2SoI_ICF_zeros and CO_zeros: # zeros in CO that are not zeros in SoI2SoI_ICF diff <- setdiff(CO_zeros$Commodity, SoI2SoI_ICF_zeros) failures_state <- SoI2SoI_ICF_zeros[SoI2SoI_ICF_zeros %in% diff] failures <- c(failures, failures_state) } cat("\n") reportValidationResult(failures)
failures <- data.frame() for (state in states[states != "Overseas"]) { # Prepare domestic 2-region Use tables TwoRegionTable_state <- TwoRegionDomesticUsewithTrade_ls[[state]] df <- cbind(TwoRegionTable_state[["SoI2SoI"]][, c("InterregionalImports", "InterregionalExports")], TwoRegionTable_state[["RoUS2RoUS"]][, c("InterregionalImports", "InterregionalExports")]) df <- as.data.frame(sapply(df, round, 1)) rownames(df) <- rownames(TwoRegionTable_state[["SoI2SoI"]]) colnames(df) <- paste(rep(paste(state, c("SoI2SoI", "RoUS2RoUS"), sep = "_"), each = 2), colnames(df), sep = "$") # Compare SoI and RoUS interregional exports and imports against 0 failures_state <- formatValidationResult(df, abs_diff = FALSE, tolerance = 0)[["Pass"]] failures <- rbind(failures, failures_state) } reportValidationResult(failures)
failures <- data.frame() for (state in states[states != "Overseas"]) { # Prepare domestic 2-region Use tables TwoRegionTable_state <- TwoRegionDomesticUsewithTrade_ls[[state]] df <- TwoRegionTable_state[["SoI2SoI"]][, "NetExports", drop = FALSE] + TwoRegionTable_state[["RoUS2RoUS"]][, "NetExports", drop = FALSE] df <- as.data.frame(sapply(df, round, 1)) rownames(df) <- rownames(TwoRegionTable_state[["SoI2SoI"]]) # Compare SoI net exports + RoUS net exports against 0 failures_state <- formatValidationResult(df, abs_diff = TRUE, tolerance = 0)[["Pass"]] failures <- rbind(failures, failures_state) } reportValidationResult(failures)
failures <- data.frame() for (state in states[states != "Overseas"]) { # Prepare state commodity supply df0 <- cbind.data.frame(State_Summary_CommodityOutput_ls[[state]][, "Output", drop = FALSE], colSums(US_Summary_Make)) colnames(df0) <- c("StateCommOutput", "USCommOutput") df0$RoUSCommOutput <- df0$USCommOutput - df0$StateCommOutput df0$USCommOutput <- NULL # Prepare domestic 2-region Use tables TwoRegionTable_state <- TwoRegionDomesticUsewithTrade_ls[[state]] columns <- c(getVectorOfCodes("Summary", "Industry"), getFinalDemandCodes("Summary"), "ExportResidual") df1 <- cbind.data.frame(rowSums(TwoRegionTable_state[["SoI2SoI"]][, columns]), rowSums(TwoRegionTable_state[["RoUS2RoUS"]][, columns])) colnames(df1) <- c(state, paste0(state, "'s RoUS")) # Compare row sum of SoI2SoI against state commodity supply failures_state <- formatValidationResult(df1 - df0, abs_diff = FALSE, tolerance = 1E7)[["Failure"]] failures <- rbind(failures, failures_state) } reportValidationResult(failures)
# Find negative values in US Use table US_negatives <- formatValidationResult(US_Summary_DomesticUse, abs_diff = FALSE, tolerance = 0)[["Pass"]] colnames(US_negatives) <- c("Commodity", "Industry") # Validate the position of zero values in state Make tables failures <- data.frame() for (state in states[states != "Overseas"]) { # Prepare domestic 2-region Use tables TwoRegionTable_state <- TwoRegionDomesticUsewithTrade_ls[[state]] columns <- c(getVectorOfCodes("Summary", "Industry"), getFinalDemandCodes("Summary")) # Find negative values in SoI2SoI Use table df_SoI <- TwoRegionTable_state[["SoI2SoI"]][, columns] SoI_negatives <- formatValidationResult(df_SoI, abs_diff = FALSE, tolerance = 0)[["Pass"]] colnames(SoI_negatives)[1:2] <- c("Commodity", "Industry") SoI_negatives$index <- paste(SoI_negatives$Commodity, SoI_negatives$Industry) SoI_negatives$table <- paste(state, "SoI2SoI Use") # Find difference between SoI_negatives and US_negatives: # negatives in SoI2SoI that are not negatives in US Use US_SoI_diff <- setdiff(SoI_negatives$index, paste(US_negatives$Commodity, US_negatives$Industry)) # Find negative values in RoUS2RoUS Use table df_RoUS <- TwoRegionTable_state[["RoUS2RoUS"]][, columns] RoUS_negatives <- formatValidationResult(df_RoUS, abs_diff = FALSE, tolerance = 0)[["Pass"]] colnames(RoUS_negatives)[1:2] <- c("Commodity", "Industry") RoUS_negatives$index <- paste(RoUS_negatives$Commodity, RoUS_negatives$Industry) RoUS_negatives$table <- paste(state, "RoUS2RoUS Use") # Find difference between RoUS_negatives and US_negatives: # negatives in RoUS2RoUS that are not negatives in US Use US_RoUS_diff <- setdiff(RoUS_negatives$index, paste(US_negatives$Commodity, US_negatives$Industry)) # Compile failures failures_state <- rbind(SoI_negatives[SoI_negatives$index %in% US_SoI_diff, ], RoUS_negatives[RoUS_negatives$index %in% US_RoUS_diff, ]) failures_state$index <- rownames(failures_state) <- NULL failures <- rbind(failures, failures_state) } reportValidationResult(failures)
failures <- data.frame() for (state in states[states != "Overseas"]) { # Prepare domestic 2-region Use tables TwoRegionTable_state <- TwoRegionDomesticUsewithTrade_ls[[state]] # Prepare df0 and df1 df0 <- TwoRegionTable_state[["SoI2SoI"]][, "InterregionalImports", drop = FALSE] df1 <- TwoRegionTable_state[["RoUS2RoUS"]][, "InterregionalExports", drop = FALSE] # Compare SoI interregional imports against RoUS interregional exports failures_state <- formatValidationResult(df0 - df1, abs_diff = FALSE, tolerance = 1E-3)[["Failure"]] if (nrow(failures_state) > 0) { failures_state$State <- state } failures <- rbind(failures, failures_state) } reportValidationResult(failures)
failures <- data.frame() for (state in states[states != "Overseas"]) { # Prepare domestic 2-region Use tables TwoRegionTable_state <- TwoRegionDomesticUsewithTrade_ls[[state]] columns <- c(getVectorOfCodes("Summary", "Industry"), setdiff(getFinalDemandCodes("Summary"), getVectorOfCodes("Summary", "Import")), "F051", "InterregionalExports", "ExportResidual") # Prepare df0 and df1 df0 <- State_Summary_CommodityOutput_ls[[state]][, "Output", drop = FALSE] df1 <- as.data.frame(rowSums(TwoRegionTable_state[["SoI2SoI"]][, columns])) # Compare state commodity supply against state demand by intermediate consumption, # plus final demand (except imports) + Interregional Exports failures_state <- formatValidationResult(df0 - df1, abs_diff = TRUE, tolerance = 1E-3)[["Failure"]] if (nrow(failures_state) > 0) { failures_state$State <- state } failures <- rbind(failures, failures_state) } reportValidationResult(failures)
failures <- c() for (state in states[states != "Overseas"]) { # Calculate Commodity Output (q) from Make and Use q <- TwoRegionCommodityOutput_ls[[state]] q_2r_use <- rowSums(TwoRegionDomesticUse_ls[[state]]) + TwoRegionITA_ls[[state]] + c(TwoRegionDomesticUsewithTrade_ls[[state]][["SoI2SoI"]][, "ExportResidual"], TwoRegionDomesticUsewithTrade_ls[[state]][["RoUS2RoUS"]][, "ExportResidual"]) # Calculate relative differences in q_make and q_use rel_diff <- (q - q_2r_use[names(q)])/q_2r_use[names(q)] # Compare rel_diff against 0 with a tolerance of 1E-3 failures_state <- formatValidationResult(rel_diff, abs_diff = TRUE, tolerance = 0.01)[["Failure"]] if (nrow(failures_state) > 0) { failures_state$State <- state } colnames(failures_state)[1:3] <- c("Commodity", "Relative Diff", "Validation") failures <- rbind(failures, failures_state) } cat("\n") reportValidationResult(failures)
US_negatives <- formatValidationResult(US_Summary_DomesticUse, abs_diff = FALSE, tolerance = 0)[["Pass"]] colnames(US_negatives) <- c("Commodity", "Industry") failure <- data.frame() for (state in states[states != "Overseas"]) { # Prepare domestic 2-region Use tables TwoRegionTable_state <- TwoRegionDomesticUsewithTrade_ls[[state]] columns <- c(getVectorOfCodes("Summary", "Industry"), getFinalDemandCodes("Summary")) failures_state <- data.frame() for (table in names(TwoRegionTable_state)[1:4]) { # Find number of negative cells in two-region Use table df <- TwoRegionTable_state[[table]][, columns] state_negatives <- formatValidationResult(df, abs_diff = FALSE, tolerance = 0)[["Pass"]] colnames(state_negatives) <- c("Commodity", "Industry") state_negatives$index <- paste(state_negatives$Commodity, state_negatives$Industry) state_negatives$table <- paste(state, table, "Use") # Find difference between state_negatives and US_negatives: # negatives in RoUS2RoUS that are not negatives in US Use US_state_diff <- setdiff(state_negatives$index, paste(US_negatives$Commodity, US_negatives$Industry)) # Compile failures failures_table <- state_negatives[state_negatives$index %in% US_state_diff, ] failures_table$index <- NULL failures_table <- rbind(failures_state, failures_table) } failures <- rbind(failures, failures_state) } reportValidationResult(failures)
Failures_ls <- list() Failures_RelDiff_ls <- list() for (state in state.name) { # Generate validation results SoI_2r_LagaintsOutput_Validation <- validateTwoRegionLagainstOutput(state, year, ioschema = 2012, iolevel = "Summary") # Failures Failures_ls[[state]] <- formatValidationResult(SoI_2r_LagaintsOutput_Validation$Validation[, "L*y-output", drop = FALSE], abs_diff = TRUE, tolerance = 1E6)[["Failure"]] Failures_RelDiff_ls[[state]] <- formatValidationResult(SoI_2r_LagaintsOutput_Validation$Validation[, "rel_diff", drop = FALSE], abs_diff = TRUE, tolerance = 1E-2)[["Failure"]] }
for (state in state.name) { cat(paste0("##### 18.", which(states == state)), state, "and Rest of the US\n") cat("Absolute Difference: \n") reportValidationResult(Failures_ls[[state]]) cat("Relative Difference: \n") reportValidationResult(Failures_RelDiff_ls[[state]]) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.