#' chunk_readylist
#'
#' @return Returns a list of disabled chunks: their lines of code, inputs and available inputs, dependencies
#' @importFrom dplyr filter mutate select
#' @importFrom tidyr gather spread
#' @export
chunk_readylist <- function() {
disabled <- output <- name <- available <- from_file <- module <- chunk <-
filename <- . <- NULL # silence notes in package check
chunklist <- find_chunks(include_disabled = TRUE)
ci <- chunk_inputs(chunklist$name)
co <- chunk_outputs(chunklist$name)
filter(chunklist, !disabled) %>%
left_join(co, by = "name") %>%
select(output) %>%
mutate(available = TRUE) ->
enabled_outputs
# 'Ready list' is disabled chunks all of whose inputs are supplied by an enabled chunk
chunklist %>%
filter(disabled) %>%
left_join(ci, by = "name") %>%
select(-name) %>%
left_join(enabled_outputs, by = c("input" = "output")) %>%
mutate(available = if_else(is.na(available) & !from_file, FALSE, TRUE)) %>%
group_by(module, chunk, disabled) %>%
summarise(n_inputs = length(available),
n_avail = sum(available),
all_avail = all(available),
n_deps = count_downstream_dependencies(chunk[1], chunklist, ci, co)[["deps"]],
n_deps_total = count_downstream_dependencies(chunk[1], chunklist, ci, co, TRUE)[["deps"]]) ->
readylist
# Add number of code lines
admin.LINEDATA %>%
mutate(chunk = gsub("chunk_", "", filename) %>%
gsub("\\.R$", "_DISABLED", .)) %>%
select(-filename) %>%
right_join(readylist, by = "chunk")
}
# internal function, used by chunk_readylist above
count_downstream_dependencies <- function(chunkname, chunklist, ci, co, recurse = FALSE, excludes = NA) {
name <- chunk <- output <- NULL # silence notes in package check
chunklist %>%
select(name, chunk) %>%
right_join(co, by = "name") %>%
filter(chunk == chunkname) %>%
select(output) %>%
inner_join(ci, by = c("output" = "input")) %>%
left_join(chunklist, by = "name") ->
outputlist
depnames <- unique(outputlist$name)
deps <- length(setdiff(depnames, excludes))
if(recurse) {
for(i in unique(outputlist$chunk)) {
x <- count_downstream_dependencies(i, chunklist, ci, co,
recurse = recurse,
excludes = depnames)
deps <- deps + x$deps
depnames <- c(depnames, x$depnames)
}
}
list("deps" = deps, "depnames" = depnames)
}
# This list of file line counts is generated by `chunk-generator.R`
# Just pasted in here for use by chunk_readylist above
admin.LINEDATA <- structure(list(
filename = c("chunk_LA100.0_LDS_preprocessing.R",
"chunk_LA100.FAO_downscale_ctry.R", "chunk_LA100.GTAP_downscale_ctry.R",
"chunk_LA100.IMAGE_downscale_ctry_yr.R", "chunk_LA101.ag_FAO_R_C_Y.R",
"chunk_LA102.ag_LDS_R_C_GLU.R", "chunk_LA103.ag_R_C_Y_GLU.R",
"chunk_LA105.an_FAO_R_C_Y.R", "chunk_LA106.ag_an_NetExp_FAO_R_C_Y.R",
"chunk_LA107.an_IMAGE_R_C_Sys_Fd_Y.R", "chunk_LA108.ag_Feed_R_C_Y.R",
"chunk_LB109.ag_an_ALL_R_C_Y.R", "chunk_LB110.For_FAO_R_Y.R",
"chunk_LB111.ag_resbio_R_C.R", "chunk_LB112.ag_prodchange_R_C_Y_GLU.R",
"chunk_LB113.bio_Yield_R_GLU.R", "chunk_LB120.LC_GIS_R_LTgis_Yh_GLU.R",
"chunk_LB121.Carbon_LT.R", "chunk_LB122.LC_R_Cropland_Yh_GLU.R",
"chunk_LB123.LC_R_MgdPastFor_Yh_GLU.R", "chunk_LB124.LC_R_UnMgd_Yh_GLU.R",
"chunk_LB125.LC_tot.R", "chunk_LB131.LV_R_GLU.R", "chunk_LB132.ag_an_For_Prices_USA_C_2005.R",
"chunk_LB133.ag_Costs_USA_C_2005.R", "chunk_LB134.Diet_Rfao.R",
"chunk_LB141.ag_Fert_IFA_ctry_crop.R", "chunk_LB142.ag_Fert_IO_R_C_Y_GLU.R",
"chunk_LB151.ag_MIRCA_ctry_C_GLU_irr.R", "chunk_LB152.ag_GTAP_R_C_GLU_irr.R",
"chunk_LB161.ag_R_C_Y_GLU_irr.R", "chunk_LB162.ag_prodchange_R_C_Y_GLU_irr.R",
"chunk_LB163.bio_Yield_R_GLU_irr.R", "chunk_LB164.ag_Costs_USA_C_2005_irr.R",
"chunk_LB165.ag_water_R_C_Y_GLU_irr.R", "chunk_LB171.LC_R_Cropland_Yh_GLU_irr.R",
"chunk_LB181.ag_R_C_Y_GLU_irr_mgmt.R", "chunk_L201.ag_For_Past_bio_input.R",
"chunk_L2011.ag_For_Past_bio_input_irr.R", "chunk_L2012.ag_For_Past_bio_input_irr_mgmt.R",
"chunk_L202.an_input.R", "chunk_L203.demand_input.R", "chunk_L204.resbio_input.R",
"chunk_L2041.resbio_input_irr.R", "chunk_L2042.resbio_input_irr_mgmt.R",
"chunk_L205.ag_prodchange_cost_input.R", "chunk_L2051.ag_prodchange_cost_input_irr.R",
"chunk_L2052.ag_prodchange_cost_irr_mgmt.R", "chunk_L206.ag_Fert.R",
"chunk_L2061.ag_Fert_irr.R", "chunk_L2062.ag_Fert_irr_mgmt.R",
"chunk_L2071.ag_water_irr.R", "chunk_L2072.ag_water_irr_mgmt.R",
"chunk_L221.land_input_1.R", "chunk_L222.land_input_2.R", "chunk_L223.land_input_3.R",
"chunk_L2231.land_input_3_irr.R", "chunk_L2241.land_input_4_irr.R",
"chunk_L2242.land_input_4_irr_mgmt.R", "chunk_L2252.land_input_5_irr_mgmt.R",
"chunk_L241.trade_input.R", "chunk_L242.ssp34_pasture.R", "chunk_L243.bio_trade_input.R",
"chunk_L101.nonghg_en_USA_S_T_Y.R", "chunk_L102.ghg_en_USA_S_T_Y.R",
"chunk_L103.ghg_an_USA_S_T_Y.R", "chunk_L104.bcoc_en_USA_S_T_Y.R",
"chunk_L105.nh3_an_USA_S_T_Y.R", "chunk_L111.nonghg_en_R_S_T_Y.R",
"chunk_L112.ghg_en_R_S_T_Y.R", "chunk_L113.ghg_an_R_S_T_Y.R",
"chunk_L114.bcoc_en_R_S_T_Y.R", "chunk_L115.nh3_an_R_S_T_Y.R",
"chunk_L121.nonco2_awb_R_S_T_Y.R", "chunk_L1211.nonco2_awb_R_S_T_Y_IRR.R",
"chunk_L122.ghg_agr_R_S_T_Y.R", "chunk_L1221.ghg_agr_R_S_T_Y_IRR.R",
"chunk_L123.bcoc_awb_R_S_T_Y.R", "chunk_L124.nonco2_unmgd_R_S_T_Y.R",
"chunk_L125.bcoc_unmgd_R_S_T_Y.R", "chunk_L131.nonco2_proc_R_S_T_Y.R",
"chunk_L141.hfc_R_S_T_Y.R", "chunk_L142.pfc_R_S_T_Y.R", "chunk_L151.ctrl_R_en_S_T.R",
"chunk_L152.MACC.R", "chunk_L161.nonghg_en_ssp_R_S_T_Y.R", "chunk_L201.en_nonco2.R",
"chunk_L211.ag_nonco2.R", "chunk_L2111.ag_nonco2_IRR.R", "chunk_L2112.ag_nonco2_IRR_MGMT.R",
"chunk_L212.unmgd_nonco2.R", "chunk_L231.proc_sector.R", "chunk_L232.prc_nonco2.R",
"chunk_L241.en_newtech_nonco2.R", "chunk_L241.fgas.R", "chunk_L251.en_ssp_nonco2.R",
"chunk_L252.MACC.R", "chunk_L2521.MACC_IRR.R", "chunk_L2522.ag_MACC_IRR_MGMT.R",
"chunk_LA100.CDIAC_downscale_ctry.R", "chunk_LA100.IEA_downscale_ctry.R",
"chunk_LA101.en_bal_IEA.R", "chunk_LA1011.en_bal_adj.R", "chunk_LA102.en_emiss_CDIAC.R",
"chunk_LA111.rsrc_fos_Prod.R", "chunk_LA112.U.R", "chunk_LA113.MSW.R",
"chunk_LA114.wind.R", "chunk_LA115.roofPV.R", "chunk_LA116.geo.R",
"chunk_LA117.tradbio.R", "chunk_LA118.hydro.R", "chunk_LA119.solar.R",
"chunk_LA121.oil.R", "chunk_LA122.gasproc_refining.R", "chunk_LA123.electricity.R",
"chunk_LA1231.elec_tech.R", "chunk_LA124.heat.R", "chunk_LA126.distribution.R",
"chunk_LA131.enduse.R", "chunk_LA132.industry.R", "chunk_LA1321.cement.R",
"chunk_LA142.building_agg.R", "chunk_LA143.HDDCDD.R", "chunk_LA144.building_det_en.R",
"chunk_LA144.building_det_flsp.R", "chunk_LA152.transportation.R",
"chunk_LA154.transportation_UCD.R", "chunk_LA161.Cstorage.R",
"chunk_LB1322.Fert.R", "chunk_L202.Ccoef.R", "chunk_L210.resources.R",
"chunk_L221.en_supply.R", "chunk_L222.en_transformation.R", "chunk_L223.electricity.R",
"chunk_L224.heat.R", "chunk_L225.hydrogen.R", "chunk_L226.en_distribution.R",
"chunk_L232.industry.R", "chunk_L2321.cement.R", "chunk_L2322.Fert.R",
"chunk_L242.building_agg.R", "chunk_L244.building_det.R", "chunk_L252.transportation.R",
"chunk_L254.transportation_UCD.R", "chunk_L261.Cstorage.R", "chunk_LA100.Socioeconomics.R",
"chunk_LA101.EIA_SEDS.R", "chunk_LA114.Wind.R", "chunk_LA115.RooftopPV.R",
"chunk_LA119.Solar.R", "chunk_LA122.Refining.R", "chunk_LA132.Industry.R",
"chunk_LA1321.Cement.R", "chunk_LA1322.Fert.R", "chunk_LA142.Building.R",
"chunk_LA144.Commercial.R", "chunk_LA144.Residential.R", "chunk_LA154.Transport.R",
"chunk_LB123.Electricity.R", "chunk_LB1231.Elec_tech.R", "chunk_LB1232.Elec_subregions.R",
"chunk_LB1233.Elec_water.R", "chunk_LB126.Gas_ElecTD.R", "chunk_L201.socioeconomics_USA.R",
"chunk_L210.resources_USA.R", "chunk_L222.en_transformation_USA.R",
"chunk_L223.electricity_USA.R", "chunk_L2232.electricity_FERC_USA.R",
"chunk_L225.hydrogen_USA.R", "chunk_L226.en_distribution_USA.R",
"chunk_L232.industry_USA.R", "chunk_L2321.cement_USA.R", "chunk_L2322.Fert_USA.R",
"chunk_L244.building_USA.R", "chunk_L254.transportation_USA.R",
"chunk_L261.carbon_storage_USA.R", "chunk_L200.modeltime.R",
"chunk_L100.GDP_hist.R", "chunk_L100.Population_downscale_ctry.R",
"chunk_L101.Population.R", "chunk_L102.GDP.R", "chunk_L201.Pop_GDP_scenarios.R",
"chunk_L242.Bld_Inc_Elas_scenarios.R", "chunk_L252.Trn_Inc_Elas_scenarios.R",
"chunk_L102.water.supply.unlimited.R", "chunk_L110.water.demand.primary.R",
"chunk_L1233.Elec_water.R", "chunk_L132.water.demand.manufacturing.R",
"chunk_L133.water.demand.livestock.R", "chunk_L145.water.demand.municipal.R",
"chunk_L202.water.resources.unlimited.R", "chunk_L203.water.mapping.R",
"chunk_L210.water.demand.primary.R", "chunk_L2233.electricity_water.R",
"chunk_L232.water.demand.manufacturing.R", "chunk_L233.water.demand.livestock.R",
"chunk_L245.water.demand.municipal.R"),
lines = c(86L, 238L,
74L, 59L, 139L, 63L, 110L, 139L, 140L, 94L, 186L, 155L, 89L,
69L, 199L, 81L, 119L, 115L, 237L, 231L, 108L, 88L, 59L, 165L,
135L, 361L, 148L, 154L, 143L, 78L, 131L, 220L, 108L, 95L, 308L,
90L, 350L, 268L, 170L, 140L, 413L, 552L, 137L, 67L, 69L, 185L,
166L, 94L, 120L, 93L, 62L, 226L, 64L, 152L, 189L, 402L, 72L,
283L, 59L, 172L, 119L, 98L, 236L, 265L, 79L, 72L, 83L, 82L, 177L,
130L, 102L, 124L, 98L, 126L, 71L, 188L, 49L, 78L, 221L, 157L,
131L, 156L, 104L, 82L, 98L, 330L, 207L, 150L, 109L, 99L, 269L,
233L, 79L, 134L, 141L, 113L, 256L, 90L, 85L, 93L, 185L, 164L,
122L, 98L, 154L, 56L, 73L, 138L, 75L, 113L, 60L, 128L, 114L,
99L, 211L, 89L, 127L, 117L, 96L, 119L, 109L, 236L, 69L, 137L,
377L, 193L, 44L, 366L, 73L, 290L, 79L, 381L, 350L, 277L, 591L,
221L, 149L, 197L, 430L, 294L, 270L, 231L, 698L, 180L, 474L, 179L,
89L, 73L, 69L, 142L, 58L, 167L, 159L, 66L, 66L, 89L, 319L, 366L,
103L, 131L, 72L, 47L, 181L, 237L, 85L, 164L, 268L, 511L, 304L,
37L, 228L, 236L, 197L, 214L, 541L, 243L, 140L, 87L, 49L, 139L,
101L, 263L, 158L, 78L, 77L, 58L, 57L, 210L, 64L, 88L, 92L, 65L,
109L, 58L, 539L, 54L, 53L, 144L)),
.Names = c("filename", "lines"),
row.names = c(NA, -198L), class = c("tbl_df", "tbl", "data.frame"))
# ---------------
# Following functions are extremely specific administrative ones
# Don't worry about covering them in tests
# nocov start
#' normalize_files
#'
#' Normalize line endings for all package input data.
#'
#' @param root Folder root to scan, character
#' @return Nothing - run for side effects only.
#' @note Set \code{root} to "./extdata" in the git directory, not the package root, to make changes that 'stick'.
#' @details Some GCAM input datafiles have bad line endings, and/or
#' don't have a final newline. This utility script converts all files to have Unix line endings (\code{\\n}) and a final newline.
#' @author BBL
normalize_files <- function(root = system.file("extdata", package = "gcamdata")) {
assert_that(is.character(root))
message("Root: ", root)
# Get a list of all CSV input files
files <- list.files(root, pattern = "\\.csv$", full.names = TRUE, recursive = TRUE)
for(f in seq_along(files)) {
shortfn <- gsub(root, "", files[f])
size <- round(file.size(files[f]) / 1024 / 1024, 3) # MB
message(f, "/", length(files), ": ", shortfn, ", ", size, " Mb ", appendLF = FALSE)
# Read file and then write it back out
message("\tReading...", appendLF = FALSE)
txt <- readLines(files[f], warn = FALSE)
uc_size <- format(utils::object.size(txt), units = "Mb")
message("OK. ", uc_size, " uncompressed")
message("\tWriting...", appendLF = FALSE)
writeLines(txt, files[f])
message("OK")
}
}
#' add_column_types_header_line
#'
#' One-off function to insert a `Column types` header line into input files.
#'
#' @param overwrite Overwrite any previous column type lines? Logical
#' @return Nothing.
add_column_types_header_line <- function(overwrite = FALSE) {
files <- list.files(path = "inst/extdata/", pattern = "csv$", full.names = TRUE, recursive = TRUE)
colchars <- c("character" = "c", "integer" = "i", "numeric" = "n", "double" = "d", "logical" = "I")
for(f in files) {
cat(f, "...")
header <- find_header(f)
original_header_length <- length(header)
# Remove any previous Column types entry
if(overwrite) {
column_rows <- grepl("^# Column types", header)
if(length(column_rows)) {
header <- header[!column_rows]
}
}
# A few files are custom and don't have headers; skip
if(length(header)) {
# Remopve any existing separator line
if(header[length(header)] == "# ----------") {
header <- header[-length(header)]
}
# Read the data and infer column classes
dat <- utils::read.csv(f, comment.char = "#", stringsAsFactors = F)
coltypes_list <- lapply(dat, class)
allNAs <- unlist(lapply(dat, function(x) all(is.na(x))))
coltypes_list[allNAs] <- "character" # we want all-NA columns to be character, not logical
coltypes <- unlist(coltypes_list)
coltypes <- paste(colchars[coltypes], collapse = "")
cat(coltypes, "\n")
# Now we want to write the header, the new columns line, the separator, and then the data
outfile <- f
fullfile <- readLines(f)
cat(header, file = outfile, sep = "\n")
cat(paste("# Column types:", coltypes, "\n"), file = outfile, append = TRUE)
cat("# ----------", file = outfile, sep = "\n", append = TRUE)
cat(fullfile[(original_header_length + 1):length(fullfile)],
file = outfile, sep = "\n", append = TRUE)
} else {
cat("SKIP\n")
next
}
}
}
# nocov end
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.