spew: SPEW algorithm to generate synthetic ecosystems
In spew: SPEW Framework for Generating Synthetic Ecosystems

Description Usage Arguments Value Examples

SPEW algorithm to generate synthetic ecosystems

spew(pop_table, shapefile, pums_h, pums_p, schools = NULL,
  workplaces = NULL, marginals = NULL, output_type = "console",
  output_dir = NULL, convert_count = FALSE, run_type = "SEQ",
  sampling_method = "uniform", locations_method = "uniform",
  outfile_loc = "", road_noise = 2e-04, timer = FALSE, verbose = FALSE)

`pop_table`	dataframe where rows correspond to places where populations should be generated. Other requird columns are "n_house" and "puma_id"
`shapefile`	sp class object used for assigning households to particular locations
`pums_h`	dataframe with microdata corresponding to housegolds
`pums_p`	dataframe with microdata corresponding to people
`schools`	list with names "public" and "private" with a dataframe of schools corresponding to public or private, respectively
`workplaces`	dataframe of workplaces with a workplace_id column, employees column, and stcotr column
`marginals`	list of marginal population totals. Each element of the list contains the marginal totals of a separate variable
`output_type`	Default is "console" if we want to resulting population as an R variable. Alternative is "write", which is used on Olympus for writing out .csv files of the population
`output_dir`	character specifying ecosystem directory write to. Only used if output_type = "write"
`convert_count`	logical meant to indicate if we are going to convert population totals from people to household counts. Default: FALSE, assumes the population is the total number of households
`run_type`	Whether to run sequentially in parallel. Default is "SEQ", for a sequential run. If parallel, back-end is either "MPI", "SOCK", or "MC"
`sampling_method`	character indicating the type of sampling method to use, defaults to "uniform". Can also be "ipf" with appropriate marginal data.
`locations_method`	character indicating the type of location sampling to use, defaults to "uniform", can also be "roads".
`outfile_loc`	Defaults to "", so we print out the parallel run information. Only set to "/dev/null" for internal testing putposes.
`road_noise`	Noise added to households during road-based sampling
`timer`	logical indicating we want to time the run
`verbose`	logical indicating we want to print output during the run. Default is FALSE.

logical indicating whether or not this run of spew ended successfully

# Call spew with default data from tartanville ---
data(tartanville)
tartanville_syneco <- spew(pop_table = tartanville$pop_table, 
                           shapefile = tartanville$shapefile, 
                           pums_h = tartanville$pums_h, 
                           pums_p = tartanville$pums_p)
                           
# Call spew with road-based location sampling ---
roads_shapefile <- list(boundaries = tartanville$shapefile, 
                        roads = tartanville$roads)                            
tartanville_syneco_roads <- spew(tartanville$pop_table, roads_shapefile,
                                 tartanville$pums_h, tartanville$pums_p,
                                 locations_method = "roads", road_noise = .05)

# Call spew with ipf agent-sampling ---

# Household income marginal 
var_name <- "HHINC"
type <- "ord"
bounds <- data.frame(lower = c(0, 50), upper = c(49, Inf))
category_name <- c("HHINC_0-49", "HHINC_50-Inf")
df <- data.frame(place_id = paste0("T", 1:9),  v1 = c(30, 0, 5, 10, 13, 9, 2, 1, 5))
df$v2 <- tartanville$pop_table$n_house - df$v1
ipf_obj_hhinc<- make_ipf_obj(var_name, type, bounds, category_name, df = df)
# Head of Household Race Marginal 
var_name <- c("RAC1P")
type <- "cat"
bounds <- data.frame(lower = c(1, 2), upper = c(1, 2))
category_name <- c("Tartan", "Argyle")
df2 <- data.frame(place_id = paste0("T", 1:9),  v1 = c(28, 0, 4, 1, 5, 8, 2, 1, 3))
df2$v2 <- tartanville$pop_table$n_house - df2$v1
ipf_obj_rac1p <- make_ipf_obj(var_name, type, bounds, category_name, df = df2)
ipf_obj <- list(HHINC = ipf_obj_hhinc[[1]], RAC1P = ipf_obj_rac1p[[1]])
supplementary_data <- list(moments = ipf_obj)

tartanville_syneco_ipf <- spew(tartanville$pop_table, tartanville$shapefile,
                              tartanville$pums_h, tartanville$pums_p,
                               marginals = supplementary_data$moments, 
                              sampling_method = "ipf")

# Call spew with moment-matching agent-sampling 
NP_avg <- c(3.2, 0, 6.0, 2.0, 3.2, 3.1, 4.0, 4.8, 3.9)
supplementary_data <- list(moments = make_mm_obj(moments_list = 
                           list(mom1 = data.frame(place_id = paste0("T", 1:9), 
                                puma_id = "T", NP = NP_avg)), 
                           assumption = "independence", nMom = 1, type = "cont"))
tartanville_syneco_mm <- spew(pop_table = tartanville$pop_table, 
                             shapefile = tartanville$shapefile,
                             pums_h = tartanville$pums_h, 
                             pums_p = tartanville$pums_p,
                             marginals = supplementary_data$moments, 
                             sampling_method = "mm")