Make sure you've followed the instructions under "Setting up your API key" in
the ipumsr API vignette (vignette("ipums-api", package = "ipumsr")
) before *
running this template.*
This template is for two types of IPUMS users:
This template uses the IPUMS API to help the analyst and collaborator work with the same dataset. It helps the analyst by including code to download their IPUMS data extract and save the extract definition in a shareable format, and it helps the collaborator by including code to create and download a new extract matching that shared definition.
If you're reading this, you are probably the analyst, because we recommend that the analyst deletes this section before sharing their analysis.
If you are the analyst, follow these steps to make your analysis shareable:
Knit
button or use rmarkdown::render()
to run the
template. Repeat this step until your extract is ready.#### Key Parameters ##### # If you change any of these parameters after running the template, delete all # files in `data_dir` to ensure a fresh start collection <- "usa" # The IPUMS data collection of your extract; run # `ipums_data_collections()` for a list of supported # collections extract_num <- NULL # The extract number, or leave as `NULL` for your most # recent extract descriptive_name <- "my_ipums_extract" # A descriptive label for your extract; # used to rename your data files data_dir <- "data" # The folder in which to save data, codebook, and .json files
This next code chunk pulls down your extract definition and saves it to a JSON
file in data_dir
. If the extract is ready, it downloads the data and codebook
files to data_dir
and renames them according to descriptive_name
. If the
extract is not ready, the code throws an error to inform you that your extract
is not ready yet, and that you should try re-running this template again later.
# Load ipumsr suppressPackageStartupMessages( library(ipumsr) ) # Create data_dir if it doesn't exist if (!dir.exists(data_dir)) dir.create(data_dir) # Define file paths json_path <- file.path(data_dir, paste0(descriptive_name,".json")) renamed_data_path <- file.path(data_dir, paste0(descriptive_name,".dat.gz")) renamed_ddi_path <- file.path(data_dir, paste0(descriptive_name,".xml")) gitignore_path <- file.path(data_dir, ".gitignore") # Get info on the designated extract (most recent extract if extract_num is NULL) if (is.null(extract_num)) { extract_definition <- get_last_extract_info(collection) } else { extract_definition <- get_extract_info(c(collection, extract_num)) } # Do we already have the data and/or the JSON? no_json <- !file.exists(json_path) no_data_yes_json <- !file.exists(renamed_data_path) & file.exists(json_path) yes_data_yes_json <- file.exists(renamed_data_path) & file.exists(json_path) # If no JSON file, create it if (no_json) { save_extract_as_json(extract_definition, file = json_path) no_data_yes_json <- TRUE } # If we don't yet have the data, check whether the extract is ready if (no_data_yes_json) { extract_is_ready <- is_extract_ready(extract_definition) extract_is_stale <- !extract_is_ready & extract_definition$status == "completed" if (extract_is_stale) { stop( paste0( "The data files for ", collection, " extract number ", extract_definition$number, " have been removed from IPUMS servers. ", "Resubmit this extract by running `submit_extract(get_extract_info(\"", collection, ":", extract_definition$number, "\"))` and update the ", "`extract_num` parameter before re-running the template." ), call. = FALSE ) } # If extract is ready, download files and rename according to `descriptive_name` if (extract_is_ready) { ddi_file <- download_extract(extract_definition, download_dir = data_dir) data_file <- gsub("\\.xml$", ".dat.gz", ddi_file) ddi_file_successfully_renamed <- file.rename(ddi_file, renamed_ddi_path) data_file_successfully_renamed <- file.rename(data_file, renamed_data_path) if (!ddi_file_successfully_renamed || !data_file_successfully_renamed) { stop( "Problem renaming DDI and/or data file; please report bug at ", "https://github.com/ipums/ipumsr/issues, including a copy of this ", "file if possible.", call. = FALSE ) } # Add the data and codebook files to .gitignore files_to_gitignore <- c( basename(renamed_data_path), basename(renamed_ddi_path) ) if (file.exists(gitignore_path)) { existing_gitignore_lines <- readLines(gitignore_path) files_to_gitignore <- c(existing_gitignore_lines, files_to_gitignore) } writeLines(files_to_gitignore, con = gitignore_path) yes_data_yes_json <- TRUE } else { # If extract isn't ready, stop execution stop( "NOT AN ERROR: ", collection, " extract number ", extract_definition$number, " is not yet ready to download. Try ", "re-running again later.", call. = FALSE ) } } # If data are downloaded, copy file paths, then delete this section if (yes_data_yes_json) { cat( paste0( "```\n", "Data, codebook, and .json extract definition files have been saved to ", "folder \"", data_dir, "\".\n\nNext, copy the code below into the ", "\"Define File Paths\" code chunk, overwriting the existing code:\n\n", "extract_definition_path <- \"", json_path, "\"\n", "data_path <- \"", renamed_data_path, "\"\n", "ddi_path <- \"", renamed_ddi_path, "\"\n\n", "Finally, delete all text and code in the section \"Delete this section ", "before sharing\"\n", "```" ) ) }
suppressPackageStartupMessages({ library(ipumsr) # library() additional packages as necessary })
extract_definition_path <- json_path data_path <- gsub("\\.json$", ".dat.gz", extract_definition_path) ddi_path <- gsub("\\.json$", ".xml", extract_definition_path)
This analysis of IPUMS data is designed to be shared, and thus does not assume that you have already downloaded the data used in the analysis. The code below checks whether the data are already downloaded, and if they aren't, it submits a new IPUMS extract request according to the specifications in the included extract definition JSON file.
# Define path to "waiting_for_extract" flag file data_dir <- dirname(extract_definition_path) waiting_for_extract_path <- file.path(data_dir, "waiting_for_extract.txt") # Ensure the JSON extract definition is present json_file_exists <- file.exists(extract_definition_path) if (!json_file_exists) { stop( "File '", extract_definition_path, "' not found; make sure that ", "`extract_definition_path` is the path to the .json extract definition ", "file.", call. = FALSE ) } # Are the data downloaded, or are we waiting for an extract? data_not_downloaded <- !file.exists(data_path) data_downloaded <- file.exists(data_path) waiting_for_extract <- file.exists(waiting_for_extract_path) # Ensure that IPUMS_API_KEY environment variable is defined ipums_api_key_undefined <- Sys.getenv("IPUMS_API_KEY") == "" if (data_not_downloaded & ipums_api_key_undefined) { stop( "Environment variable 'IPUMS_API_KEY' is undefined. Make sure you've ", "followed the instructions under 'Setting up your API key' in the ", "ipumsr API vignette (`vignette(\"ipums-api\", package = \"ipumsr\")`) ", "before running this script.", call. = FALSE ) } # If not yet waiting for extract, create and submit a new extract and create # the "waiting_for_extract" flag file if (data_not_downloaded & !waiting_for_extract) { extract_definition <- define_extract_from_json(extract_definition_path) submitted_extract <- submit_extract(extract_definition) writeLines( paste0(submitted_extract$collection, ":", submitted_extract$number), con = waiting_for_extract_path ) waiting_for_extract <- TRUE } # If waiting for an extract, read extract ID from flag file and check the status if (data_not_downloaded & waiting_for_extract) { extract_id <- readLines(waiting_for_extract_path) extract_info <- get_extract_info(extract_id) extract_is_ready <- is_extract_ready(extract_info) extract_is_stale <- !extract_is_ready & extract_info$status == "completed" if (extract_is_stale) { stop( paste0( "The data files for ", extract_info$collection, " extract number ", extract_info$number, " have been removed from IPUMS servers. ", "Please delete the file '", waiting_for_extract_path, "' and re-run ", "the template." ), call. = FALSE ) } # If the extract is ready, download files and rename to match the JSON file, # then delete the waiting_for_extract flag file if (extract_is_ready) { orig_ddi_path <- download_extract(extract_info, download_dir = data_dir) orig_data_path <- gsub("\\.xml$", ".dat.gz", orig_ddi_path) ddi_file_successfully_renamed <- file.rename(orig_ddi_path, ddi_path) data_file_successfully_renamed <- file.rename(orig_data_path, data_path) if (!ddi_file_successfully_renamed || !data_file_successfully_renamed) { stop( "Problem renaming DDI and/or data file; please report bug at ", "https://github.com/ipums/ipumsr/issues, including a copy of this ", "file if possible.", call. = FALSE ) } data_downloaded <- TRUE waiting_file_successfully_removed <- file.remove(waiting_for_extract_path) if (!waiting_file_successfully_removed) { stop( "Unable to remove 'waiting_for_extract.txt'; please report bug at ", "https://github.com/ipums/ipumsr/issues, including a copy of this ", "file if possible.", call. = FALSE ) } } else { # If extract is not ready, stop execution stop( "NOT AN ERROR: ", extract_info$collection, " extract number ", extract_info$number, " is not yet ready to download. Try ", "re-running again later.", call. = FALSE ) } }
ddi <- read_ipums_ddi(ddi_path) data <- read_ipums_micro(ddi, data_file = data_path)
data
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.