#' @title Join US ACS data and US geo files on FIPS
#' @description Read the processed csv files of estimates and MOE (margin of error) for American Community Survey (ACS)
#' 5-year summary file data obtained from US Census FTP site, and join with geographic information from geo file.
#' @param mygeo Required geo file. See [get.acs()] and [get.read.geo()]
#' @param my.list.of.tables List of data tables resulting from prior steps in [get.acs()]
#' @param folder Default is current working directory.
#' @param save.csv FALSE by default. Specifies whether to save each data table as csv format file.
#' @param testing Default is FALSE. If TRUE, prints more information.
#' @param sumlevel Default is "both", specifies if "tracts" or "blockgroups" or "both" should be used.
#' @param end.year Default is "" -- used in naming file if save.csv=TRUE
#' @return Returns a list of data.frames, where each element of the list is one ACS table, such as table B01001.
#' @seealso [get.acs()] and [get.read.geo()]
#' @export
join.geo.to.tablist <-
function(mygeo,
my.list.of.tables,
save.csv = FALSE,
sumlevel = 'both',
folder = getwd(),
testing = FALSE,
end.year = acsdefaultendyearhere_func()) {
# FUNCTION TO join (merge) US data and US geo files on FIPS
# Do geo join for one seqfile at a time, or actually one table at a time.
# That is somewhat inefficient because geo merge has to be done repeatedly instead of once.
# But it may be useful to have one file per table.
validate.end.year(end.year)
for (i in 1:length(my.list.of.tables)) {
if (testing) {
print('length of my.list.of.tables')
print(length(my.list.of.tables))
print('structure of it')
print(str(my.list.of.tables))
print('structure of mygeo')
print(str(mygeo))
}
bigtable <- my.list.of.tables[[i]]
# remove the redundant columns
bigtable <-
bigtable[, !(names(bigtable) %in% c("STUSAB", "SEQUENCE", "LOGRECNO"))]
# NOTE - This is very slow as written, and takes too much RAM, so it can fail. Can take 5 minutes on a slow machine.
# plyr::join is much faster than merge (& data.table merge is faster also) according to
# http://stackoverflow.com/questions/1299871/how-to-join-data-frames-in-r-inner-outer-left-right/1300618#1300618
my.list.of.tables[[i]] <-
merge(mygeo, bigtable, by.x = "KEY", by.y = "KEY")
# DROP ROWS WE DON'T NEED, IF ANY
if (sumlevel == 'tracts') {
my.list.of.tables[[i]] <-
subset(my.list.of.tables[[i]],
my.list.of.tables[[i]]$SUMLEVEL == '140')
}
if (sumlevel == 'bg') {
my.list.of.tables[[i]] <-
subset(my.list.of.tables[[i]],
my.list.of.tables[[i]]$SUMLEVEL == '150')
}
# MIGHT WANT TO DO ERROR CHECKING HERE FOR LENGTH & HOW MANY FAIL TO MATCH
# print(overlaps(mygeo$KEY, bigtable$KEY))
rm(bigtable)
if (save.csv) {
this.tab <- names(my.list.of.tables)[i]
write.csv(my.list.of.tables[[i]],
file = file.path(
folder,
paste("ACS", end.year, "-", this.tab, ".csv", sep = "")
),
row.names = FALSE)
# save(bigtable, file=paste("ACS", end.year, "-", this.tab, ".RData", sep=""))
}
}
return(my.list.of.tables)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.