#' @title Get information on U.S. State(s)
#' @description Query information about States, from proxistat package data in data(lookup.states, package='proxistat')
#' @details
#' See proxistat package for data source (<http://ejanalysis.github.io/proxistat/>)
#' For 1+ or all US States plus DC, PR, Island Areas (and USA overall for use in FTP URL):\cr\cr
#' EPA Region, FIPS, State name, abbreviation for State(s); based on any of these query methods: \cr\cr
#' State's FIPS, State's name, OR State's abbreviation, (i.e., FIPS.ST, statename, or ST). \cr\cr
#' Also see data in packages [acs] and [choroplethr] \cr\cr
#' Also see <http://www.census.gov/geo/reference/docs/state.txt> and <http://www.census.gov/geo/reference/ansi.html> \cr
#' # Note on definitions of is.usa, is.contiguous.us, etc.: \cr
#' <https://www.census.gov/geo/reference/gtc/gtc_usa.html> \cr
#' <https://www.census.gov/geo/reference/gtc/gtc_codes.html> \cr
#' <https://www.census.gov/geo/reference/gtc/gtc_island.html> \cr
#' <http://en.wikipedia.org/wiki/Contiguous_United_States> \cr\cr
#' Also note this other possible list of abbreviations (not used) lacks US, PR, DC: \cr
#' require(datasets); state.abb \cr\cr
#' Note another possible list of States, abbrev, FIPS \cr
#' which has island areas but not US total and not leading zeroes on FIPS: \cr
#' require(acs) \cr
#' print(fips.state) \cr\cr
#' Note FIPS were also available here: \cr
#' State: <http://www.census.gov/geo/reference/ansi_statetables.html> \cr
#' County: <http://www.census.gov/geo/www/codes/county/download.html> \cr\cr
#' Also see <https://www.census.gov/geo/reference/state-area.html> for info on state area and internal point
#' @param query vector of 1+ elements, which can be \cr
#' state FIPS code(s) (as numbers or strings with numbers), \cr
#' state name(s) (exactly matching formats here), or \cr
#' 2-letter state abbreviation(s) (case insensitive). \cr
#' @param fields vector of 1+ character string names of the fields available here:
#' FIPS.ST, ST, statename, ftpname, REGION, is.usa.plus.pr, is.usa, is.state, is.contiguous.us, is.island.areas, and others (see below)
#' @return A data.frame (if query has 2+ elements), providing all or specified fields of information, covering matching states/dc/pr/island areas,
#' a vector of the same type of information for just one place (if only 1 query term, i.e., one element in the query vector is provided),
#' or NA if certain problems arise.\cr\cr
#' If no query term, or fields not specified, then all information fields are returned: \cr
#' get.state.info()[1:2, ]\cr\cr
#' statename FIPS.ST ST ftpname REGION is.usa.plus.pr is.usa is.state is.contiguous.us \cr
#' 1 Alabama 01 AL Alabama 4 TRUE TRUE TRUE TRUE \cr
#' 2 Alaska 02 AK Alaska 10 TRUE TRUE TRUE FALSE \cr \cr
#'
#' is.island.areas area.sqmi area.sqkm landarea.sqmi landarea.sqkm waterarea.sqmi waterarea.sqkm \cr
#' 1 FALSE 52420 135767 50645 131171 1775 4597 \cr
#' 2 FALSE 665384 1723337 570641 1477953 94743 245383 \cr \cr
#'
#' inland.sqmi inland.sqkm coastal.sqmi coastal.sqkm greatlakes.sqmi greatlakes.sqkm \cr
#' 1 1058 2740 517 1340 0 0 \cr
#' 2 19304 49997 26119 67647 0 0 \cr \cr
#'
#' territorial.sqmi territorial.sqkm lat lon \cr
#' 1 199 516 32.73963 -86.84346 \cr
#' 2 49320 127739 63.34619 -152.83707 \cr
#' @seealso [clean.fips()]
#' @examples
#' # data(lookup.states, package='proxistat')
#' # x <- get.state.info(); str(x); cat('\n'); x[ 1:2, ]
#' # get.state.info(c('alaska','north carolina', 'montana', "hawaii"),
#' fields=c('ST','statename','REGION'))
#' # get.state.info('DC'); get.state.info('U.S. Virgin Islands'); get.state.info(4)
#' # get.state.info(c('New york','alaska','North Carolina','MONTANA', 'typo'))
#' # get.state.info(c('ny','DC','AK','mt', 'PR'))
#' # get.state.info( c(36, 36, 'ny', ' ny', 'ny ', 'California', 'DC','AK','mt', 'PR',
#' '02', 2, 'North carolina') )
#' # get.state.info(1:80)
#' @export
get.state.info <- function(query, fields='all') {
# ********* INFO IS NOW OBTAINED FROM proxistat package:
data(lookup.states, package='proxistat')
###### Query & report results differently depending on nature of the query term if any:
# If no fields specified, return all fields.
if (any(fields=='all')) {fields <- names(lookup.states)}
# If any bad fieldnames are specified, stop.
if (any(!(fields %in% names(lookup.states)))) {
cat('Available field names:\n'); cat(names(lookup.states)); cat('\n');
stop('fields requested must all be among fields available')
}
# If no query term specified, return entire table of data (or just specified fields).
if (missing(query)) { return(lookup.states[ , fields]) }
# If query has any NA values, warn user.
if (any(is.na(query))) {cat('Warning - some NA values in input query\n')}
####### done checking overall type of input
x <- query
# If FIPS.ST is kept at NA, it will match the NA FIPS code that is associated with USA overall in the lookup.states table, so set it to zero.
x[is.na(x)] <- 0
# prepopulate the output variable
results <- matrix(NA, nrow = length(x), ncol = length(fields))
results <- data.frame(results)
names(results) <- fields
# remove leading and trailing blank spaces, in case those are present, so it will still match ' NY' for example
x <- gsub('^\\s+|\\s+$', '', x)
# FIND WHICH OF QUERY TERMS ARE VALID FIPS.ST AND GET STATE DATA FOR THOSE
is.valid.FIPS.ST <- grepl('^[0-9]*$', x)
is.valid.FIPS.ST[is.valid.FIPS.ST] <- as.numeric(x[is.valid.FIPS.ST]) %in% as.numeric(lookup.states$FIPS.ST)
# cbind(x, is.valid.FIPS.ST )
# as.numeric('NY') would fail
results[is.valid.FIPS.ST, ] <- lookup.states[ match(as.numeric(x[is.valid.FIPS.ST]), as.numeric(lookup.states$FIPS.ST)), fields]
nocomma <- function(z) gsub(pattern = ',', '', z)
nospace <- function(z) gsub(pattern = ' ', '', z)
upx <- nocomma(nospace(toupper(x)))
# FIND WHICH OF QUERY TERMS ARE VALID statename AND GET STATE DATA FOR THOSE
is.valid.statename <- grepl('^[[:space:][:alpha:][:punct:]]*$', upx)
is.valid.statename[is.valid.statename] <- upx[is.valid.statename] %in% nocomma(nospace(toupper(lookup.states$statename)))
results[is.valid.statename, ] <- lookup.states[ match( upx[is.valid.statename], nocomma(nospace(toupper(lookup.states$statename)))), fields]
# FIND WHICH OF QUERY TERMS ARE VALID ST (2-letter abbreviation) AND GET STATE DATA FOR THOSE
is.valid.ST <- grepl('^[[:space:][:alpha:]]*$', upx)
is.valid.ST[is.valid.ST] <- upx[is.valid.ST] %in% nocomma(nospace(toupper(lookup.states$ST)))
results[is.valid.ST, ] <- lookup.states[ match( upx[is.valid.ST], nocomma(nospace(toupper(lookup.states$ST)))), fields]
if (all(is.na(results[ , 1]))) {
cat('Warning- No matches found for what should be state identifiers.\n'); return(NA)
}
#rownames(results) <- query # This would not work if there were duplicates in the query vector, so create a column to show the query term
results <- data.frame(QUERY = query, results, stringsAsFactors = FALSE)
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.