R/get.county.info.R

Defines functions get.county.info

Documented in get.county.info

#' @title Get info on US Counties
#'
#' @description Function that reports some or all of a table of data
#'   about queried (or all) US Counties (and county equivalents)
#'   for 1 or more counties. Query terms can be 5-digit FIPS,
#'   or  countyname, statename   or just   statename   or just   2-letter state abbrev.
#'    Montgomery, MD   will not work.   Montgomery County, Maryland   will work.
#'  UPDATED 6/2022 TO ACS 2020 COUNTY LIST (this relies on proxistat::countiesall)
#'
#'   Requested fields can include any of these: ST, countyname, FIPS.COUNTY, statename, fullname
#' @details  Converted basic data to data, so now can also say data(counties, package="proxistat")
#'   or x <- countiesall via lazy loading. \cr
#'   Also see various packages like acs package or help( package="choroplethr") \cr
#'   Also see:
#'
#'  <https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.html>
#'
#'   Note this other possible list of abbreviations (not used) lacks US, PR, DC:  \cr
#'   require(datasets); state.abb \cr
#'   Note another possible list of States, abbrev, FIPS
#'   which has island areas but not US total and not leading zeroes on FIPS: \cr
#'   require(acs) \cr
#'   print(fips.state) \cr
#' @param query Vector of search terms.
#'   Can be county's 5-digit FIPS code(s) (as numbers or strings with numbers),
#'   and also could be 'countyname, statename' (fullname, exactly matching formats in countiesall$fullname, but case insensitive).
#' @param fields Character string optional defaults to 'all' but can specify 'countyname' 'ST' and/or 'FIPS.COUNTY'
#' @return Returns a data.frame or vector of results depending on fields selected.
#' Returns a data.frame (if query has 2+ elements), 'QUERY' as first column, and then all or specified fields of information, covering matching counties,
#' or NA if certain problems arise.
#' If no query term, or fields not specified, then all information fields are returned:
#' QUERY, ST, countyname, FIPS.COUNTY, statename, fullname
#'
#' @examples
#'  testdata <- c('01001', 1001, '1001', "Montgomery County, Maryland", "Montgomery County, MD",
#'    'montgomery county, maryland', "Montgomery County MD", "MontgomeryCountyMD",
#'    "Montgomery County", "NY")
#'  testonlystates <- c('NY', 'NJ')
#'  get.county.info(testdata)
#'  get.county.info(testonlystates)
#'  get.county.info(c('New Jersey'))
#'
#' @export
get.county.info <- function(query, fields = 'all') {
# lazy load from proxistat package
    # data(countiesall, package='proxistat')
    lookup.county <- proxistat::countiesall

  ######  Query & report results differently depending on nature of the query term if any:

  # If no fields specified, return all fields.
  if (any(fields=='all')) {fields <- names(lookup.county)}

  # If any bad fieldnames are specified, stop.
  if (any(!(fields %in% names(lookup.county)))) {
    cat('Available field names:\n'); cat(names(lookup.county)); cat('\n');
    stop('fields requested must all be among fields available')
  }

  # If no query term specified, return entire table of data (or just specified fields).
  if (missing(query)) { return(lookup.county[ , fields]) }

  # If query has any NA values, warn user.
  if (any(is.na(query))) {cat('Warning - some NA values in input query\n')}

  ####### done checking overall type of input

  x <- query

  # If FIPS.ST is kept at NA, it will match the NA FIPS code that is associated with USA overall in the lookup table, so set it to zero.
  # x[is.na(x)]  <- 0

  # prepopulate the output variable
  results <- matrix(NA, nrow=length(x), ncol=length(fields))
  results <- data.frame(results)
  names(results) <- fields

  # FIND WHICH OF QUERY TERMS ARE VALID FIPS.COUNTY AND GET DATA FOR THOSE
  is.valid.FIPS.COUNTY <- grepl('^[0-9]*$', x) # verify it is numeric as character
  is.valid.FIPS.COUNTY[is.valid.FIPS.COUNTY] <- as.numeric(x[is.valid.FIPS.COUNTY]) %in% as.numeric(lookup.county$FIPS.COUNTY)
  results[is.valid.FIPS.COUNTY, ] <- lookup.county[ match(as.numeric(x[is.valid.FIPS.COUNTY]), as.numeric(lookup.county$FIPS.COUNTY)), fields]

  # now that FIPS are handled, look for text queries
  # testdata <- c('01001', 1001, '1001', "Montgomery County, Maryland", "Montgomery County, MD", 'montgomery county, maryland', "Montgomery County MD", "MontgomeryCountyMD", "Montgomery County" )
  # get.county.info(testdata)

  # remove leading and trailing blank spaces, in case those are present, so it will still match '   NY' for example
  x <- gsub('^\\s+|\\s+$', '', x)  # or could use trimws() ?

  # might as well allow matching even if the comma and or any spaces are is missing
  nocomma <- function(z) gsub(pattern = ',', '', z)
  nospace <- function(z) gsub(pattern = ' ', '', z)
  x <- nocomma(nospace(x))

  # make it case-insensitive
  upx <- toupper(x)

  # FIND WHICH OF QUERY TERMS ARE VALID countyname, statename pair. Note they are not unique without statename as well!
  # Also see code in other functions that tries to parse that. (urls.countyhealthrankings()?)
  #  only "St. Croix County, Wisconsin" etc will work
  # Montgomery, MD will not work right here. Montgomery County, Maryland will work.
  is.valid.countyname <- grepl('^[[:space:][:alpha:][:punct:]]*$', upx) # verify just spaces alpha or punctuation like .
  fullnamelookup <- nocomma(nospace(toupper(lookup.county$fullname)))
  is.valid.countyname[is.valid.countyname] <- upx[is.valid.countyname] %in% fullnamelookup
  results[is.valid.countyname, ] <- lookup.county[ match( upx[is.valid.countyname], fullnamelookup), fields]

  # FIND WHICH OF QUERY TERMS ARE VALID countyname, ST (2-letter abbreviation) pair, AND GET DATA FOR THOSE
  countySTlookup <- paste(lookup.county$countyname, ', ', lookup.county$ST, sep = '')
  countySTlookup <- nocomma(nospace(toupper(countySTlookup)))
  is.valid.countyST <- upx %in% countySTlookup
  results[is.valid.countyST, ] <- lookup.county[ match(upx[is.valid.countyST], countySTlookup), fields]

  ############################################################### #

  ####### COULD MERGE ST AND statename code so it will accept a mix like c('NY', 'Alabama')


  # work in progress here...



  statenamelookup <- nocomma(nospace(toupper(lookup.county$statename)))
  is.valid.statename <- upx %in% statenamelookup
  is.valid.ST <- upx %in% (ejanalysis::get.state.info()$ST)
  if (any(is.valid.ST | is.valid.statename)) {


    #  ST
    #
    # # FIND WHICH OF QUERY TERMS ARE VALID ST (2-letter abbreviation) - BUT NO SINGLE COUNTY SPECIFIED - AND GET STATE DATA FOR THOSE
    # BUT THIS ONLY MAKES SENSE AND IS EASY TO HANDLE and is allowed here IF THE QUERY IS FOR ONE OR MORE ENTIRE STATES, NOT A MIX OF FULL STATES AND INDIVIDUAL COUNTIES
    # *** note a state will return multiple rows / counties, not just one!
    #
    if (all(is.valid.ST | is.valid.statename)) {

      #  convert all to standardized ST

      upx <- get.state.info(upx)$ST
      blah <- data.frame(QUERY = query, upx = upx, stringsAsFactors = FALSE)
      results <- merge(lookup.county, blah, by.x = 'ST', by.y = 'upx' )
      # BUT THE SORT ORDER WILL DIFFER FROM ORIGINAL QUERY !!! *************** BUT WANT SAME IN CASE WANT STATE MAP WITH ON VALUE PER WHOLE STATE
      results <- results[ , c('QUERY', fields)]
      return(results)

      # obsolete... soon...
      #
      # if (all(is.valid.ST)) {
      #   results <- data.frame(lookup.county[lookup.county$ST %in% upx, fields], stringsAsFactors = FALSE)
      #   # presumes ST is in fields which it actually may not be: could fix that when I get a chance
      #   foundstatename <- lookup.county[lookup.county$ST %in% upx, 'statename']
      #   results <- data.frame(QUERY = foundstatename, results, stringsAsFactors = FALSE)
      #   return(results) # stops here since format is different than if each query element returns one county
    } else {
      # mix of states and counties - not good
      warning('All or none of queried places must be a full state abbreviation like NY or statename like New York - if others are individual counties, the full state queries will be ignored')

    }

  }
  #
  #   #  statename
  #   #
  #   # FIND WHICH OF QUERY TERMS ARE VALID statename - BUT NO SINGLE COUNTY SPECIFIED - AND GET DATA FOR THOSE
  #   # BUT THIS ONLY MAKES SENSE AND IS EASY TO HANDLE and is allowed here IF THE QUERY IS FOR ONE OR MORE ENTIRE STATES, NOT A MIX OF FULL STATES AND INDIVIDUAL COUNTIES
  #   # *** note a state will return multiple rows / counties, not just one!
  #   statenamelookup <- nocomma(nospace(toupper(lookup.county$statename)))
  #   is.valid.statename <- upx %in% statenamelookup
  #   if (any(is.valid.statename)) {
  #
  #     if (all(is.valid.statename)) {
  #       results <- data.frame(lookup.county[statenamelookup %in% upx, fields], stringsAsFactors = FALSE)
  #       # presumes ST is in fields which it actually may not be: could fix that when I get a chance
  #       foundST <- lookup.county[statenamelookup %in% upx, 'ST']
  #       results <- data.frame(QUERY = foundST, results, stringsAsFactors = FALSE)
  #       return(results) # stops here since format is different than if each query element returns one county
  #     } else {
  #       warning('All or none of queried places must be a full state abbreviation like NY or statename like New York - if others are individual counties, the full state queries will be ignored')
  #     }
  #   }
  ############################################################### #

  if (all(is.na(results[ , 1]))) {
    cat('Warning- No matches found for what should be county or state identifiers.\n'); return(NA)
  }

  results <- data.frame(QUERY = query, results, stringsAsFactors = FALSE)
  return(results)

  ### OUTPUTS OF FUNCTION:

  if (1 == 0) {

    #    head(get.county.info())

    #   ST     countyname FIPS.COUNTY statename                fullname
    #   1 AL Autauga County       01001   Alabama Autauga County, Alabama
    #   2 AL Baldwin County       01003   Alabama Baldwin County, Alabama

    #     get.state.info()

    #   FIPS.ST ST                   statename            ftpname REGION is.usa.plus.pr is.usa is.state is.contiguous.us is.island.areas
    #1     <NA> US               United States       UnitedStates     NA          FALSE  FALSE    FALSE            FALSE           FALSE
    #2       01 AL                     Alabama            Alabama      4           TRUE   TRUE     TRUE             TRUE           FALSE
    #3       02 AK                      Alaska             Alaska     10           TRUE   TRUE     TRUE            FALSE           FALSE
    #4       04 AZ                     Arizona            Arizona      9           TRUE   TRUE     TRUE             TRUE           FALSE
    #5       05 AR                    Arkansas           Arkansas      6           TRUE   TRUE     TRUE             TRUE           FALSE
    #6       06 CA                  California         California      9           TRUE   TRUE     TRUE             TRUE           FALSE
    #7       08 CO                    Colorado           Colorado      8           TRUE   TRUE     TRUE             TRUE           FALSE
    #8       09 CT                 Connecticut        Connecticut      1           TRUE   TRUE     TRUE             TRUE           FALSE
    #9       10 DE                    Delaware           Delaware      3           TRUE   TRUE     TRUE             TRUE           FALSE
    #10      11 DC        District of Columbia DistrictOfColumbia      3           TRUE   TRUE    FALSE             TRUE           FALSE
    #11      12 FL                     Florida            Florida      4           TRUE   TRUE     TRUE             TRUE           FALSE
    #12      13 GA                     Georgia            Georgia      4           TRUE   TRUE     TRUE             TRUE           FALSE
    #13      15 HI                      Hawaii             Hawaii      9           TRUE   TRUE     TRUE            FALSE           FALSE
    #14      16 ID                       Idaho              Idaho     10           TRUE   TRUE     TRUE             TRUE           FALSE
    #15      17 IL                    Illinois           Illinois      5           TRUE   TRUE     TRUE             TRUE           FALSE
    #16      18 IN                     Indiana            Indiana      5           TRUE   TRUE     TRUE             TRUE           FALSE
    #17      19 IA                        Iowa               Iowa      7           TRUE   TRUE     TRUE             TRUE           FALSE
    #18      20 KS                      Kansas             Kansas      7           TRUE   TRUE     TRUE             TRUE           FALSE
    #19      21 KY                    Kentucky           Kentucky      4           TRUE   TRUE     TRUE             TRUE           FALSE
    #20      22 LA                   Louisiana          Louisiana      6           TRUE   TRUE     TRUE             TRUE           FALSE
    #21      23 ME                       Maine              Maine      1           TRUE   TRUE     TRUE             TRUE           FALSE
    #22      24 MD                    Maryland           Maryland      3           TRUE   TRUE     TRUE             TRUE           FALSE
    #23      25 MA               Massachusetts      Massachusetts      1           TRUE   TRUE     TRUE             TRUE           FALSE
    #24      26 MI                    Michigan           Michigan      5           TRUE   TRUE     TRUE             TRUE           FALSE
    #25      27 MN                   Minnesota          Minnesota      5           TRUE   TRUE     TRUE             TRUE           FALSE
    #26      28 MS                 Mississippi        Mississippi      4           TRUE   TRUE     TRUE             TRUE           FALSE
    #27      29 MO                    Missouri           Missouri      7           TRUE   TRUE     TRUE             TRUE           FALSE
    #28      30 MT                     Montana            Montana      8           TRUE   TRUE     TRUE             TRUE           FALSE
    #29      31 NE                    Nebraska           Nebraska      7           TRUE   TRUE     TRUE             TRUE           FALSE
    #30      32 NV                      Nevada             Nevada      9           TRUE   TRUE     TRUE             TRUE           FALSE
    #31      33 NH               New Hampshire       NewHampshire      1           TRUE   TRUE     TRUE             TRUE           FALSE
    #32      34 NJ                  New Jersey          NewJersey      2           TRUE   TRUE     TRUE             TRUE           FALSE
    #33      35 NM                  New Mexico          NewMexico      6           TRUE   TRUE     TRUE             TRUE           FALSE
    #34      36 NY                    New York            NewYork      2           TRUE   TRUE     TRUE             TRUE           FALSE
    #35      37 NC              North Carolina      NorthCarolina      4           TRUE   TRUE     TRUE             TRUE           FALSE
    #36      38 ND                North Dakota        NorthDakota      8           TRUE   TRUE     TRUE             TRUE           FALSE
    #37      39 OH                        Ohio               Ohio      5           TRUE   TRUE     TRUE             TRUE           FALSE
    #38      40 OK                    Oklahoma           Oklahoma      6           TRUE   TRUE     TRUE             TRUE           FALSE
    #39      41 OR                      Oregon             Oregon     10           TRUE   TRUE     TRUE             TRUE           FALSE
    #40      42 PA                Pennsylvania       Pennsylvania      3           TRUE   TRUE     TRUE             TRUE           FALSE
    #41      44 RI                Rhode Island        RhodeIsland      1           TRUE   TRUE     TRUE             TRUE           FALSE
    #42      45 SC              South Carolina      SouthCarolina      4           TRUE   TRUE     TRUE             TRUE           FALSE
    #43      46 SD                South Dakota        SouthDakota      8           TRUE   TRUE     TRUE             TRUE           FALSE
    #44      47 TN                   Tennessee          Tennessee      4           TRUE   TRUE     TRUE             TRUE           FALSE
    #45      48 TX                       Texas              Texas      6           TRUE   TRUE     TRUE             TRUE           FALSE
    #46      49 UT                        Utah               Utah      8           TRUE   TRUE     TRUE             TRUE           FALSE
    #47      50 VT                     Vermont            Vermont      1           TRUE   TRUE     TRUE             TRUE           FALSE
    #48      51 VA                    Virginia           Virginia      3           TRUE   TRUE     TRUE             TRUE           FALSE
    #49      53 WA                  Washington         Washington     10           TRUE   TRUE     TRUE             TRUE           FALSE
    #50      54 WV               West Virginia       WestVirginia      3           TRUE   TRUE     TRUE             TRUE           FALSE
    #51      56 WI                   Wisconsin          Wisconsin      5           TRUE   TRUE     TRUE             TRUE           FALSE
    #52      56 WY                     Wyoming            Wyoming      8           TRUE   TRUE     TRUE             TRUE           FALSE
    #53      60 AS              American Samoa               <NA>      9          FALSE  FALSE    FALSE            FALSE            TRUE
    #54      66 GU                        Guam               <NA>      9          FALSE  FALSE    FALSE            FALSE            TRUE
    #55      69 MP    Northern Mariana Islands               <NA>      9          FALSE  FALSE    FALSE            FALSE            TRUE
    #56      72 PR                 Puerto Rico         PuertoRico      2           TRUE  FALSE    FALSE            FALSE            TRUE
    #57      74 UM U.S. Minor Outlying Islands               <NA>      9          FALSE  FALSE    FALSE            FALSE            TRUE
    #58      78 VI         U.S. Virgin Islands               <NA>      2          FALSE  FALSE    FALSE            FALSE            TRUE

  }
}
ejanalysis/ejanalysis documentation built on April 2, 2024, 10:12 a.m.