knitr::opts_chunk$set(echo = TRUE)
I'm implementing a timezone lookup for arbitrary locations within the US. The data I have to work with looks like this:
stainfo <- read.csv("inst/data/stainfo.csv") glimpse(stainfo)
The useable information is containe in the columns dec_lat_va
, dec_lon_va
, and StateCode
. There are services available to lookup timezone given a latitude/longitude pair (e.g. here, here, and here), but I'd rather not rely on these. My limited experience has shown them to be slow--only processing one location per request--and anyway I'd prefer to keep this project as self-contained as possible.
To minimize the number of API calls I'll rely as much as I can on the statecode, and use a simple lookup table, scraped from Wikipedia using the rvest package.
This will require a function I've often found handy when splitting strings:
# First, a handy function for vectorized splitting of strings splitPiece <- function (strvec, split, piece, ...) { spl <- strsplit(strvec, split = split, ...) out <- vapply(spl, `[`, character(1), piece) out } # Now get that table library(rvest) library(dplyr) url <- "https://en.wikipedia.org/wiki/List_of_time_offsets_by_U.S._state" tzTable <- url %>% read_html(encoding = "UTF-8") %>% html_nodes(xpath = '//*[@id="mw-content-text"]/table[1]') %>% html_table() %>% `[[`(1) %>% setNames(make.names(names(.))) %>% mutate(Time.Offsets = gsub("−", "-", Time.Offsets), singleOffset = ifelse(grepl("\n", Time.Offsets), NA, markstats::splitPiece(Time.Offsets, " ", 1))) glimpse(tzTable)
Now I'll make a table to replace offsets with Olson names (not technically an exact mapping, as Olson names reflect more nuance than offset, but this is a necessary approximation.)
unique(tzTable$Time.Offsets) olsonTbl <- data.frame(offset = sprintf("UTC%03+d:00", c(-10:-5)), olson = c("Pacific/Honolulu", "America/Anchorage", "America/Los_Angeles", "America/Denver", "America/Chicago", "America/New_York"), stringsAsFactors = FALSE) kable(olsonTbl)
I'll also need a table of FIPS state codes, since that's how state info is given in the siteInfo table. Once again I'll use rvest and Wikipedia:
library(xml2) library(rvest) scUrl <- "https://en.wikipedia.org/wiki/Federal_Information_Processing_Standard_state_code" scTable <- scUrl %>% read_html(encoding = "UTF-8") %>% html_nodes(xpath = '//*[@id="mw-content-text"]/table[1]') %>% html_table() %>% `[[`(1) %>% setNames(make.names(names(.))) #%>% # mutate(Time.Offsets = gsub("−", "-", Time.Offsets)) glimpse(scTable)
The workflow is then:
stateNames <- scTable$Name[match(stainfo$StateCode, scTable$Numeric.code)] %>% gsub("District of Columbia", "Washington, D.C.", .) stateOffsets <- tzTable$singleOffset[match(stateNames, tzTable$State)] stateOlson <- olsonTbl$olson[match(stateOffsets, olsonTbl$offset)]
Now put this into a function.
geonamesTZ <- function(lat, lon, geonamesUser) { force(geonamesUser) oneTZ <- function(lat, lon) { url <- sprintf("http://api.geonames.org/timezoneJSON?lat=%s&lng=%s&radius=10&username=%s", lat, lon, geonamesUser) res <- GET(url) stop_for_status(res) out <- fromJSON(content(res, as = "text", encoding = "UTF-8"))$timezoneID if (is.null(out)) stop("No timezone recognized for location.") out } out <- Map(oneTZ, lat = lat, lon = lon) out } lookupTZ <- function(lat, lon, statecode, geonamesUser = "markwh") { data(scTable) data(tzTable) data(olsonTbl) state <- scTable$Name[match(statecode, scTable$Numeric.code)] state <- gsub("District of Columbia", "Washington, D.C.", state) stateOffsets <- tzTable$singleOffset[match(state, tzTable$State)] stateOlson <- olsonTbl$olson[match(stateOffsets, olsonTbl$offset)] if (sum(is.na(stateOlson)) > 0) { nas <- is.na(stateOlson) stateOlson[nas] <- geonamesTZ(lat = lat, lon = lon, geonamesUser = geonamesUser) } stateOlson }
Try it out!
lookupTZ(lat = 43, lon = -73, statecode = 1:50)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.