#Weather Data Functions...
#' Read data from NOAA's Climate Data Online Web Services
#'
#' Queries NOAA's Web Services according to the following documentation:
#' \url{http://www.ncdc.noaa.gov/cdo-web/webservices/v2}.
#'
#' To use this function you need to get a key for NOAA's API.
#' This is free and you can generate one here: \url{https://www.ncdc.noaa.gov/cdo-web/token}.
#' All weather functions in rterm look for an object in the global environment
#' called "noaa_key". You can deal with this by taking the following line:
#'
#' noaa_key <- "your_key_here"
#'
#' and placing it either before the library(rterm) statement, or, preferably, in
#' a .Rprofile that gets run when you load R.
#' \url{http://www.statmethods.net/interface/customizing.html}
#'
#' By default the queries return only 25 entries with a maximum limit of
#' 1000, so be mindful that often much of what you thought you were
#' querying is not returned.
#'
#' @param table the data table to query from, examples include
#' "locations", "stations", "data"
#' @param param extra parameters for the search
#'
#' @seealso \code{\link{read.ghcn}}
#'
#' @examples
#' # Show available datasets
#' read.noaa("datasets")
#'
#' # Show available data types of category "TEMP"
#' read.noaa("datatypes", "datacategoryid=TEMP&limit=100")
#'
#' # Show locations for the location category of US State
#' read.noaa("locations", "locationcategoryid=ST&limit=51")
#'
#' # Show Stations in Washington State recording Temperature GHCN
#' # daily datasets since 2010
#' tmp <- read.noaa("stations", paste("locationid=FIPS:53",
#' "datacategoryid=TEMP",
#' "datasetid=GHCND",
#' "startdate=2010-01-01",
#' "limit=1000", sep = "&"))
#'
read.noaa <- function(table, param = NULL, quietly = TRUE) {
if(!exists("noaa_key")) {
stop("Must have a noaa key to read weather data. See help(read.noaa)")
}
urlx <- paste("http://www.ncdc.noaa.gov/cdo-web/api/v2/", table, sep = "")
if(!is.null(param)) {
urlx <- paste(urlx, "?", param, sep = "")
}
if(!quietly) print(paste("Attempting to query:", urlx))
tmp <- httr::GET(urlx, httr::add_headers(token = noaa_key))
if(!quietly) {
print(httr::content(tmp))
print(noaa_key)
}
if(length(grep("We are sorry, but the website you were looking for is currently unavailable",
httr::content(tmp, "text")))) {
stop("Uh oh, it looks like the NOAA webservices are down")
}
jsonlite::fromJSON(httr::content(tmp, "text"))$results
}
read.noaa.json <- function(table, param = NULL) {
if(!exists("noaa_key")) {
stop("Must have a noaa key to read weather data. See help(read.noaa)")
}
urlx <- paste("http://www.ncdc.noaa.gov/cdo-web/api/v2/", table, sep = "")
if(!is.null(param)) {
urlx <- paste(urlx, "?", param, sep = "")
}
print(paste("Attempting to query:", urlx))
tmp <- httr::GET(urlx, httr::add_headers(token = noaa_key))
jsonlite::fromJSON(httr::content(tmp, "text"))
}
# Here is a stationid... for testing...
# stationid <- "GHCND:USW00024233"
# startdate <- "2012-01-01"
# enddate <- "2012-02-01"
read.one.ghcn <- function(stationid, startdate, enddate) {
# Set-up the parameters for the call to NOAA
startdate <- as.character(startdate)
enddate <- as.character(enddate)
param <- paste0("datasetid=GHCND&",
"stationid=", stationid, "&",
"startdate=", startdate, "&",
"enddate=", enddate, "&",
"datatypeid=TMIN,TMAX&",
"limit=1000")
# Query the NOAA API for the data
dset <- read.noaa("data", param)
if(is.null(dset)) {
warning(paste("No data found from NOAA w/ param", param))
return(NULL)
}
# Clean it up and return
dset <- dset %>%
dplyr::mutate(date = as.Date(date)) %>%
dplyr::mutate(value = C_to_F(value / 10)) %>%
dplyr::select(date, value, datatype) %>%
tidyr::spread(datatype, value) %>%
dplyr::mutate(aveTemp = (TMIN + TMAX) / 2)
names(dset)[names(dset) == "TMIN"] <- "tmin"
names(dset)[names(dset) == "TMAX"] <- "tmax"
dset
}
read.one.ghcn.monthly <- function(stationid, startdate, enddate) {
# Set-up the parameters for the call to NOAA
startdate <- as.character(startdate)
enddate <- as.character(enddate)
param <- paste0("datasetid=GHCNDMS&",
"stationid=", stationid, "&",
"startdate=", startdate, "&",
"enddate=", enddate, "&",
"datatypeid=MNTM&",
"limit=1000")
# Query the NOAA API for the data
dset <- read.noaa("data", param)
if(is.null(dset)) {
warning(paste("No data found from NOAA w/ param", param))
return(NULL)
}
# Clean it up and return
dset <- dset %>%
dplyr::mutate(date = as.Date(date)) %>%
dplyr::mutate(value = C_to_F(value / 10)) %>%
dplyr::select(date, value, datatype) %>%
tidyr::spread(datatype, value)
names(dset)[names(dset) == "MNTM"] <- "aveTemp"
dset
}
# startdate <- "2010-01-01"
# enddate <- "2014-01-01"
#' Retrive GHCN Weather Data
#'
#' Given a station id and start and end dates, retrieves daily
#' temperature data from NOAA's API. This wraps the function
#' read.one.ghcn, which reads data one year at a time (that's the
#' maximum you're allowed to get in a single query), which itself
#' gets raw data with the \code{\link{read.noaa}} function.
#'
#' Two things to note: 1) you need a key from NOAA to access the data.
#' This is free and you can generate one here: \url{https://www.ncdc.noaa.gov/cdo-web/token}.
#' All weather functions in rterm look for an object in the global environment
#' called "noaa_key". You can deal with this by taking the following line:
#'
#' noaa_key <- "your_key_here"
#'
#' and placing it either before the library(rterm) statement, or, preferably, in
#' a .Rprofile. \url{http://www.statmethods.net/interface/customizing.html}
#'
#' 2) You need to find a weather station to use! Probably the best way
#' to do this for now is to use the NOAA web search at
#' \url{http://www.ncdc.noaa.gov/cdo-web/search}, where you search for
#' "Daily Summaries" and "Stations" with the appropriate date range and a
#' keyword. This will bring up a map where you can click on icons for weather
#' stations and it will report an "ID" that looks something like
#' "GHCND:US1WAKG0179"
#'
#' @param stationid the identifier for the weather station
#' @param startdate the first date of data requested
#' @param enddate the last date of data requested. Dates should
#' be POSIX date class or coercible to such with as.Date
#'
#'
#' @return a data frame with variables "date", "TMIN", "TMAX", and
#' "aveTemp". Temperatures in Fahrenheit
#'
#' @seealso \code{\link{read.noaa}} \code{\link{stationSearch}}
#'
#' @examples
#' weather <- read.ghcn("GHCND:USW00024233", "2010-01-01", "2014-12-31")
#'
read.ghcn <- function(stationid, startdate, enddate) {
stDate <- try(as.Date(startdate))
if(inherits(stDate, "try-error")) {
stop(paste("Could not parse start date", startdate))
}
edDate <- try(as.Date(enddate))
if(inherits(edDate, "try-error")) {
stop(paste("Could not parse end date", enddate))
}
# We can download 1 year at a time
interval <- as.numeric(difftime(edDate, stDate))
nCalls <- ceiling(interval / 365)
dsets <- lapply(1:nCalls, function(i) {
read.one.ghcn(stationid, stDate + (i - 1) * 365, min(edDate, stDate + i * 365 - 1))
})
dset <- do.call('rbind', dsets)
if(is.null(dset)) return(NULL)
# Fill in NA values...
allDates <- seq(from = stDate, to = edDate, by = 1)
isMissing <- setdiff(allDates, dset$date)
if(length(isMissing)) {
isMissing <- as.Date(isMissing, origin = "1970-01-01")
dset <- rbind(dset,
data.frame("date" = isMissing, "tmin" = NA, "tmax" = NA, "aveTemp" = NA))
}
dset <- plyr::arrange(dset, date)
dset$interpolated <- FALSE
if(sum(is.na(dset$aveTemp))) {
xx <- 1:nrow(dset)
missingTemps <- is.na(dset$aveTemp)
newvals <- approx(xx, dset$aveTemp, xout = xx[missingTemps])$y
dset$aveTemp[missingTemps] <- newvals
dset$interpolated[missingTemps] <- TRUE
}
return(dset)
}
read.ghcn.monthly <- function(stationid, startdate, enddate) {
stDate <- try(as.Date(startdate))
if(inherits(stDate, "try-error")) {
stop(paste("Could not parse start date", startdate))
}
edDate <- try(as.Date(enddate))
if(inherits(edDate, "try-error")) {
stop(paste("Could not parse end date", enddate))
}
fullInt <- lubridate::interval(stDate, edDate)
monthsToGet <- lubridate::as.period(fullInt) %/% months(1)
# We can download 100 months at a time
nCalls <- ceiling(monthsToGet / 100)
dsets <- lapply(1:nCalls, function(i) {
stTmp <- stDate + (i - 1) * months(101)
edTmp <- min(edDate, stDate + i * months(100))
read.one.ghcn.monthly(stationid, stTmp, edTmp)
})
dset <- do.call('rbind', dsets)
if(is.null(dset)) return(NULL)
# Fill in NA values...
allDates <- stDate + months(0:monthsToGet)
isMissing <- setdiff(allDates, dset$date)
if(length(isMissing)) {
isMissing <- as.Date(isMissing, origin = "1970-01-01")
dset <- rbind(dset,
data.frame("date" = isMissing, "aveTemp" = NA))
}
dset <- plyr::arrange(dset, date)
return(dset)
}
calcDistance <- function(lat1, lon1, lat2, lon2) {
if(lat1 == lat2 & lon1 == lon2) {
return(0)
}
degRad<-pi/180
phi1 <- (90 - lat1) * degRad
phi2 <- (90 - lat2) * degRad
theta1 <- lon1 * degRad
theta2 <- lon2 * degRad
cosTotal <- sin(phi1) * sin(phi2) * cos(theta1 - theta2) + cos(phi1) * cos(phi2)
arc <- acos(cosTotal)
return(arc * 3960)
}
#' Search for GHCN Weather Stations
#'
#' Search by location name, station name or latitude/longitude for GHCN weather stations
#' that can be loaded with \code{\link{read.ghcn}}. Note that you need to
#' acquire a \href{https://developers.google.com/maps/documentation/geocoding/?hl=en_US#api_key}{Google Maps API key} for the default functionality, which is to
#' search for stations close to a latitude/longitude as returned by a Google
#' Maps geocode.
#'
#' @param geocode The default input. A string that will be sent to
#' Google Maps to get a latitude/longitude. This must be loaded into
#' your R session as "google_key"
#' @param name A name to search for in the list of station names.
#' Evaluated as a regular expression.
#' @param lat Latitude
#' @param lon Longitude
#' @param nClosest The number of stations to return when searching
#' on latitude & longitude. Example: nClosest = 10 will show the
#' 10 closest stations. Optional, defaults to 5.
#' @param country an optional specification of country
#' @param state an optional specification of us state
#' @param type specify either GHCN or TMY for the type of station to search for
#'
#' @return a data frame with information about the relevant stations found.
#' In the case that you specified lat/lon instead of a name this will
#' include station distance in miles from the searched location.
#'
#' @seealso \code{\link{read.ghcn}} \code{\link{stationCompare}}
#' \code{\link{read.noaa}}
#'
#' @examples
#' # Look for a weather station in Bend, Oregon
#' stationSearch("Bend, OR")
#'
#' # Look for a TMY station near Bend, Oregon
#' stationSearch("Bend, OR", type = "TMY")
#'
#' # Find the closest weather stations to a lat/lon pair
#' # specifying a location in the Columbia Basin
#' stationSearch(lat = 46.943, lon = -119.240)
#'
#' # It can help to specify state for a common name
#' stationSearch("Madison")
#' stationSearch("Madison", state = "wisconsin")
#'
#' # Do we have weather sites in Switzerland?
#' stationSearch(country = "Switzerland")
#'
#'
stationSearch <- function(geocode = NULL, stationName = NULL, lat = NULL, lon = NULL, nClosest = 5, country = NULL, state = NULL, elevThreshold = NULL, type = "GHCN") {
if(type == "GHCN") {
stationsTmp <- stations
} else if(type == "TMY") {
stationsTmp <- tmyStations
} else {
stop("Must specify either GHCN or TMY for stationSearch")
}
# Check for a country and/or state
if(!is.null(country)) {
stationsTmp <- stationsTmp[grep(country, stationsTmp$country, ignore.case = TRUE), ]
}
if(!is.null(state)) {
stationsTmp <- stationsTmp[grep(state, stationsTmp$state, ignore.case = TRUE), ]
}
if(!nrow(stationsTmp)) {
print("No stationsTmp found")
return(stationsTmp)
}
stationsTmp$milesDistant <- NA
# Check if we need to geocode
elev <- NULL
if(!is.null(geocode)) {
locs <- geocode(geocode)
if(is.null(locs)) {
return(NULL)
}
lat <- as.numeric(locs[1])
lon <- as.numeric(locs[2])
elev <- as.numeric(locs[3])
}
# Now search by either stationName or lat/lon
if(!is.null(stationName)) {
if(type == "GHCN") {
varTmp <- "name"
} else if(type == "TMY") {
varTmp <- "site"
}
ind <- grep(stationName, stationsTmp[, varTmp], ignore.case = TRUE)
if(!length(ind)) {
return(NULL)
}
} else if(!is.null(lat) & !is.null(lon)) {
stationsTmp$milesDistant <- sapply(1:nrow(stationsTmp), function(i) {
calcDistance(stationsTmp$latitude[i], stationsTmp$longitude[i], lat, lon)
})
stationsTmp <- plyr::arrange(stationsTmp, milesDistant)
ind <- seq(from = 1, to = min(nrow(stationsTmp), nClosest), by = 1)
# Assign lat and lon of search center to stationsTmp
} else {
ind <- 1:nrow(stationsTmp)
}
if(!is.null(elevThreshold)) {
stationsTmp <- stationsTmp[abs(stationsTmp$elevation - elev) < elevThreshold, ]
}
stationsTmp <- stationsTmp[, !(names(stationsTmp) %in% c("country", "state"))]
# stationsTmp <- subset(stationsTmp, select = -c(country, state))
attr(stationsTmp, "lat") <- lat
attr(stationsTmp, "lon") <- lon
attr(stationsTmp, "elev") <- elev
attr(stationsTmp, "gmapsSearch") <- geocode
stationsTmp <- stationsTmp[ind, ]
stationsTmp
}
tmySearch <- function(geocode = NULL, stationName = NULL, lat = NULL, lon = NULL, nClosest = 5, country = NULL, state = NULL, elevThreshold = NULL) {
stationSearch(geocode, stationName, lat, lon, nClosest, country, state, elevThreshold, type = "TMY")
}
ghcnSearch <- function(geocode = NULL, stationName = NULL, lat = NULL, lon = NULL, nClosest = 5, country = NULL, state = NULL, elevThreshold = NULL) {
stationSearch(geocode, stationName, lat, lon, nClosest, country, state, elevThreshold, type = "GHCN")
}
smoothTemps <- function(dset, days = 14, var = "aveTemp") {
# Make sure we have enough records to smooth
if(days >= min(table(dset$id))) {
days <- 1
}
# Take a rolling mean for presentation
fsmooth <- rep(1 / days, days)
dset <- do.call('rbind', by(dset, dset$id, function(x) {
x$maTemp <- stats::filter(x[var], fsmooth, sides = 2)
x
}))
# Scale by the mean at each date, to make the
# comparison easier to see
dset <- do.call('rbind', by(dset, dset$date, function(x) {
x$scaledTemp <- scale(x$maTemp, center = TRUE, scale = FALSE)
x$scaledTempRaw <- scale(x[var], center = TRUE, scale = FALSE)
x
}))
dset
}
#' Compare GHCN Weather Stations
#'
#' Compare weather stations returned by \code{\link{stationSearch}}
#'
#'
#'
#' @param st Data frame of stations returned by \code{\link{stationSearch}}
#' @param startdate the start date of the interval to compare
#' @param enddate the end date of the interval to compare
#'
#' The start and end date arguments should either be POSIX date objects,
#' or coercible to with as.Date. So for example "2013-01-01" is appropriate.
#'
#' @return An object of S3 class stationComp. This is a list with two entries
#' 1) data - the weather data for the requested stations over the requested interval
#' and 2) stations - the station info data frame.
#'
#' @seealso \code{\link{summary.stationComp}} \code{\link{plot.stationComp}}
#' \code{\link{read.ghcn}}
#'
#' @examples
#' # Compare 5 closest weather stations to Ecotope's Seattle office
#' stations <- stationSearch(lat = 47.6569326, lon = -122.3184546)
#' comp <- stationCompare(stations, "2014-01-01", "2014-12-31")
#' summary(comp)
#' plot(comp)
#'
#'
stationCompare <- function(st, startdate, enddate) {
ids <- stations$id
dsets <- lapply(1:nrow(st), function(i) {
dset <- try(read.ghcn(st$id[i], startdate, enddate))
if(inherits(dset, "try-error")) {
return(NULL)
}
if(!is.null(dset)) {
dset <- merge(dset, st[i, ])
}
dset
})
dset <- do.call("rbind", dsets)
stationComp <- list("data" = dset, "stations" = st)
class(stationComp) <- "stationComp"
stationComp
}
stationCompareMonthly <- function(st, startdate, enddate) {
ids <- stations$id
dsets <- lapply(1:nrow(st), function(i) {
dset <- try(read.ghcn.monthly(st$id[i], startdate, enddate))
if(inherits(dset, "try-error")) {
return(NULL)
}
if(!is.null(dset)) {
dset <- merge(dset, st[i, ])
}
dset
})
dset <- do.call("rbind", dsets)
stationComp <- list("data" = dset, "stations" = st)
class(stationComp) <- "stationComp"
stationComp
}
#' Summarize a comparison of GHCN weather stations
#'
#' Summarize a comparison of weather stations returned by
#' \code{\link{stationCompare}}
#'
#'
#' @param sc stationComp object as returned by \code{\link{stationCompare}}
#'
#'
#' @return a data frame summarizing the weather stations. This includes
#' the station name and id, as well as some summary stats to hopefully
#' help you choose. "dataFrac" refers to the fraction of non-missing data
#' in the observed interval ("datacoverage" from stationSearch refers to
#' the entire history of the weather station). "relativeTemp" refers to
#' the average temperature of that station, with respect to the average
#' temperature of all stations in the comparison. If you searched by
#' latitude/longitude, you also get "milesDistant", which is miles from your
#' search coordinates to the weather station.
#'
#' @seealso \code{\link{stationSearch}} \code{\link{stationComp}}
#' \code{\link{plot.stationComp}} \code{\link{read.ghcn}}
#'
#' @examples
#' # Compare 5 closest weather stations to Ecotope's Seattle office
#' stations <- stationSearch(lat = 47.6569326, lon = -122.3184546)
#' comp <- stationCompare(stations, "2014-01-01", "2014-12-31")
#' summary(comp)
#'
#'
summary.stationComp <- function(sc, days = 14) {
# We want to report the following...
# 1) Observed Data Fraction
# 2) Degrees above or below average between selected stations
dset <- smoothTemps(sc$data, days)
sumStats <- do.call('rbind', by(dset, dset$id, function(x) {
data.frame("dataFrac" = sum(!x$interpolated) / nrow(x),
"relativeTemp" = mean(x$scaledTempRaw, na.rm = TRUE),
"aveTemp" = mean(x$aveTemp, na.rm = TRUE),
"id" = x$id[1])
}))
results <- merge(sc$stations, sumStats)
results <- dplyr::mutate(results,
relTempScaled = 1 - abs(relativeTemp) / sum(abs(relativeTemp)))
if(is.null(sc$stations$milesDistant)) {
results <- results %>%
dplyr::mutate(useIndex = (dataFrac + relTempScaled) / 2)
} else {
results <- results %>%
dplyr::mutate(distInd = 1 - milesDistant / sum(milesDistant)) %>%
dplyr::mutate(useIndex = (dataFrac + relTempScaled + distInd) / 3)
}
results <- plyr::arrange(results, -useIndex)
results <- results[, names(results) %in% c("id", "name", "milesDistant", "dataFrac", "relativeTemp", "aveTemp")]
results
}
#' Plot a comparison of GHCN weather stations
#'
#' Plot a comparison of weather stations returned by
#' \code{\link{stationCompare}}
#'
#'
#' @param sc stationComp object as returned by \code{\link{stationCompare}}
#' @param days The number of days for the rolling average. Optional,
#' defaults to 14.
#' @param var The variable to plot. Optional, defaults to "aveTemp". Can
#' also be "tmin" or "tmax". See \code{\link{read.ghcn}}
#' @param type The type of view, can be "actual" or "relative". "actual"
#' will plot the recorded temperatures, while "relative" will plot the
#' temperatures with reference to the mean temperature. "relative" tends
#' to be more useful in visualizing the differences between stations.
#' This argument is optional and defaults to "relative".
#' @param xvar What variable to plot against. Defaults to "date" but can
#' also be "doy" - day of year. "date" gives a long rolling line, while
#' "doy" gives multiple lines on top of each other per station.
#'
#'
#' @return a ggplot object of the graphic
#'
#' @seealso \code{\link{stationSearch}} \code{\link{stationComp}}
#' \code{\link{plot.stationComp}} \code{\link{read.ghcn}}
#'
#' @examples
#' # Compare 5 closest weather stations to Ecotope's Seattle office
#' stations <- stationSearch("Seattle")
#' comp <- stationCompare(stations, "2014-01-01", "2014-12-31")
#' plot(comp)
#' plot(comp, "days" = 60)
#' plot(comp, var = "tmin", type = "actual")
#'
#'
plot.stationComp <- function(sc, days = 14, var = "aveTemp", type = "actual", xvar = "date") {
dset <- smoothTemps(sc$data, days, var)
if(var == "aveTemp") {
varLong <- "Average Temperature"
} else if(var == "tmin") {
varLong <- "Minimum Temperature"
} else if(var == "tmax") {
varLong <- "Maximum Temperature"
} else {
stop(paste("Unrecognized variable", var))
}
title1 <- paste(days, "day Rolling", varLong)
if(type == "relative") {
title2 <- paste(title1, "\nRelative to Mean Across Selected Weather Stations", sep = "")
} else {
title2 <- title1
}
if(xvar == "doy") {
dset$dofm <- format(dset$date, "%d")
dset$m <- format(dset$date, "%m")
dset$doy <- as.Date(paste("2000", dset$m, dset$dofm, sep = "-"))
dset$year <- as.numeric(format(dset$date, format = "%Y"))
dset$groupvar <- interaction(dset$name, dset$year)
}
if(type == "actual") {
yvar = "maTemp"
} else if(type == "relative") {
yvar = "scaledTemp"
}
p <- ggplot2::ggplot(dset) + ggplot2::theme_bw() +
ggplot2::ggtitle(title2) +
ggplot2::xlab("") + ggplot2::ylab(paste(title1, "(F)"))
if(xvar == "doy") {
p <- p + ggplot2::geom_line(ggplot2::aes_string(x = xvar, y = yvar, col = "name", group = "groupvar", size = "year"), alpha = .8) +
scale_x_date(breaks = "2 months", labels = scales::date_format("%B")) +
scale_size_continuous(range = c(.2, 1.5))
} else {
p <- p + ggplot2::geom_line(ggplot2::aes_string(x = xvar, y = yvar, col = "name"))
}
p
}
removeStation <- function(comp, name) {
tmp <- grep(name, comp$stations$name, ignore.case = TRUE)
if(length(tmp) > 0) {
badName <- comp$stations$name[tmp]
for(name in badName) {
comp$stations <- comp$stations[-tmp, ]
comp$data <- comp$data[comp$data$name != name, ]
}
} else {
stop(paste("Could not find station", name))
}
comp
}
# Bonus function to compare weather at a given site for x years...
#' Look at a time trend within a station, for example the past 5 years
#'
#' Plot a comparison of weather stations returned by
#' \code{\link{stationCompare}}
#'
#'
#' @param weather a weather dataset returned by read.ghcn
#' @param var The variable to plot. Optional, defaults to "aveTemp". Can
#' also be "tmin" or "tmax". See \code{\link{read.ghcn}}
#' @param days The number of days for the rolling average. Optional,
#' defaults to 14.
#' @param type The type of view, can be "actual" or "relative". "actual"
#' will plot the recorded temperatures, while "relative" will plot the
#' temperatures with reference to the mean temperature. "relative" tends
#' to be more useful in visualizing the differences between stations.
#' This argument is optional and defaults to "relative".
#'
#' @return a list containing "data" the dataset w/ additional smoothed
#' variable(s) and "plot", the ggplot object
#'
#' @seealso \code{\link{stationSearch}} \code{\link{stationComp}}
#' \code{\link{plot.stationComp}} \code{\link{read.ghcn}}
#'
#' @examples
#' # Look at the last 5 years from Ted Stevens Intl Airport in Alaska
#' anc <- read.ghcn("GHCND:USW00026451", "2010-01-01", "2015-04-15")
#' ancTrend <- stationTrend(anc)
#' ancTrend$plot
#'
#'
stationTrend <- function(weather, var = "aveTemp", days = 60, type = "actual") {
weather$year <- as.numeric(format(weather$date, format = "%Y"))
weather$day <- as.numeric(format(weather$date, format = "%j"))
# Add a relative temperature variable
weather <- do.call("rbind", by(weather, weather$day, function(x) {
x$relTemp <- as.numeric(scale(x[, var], center = TRUE, scale = FALSE))
x
}))
weather <- arrange(weather, date)
if(days > 1) px <- "Smoothed" else px <- ""
if(var == "aveTemp") {
vname <- "Average Temp"
} else if(var == "tmin") {
vname <- "Min Temp"
} else if(var == "tmax") {
vname <- "Max Temp"
}
tx <- paste(px, type, vname)
fsmooth <- rep(1 / days, days)
weather$maTemp <- stats::filter(weather$relTemp, fsmooth, sides = 2)
weather$actualSmoothed <- stats::filter(weather[var], fsmooth, sides = 2)
weather$dofm <- format(weather$date, "%d")
weather$m <- format(weather$date, "%m")
weather$dateDummy <- as.Date(paste("2000", weather$m, weather$dofm, sep = "-"))
# p <- ggplot(weather[!is.na(weather$maTemp), ]) + theme_bw() +
p <- ggplot(weather) + theme_bw() +
scale_x_date(breaks = "2 months", labels = scales::date_format("%B")) +
ggtitle(tx) +
xlab("") + ylab(paste(tx, "(F)")) +
scale_colour_discrete(name = "Year")
if(type == "relative") {
p <- p + geom_line(aes(x = dateDummy, y = maTemp, col = factor(year)))
} else if(type == "actual") {
p <- p + geom_line(aes(x = dateDummy, y = actualSmoothed, col = factor(year)))
}
p
# list("data" = weather, "plot" = p)
}
geocode <- function(name, quietly = TRUE) {
if(!exists("google_key")) {
stop("Must Register a Google Maps API Key to Search by City/Address")
}
name <- enc2utf8(gsub(" ", "%20", name))
urlx <- paste("https://maps.google.com/maps/api/geocode/json?address=",
name, "&key=", google_key, sep = "")
if(!quietly) print(paste("Attempting to query:", urlx))
tmp <- httr::GET(urlx)
results <- jsonlite::fromJSON(httr::content(tmp, "text"))$results
if(length(results) == 0) {
warning(paste("Could Not Find City/Address", name))
return(NULL)
}
if(nrow(results) != 1) {
warning(paste("Ambiguous City/Address", name))
return(NULL)
}
code <- as.numeric(results$geometry$location)
names(code) <- c("lat", "lon")
urly <- paste("https://maps.googleapis.com/maps/api/elevation/json?locations=",
code['lat'], ",", code['lon'], "&key=", google_key, sep = "")
tmp <- httr::GET(urly)
check_results <- jsonlite::fromJSON(httr::content(tmp, "text"))
if(check_results$status == "REQUEST_DENIED"){
stop("Google Maps API Key request denied, check your key")
}
results <- check_results$results
code <- c(code, "elevation" = as.numeric(results['elevation']))
Sys.sleep(.15)
code
}
getElevation <- function(name, quietly = TRUE) {
if(!exists("google_key")) {
stop("Must Register a Google Maps API Key to Search by City/Address")
}
name <- enc2utf8(gsub(" ", "%20", name))
# urlx <- paste("https://maps.google.com/maps/api/elevation/json?sensor=false&address=",
# name, "&key=", google_key, sep = "")
urlx <- paste("https://maps.googleapis.com/maps/api/elevation/json?address=",
name, sep = "")
if(!quietly) print(paste("Attempting to query:", urlx))
tmp <- httr::GET(urlx)
results <- jsonlite::fromJSON(httr::content(tmp, "text"))$results
if(nrow(results) != 1) {
warning(paste("Ambiguous City/Address", name))
return(NULL)
}
geocode <- as.numeric(results$geometry$location)
names(geocode) <- c("lat", "lon")
Sys.sleep(.15)
geocode
}
assignStations <- function(dset, varname, quietly = TRUE) {
locs <- unique(dset[, varname])
stationInfo <- do.call('rbind', lapply(locs, function(x) {
if(!quietly) {
print(paste("Assigning Station for", x))
}
locInfo <- geocode(x)
stations <- stationSearch(x, nClosest = 1, elevThreshold = 200)
if(is.null(stations)) return(NULL)
names(stations)[names(stations) == "id"] <- "stationid"
stations[varname] <- x
stations
}))
merge(dset,
stationInfo[, c(varname, "stationid", "name", "datacoverage", "elevation", "milesDistant")],
by = varname, all.x = TRUE)
}
# -------------Mapping utilities
stationMap <- function(x, ...) {
UseMethod("stationMap")
}
# Map just the station locations
stationMap.data.frame <- function(stations, zoom = 10) {
location = c("lon" = attributes(stations)$lon,
"lat" = attributes(stations)$lat)
df <- subset(stations, select = c(name, longitude, latitude))
df <- rbind(df, data.frame("name" = "Search Location",
"longitude" = location[1],
"latitude" = location[2]))
mapTmp <- ggmap::get_map(location, maptype = "terrain", color = "bw", zoom = zoom)
ggmap::ggmap(mapTmp) +
ggplot2::geom_point(data = df[-nrow(df), ], ggplot2::aes(longitude, latitude, col = name), size = 6) +
ggplot2::geom_point(data = df[nrow(df), ], ggplot2::aes(longitude, latitude, col = name), shape = 17, size = 4) +
ggplot2::scale_colour_discrete(name = "Weather Station") +
ggplot2::ggtitle(paste(" NOAA GHCN Weather Stations Near", attributes(stations)$gmapsSearch)) +
ggplot2::xlab("Longitude") + ggplot2::ylab("Latitude")
}
stationMap.stationComp <- function(comp, zoom = 10, type = "relative", art = FALSE) {
location = c("lon" = attributes(comp$stations)$lon,
"lat" = attributes(comp$stations)$lat)
dfl <- data.frame("lon" = attributes(comp$stations)$lon,
"lat" = attributes(comp$stations)$lat)
df <- merge(summary(comp, days = 1),
comp$stations[, c("id", "longitude", "latitude", "elevation")])
df$elevation <- df$elevation * 3.28
df$lat2 <- df$latitude + .015
if(type == "relative") {
legLabel <- "Relative Temp(F)"
vname <- "relativeTemp"
} else if(type == "actual") {
legLabel <- "Average Temp (F)"
vname <- "aveTemp"
} else {
stop(paste("Unrecognized plot type", type))
}
mapTmp <- ggmap::get_map(location, maptype = "terrain", color = "bw", zoom = zoom)
p <- ggmap::ggmap(mapTmp) +
ggplot2::geom_point(data = dfl, ggplot2::aes(lon, lat), col = "black", size = 4, shape = 2) +
ggplot2::scale_colour_continuous(low = "blue", high = "red", name = legLabel) +
ggplot2::ggtitle(paste("NOAA GHCN Weather Stations Near", attributes(comp$stations)$gmapsSearch, "\n",
min(comp$data$date), "to", max(comp$data$date))) +
ggplot2::xlab("Longitude") + ggplot2::ylab("Latitude")
if(art) {
p <- p + ggplot2::geom_point(data = df, ggplot2::aes_string(x = "longitude", y = "lat2", col = vname, size = "elevation")) +
ggplot2::scale_size_continuous(name = "Elevation", range = c(2.5, 5))
} else {
p <- p + ggplot2::geom_text(data = df, ggplot2::aes_string(x = "longitude", y = "lat2", col = vname, label = "name", size = "dataFrac")) +
ggplot2::geom_point(data = df, ggplot2::aes_string(x = "longitude", y = "latitude", col = vname), size = 4, shape = 2) +
ggplot2::scale_size_continuous(name = "Data Fraction", range = c(2, 4))
}
p
}
# In case you wanted precipitation
read.one.precip <- function(stationid, startdate, enddate) {
param <- paste0("datasetid=GHCND&",
"stationid=", stationid, "&",
"startdate=", startdate, "&",
"enddate=", enddate, "&",
"datatypeid=PRCP&",
"limit=1000")
# Query the NOAA API for the data
dset <- read.noaa("data", param)
if(is.null(dset)) {
print(paste("Null dset for", stationid, startdate, enddate))
return(NULL)
}
dset$date <- as.Date(dset$date)
dset$precip <- dset$value / 10 * 0.0393701
dset[, c("date", "precip")]
}
read.precip <- function(stationid, startdate, enddate) {
stDate <- try(as.Date(startdate))
if(inherits(stDate, "try-error")) {
stop(paste("Could not parse start date", startdate))
}
edDate <- try(as.Date(enddate))
if(inherits(edDate, "try-error")) {
stop(paste("Could not parse end date", enddate))
}
# We can download 1 year at a time
interval <- as.numeric(difftime(edDate, stDate, units = "days"))
nCalls <- ceiling(interval / 365)
dsets <- lapply(1:nCalls, function(i) {
read.one.precip(stationid, stDate + (i - 1) * 365, min(edDate, stDate + i * 365 - 1))
})
dset <- do.call('rbind', dsets)
allDates <- seq(from = stDate, to = edDate, by = 1)
isMissing <- setdiff(allDates, dset$date)
if(length(isMissing)) {
isMissing <- as.Date(isMissing, origin = "1970-01-01")
dset <- rbind(dset,
data.frame("date" = isMissing, "precip" = NA))
}
dset <- plyr::arrange(dset, date)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.