## TODO: Add comment
#
## Author: David Carslaw
## useful utility functions
## with some updates and modification by Karl Ropkins
###############################################################################
startYear <- function(dat) as.numeric(format(min(dat[order(dat)]), "%Y"))
endYear <- function(dat) as.numeric(format(max(dat[order(dat)]), "%Y"))
startMonth <- function(dat) as.numeric(format(min(dat[order(dat)]), "%m"))
endMonth <- function(dat) as.numeric(format(max(dat[order(dat)]), "%m"))
## these are pre-defined type that need a field "date"; used by cutData
dateTypes <- c("year", "hour", "month", "season", "weekday", "weekend",
"monthyear", "gmtbst", "bstgmt", "dst", "daylight",
"seasonyear", "yearseason")
###############################################################################
## function to find averaging period of data, returns "xx sec"
## for use in filling in gaps in time series data
## it finds the table of values of time gaps and picks the biggest
## can't think of better way unless user specifies what the time interval is meant to be
find.time.interval <- function(dates) {
## could have several sites, dates may be unordered
## find the most common time gap in all the data
dates <- unique(dates) ## make sure they are unique
# work out the most common time gap of unique, ordered dates
id <- which.max(table(diff(as.numeric(unique(dates[order(dates)])))))
seconds <- as.numeric(names(id))
if ("POSIXt" %in% class(dates)) seconds <- paste(seconds, "sec")
if (class(dates)[1] == "Date") {
seconds <- seconds * 3600 * 24
seconds <- paste(seconds, "sec")
}
seconds
}
date.pad2 <- function(mydata, type = NULL, interval = "month") {
# assume by the time we get here the data have been split into types
# This means we just need to pad out the missing types based on first
# line.
start.date <- min(mydata$date, na.rm = TRUE)
end.date <- max(mydata$date, na.rm = TRUE)
all.dates <- data.frame(date = seq(start.date, end.date, by = interval))
mydata <- mydata %>% full_join(all.dates, by = "date")
# add in missing types if gaps are made
if (!is.null(type))
mydata[type] <- mydata[1, type]
# make sure order is correct
mydata <- arrange(mydata, date)
return(mydata)
}
## #################################################################
# Function to pad out missing time data
# assumes data have already been split by type, so just take first
# tries to work out time interval of input based on most common gap
# can print assumed gap to screen
date.pad <- function(mydata, type = NULL, print.int = FALSE) {
## time zone of data
TZ <- attr(mydata$date, "tzone")
if (is.null(TZ)) TZ <- "GMT" ## as it is on Windows for BST
## function to fill missing data gaps
## assume no missing data to begin with
## pad out missing data
start.date <- min(mydata$date, na.rm = TRUE)
end.date <- max(mydata$date, na.rm = TRUE)
## interval in seconds
interval <- find.time.interval(mydata$date)
## equivalent number of days, used to refine interval for month/year
days <- as.numeric(strsplit(interval, split = " ")[[1]][1]) /
24 / 3600
## find time interval of data
if (class(mydata$date)[1] == "Date") {
interval <- paste(days, "day")
} else {
## this will be in seconds
interval <- find.time.interval(mydata$date)
}
## better interval, most common interval in a year
if (days == 31) interval <- "month"
if (days %in% c(365, 366)) interval <- "year"
## only pad if there are missing data
if (length(unique(diff(mydata$date))) != 1L) {
all.dates <- data.frame(date = seq(start.date, end.date, by = interval))
mydata <- mydata %>% full_join(all.dates, by = "date")
# add missing types - if type is present
if (!is.null(type))
mydata[type] <- mydata[1, type]
}
## return the same TZ that we started with
attr(mydata$date, "tzone") <- TZ
if (print.int) print(paste0("Input data time interval assumed is ", interval))
# make sure date-sorted
mydata <- arrange(mydata, date)
mydata
}
#############################################################################################
## unitility function to convert decimal date to POSIXct
decimalDate <- function(x, date = "date") {
thedata <- x
x <- x[, date]
x.year <- floor(x)
## fraction of the year
x.frac <- x - x.year
## number of seconds in each year
x.sec.yr <- unclass(ISOdate(x.year + 1, 1, 1, 0, 0, 0)) - unclass(ISOdate(x.year, 1, 1, 0, 0, 0))
## now get the actual time
x.actual <- ISOdate(x.year, 1, 1, 0, 0, 0) + x.frac * x.sec.yr
x.actual <- as.POSIXct(trunc(x.actual, "hours"), "GMT")
thedata$date <- x.actual
thedata
}
##' Calculate rollingMean values
##'
##' Calculate rollingMean values taking account of data capture thresholds
##'
##' This is a utility function mostly designed to calculate rolling
##' mean statistics relevant to some pollutant limits e.g. 8 hour
##' rolling means for ozone and 24 hour rolling means for
##' PM10. However, the function has a more general use in helping to
##' display rolling mean values in flexible ways e.g. with the rolling
##' window width left, right or centre aligned.
##'
##' The function will try and fill in missing time gaps to get a full
##' time sequence but return a data frame with the same number of rows
##' supplied.
##'
##' @param mydata A data frame containing a \code{date}
##' field. \code{mydata} must contain a \code{date} field in
##' \code{Date} or \code{POSIXct} format. The input time series must
##' be regular e.g. hourly, daily.
##' @param pollutant The name of a pollutant e.g. \code{pollutant = "o3"}.
##' @param width The averaging period (rolling window width) to use
##' e.g. \code{width = 8} will generate 8-hour rolling mean values
##' when hourly data are analysed.
##' @param new.name The name given to the new rollingMean variable. If
##' not supplied it will create a name based on the name of the
##' pollutant and the averaging period used.
##' @param data.thresh The data capture threshold in %. No values are
##' calculated if data capture over the period of interest is less
##' than this value. For example, with \code{width = 8} and
##' \code{data.thresh = 75} at least 6 hours are required to calculate
##' the mean, else \code{NA} is returned.
##' @param align specifyies how the moving window should be
##' aligned. \code{"right"} means that the previous \code{hours}
##' (including the current) are averaged. This seems to be the default
##' for UK air quality rolling mean statistics. \code{"left"} means
##' that the forward \code{hours} are averaged, and \code{"centre"} or
##' \code{"center"}, which is the default.
##' @param ... other arguments, currently unused.
##' @export
##' @author David Carslaw
##' @keywords methods
##' @examples
##'
##' ## rolling 8-hour mean for ozone
##' mydata <- rollingMean(mydata, pollutant = "o3", width = 8, new.name =
##' "rollingo3", data.thresh = 75, align = "right")
##'
##'
rollingMean <- function(mydata, pollutant = "o3", width = 8, new.name = "rolling",
data.thresh = 75, align = "centre", ...){
## function to calculate rolling means
## uses C++ code
## get rid of R check annoyances
site = NULL
if (!align %in% c("left", "right", "centre", "center")) stop("align should be one of 'right', 'left', 'centre' or 'center'.")
if (missing(new.name)) new.name <- paste("rolling", width, pollutant, sep = "")
if (data.thresh < 0 | data.thresh > 100) stop("Data threshold must be between 0 and 100.")
calc.rolling <- function(mydata, ...) {
## data needs to be numeric
if (!is.numeric(mydata[[pollutant]])) {
warning("Data are not numeric.")
return(mydata)
}
## need to know whether dates added
dates <- mydata$date
## pad missing hours
mydata <- date.pad(mydata)
## make sure function is not called with window width longer than data
if (width > nrow(mydata)) return(mydata)
mydata[[new.name]] <- .Call("rollingMean", mydata[[pollutant]],
width, data.thresh, align,
PACKAGE = "ggopenair")
if (length(dates) != nrow(mydata)) {
## return what was put in
## avoids adding missing data e.g. for factors
mydata <- mydata[mydata$date %in% dates, ]
}
mydata
}
## split if several sites
if ("site" %in% names(mydata)) { ## split by site
mydata <- group_by(mydata, site) %>%
do(calc.rolling(., ...))
mydata
} else {
mydata <- calc.rolling(mydata, ...)
mydata
}
}
convert.date <- function(mydata, format = "%d/%m/%Y %H:%M") {
mydata$date <- as.POSIXct(strptime(mydata$date, format = format), "GMT")
mydata
}
#############################################################################################
## splits data frame into date chunks. Allows users to supply simple dates and labels
## useful for type = "site", interventions
##' Divide up a data frame by time
##'
##' Utility function to prepare input data for use in openair functions
##'
##' This function partitions a data frame up into different time segments. It
##' produces a new column called controlled by \code{name} that can be used in many
##' \code{openair} functions. Note that there must be one more label than there
##' are dates. See examples below and in full \code{openair} documentation.
##'
##' @param mydata A data frame containing a \code{date} field in hourly or high
##' resolution format.
##' @param dates A date or dates to split data by.
##' @param labels Labels for each time partition.
##' @param name The name to give the new column to identify the periods split
##' @export
##' @author David Carslaw
##' @keywords methods
##' @examples
##'
##' ## split data up into "before" and "after"
##' mydata <- splitByDate(mydata, dates = "1/04/2000",
##' labels = c("before", "after"))
##'
##' ## split data into 3 partitions:
##' mydata <- splitByDate(mydata, dates = c("1/1/2000", "1/3/2003"),
##' labels = c("before", "during", "after"))
##'
##'
splitByDate <- function(mydata, dates = "1/1/2003", labels = c("before", "after"), name = "split.by") {
## if date in format dd/mm/yyyy hh:mm (basic check)
if (missing(mydata)) stop("No data frame was supplied!")
mydata <- checkPrep(mydata, names(mydata), "default", remove.calm = FALSE)
## check there are sufficent labels for number of dates
if (length(dates) != length(labels) - 1) {
stop("There is a mis-match between dates and labels. There should be
one more label than date")
}
if (length(grep("/", as.character(dates))) > 0) {
if (class(mydata$date)[1] == "Date") {
dates <- as.Date(as.POSIXct(strptime(dates, "%d/%m/%Y"), "GMT"))
} else {
dates <- as.POSIXct(strptime(dates, "%d/%m/%Y"), "GMT")
}
} else { ## asume format yyyy-mm-dd
if (class(mydata$date)[1] == "Date") {
dates <- as.Date(dates)
} else {
dates <- as.POSIXct(dates, "GMT")
}
}
mydata[ , name] <- cut(as.numeric(mydata$date), breaks = c(0, as.numeric(dates),
max(mydata$date)), labels = labels,
ordered_result = TRUE)
mydata
}
#############################################################################################
## function to make it easy to use d/m/y format for subsetting by date
##' Subset a data frame based on date
##'
##' Utility function to make it easier to select periods from a data frame
##' before sending to a function
##'
##' This function makes it much easier to select periods of interest from a
##' data frame based on dates in a British format. Selecting date/times in R
##' format can be intimidating for new users. This function can be used to
##' select quite complex dates simply - see examples below.
##'
##' Dates are assumed to be inclusive, so \code{start = "1/1/1999"}
##' means that times are selected from hour zero. Similarly, \code{end
##' = "31/12/1999"} will include all hours of the 31st
##' December. \code{start} and \code{end} can also be in standard R
##' format as a string i.e. "YYYY-mm-dd", so \code{start =
##' "1999-01-01"} is fine.
##'
##' All options are applied in turn making it possible to select quite complex
##' dates
##'
##' @param mydata A data frame containing a \code{date} field in hourly or high
##' resolution format.
##' @param start A start date string in the form d/m/yyyy
##' e.g. \dQuote{1/2/1999} or in \sQuote{R} format
##' i.e. \dQuote{YYYY-mm-dd}, \dQuote{1999-02-01}
##' @param end See \code{start} for format.
##' @param year A year or years to select e.g. \code{year = 1998:2004} to
##' select 1998-2004 inclusive or \code{year = c(1998, 2004)} to select 1998
##' and 2004.
##' @param month A month or months to select. Can either be numeric e.g.
##' \code{month = 1:6} to select months 1-6 (January to June), or by name
##' e.g. \code{month = c("January", "December")}. Names can be abbreviated to
##' 3 letters and be in lower or upper case.
##' @param day A day name or or days to select. \code{day} can be
##' numeric (1 to 31) or character. For example \code{day =
##' c("Monday", "Wednesday")} or \code{day = 1:10} (to select the 1st
##' to 10th of each month). Names can be abbreviated to 3 letters and
##' be in lower or upper case. Also accepts \dQuote{weekday} (Monday -
##' Friday) and \dQuote{weekend} for convenience.
##' @param hour An hour or hours to select from 0-23 e.g. \code{hour = 0:12} to
##' select hours 0 to 12 inclusive.
##' @importFrom lubridate dst year month hour force_tz day
##' @export
##' @author David Carslaw
##' @keywords methods
##' @examples
##'
##' ## select all of 1999
##' data.1999 <- selectByDate(mydata, start = "1/1/1999", end = "31/12/1999")
##' head(data.1999)
##' tail(data.1999)
##'
##' # or...
##' data.1999 <- selectByDate(mydata, start = "1999-01-01", end = "1999-12-31")
##'
##' # easier way
##' data.1999 <- selectByDate(mydata, year = 1999)
##'
##'
##' # more complex use: select weekdays between the hours of 7 am to 7 pm
##' sub.data <- selectByDate(mydata, day = "weekday", hour = 7:19)
##'
##' # select weekends between the hours of 7 am to 7 pm in winter (Dec, Jan, Feb)
##' sub.data <- selectByDate(mydata, day = "weekend", hour = 7:19, month =
##' c("dec", "jan", "feb"))
##'
selectByDate <- function (mydata, start = "1/1/2008",
end = "31/12/2008", year = 2008,
month = 1, day = "weekday", hour = 1)
{
## extract variables of interest
vars <- names(mydata)
## check data - mostly date format
mydata <- checkPrep(mydata, vars, "default", remove.calm = FALSE,
strip.white = FALSE)
weekday.names <- format(ISOdate(2000, 1, 3:9), "%A")
if (!missing(start) & !missing(end)) {
if (length(grep("/", start)) > 0 & length(grep("/", end)) > 0) {
## assume UK date format
start <- as.Date(start, "%d/%m/%Y")
end <- as.Date(end, "%d/%m/%Y")
}
if (length(grep("-", start)) > 0 & length(grep("-", end)) > 0) {
## assume R date format
start <- as.Date(start)
end <- as.Date(end)
}
mydata <- subset(mydata, as.Date(date) >= start & as.Date(date) <= end)
}
if (!missing(year))
mydata <- mydata[which(year(mydata$date) %in% year), ]
if (!missing(month)) {
if (is.numeric(month)) {
if (any(month < 1 | month > 12))
stop ("Month must be between 1 to 12.")
mydata <- mydata[which(month(mydata$date) %in% month), ]
}
else {
mydata <- subset(mydata, substr(tolower(format(date,
"%B")), 1, 3) %in% substr(tolower(month), 1, 3))
}
}
if (!missing(hour)) {
if (any(hour < 0 | hour > 23)) stop ("Hour must be between 0 to 23.")
mydata <- mydata[which(hour(mydata$date) %in% hour), ]
}
if (!missing(day)) {
days <- day
if (is.numeric(day)) {
if (any(day < 1 | day > 31))
stop ("Day must be between 1 to 31.")
mydata <- mydata[which(day(mydata$date) %in% day), ]
} else {
if (day[1] == "weekday")
days <- weekday.names[1:5]
if (day[1] == "weekend")
days <- weekday.names[6:7]
mydata <- subset(mydata, substr(tolower(format(date, "%A")), 1, 3) %in%
substr(tolower(days), 1, 3))
}
}
mydata
}
### panel functions for plots based on lattice ####################################################
panel.gam <- function (x, y, form = y ~ x, method = "loess", k = k, Args, ..., simulate = FALSE, n.sim = 200,
autocor = FALSE, se = TRUE,
level = 0.95, n = 100, col = plot.line$col, col.se = col,
lty = plot.line$lty, lwd = plot.line$lwd, alpha = plot.line$alpha,
alpha.se = 0.20, border = NA, subscripts, group.number, group.value,
type, col.line, col.symbol, fill, pch, cex, font, fontface,
fontfamily)
{
## panel function to add a smooth line to a plot
## Uses a GAM (mgcv) to fit smooth
## Optionally can plot 95% confidence intervals and run bootstrap simulations
## to estimate uncertainties. Simple block bootstrap is also available for correlated data
## get rid of R check annoyances#
plot.line = NULL
thedata <- data.frame(x = x, y = y)
thedata <- na.omit(thedata)
tryCatch({
if (!simulate) {
if (is.null(k)) {
mod <- suppressWarnings(gam(y ~ s(x), select = TRUE, data = thedata, ...))
} else {
mod <- suppressWarnings(gam(y ~ s(x, k = k), select = TRUE, data = thedata, ...))
}
lims <- current.panel.limits()
xrange <- c(max(min(lims$x), min(x, na.rm = TRUE)), min(max(lims$x), max(x, na.rm = TRUE)))
xseq <- seq(xrange[1], xrange[2], length = n)
## for uncertainties
std <- qnorm(level / 2 + 0.5)
pred <- predict(mod, data.frame(x = xseq), se = se)
results <- data.frame(date = xseq, pred = pred$fit,
lower = pred$fit - std * pred$se,
upper = pred$fit + std * pred$se)
if (se) {
panel.polygon(x = c(xseq, rev(xseq)), y = c(pred$fit -
std * pred$se, rev(pred$fit + std * pred$se)),
col = col.se, alpha = alpha.se, border = border)
pred <- pred$fit
}
panel.lines(xseq, pred, col = col, alpha = alpha, lty = lty, lwd = 2)
} else { ## simulations required
sam.size <- length(x)
lims <- current.panel.limits()
xrange <- c(max(min(lims$x), min(x)), min(max(lims$x), max(x)))
xseq <- seq(xrange[1], xrange[2], length = sam.size)
boot.pred <- matrix(nrow = sam.size, ncol = n.sim)
print ("Taking bootstrap samples. Please wait...")
## set up bootstrap
block.length <- 1
if (autocor) block.length <- round(sam.size ^ (1 / 3))
index <- samp.boot.block(sam.size, n.sim, block.length)
## predict first
if (is.null(k)) {
mod <- gam(y ~ s(x), data = thedata, ...)
} else {
mod <- gam(y ~ s(x, k = k), data = thedata, ...)
}
residuals <- residuals(mod) ## residuals of the model
pred.input <- predict(mod, thedata)
for (i in 1:n.sim) {
## make new data
new.data <- data.frame(x = xseq, y = pred.input + residuals[index[, i]])
mod <- gam(y ~ s(x), data = new.data, ...)
pred <- predict(mod, new.data)
boot.pred[, i] <- as.vector(pred)
}
## calculate percentiles
percentiles <- apply(boot.pred, 1, function(x) quantile(x, probs = c(0.025, 0.975)))
results <- as.data.frame(cbind(pred = rowMeans(boot.pred),
lower = percentiles[1, ], upper = percentiles[2, ]))
if (se) {
panel.polygon(x = c(xseq, rev(xseq)), y = c(results$lower, rev(results$upper)),
col = col.se, alpha = alpha.se, border = border)
}
panel.lines(xseq, pred.input, col = col, alpha = alpha, lty = lty, lwd = 2)
}
results
}, error = function(x) return)
}
## version of GAM fitting not for plotting - need to rationalise both...
fitGam <- function (thedata, x = "date", y = "conc", form = y ~ x, k = k,
Args, ..., simulate = FALSE, n.sim = 200, autocor = FALSE, se = TRUE,
level = 0.95, n = 100)
{
## panel function to add a smooth line to a plot
## Uses a GAM (mgcv) to fit smooth
## Optionally can plot 95% confidence intervals and run bootstrap simulations
## to estimate uncertainties. Simple block bootstrap is also available for correlated data
data.orig <- thedata ## return this if all else fails
id <- which(names(thedata) == x)
names(thedata)[id] <- "x"
id <- which(names(thedata) == y)
names(thedata)[id] <- "y"
# can only fit numeric, so convert back after fitting
class_x <- class(thedata$x)
thedata$x <- as.numeric(thedata$x)
tryCatch({
if (!simulate) {
if (is.null(k)) {
mod <- suppressWarnings(gam(y ~ s(x), select = TRUE, data = thedata, ...))
} else {
mod <- suppressWarnings(gam(y ~ s(x, k = k), select = TRUE, data = thedata, ...))
}
xseq <- seq(min(thedata$x, na.rm = TRUE), max(thedata$x, na.rm = TRUE), length = n)
## for uncertainties
std <- qnorm(level / 2 + 0.5)
pred <- predict(mod, data.frame(x = xseq), se = se)
results <- data.frame(date = xseq, pred = pred$fit,
lower = pred$fit - std * pred$se,
upper = pred$fit + std * pred$se)
} else { ## simulations required
sam.size <- nrow(thedata)
xseq <- seq(min(thedata$x, na.rm = TRUE), max(thedata$x, na.rm = TRUE), length = n)
boot.pred <- matrix(nrow = sam.size, ncol = n.sim)
print ("Taking bootstrap samples. Please wait...")
## set up bootstrap
block.length <- 1
if (autocor) block.length <- round(sam.size ^ (1 / 3))
index <- samp.boot.block(sam.size, n.sim, block.length)
## predict first
if (is.null(k)) {
mod <- gam(y ~ s(x), data = thedata, ...)
} else {
mod <- gam(y ~ s(x, k = k), data = thedata, ...)
}
residuals <- residuals(mod) ## residuals of the model
pred.input <- predict(mod, thedata)
for (i in 1:n.sim) {
## make new data
new.data <- data.frame(x = xseq, y = pred.input + residuals[index[, i]])
mod <- gam(y ~ s(x), data = new.data, ...)
pred <- predict(mod, new.data)
boot.pred[, i] <- as.vector(pred)
}
## calculate percentiles
percentiles <- apply(boot.pred, 1, function(x) quantile(x, probs = c(0.025, 0.975)))
results <- as.data.frame(cbind(pred = rowMeans(boot.pred),
lower = percentiles[1, ], upper = percentiles[2, ]))
}
# convert class back to orginal
class(results$x) <- class_x
results
}, error = function(x) {data.orig})
}
#########################################################################################################
## error in mean from Hmisc
errorInMean <- function (x, mult = qt((1 + conf.int)/2, n - 1), conf.int = 0.95,
na.rm = TRUE)
{
if (na.rm)
x <- x[!is.na(x)]
n <- length(x)
if (n < 2)
return(c(Mean = mean(x), Lower = NA, Upper = NA))
xbar <- sum(x)/n
se <- sqrt(sum((x - xbar)^2)/n/(n - 1))
c(Mean = xbar, Lower = xbar - mult * se, Upper = xbar + mult *
se)
}
## bootsrap confidence intervals in the mean from Hmisc
bootMean <- function (x, conf.int = 0.95, B = 1000, na.rm = TRUE, reps = FALSE, ...)
{
if (na.rm)
x <- x[!is.na(x)]
n <- length(x)
xbar <- mean(x)
if (n < 2)
return(c(Mean = xbar, Lower = NA, Upper = NA))
z <- unlist(lapply(1:B, function(i, x, N)
sum(x[(sample.int(N, N, TRUE, NULL))]), x = x, N = n)) / n
quant <- quantile(z, c((1 - conf.int) / 2, (1 + conf.int) / 2))
names(quant) <- NULL
res <- c(Mean = xbar, Lower = quant[1], Upper = quant[2])
if (reps)
attr(res, "reps") <- z
res
}
bootMeanDiff <- function (mydata, x = "x", y = "y", conf.int = 0.95, B = 1000, na.rm = TRUE, reps = TRUE)
{
## calculates bootstrap mean differences
## assumes y - x
x.name <- x
y.name <- y
x <- na.omit(mydata[[x]])
y <- na.omit(mydata[[y]])
Mean <- mean(y) - mean(x)
if (nrow(mydata) < 2) {
res1 <- data.frame(variable = x.name, Mean = mean(x), Lower = NA, Upper = NA)
res2 <- data.frame(variable = y.name, Mean = mean(y), Lower = NA, Upper = NA)
res <- data.frame(variable = paste(y.name, "-", x.name), Mean = Mean, Lower = NA, Upper = NA)
res <- bind_rows(res1, res2, res)
res$variable <- factor(res$variable)
return(res)
}
x <- attr(bootMean(x, B = B, reps = TRUE), 'reps')
y <- attr(bootMean(y, B = B, reps = TRUE), 'reps')
quant1 <- quantile(x, c((1 - conf.int) / 2, (1 + conf.int) / 2))
quant2 <- quantile(y, c((1 - conf.int) / 2, (1 + conf.int) / 2))
quant <- quantile(y - x, c((1 - conf.int) / 2, (1 + conf.int) / 2))
names(quant1) <- NULL
names(quant2) <- NULL
names(quant) <- NULL
res1 <- data.frame(variable = x.name, Mean = mean(x), Lower = quant1[1], Upper = quant1[2])
res2 <- data.frame(variable = y.name, Mean = mean(y), Lower = quant2[1], Upper = quant2[2])
res <- data.frame(variable = paste(y.name, "-", x.name), Mean = Mean, Lower = quant[1], Upper = quant[2])
res <- bind_rows(res1, res2, res)
res$variable <- factor(res$variable)
res
}
###########################################################################################################
## list update function
## for lattice type object structure and ... handling
## (currently used by)
## (all openair plots that include colorkey controlled by drawOpenKey)
##listUpdate function
#[in development]
listUpdate <- function(a, b, drop.dots = TRUE,
subset.a = NULL, subset.b = NULL){
if(drop.dots){
a <- a[names(a) != "..."]
b <- b[names(b) != "..."]
}
if(!is.null(subset.a))
a <- a[names(a) %in% subset.a]
if(!is.null(subset.b))
b <- b[names(b) %in% subset.b]
if(length(names(b) > 0))
a <- modifyList(a, b)
a
}
#############################################################################################################
## makeOpenKeyLegend v0.1
##common code for making legend list
##objects for use with drawOpenkey outputs
##uses listUpdate in utilities
makeOpenKeyLegend <- function(key, default.key, fun.name = "function"){
#handle logicals and lists
if (is.logical(key)) {
legend <- if (key) default.key else NULL
} else if (is.list(key)) {
legend <- listUpdate(default.key, key)
} else {
if(!is.null(key))
warning(paste("In ", fun.name, "(...):\n unrecognised key not exported/applied\n",
" [see ?drawOpenKey for key structure/options]", sep = ""),
call. = FALSE)
legend <- NULL
}
#structure like legend for drawOpenKey
if(!is.null(legend)){
legend <- list(right = list(fun = drawOpenKey, args = list(key = legend),
draw =FALSE))
if("space" %in% names(legend$right$args$key))
names(legend)[[1]] <- legend$right$args$key$space
}
legend
}
## polygon that can deal with missing data for use in lattice plots with groups
poly.na <- function(x1, y1, x2, y2, group.number, myColors, alpha = 0.4, border = NA) {
for(i in seq(2, length(x1)))
if (!any(is.na(y2[c(i - 1, i)])))
lpolygon(c(x1[i - 1], x1[i], x2[i], x2[i - 1]),
c(y1[i - 1], y1[i], y2[i], y2[i - 1]),
col = myColors[group.number], border = border, alpha = alpha)
}
## gives names of lattice strips
strip.fun <- function(results.grid, type, auto.text) {
## proper names of labelling ###################################################
pol.name <- sapply(levels(factor(results.grid[[type[1]]])),
function(x) quickText(x, auto.text, expression = FALSE))
pol.name2 <- NULL
if (length(type) != 1 ) {
## two conditioning variables
pol.name2 <- sapply(levels(factor(results.grid[[type[2]]])),
function(x) quickText(x, auto.text, expression = FALSE))
}
if (length(type) == 1 & type[1] == "default") strip <- FALSE ## remove strip
list(pol.name, pol.name2)
}
## from lattice
chooseFace <- function (fontface = NULL, font = 1)
{
if (is.null(fontface))
font
else fontface
}
## .smoothScatterCalcDensity() is also in graphics, but not exported.
.smoothScatterCalcDensity <- function(x, nbin, bandwidth, range.x)
{
if (!("KernSmooth" %in% loadedNamespaces())) {
ns <- try(loadNamespace("KernSmooth"))
if (isNamespace(ns))
message("(loaded the KernSmooth namespace)")
else stop("panel.smoothScatter() requires the KernSmooth package, but unable to load KernSmooth namespace")
}
if (length(nbin) == 1)
nbin <- c(nbin, nbin)
if (!is.numeric(nbin) || (length(nbin)!=2)) stop("'nbin' must be numeric of length 1 or 2")
if (missing(bandwidth)) {
bandwidth <- diff(apply(x, 2, quantile, probs=c(0.05, 0.95), na.rm=TRUE)) / 25
} else {
if(!is.numeric(bandwidth)) stop("'bandwidth' must be numeric")
}
bandwidth[bandwidth==0] <- 1
## create density map
if(missing(range.x))
rv <- KernSmooth::bkde2D(x, gridsize=nbin, bandwidth=bandwidth)
else
rv <- KernSmooth::bkde2D(x, gridsize=nbin, bandwidth=bandwidth, range.x=range.x)
rv$bandwidth <- bandwidth
return(rv)
}
Cquantile <- function(x, probs = 0.5) {
## Quick (but basic) C++ quantile function
if (!is.numeric(x))
stop("Quantiles only work for numeric data")
if (any(probs < 0) | any(probs > 1))
stop("Probabilities should be betweem 0 and 1")
if (all(is.na(x))) return(NA)
res <- Cquant(x, probs)
return(res)
}
## simple rounding function from plyr
round_any <- function(x, accuracy, f = round) {
f(x / accuracy) * accuracy
}
## pretty gap calculator
prettyGap <- function(x, n = 100) {
return(diff(pretty(x, n))[1])
}
# function to check variables are numeric, if not force with warning
checkNum <- function(mydata, vars) {
for (i in seq_along(vars)) {
if (!is.numeric(mydata[[vars[i]]])) {
mydata[[vars[i]]] <- as.numeric(as.character(mydata[[vars[i]]]))
warning(paste(vars[i], "is not numeric, forcing to numeric..."),
call. = FALSE)
}
}
return(mydata)
}
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.