#' Data Format
#' @import data.table
#' @import parallelMap
#' @export
ebDataFormat <- function(
x,
install_dates = NULL,
sites = NULL,
norm_data = NULL,
data_options = NULL,
temp_bins = NULL,
cpus = 1) {
defaults <- list(interval = 'hourly',
base_length = 365,
perf_length = 365,
blackout = 0,
base_min = 0,
perf_min = 0,
tbin_type = 'simple',
occupancy_lookup = FALSE)
data_options <- c(defaults[setdiff(names(defaults), names(data_options))], data_options)
if(!(data_options$interval %in% c('hourly', 'daily'))) stop('Improper interval')
meterDict <- setNames(unique(x$meterID), unique(x$meterID))
if(is.null(sites)) sites <- meterDict
cat(length(meterDict), 'total meters \n')
no_tbin <- setdiff(meterDict, names(temp_bins))
data_options$tbin_type = match.arg(data_options$tbin_type, c('simple', 'detailed', 'none'))
temp_bins <- c(setNames(rep(10, length(no_tbin)), no_tbin), temp_bins)
applyCall <- quote(f(meterDict, function(m) {
ebMeterFormat(dat = x[meterID == m, ],
inDate = install_dates[[m]],
ntbin = temp_bins[m],
data_options = data_options)
}))
applyCall[[1]] <- as.name('lapply')
if(cpus > 1){
parallelStartSocket(cpus)
parallelLibrary('ebase')
applyCall[[1]] <- as.name('parallelLapply')
}
dataList <- eval(applyCall)
if(cpus > 1) parallelStop(); dataList <- setNames(dataList, meterDict)
dataList <- dataList[sapply(dataList, function(x) !is.null(names(x)))]
meterDict = setNames(names(dataList), names(dataList))
cat(length(meterDict), 'with sufficient data \n')
sites <- sites[names(sites) %in% meterDict]
periods = names(dataList[[1]])
names(periods) = periods
out = list(
data = dataList,
norms = norm_data,
meterDict = meterDict,
siteDict = sites,
periods = periods)
setattr(out, "class", c('ebFormatted', 'list'))
out
}
#' Stack Formatted Data
#' @export
ebStack <- function(x, ...) UseMethod('ebStack')
#' Data Stack Method
#' @import data.table
#' @export
ebStack.ebFormatted = function(x, by = 'meter') {
if(by == 'meter') {
lapply(x$data, rbindlist)
} else if(by == 'period') {
lapply(x$periods, function(period) {
meters = setNames(names(x$data), names(x$data))
rbindlist(lapply(meters, function(meter) x$data[[meter]][[period]]))
})
}
}
#' List Formatted Data
#' @export
ebList <- function(x, ...) UseMethod('ebList')
#' Data List Method
#' @import data.table
#' @export
ebList.ebFormatted = function(x, top = 'period') {
if(top == 'meter') {
x$data
} else if(top == 'period') {
meters = setNames(names(x$data), names(x$data))
lapply(x$periods, function(period) {
lapply(meters, function(meter) x$data[[meter]][[period]])
})
}
}
#' Meter Format
#' @import data.table
#' @export
ebMeterFormat <- function(dat, inDate, ntbin, data_options) {
sec_day = 60 * 60 * 24
dat[, period:= 1]
if(!is.null(inDate)) {
dat[, period:=
(date >= min(inDate) - (data_options$blackout / 2 + data_options$base_length) * sec_day) +
(date >= min(inDate) - (data_options$blackout / 2) * sec_day) +
(date >= max(inDate) + (data_options$blackout / 2) * sec_day) +
(date >= max(inDate) + (data_options$blackout / 2 + data_options$perf_length) * sec_day)]
}
dat <- dat[period > 0 & period < 4]
pNames <- c('baseline', 'blackout', 'performance')
dat[, period:= as.factor(pNames[period])]
towDict <- get_towDict(data_options$interval)
if(data_options$interval == 'hourly') {
dat[, tow:= towDict[paste0(weekdays(date), hour(date))]]
} else if(data_options$interval == 'daily'){
dat[, tow:= towDict[paste0(weekdays(date))]]
}
dat[, month:= month(date)]
dat[, mm:= as.factor(month)]
tcuts <- c(-Inf, quantile(dat$temp, 1:ntbin / ntbin))
tcuts <- c(tcuts[-length(tcuts)], Inf)
if(data_options$tbin_type != 'none') {
dat <- get_tbins(dat, tcuts, data_options$tbin_type)
}
if(data_options$occupancy_lookup) {
ocDT = ebOccupancy(dat[period == 'baseline'])
dat = merge(dat, ocDT, by = c('tow', 'month'))
}
classStr <- list('hourly' = c('hourly'), 'daily' = c('daily', 'hourly'))[[data_options$interval]]
setattr(dat, "class", c(classStr, class(dat)))
if(uniqueN(dat[period == 'baseline', as.Date(date)]) < data_options$base_min |
uniqueN(dat[period == 'performance', as.Date(date)]) < data_options$perf_min) return(NA)
list(baseline = dat[period == 'baseline', ],
blackout = dat[period == 'blackout', ],
performance = dat[period == 'performance', ])
}
#' Baseline Model
#' @import data.table
#' @import mlr
#' @import parallelMap
#' @export
ebModel <- function(dat, method = c('regress', 'gboost', 'rforest', 'caltrack', 'hybrid'), model_options = NULL)
{
method <- match.arg(method)
model_defaults <- list(max_depth = 3,
nrounds = seq(200, 1400, 200),
ntree = 1:5 * 50,
early_stopping_rounds = 200,
eta = 0.1,
blocks = 2,
block_on_week = FALSE,
weights = NULL,
occupancy_lookup = FALSE,
ivars = NULL,
custom_lm = NULL)
model_options <- c(model_defaults[setdiff(names(model_defaults), names(model_options))], model_options)
modelCall <- quote(f(dat = dat, model_options = model_options))
modelCall[[1]] <- as.name(method)
cat('X')
eval(modelCall)
}
#' Predictions
#' @import data.table
#' @import mlr
#' @export
ebPredict = function(modelList, dataList) {
meterDict = dataList$meterDict
siteDict = dataList$siteDict
periods = dataList$periods
norms = dataList$norms
dataList = ebStack(dataList)
dataList = lapply(meterDict, function(meter) predict(modelList[[meter]], dataList[[meter]]))
dataList = lapply(dataList, split, by = 'period')
norm_predict = NULL
if(!is.null(norms)) {
norm_dat = lapply(meterDict, function(meter) ebForecast(modelList[[meter]], norms[[meter]]))
norm_predict = function(modelList) {
lapply(meterDict, function(meter) {
merge(ebForecast(modelList[[meter]], norms[[meter]])[, .(meterID = meter, date, use)],
norm_dat[[meter]][, .(date, pUse = use)])
})
}
}
out = list(
data = dataList,
site_dat = data.table(site = as.factor(siteDict), meterID = names(siteDict)),
norm_predict = norm_predict,
periods = periods)
setattr(out, 'class', c('prediction', 'ebFormatted', class(out)))
out
}
#' Summarize Prediction Data
#' @import data.table
#' @export
ebSummary <- function(x, ...) UseMethod('ebSummary')
#' Prediction and Savings Summary
#' @import data.table
#' @export
ebSummary.prediction = function(x, norm_modelList = NULL) {
meter_baseline = merge(
x$site_dat,
ebStack(x, 'period')[['baseline']],
by = 'meterID')
site_baseline = meter_baseline[, lapply(.SD, sum),
by = .(site, date),
.SDcols = c('use', 'pUse')]
meter_perf = merge(
x$site_dat,
ebStack(x, 'period')[['performance']])
site_perf = meter_perf[, lapply(.SD, sum),
by = .(site, date),
.SDcols = c('use', 'pUse')]
meter_metrics <- meter_baseline[, list(r2 = R2(use, pUse),
cvrmse = CVRMSE(use, pUse),
nmbe = NMBE(use, pUse),
baseline = sum(use)),
by = .(meterID)]
meter_savings <- meter_perf[, list(gross = sum(pUse - use),
var_gross = varCalc(use, pUse)),
by = .(meterID)]
site_metrics <- site_baseline[, list(r2 = R2(use, pUse),
cvrmse = CVRMSE(use, pUse),
nmbe = NMBE(use, pUse),
baseline = sum(use)),
by = .(site)]
site_savings <- site_perf[, list(gross = sum(pUse - use),
var_gross = varCalc(use, pUse)),
by = .(site)]
norms = list(meters = NULL, sites = NULL)
if(!is.null(norm_modelList)) {
norm_dat = rbindlist(x$norm_predict(norm_modelList))
norm_dat = merge(x$site_dat, norm_dat)
setattr(norm_dat, 'class', c('norm', class(norm_dat)))
norms = ebSummary(norm_dat)
}
return(list(meters = list(metrics = meter_metrics,
savings = meter_savings,
norms = norms$meters),
sites = list(metrics = site_metrics,
savings = site_savings,
norms = norms$sites)))
}
#' Normalized Savings Summary
#' @import data.table
#' @export
ebSummary.norm = function(x) {
meter_perf = x
site_perf = meter_perf[, lapply(.SD, sum),
by = .(site, date),
.SDcols = c('use', 'pUse')]
meter_savings <- meter_perf[, list(gross = sum(pUse - use),
var_gross = varCalc(use, pUse)),
by = .(meterID)]
site_savings <- site_perf[, list(gross = sum(pUse - use),
var_gross = varCalc(use, pUse)),
by = .(site)]
list(meters = meter_savings,
sites = site_savings)
}
#' R2 calculation
#' @export
R2 <- function(actual, predicted){
actual <- na.omit(actual); predicted <- na.omit(predicted)
round(1 - sum((actual - predicted)^2)/sum((actual - mean(actual))^2), 2)
}
#' CVRMSE calculation
#' @export
CVRMSE <- function(actual, predicted){
actual <- na.omit(actual); predicted <- na.omit(predicted)
round(100*sqrt(mean((actual - predicted)^2))/mean(actual), 0)
}
#' NMBE calculation
#' @export
NMBE <- function(actual, predicted){
actual <- na.omit(actual); predicted <- na.omit(predicted)
format(100*sum(predicted - actual)/sum(actual), digits = 1)
}
#' Variance calculation
#' @export
varCalc <- function(actual, predicted){
actual <- na.omit(actual); predicted <- na.omit(predicted)
length(actual) * (sum((actual - mean(actual))^2)/adjustedN(actual, length(actual)) +
sum((predicted - mean(predicted))^2)/adjustedN(predicted, length(predicted)) -
2 * cov(actual, predicted))
}
#' Serial-Corr. Correction
#' @export
adjustedN <- function(x, n){
corr <- acf(x, lag.max = 1, plot = FALSE)$acf[2]
n*(1 - corr)/(1 + corr)
}
#' Apply Outlier Weights
#' @import data.table
#' @export
ebOutlierWeights <- function(dat)
{
dat <- copy(dat)
weightFn <- function(z0, z1, p0, p1){
b <- (log(1 / p1 - 1) - log(1 / p0 - 1)) / (z1 - z0)
a <- log(1 / p0 - 1) - b * z0
function(z) 1 / (1 + exp(b * z + a))
}
wt <- weightFn(1, 3, .75, .001)
dat[, z:= abs((use - mean(use)) / sd(use)), by = .(tow)]
dat[, weight:= wt(z)]
dat[, z:= NULL]
}
#' Add Event
#' @import data.table
#' @export
ebAddEvent <- function(dataList, meter, nre_dates, event_name = 'nre')
{
nre_dates = as.POSIXct(nre_dates, tz = 'UTC')
for(period in c('baseline', 'blackout', 'performance')){
dat = copy(dataList[[period]][[meter]])
dat[, nre:= as.numeric(date >= min(nre_dates) & date <= max(nre_dates))]
setnames(dat, 'nre', event_name)
dataList[[period]][[meter]] = dat
}
dataList
}
#' Create Occupancy Lookup
#' @import data.table
#' @export
ebOccupancy <- function(dat)
{
mmDict <- setNames(as.character(unique(dat$month)), as.character(unique(dat$month)))
rbindlist(
lapply(mmDict, function(segment){
m_upper <- as.character(as.numeric(segment) + 1)
m_lower <- as.character(as.numeric(segment) - 1)
if(!(m_upper %in% mmDict)) m_upper <- as.character(min(as.numeric(mmDict)))
if(!(m_lower %in% mmDict)) m_lower <- as.character(max(as.numeric(mmDict)))
weightDict <- setNames(c(0, 1, 0), c(m_lower, segment, m_upper))
out = dat[, .(date,
use,
tow,
cdh = as.numeric(temp > 65) * (temp - 65),
hdh = as.numeric(temp < 50) * (50 - temp),
month,
weight = weightDict[as.character(month)])]
out[is.na(weight), weight:= 0]
out[, res:= lm('use ~ cdh + hdh', data = out, weights = weight)$residuals]
out = out[as.character(month) == segment,
.(month = as.numeric(segment),
occupied = mean(as.numeric(res > 0)) > 0.65),
by = .(tow)]
out
})
)
}
#' Plot Meter Data
#' @import data.table
#' @export
ebPlot <- function(x, ...) UseMethod('ebPlot')
#' Plot Predictions
#' @import data.table
#' @import dygraphs
#' @export
ebPlot.data.table <- function(x, compress = TRUE) {
dat <- copy(x)
setnames(dat, c('use', 'pUse'), c('Actual', 'Predicted'))
datesV <- c(
min(dat[period == 'baseline', date]),
max(dat[period == 'baseline', date]),
min(dat[period == 'performance', date]),
max(dat[period == 'performance', date]))
if(compress) dat <- dat[, lapply(.SD, sum),
.SDcols = c('Actual', 'Predicted'),
by = .(date = as.POSIXct(trunc.POSIXt(date, 'days', tz = 'UTC'), tz = 'UTC'))]
dygraph(dat[, .(date, Actual, Predicted)], ylab = 'kWh') %>%
dySeries("Actual", stepPlot = TRUE, fillGraph = TRUE, color = 'black') %>%
dySeries("Predicted", strokeWidth = 1, stepPlot = TRUE, color = '#4889ce') %>%
dyShading(from = datesV[3], to = datesV[4], color = "#CCEBD6") %>%
dyEvent(datesV[2]) %>%
dyEvent(datesV[3], 'Install Period', labelLoc = "top") %>%
dyLegend(width = 400)
}
#' Reduce size of linear model
#' @import data.table
#' @export
strip_lm = function(x) {
x$data <- NULL
x$y <- NULL
x$linear.predictors <- NULL
x$weights <- NULL
x$fitted.values <- NULL
x$model <- NULL
x$prior.weights <- NULL
x$residuals <- NULL
x$effects <- NULL
x$qr$qr <- NULL
attr(x$terms,".Environment") = c()
attr(x$formula,".Environment") = c()
x
}
#' Forecast Method
#' @import data.table
#' @export
ebForecast <- function(model, ...) UseMethod('ebForecast')
#' Temp Bins Helper Function
#' @import data.table
#' @export
get_tbins <- function(dat, tcuts, tbin_type){
ntbin <- length(tcuts) - 1
dat[, tbin:= as.factor(cut(temp, tcuts))]
if(tbin_type == 'detailed'){
tcuts <- unname(c(0, tcuts[-1]))
qtempList = lapply(1:ntbin, function(x) c(min = tcuts[x], max = tcuts[x + 1] - tcuts[x]))
dat[, paste0('tbin_', 1:ntbin):=
lapply(qtempList, function(q) pmin(as.numeric(temp > q['min']) * (temp - q['min']), q['max']))]
}
dat
}
#' TOW Dictionary Helper
#' @import data.table
#' @export
get_towDict <- function(x)
{
if(x == 'hourly') {
dict = setNames(
1:168,
paste0(
rep(c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'),
each = 24),
rep(0:23, 7)))
} else if(x == 'daily') {
dict = setNames(1:7, c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'))
}
dict
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.