R/rterm.R

Defines functions extractModel evalOne evaluate addMethod addTmy addWeather addData newTerm

Documented in addData addMethod addWeather evaluate extractModel newTerm

#' Initialize a new "term" - temperature energy regression model
#' 
#' Returns a new term object to populate with data, weather, and methods
#' 
#' @param name an optional name for the term. This will go in tables and graphs
#' @param address an optional address. If specified then the \code{\link{addWeather}} 
#'  function will automatically grab the closest weather station.
#' @param sqft an optional building size. If you specify a square feet, it will assume
#'  that you're dealing in total kWh and will convert that to EUI
#' 
#' @seealso \code{\link{addData}} \code{\link{addWeather}} \code{\link{addMethod}}
#' 
#' @return the new term object
#' 
#' @examples
#' # Show available datasets
#' newTerm("Ecotope Mothership", "4056 9th Ave NE Seattle, WA", 2600)
newTerm <- function(name = NULL, address = NULL, sqft = NULL) {
  term <- list()
  term$data <- NULL
  term$weather <- list()
  term$methods <- list()
  term$models <- list()
  class(term) <- "term"
  if(!is.null(name)) {
    attr(term, "name") <- name
  }
  if(!is.null(address)) {
    attr(term, "address") <- address
  }
  if(!is.null(sqft)) {
    attr(term, "sqft") <- sqft
  }
  return(term)
}


#' Add data to a "term" object
#' 
#' Adds a dataset & instructions to a term object, returns the updated object
#' 
#' @param term the term object to add data
#' @param data the dataset to add
#' @param formula an optional specification for energy variable and date variables,
#'  for example kwhd ~ dateStart + dateEnd.
#' @param interval an optional specification for interval data. "daily" or "monthly"
#' @param energyVar an optional manual specification of the energy variable of the
#'  dataset.
#' @param dateStartVar an optional manual specification of the start date variable.
#' @param dateEndVar an optional manual specification of the end date variable.
#' @param daysVar an optional specification of the days in cycle variable (mainly 
#'  for billing data). Note that you only need to specify any two of dateStartVar,
#'  dateEndVar, and daysVar if you do the manual specification.
#' @param daily whether or not the energy use is total across the interval or daily.
#'  Mostly this is daily by the time the analyst gets it, but if dealing with raw
#'  consumption specify daily = FALSE
#' 
#' @seealso \code{\link{newTerm}} \code{\link{addWeather}} \code{\link{addMethod}}
#' 
#' @return the term object with data added
#' 
#' @examples
#' # Ecotope Mothership data
#' data(ecotope)
#' mod <- newTerm("Ecotope Mothership")
#' mod <- addData(mod, ecotope, kwhd ~ dateStart + dateEnd)
#' mod <- addData(mod, ecotope, energyVar = "kwhd", dateStartVar = "dateStart", dateEndVar = "dateEnd")
addData <- function(term, data, formula = NULL, interval = NULL, energyVar = NULL, dateStartVar = NULL, dateEndVar = NULL, daysVar = NULL, daily = TRUE) {
  if(!inherits(term, "term")) {
    stop("Must add data to a term object. See help(addData)")
  }
  
  if(!is.null(term$data)) {
    warning("Data already found, will overwrite")
  }

  attr(term, "eui") <- FALSE  
  attr(term, "gas") <- FALSE
  isGas <- FALSE
  
  # First check for a formula
  if(!is.null(formula)) {
    # Need to parse the formula
    mf <- model.frame(formula, data)
    y <- model.response(mf)
    term$data <- data.frame(mf)
    eName <- tolower(names(term$data)[1])
    if(length(grep("gas", eName)) | length(grep("therm", eName)) | length(grep("thm", eName))) {
      isGas <- TRUE
      attr(term, "gas") <- TRUE
    } else if(length(grep("eui", eName, ignore.case = TRUE))) {
      attr(term, "eui") <- TRUE
    } else {
      isGas <- FALSE
    }
    names(term$data)[1] <- "energy"
    
    # We're going to coerce POSIX times to dates... note that 
    # none of this methodology works sub-daily anyway.
    term$data <- data.frame(lapply(term$data, function(x) {
      if(inherits(x, "POSIXt")) {
        as.Date(x)
      } else if(inherits(x, "factor") | inherits(x, "character")) {
        tmp <- lubridate::mdy(x)
        if(sum(!is.na(tmp)) == 0) {
          tmp <- lubridate::ymd(x)
          if(sum(!is.na(tmp)) == 0) {
            warning("Could not parse the dates. Set them as either 2015-05-20 of 5/20/2015")
          }
        }
        as.Date(tmp)
      } else {
        x
      }
    }))
    
    # Parse the classes of the variables to decide how to set up the model.
    varClasses <- lapply(term$data, class)
    
    # Case 1.. three variables, assume either start date + end date
    #  or end date + days of service
    if(ncol(term$data) == 3) {
      if(varClasses[2] == "Date" & varClasses[3] == "Date") {
        # Two dates mean a start date & an end date
        term$data$diffs <- as.numeric(difftime(term$data[, 2], term$data[, 3]))
        if(sum(term$data$diffs > 0) == 0) {
          names(term$data)[2:3] <- c("dateStart", "dateEnd")
        } else if(sum(term$data$diffs < 0) == 0) {
          names(term$data)[2:3] <- c("dateEnd", "dateStart")
        } else {
          stop("Interval end dates must all be greater than start dates")
        }
        term$data$diffs <- NULL
      } else if((varClasses[2] == "numeric" & varClasses[3] == "Date") | 
                  (varClasses[2] == "Date" & varClasses[3] == "numeric")) {
        dateVar <- which(varClasses[-1] == "Date")
        daysVar <- which(varClasses[-1] == "numeric")
        
        if(sum(term$data[, -1][, daysVar] > 0) == 0) {
          names(term$data)[-1][dateVar] <- "dateEnd"
        } else {
          names(term$data)[-1][dateVar] <- "dateEnd"
        }
        term$data[, -1][, daysVar] <- abs(term$data[, -1][, daysVar])
        names(term$data)[-1][daysVar] <- "days"
        
        
      } else {
        stop("Couldn't recognize date/time variables")
      }

    } else if(ncol(term$data) == 2) {
      if(!(inherits(term$data[, 2], "Date") & !inherits(term$data[, 2], "POSIXt"))) {
        stop("Right Hand Side must be either a Date or Time class")
      }
      if(interval == "monthly") {
        term$data$dateStart <- lubridate::floor_date(term$data[, 2], "month")
        term$data$dateEnd <- term$data$dateStart %m+% months(1)
        # term$data$days <- term$data$dateEnd - term$data$dateStart
      } else if(interval == "daily") {
        names(term$data)[2] <- "dateStart"
        term$data$dateEnd <- term$data$dateStart + lubridate::days(1)
      } else if(interval == "weekly") {
        names(term$data)[2] <- "dateStart"
        term$data$dateEnd <- term$data$dateStart + lubridate::days(7)
      } else if(varClasses[2] == "Date") {
        warning("One Date variable found, assuming this is the end date")
        names(term$data)[2] <- "dateEnd"
        term$data$days <- c(NA, as.numeric(diff(term$data[, 2])))
        term$data$days[1] <- mean(term$data$days, na.rm = TRUE)
        term$data$dateStart <- term$data$dateEnd - term$data$days
      }
    }
    
  } else if(!is.null(energyVar)) {
    # Else check for manually specified variables
    term$data <- data.frame("energy" = data[, energyVar])
    if(!is.null(dateStartVar)) {
      tmp <- as.Date(data[, dateStartVar])
      term$data$dateStart <- tmp
    }
    if(!is.null(dateEndVar)) {
      tmp <- as.Date(data[, dateEndVar])
      term$data$dateEnd <- tmp
    }
    if(!is.null(daysVar)) {
      tmp <- as.numeric(as.character(data[, daysVar]))
      term$data$days <- tmp
    }
  }
  
  
  # Can specify two of start date, end date, and days... fill in missing
  # if applicable
  ds <- is.null(term$data$dateStart)
  de <- is.null(term$data$dateEnd)
  da <- is.null(term$data$days)
  
  if(!ds & !de & da) {
    term$data$days <- as.numeric(difftime(term$data$dateEnd, term$data$dateStart, units = "days"))
  } else if(!ds & de & !da) {
    term$data$dateEnd <- term$data$dateStart + term$data$days
  } else if(ds & !de & !da) {
    term$data$dateStart <- term$data$dateEnd - term$data$days
  } else if(!ds & !de & !da) {
    # 
  } else  {
    stop("Error, must specify 2 of the following: start date, end date, days")
  }
  
  if(daily) {
    term$data$dailyEnergy <- term$data$energy
    term$data$energy <- term$data$dailyEnergy * term$data$days
  } else {
    term$data$dailyEnergy <- term$data$energy / term$data$days
    if(!is.null(attr(term, "sqft"))) {
      if(isGas) {
        term$data$dailyEnergy <- term$data$dailyEnergy * 100 / attr(term, "sqft") * 365
      } else {
        term$data$dailyEnergy <- term$data$dailyEnergy * 3.412 / attr(term, "sqft") * 365
      }
      
    }
  }
  
  
  
  term
}




#' Add weather to a "term" object
#' 
#' Adds a weather & instructions to a term object, returns the updated object. 
#' If you specified an address in the term construction with \code{\link{newTerm}} 
#' then this function will take care of everything automatically. Note that you 
#' can add multiple weather stations to the "term" object with repeated calls of
#' this function.
#' 
#' @param term the term object to add data
#' @param stationid optional GHCN ID as found from \code{\link{stationSearch}}. 
#' @param weather optional dataset if providing your own weather (rather than
#'  using the built-in integration with NOAA).
#' @param formula optional specification of time variable and temp variable if
#'  providing your own weather. Example temp ~ date. 
#' @param timeVar optional manual specification of date variable if providing your
#'  own weather data.
#' @param tempVar optional manual specification of temp variable if providing your
#'  own weather data.
#' @param name optional name for this particular weather. Example name = "Sea-Tac" 
#'  is way better than "SEATTLE-TACOMA INTERNATIONAL AIRPORT"
#' 
#' @seealso \code{\link{newTerm}} \code{\link{addData}} \code{\link{addMethod}}
#' 
#' @return the term object w/ weather added
#' 
#' @examples
#' # Add weather
#' stationSearch("Seattle, WA")
#' mod <- addWeather(mod, stationid = "GHCND:USW00024234")
addWeather <- function(term, weather = NULL, formula = NULL, stationid = NULL, timeVar = NULL, tempVar = NULL, name = NULL) {
  
  ind <- length(term$weather)
  # Check to see if this weather needs a name
  if(is.null(name)) {
    name <- paste0("weather", ind)
  }
  
  if(name %in% names(term$weather)) {
    stop(paste("Weather named", name, "already added to TERM"))
  }
  
  # If there are no inputs & an address was specified blindly take the first
  if(is.null(stationid) & is.null(weather) & !is.null(attr(term, "address"))) {
    address <- attr(term, "address")
    stations <- stationSearch(address)
    print("Using closest weather station to specified address")
    print(stations[1, ])
    stationid <- stations$id[1]
  }
  
  # Check if a stationid was entered for read.ghcn
  if(!is.null(stationid)) {
    if(!is.null(term$data)) {
      minDate <- min(term$data$dateStart)
      maxDate <- max(term$data$dateEnd)
      term$weather[[name]] <- read.ghcn(stationid, minDate, maxDate)
      attr(term$weather[[name]], "stationid") <- stationid
    } else {
      warning(paste("With no data, addWeather does not know how much weather data to query",
                    "See help(addData) or help(addWeather)"))
    }
  } else if(!is.null(weather)) {
    # Check if custom weather was provided
    if(!is.null(timeVar) & !is.null(tempVar)) {
      term$weather[[name]] <- data.frame("aveTemp" = weather[, tempVar])
      if(inherits(weather[, timeVar], "POSIXt")) {
        term$weather[[name]]$time <- weather[, timeVar]
        term$weather[[name]]$date <- as.Date(weather[, timeVar])
      } else if(inherits(weather[, timeVar], "Date")) {
        term$weather[[name]]$date <- weather[, timeVar]
      } else {
        stop("Weather data time variable should be either Date class or POSIX time class")
      }
    } else if(!is.null(formula)) {
      # Need to parse the formula
      mf <- model.frame(formula, weather)
      term$weather[[name]] <- data.frame(mf)
      if(!inherits(term$weather[[name]][, 1], "numeric")) {
        stop("Temperature variable must be numeric")
      }
      names(term$weather[[name]])[1] <- "aveTemp"
      if(inherits(term$weather[[name]][, 2], "POSIXt")) {
        names(term$weather[[name]])[2] <- "time"
        term$weather[[name]]$date <- as.Date(term$weather[[name]]$time)
      # } else if(inherits(term$weather[[name]][, 1], "Date")) {
      } else if(inherits(term$weather[[name]][, 2], "Date")) {
        names(term$weather[[name]])[2] <- "date"
      } else {
        stop("Time variable must be either POSIXt or Date")
      }
      # names(term$data)[1] <- "energy"
    } else if(!is.null(weather$date) & !is.null(weather$aveTemp)) {
      term$weather[[name]] <- weather  
    } else {
      stop(paste("When providing your own weather, must specify timeVar and tempVar",
           "See help(addWeather)"))
    }
  } else {
    stop("Must enter either a stationid, a weather dataset, or associate an address w/ the TERM")
  }
  

  # Interpolate missing values, if necessary
  if(!is.null(term$weather[[name]]$time)) {
    tvar <- "time"
  } else {
    tvar <- "date"
  }
  g <- splinefun(term$weather[[name]][, tvar], term$weather[[name]]$aveTemp, method = "natural")
  
  term$weather[[name]]$rawTemp <- term$weather[[name]]$aveTemp
  term$weather[[name]]$aveTemp <- g(term$weather[[name]][, tvar])
  term$weather[[name]]$missing <- FALSE
  missingTemps <- is.na(term$weather[[name]]$rawTemp)
  if(sum(missingTemps) > 0) {
    term$weather[[name]]$missing[missingTemps] <- TRUE
  }
  
  term
}


#' Add TMY weather to a "term" object
#' 
#' Associates TMY weather file(s) to a "term" object.
#' 
#' @param term the term object to add the method.
#' @param tmyFiles a vector of tmyFiles to associate with the term. Usually just one ex WASeattle3.tm2
#' 
#' @examples 
#' mod <- newTerm()
#' mod <- addTmy(mod, "ORPortland3.tm2")
#' 
addTmy <- function(term, tmyFiles) {
  term$tmy <- tmyData[tmyData$tmyFile %in% tmyFiles, ]
  term
}


#' Add a method to a "term" object
#' 
#' Adds an evaluation method to a "term" object. Current options are for a 
#' change point model, Variable Base Degree Day (VBDD or PRISM) model, or a
#' Bayesian change point model. Note that you can add multiple methods to the 
#' same "term" with repeated calls of this function.
#' 
#' For the bayesian "web" method, you can modify the prior through "Mean" and 
#' "Sd" suffixes to the parameter names. For example, for the "baseLoad" 
#' parameter you can specify a "baseLoadMean" and "baseLoadSd" for prior 
#' mean and standard deviation of the base load.
#' 
#' For the frequentist "changepoint" and "degreeday" methods, you can specify
#' whether to generate bootstrap standard errors with se = TRUE/FALSE & the 
#' number of bootstrap replicates with "nreps". In addition you can tweak the 
#' L1 penalty strength by specifying "lambda"
#' 
#' @param term the term object to add the method.
#' @param method the method name "changepoint", "degreeday", or "web"
#' @param name an optional name for this method
#' @param ... optional extra parameters for the specific methodology. 
#' 
#' @seealso \code{\link{newTerm}} \code{\link{addData}} \code{\link{addMethod}}
#' 
#' @return the term object w/ the method added
#' 
#' @examples
#' stationSearch("Seattle, WA")
#' mod <- addMethod(mod, "changepoint")
#' mod <- addMethod(mod, "degreeday")
#' mod <- addMethod(mod, "web", name = "normal")
#' mod <- addMethod(mod, "web", baseLoadMean = 100, name = "higher base")
#' mod <- addMethod(mod, "cp", se = FALSE, lambda = 20)
addMethod <- function(term, method, name = NULL, ...) {
  

  controls <- eval(substitute(alist(...)))
  
  # I need default options... store them here
  if(tolower(method) %in% c("change-point", "changepoint", "cp")) {
    method <- "cp"
    defaults <- list(heating = NULL, cooling = NULL, intercept = TRUE, se = TRUE, nreps = 200, parametric = FALSE, lambda = 12, selection = "LS")
  } else if(tolower(method) %in% c("degree-day", "degreeday", "dd")) {
    method <- "dd"
    defaults <- list(heating = NULL, cooling = NULL, intercept = TRUE, se = TRUE, nreps = 200, parametric = FALSE, lambda = 8, selection = "LS") 
  } else if(tolower(method) %in% c("web", "bayes", "bayesian")) {
    method <- "web"
    defaults <- list(heating = NULL, cooling = NULL, intercept = TRUE, selection = "L1", lambda = 10, 
                     baseLoadMean = 50, baseLoadSd = 35, 
                     heatingBaseMean = 55, heatingBaseSd = 10,
                     heatingSlopeMean = 15, heatingSlopeSd = 10,
                     coolingBaseMean = 75, coolingBaseSd = 15,
                     coolingSlopeMean = 15, coolingSlopeSd = 10) 
  } else {
    stop(paste("Unrecognized Method", method))
  }

  ind <- length(term$methods[grep(method, names(term$methods))])
  # Check to see if this weather needs a name
  if(is.null(name)) {
    if(ind) {
      name <- paste0(method, ind)
    } else {
      name <- method
    }
  } else {
    name <- make.names(paste(method, name))
  }
  
  if(name %in% names(term$methods)) {
    stop(paste("Method named", name, "already added to TERM"))
  }

  
  
  # Loop through the default controls, over-riding where necessary
  toUse <- lapply(names(defaults), function(x) {
    if(!is.null(controls[[x]])) {
      tmp <- controls[[x]]
    } else {
      tmp <- defaults[[x]]
    }
  })
  names(toUse) <- names(defaults)
  
  term$methods[[name]] <- toUse
  
  
  term
}


#' Evaluate a term
#' 
#' Evaluates a "term" that has had data, weather, and method(s) added
#' 
#' @param term the term object to evaluate.
#' 
#' @seealso \code{\link{newTerm}} \code{\link{addData}} 
#' \code{\link{addWeather}} \code{\link{addMethod}}
#' 
#' @return the evaluated term object
evaluate <- function(term) {
  
  # term <- linkWeatherToData(term)
  
  methodWeather <- expand.grid(seq_along(term$methods), seq_along(term$weather))
  methodWeather$name <- make.names(paste(names(term$methods)[methodWeather[, 1]],
                              names(term$weather)[methodWeather[, 2]]))
  
  models <- Map(function(method, weather) {
    evalOne(term, method, weather)
  }, methodWeather[, 1], methodWeather[, 2])
  names(models) <- methodWeather$name
  
  term$models <- models
  term
}

evalOne <- function(term, method, weather) {
  
  # Pull out the master data and the current weather, for linking...
  dset <- term$data
  weather <- term$weather[[weather]]
  
  # Link the requested weather to this data
  tmp <- linkOneToData(dset, weather)
  dset <- tmp$dset
  weather <- tmp$weather
  
  #lowerName <- tolower(names(term$methods)[method])
  if(length(grep("cp", names(term$methods)[method]))) {
    mod <- cplm(dset, weather, term$methods[[method]])
  } else if(length(grep("dd", names(term$methods[method])))) {
    mod <- ddlm(dset, weather, term$methods[[method]])
  } else if(length(grep("web", names(term$methods[method])))) {
    mod <- web(dset, weather, term$methods[[method]])
  } else {
    warning(paste("Unrecognized method", names(method), "skipping"))
  }
  
  if(!is.null(attr(term, "sqft")) | attr(term, "eui") == TRUE) {
    attr(mod, "sqft") <- attr(term, "sqft")
    eui <- TRUE
  } else {
    eui <- FALSE
  }
  
  if(!is.null(attr(weather, "stationid"))) {
    attr(mod, "stationid") <- attr(weather, "stationid")
  }
  
  if(!is.null(term$tmy)) {
    tmyFiles <- unique(term$tmy$tmyFile)
    tmyResults <- lapply(tmyFiles, function(tmyFile) {
      makeTmyPrediction(mod, tmyData[tmyData$tmyFile == tmyFile, ], names(term$methods)[method], eui)
    })
    mod$tmyResults <- do.call('rbind', lapply(seq_along(tmyResults), function(i) {
      tmp <- data.frame("tmyFile" = tmyFiles[i])
      tmp <- cbind(tmp, tmyResults[[i]]$tmyResults)
      tmp
    }))
    mod$tmyPredDsets <- do.call('rbind', lapply(seq_along(tmyResults), function(i) {
      tmp <- tmyResults[[i]]$tmyPredDset
      tmp$tmyFile <- tmyFiles[i]
      tmp
    }))
  }
  
  return(mod)
}

#' Extract an individual model from a fitted term
#' 
#' Pulls on individual model fit out of a term object. For example 
#' if you fitted both "degreeday" and "changepoint" you can pull out 
#' just the change point model with extractModel(mod, "cp")
#' 
#' @param term the term object to extract from.
#' @param name a regex matching the name of the model to extract
#' 
#' @seealso \code{\link{newTerm}} \code{\link{addData}} 
#' \code{\link{addWeather}} \code{\link{addMethod}} \code{\link{evaluate}}
#' 
#' @return the extracted model
extractModel <- function(term, name) {
  models <- names(term$models)
  toSelect <- grep(name, models)
  if(length(toSelect) > 1) {
    stop(paste("Ambiguous Model name", name))
  } else if(length(toSelect) == 0) {
    stop(paste("Model", name, "not found"))
  }
  toReturn <- term$models[[toSelect]]
  attr(toReturn, "gas") <- attr(term, "gas")
  attr(toReturn, "eui") <- attr(term, "eui")
  if(!is.null(attr(term, "sqft"))) {
    attr(toReturn, "sqft") <- attr(term, "sqft")
  }
  toReturn
}
EcotopeResearch/rterm documentation built on Oct. 17, 2022, 4:02 p.m.