tslm <- function(formula, data, subset, lambda=NULL, biasadj=FALSE, ...){
cl <- match.call()
if(!("formula" %in% class(formula))){
formula <- stats::as.formula(formula)
}
if(missing(data)){
mt <- try(terms(formula))
if(is.element("try-error", class(mt))){
stop("Cannot extract terms from formula, please provide data argument.")
}
}
else{
mt <- terms(formula, data=data)
}
## Categorise formula variables into time-series, functions, and data.
vars <- attr(mt,"variables")
#Check for time series variables
tsvar <- match(c("trend", "season"), as.character(vars), 0L)
#Check for functions (which should be evaluated later, in lm)
fnvar <- NULL
for(i in 2:length(vars)){
term <- vars[[i]]
if(!is.symbol(term)){
if(typeof(eval(term[[1]]))=="closure"){#If this term is a function (alike fourier)
fnvar <- c(fnvar, i)
}
}
}
## Fix formula's environment for correct `...` scoping.
attr(formula, ".Environment") <- environment()
if(sum(c(tsvar, fnvar))>0){
#Remove variables not needed in data (trend+season+functions)
rmvar <- c(tsvar, fnvar)
rmvar <- rmvar[rmvar!=attr(mt,"response")+1] #Never remove the reponse variable
if(any(rmvar!=0)){
vars <- vars[-rmvar]
}
}
## Grab any variables missing from data
if(!missing(data)){
#Check for any missing variables in data
vars <- vars[c(TRUE, !as.character(vars[-1])%in%colnames(data))]
dataname <- substitute(data)
}
if(!missing(data)){
data <- datamat(do.call(datamat, as.list(vars[-1]), envir = parent.frame()),data)
}
else{
data <- do.call(datamat, as.list(vars[-1]), envir = parent.frame())
}
## Set column name of univariate dataset
if(is.null(dim(data)) & length(data)!=0){
cn <- as.character(vars)[2]
} else{
cn <- colnames(data)
}
## Get time series attributes from the data
if(is.null(tsp(data))){
if((attr(mt,"response")+1)%in%fnvar){#Check unevaluated response variable
tspx <- tsp(eval(attr(mt,"variables")[[attr(mt,"response")+1]]))
}
tspx <- tsp(data[,1])#Check for complex ts data.frame
}
else{
tspx <- tsp(data)
}
if(is.null(tspx)){
stop("Not time series data, use lm()")
}
tsdat <- match(c("trend", "season"), cn, 0L)
## Create trend and season if missing from the data
if(tsdat[1]==0){#&tsvar[1]!=0){#If "trend" is not in data, but is in formula
trend <- 1:NROW(data)
cn <- c(cn,"trend")
data <- cbind(data,trend)
}
if(tsdat[2]==0){#&tsvar[2]!=0){#If "season" is not in data, but is in formula
if(tsvar[2]!=0 & tspx[3]==1){ # Nonseasonal data, and season requested
stop("Non-seasonal data cannot be modelled using a seasonal factor")
}
season <- as.factor(cycle(data[,1]))
cn <- c(cn,"season")
data <- cbind(data,season)
}
colnames(data) <- cn
## Subset the data according to subset argument
if(!missing(subset)){
if(!is.logical(subset))
stop("subset must be logical")
else if(NCOL(subset) > 1)
stop("subset must be a logical vector")
else if(NROW(subset) != NROW(data))
stop("Subset must be the same length as the number of rows in the dataset")
warning("Subset has been assumed contiguous")
timesx <- time(data[,1])[subset]
tspx <- recoverTSP(timesx)
if(tspx[3]==1 & tsdat[2]==0 & tsvar[2]!=0){
stop("Non-seasonal data cannot be modelled using a seasonal factor")
}
data <- data[subset,]#model.frame(formula,as.data.frame(data[subsetTF,]))
}
if(!is.null(lambda)){
data[,1] <- BoxCox(data[,1],lambda)
}
if(tsdat[2]==0&tsvar[2]!=0){
data$season <- factor(data$season) #fix for lost factor information, may not be needed?
}
## Fit the model and prepare model structure
fit <- lm(formula,data=data,na.action=na.exclude,...)
fit$residuals <- ts(residuals(fit))
fit$fitted.values <- ts(fitted(fit))
tsp(fit$residuals) <- tsp(fit$fitted.values) <- tsp(data[,1]) <- tspx
fit$call <- cl
fit$method <- "Linear regression model"
if(exists("dataname")){
fit$call$data <- dataname
}
if(!is.null(lambda)){
attr(lambda, "biasadj") <- biasadj
fit$lambda <- lambda
fit$fitted.values <- InvBoxCox(fit$fitted.values, lambda, biasadj, var(fit$residuals))
}
return(fit)
}
forecast.lm <- function(object, newdata, h=10, level=c(80,95), fan=FALSE, lambda=object$lambda, biasadj=NULL, ts=TRUE, ...)
{
if (fan)
level <- seq(51, 99, by = 3)
else
{
if (min(level) > 0 & max(level) < 1)
level <- 100 * level
else if (min(level) < 0 | max(level) > 99.99)
stop("Confidence limit out of range")
}
if(!is.null(object$data))
origdata <- object$data #no longer exists
else if(!is.null(object$model)){
origdata <- object$model
}
else if(!is.null(object$call$data)){
origdata <- try(object$data <- eval(object$call$data), silent = TRUE)
if (is.element("try-error", class(origdata)))
stop("Could not find data. Try training your model using tslm() or attach data directly to the object via object$data<-modeldata for some object<-lm(formula,modeldata).")
}
else
origdata <- as.data.frame(fitted(object) + residuals(object))
if(!is.element("data.frame", class(origdata)))
{
origdata <- as.data.frame(origdata)
if(!is.element("data.frame", class(origdata)))
stop("Could not find data. Try training your model using tslm() or attach data directly to the object via object$data<-modeldata for some object<-lm(formula,modeldata).")
}
# Check if the forecasts will be time series
if(ts & is.element("ts",class(origdata))){
tspx <- tsp(origdata)
timesx <- time(origdata)
}
else if(ts & is.element("ts",class(origdata[,1]))){
tspx <- tsp(origdata[,1])
timesx <- time(origdata[,1])
}
else if(ts & is.element("ts",class(fitted(object)))){
tspx <- tsp(fitted(object))
timesx <- time(fitted(object))
}
else
tspx <- NULL
# if(!is.null(object$call$subset))
# {
# j <- eval(object$call$subset)
# origdata <- origdata[j,]
# if(!is.null(tspx))
# {
# # Try to figure out times for subset. Assume they are contiguous.
# timesx <- timesx[j]
# tspx <- tsp(origdata) <- c(min(timesx),max(timesx),tspx[3])
# }
# }
# Add trend and seasonal to data frame
oldterms <- terms(object)
#Adjust terms for function variables and rename datamat colnames to match.
if(!missing(newdata))
{
reqvars <- as.character(attr(object$terms,"variables")[-1])[-attr(object$terms,"response")]
#Search for time series variables
tsvar <- match(c("trend", "season"), reqvars, 0L)
#Check if required variables are functions
fnvar <- sapply(reqvars, function(x) !(is.symbol(parse(text=x)[[1]]) || typeof(eval(parse(text=x)[[1]][[1]]))!="closure"))
if(!is.data.frame(newdata)){
newdata <- datamat(newdata)
colnames(newdata)[1] <- ifelse(sum(tsvar>0),reqvars[-tsvar][1],reqvars[1])
warning("newdata column names not specified, defaulting to first variable required.")
}
oldnewdata <- newdata
newvars <- make.names(colnames(newdata))
#Check if variables are missing
misvar <- match(make.names(reqvars), newvars, 0L)==0L
if (any(!misvar & !fnvar)){ #If any variables are not missing/functions, add them to data
tmpdata <- datamat(newdata[reqvars[!misvar]])
rm1 <- FALSE
}
else{
#Prefill the datamat
tmpdata <- datamat(1:NROW(newdata))
rm1 <- TRUE
}
#Remove trend and seasonality from required variables
if(sum(tsvar)>0){
reqvars <- reqvars[-tsvar]
fnvar <- fnvar[-tsvar]
misvar <- match(make.names(reqvars), newvars, 0L)==0L
}
if (any(misvar | fnvar)){ #If any variables are missing/functions
reqvars <- reqvars[misvar | fnvar] #They are required
fnvar <- fnvar[misvar | fnvar] #Update required function variables
for (i in reqvars){
subvars <- NULL
for(j in 1:length(object$coefficients)){
subvars[j] <- pmatch(i,names(object$coefficients)[j])
}
subvars <- !is.na(subvars)
subvars <- names(object$coefficients)[subvars]
#Detect if subvars if multivariate
if (length(subvars)>1){
#Extract prefix only
subvars <- substr(subvars, nchar(i)+1, 999L)
fsub <- match(make.names(subvars), newvars, 0L)
if (any(fsub == 0)){
#Check for misnamed columns
fsub <- grep(paste(make.names(subvars),collapse="|"), newvars)
}
if (all(fsub != 0) & length(fsub) == length(subvars)){
imat <- as.matrix(newdata[,fsub], ncol = length(fsub))
colnames(imat) <- subvars
tmpdata[[length(tmpdata)+1]] <- imat
}
else{
#Attempt to evaluate it as a function
subvars <- i
}
}
if(length(subvars)==1){ #Check if it is a function
if(fnvar[match(i, reqvars)]){#Pre-evaluate function from data
tmpdata[[length(tmpdata)+1]] <- eval(parse(text=subvars)[[1]], newdata)
}
}
names(tmpdata)[length(tmpdata)] <- paste0("solvedFN___",match(i, reqvars))
subvarloc <- match(i,lapply(attr(object$terms,"predvars"),deparse))
attr(object$terms,"predvars")[[subvarloc]] <- attr(object$terms,"variables")[[subvarloc]] <- parse(text=paste0("solvedFN___",match(i, reqvars)))[[1]]
}
}
if(rm1){
tmpdata[[1]] <- NULL
}
newdata <- tmpdata
h <- nrow(newdata)
}
if(!is.null(tspx))
{
# Always generate trend series
trend <- ifelse(is.null(origdata$trend), NCOL(origdata), max(origdata$trend)) + (1:h)
if(!missing(newdata)){
newdata <- cbind(newdata, trend)
}
else{
newdata <- datamat(trend)
}
# Always generate season series
x <- ts(1:h, start=tspx[2]+1/tspx[3], frequency=tspx[3])
season <- as.factor(cycle(x))
newdata <- cbind(newdata, season)
}
newdata <- as.data.frame(newdata)
if(!exists("oldnewdata")){
oldnewdata <- newdata
}
# If only one column, assume its name.
if(ncol(newdata)==1 & colnames(newdata)[1]=="newdata")
colnames(newdata) <- as.character(formula(object$model))[3]
# Check regressors included in newdata.
# Not working so removed for now.
#xreg <- attributes(terms(object$model))$term.labels
#if(any(!is.element(xreg,colnames(newdata))))
# stop("Predictor variables not included")
object$x <- getResponse(object)
#responsevar <- as.character(formula(object$model))[2]
#responsevar <- gsub("`","",responsevar)
#object$x <- model.frame(object$model)[,responsevar]
out <- list()
nl <- length(level)
for(i in 1:nl)
out[[i]] <- predict(object, newdata=newdata, se.fit=TRUE, interval="prediction", level=level[i]/100, ...)
if(nrow(newdata) != length(out[[1]]$fit[,1]))
stop("Variables not found in newdata")
object$terms <- oldterms
if(is.null(object$series)){ # Model produced via lm(), add series attribute
object$series <- deparse(attr(oldterms, "variables")[[1 + attr(oldterms, "response")]])
}
fcast <- list(model=object,mean=out[[1]]$fit[,1],lower=out[[1]]$fit[,2],upper=out[[1]]$fit[,3],
level=level,x=object$x,series=object$series)
fcast$method <- "Linear regression model"
fcast$newdata <- oldnewdata
fcast$residuals <- residuals(object)
fcast$fitted <- fitted(object)
if(nrow(origdata) != length(fcast$x)) # Give up on ts attributes as some data are missing
tspx <- NULL
if(length(fcast$x) != length(fcast$residuals))
tspx <- NULL
if(!is.null(tspx))
{
fcast$x <- ts(fcast$x)
fcast$residuals <- ts(fcast$residuals)
fcast$fitted <- ts(fcast$fitted)
tsp(fcast$x) <- tsp(fcast$residuals) <- tsp(fcast$fitted) <- tspx
}
if(nl > 1)
{
for(i in 2:nl)
{
fcast$lower <- cbind(fcast$lower,out[[i]]$fit[,2])
fcast$upper <- cbind(fcast$upper,out[[i]]$fit[,3])
}
}
if(!is.null(tspx))
{
fcast$mean <- ts(fcast$mean, start=tspx[2]+1/tspx[3],frequency=tspx[3])
fcast$upper <- ts(fcast$upper, start=tspx[2]+1/tspx[3],frequency=tspx[3])
fcast$lower <- ts(fcast$lower, start=tspx[2]+1/tspx[3],frequency=tspx[3])
}
if(!is.null(lambda))
{
fcast$x <- InvBoxCox(fcast$x,lambda)
fcast$mean <- InvBoxCox(fcast$mean,lambda, biasadj, fcast)
fcast$lower <- InvBoxCox(fcast$lower,lambda)
fcast$upper <- InvBoxCox(fcast$upper,lambda)
}
return(structure(fcast,class="forecast"))
}
# Compute cross-validation and information criteria from a linear model
CV <- function(obj)
{
if(!is.element("lm", class(obj)))
stop("This function is for objects of class lm")
n <- length(obj$residuals)
k <- extractAIC(obj)[1]-1 # number of predictors (constant removed)
aic <- extractAIC(obj)[2]+2 # add 2 for the variance estimate
aicc <- aic + 2*(k+2)*(k+3)/(n-k-3)
bic <- aic + (k+2)*(log(n)-2)
cv <- mean((residuals(obj)/(1-hatvalues(obj)))^2, na.rm=TRUE)
adjr2 <- summary(obj)$adj
out <- c(cv,aic,aicc,bic,adjr2)
names(out) <- c("CV","AIC","AICc","BIC","AdjR2")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.