R/tst.estimate.R

Defines functions tst.estimate

# Estimating tst.equations

# Helper functions for estimating parameters of equations to be used in a tst.model

# This is simply a wrapper fo nls, which deals with NA's and filters the data and deals with lags

# parameters are identified by a term in parenthesis followed by a number
# lags are identified by the term in parenthesis with a negative
# leads are identified by a term in parenthesis followed a plus
# Need to add R-square (follow Hendry and Juelies)

tst.estimate <- function(equation = NA, data = NA, start = NA, smpl = NA){
  
  if(is.na(equation)){
    stop("Must enter an equation")
  }
  
  if(is.na(data)){
    stop("Must enter data")
  }
  # Testing
  #equation <-  "dl_IDW_r = a0idw + a1idw*(log(IDW_r(-1))-rate_mo_r(-1)+(log(PIDW(-1))+log(CONH_r(-1)))) + (a2idw/4)*(rate_90(-1)-rate_90(-5))/100 + (a3idw/3)*log(HP_r(-1)/HP_r(-4))+a4idw*DUMQ400 + a5idw*DUMQ101 + a6idw*DUMQ201 + a7idw*DUMQ301 + dl_TREND_PROD(-1)+dl_TREND_AVEH(-1)+dl_TREND_WAP(-1)"
  
  # Add check for duplicate data
  
  if(!is.na(smpl)){
    
    data <- data[data$Date >= smpl,]
    
  }
  
  
  # Replace all lags and leads with words
  eq_string <- gsub("\\(-","_lag_",equation) 
  eq_string <- gsub("\\(\\+","_lead_",eq_string)
  
  eq_string_s <- trimws(str_split(eq_string,"[=+/*-]")[[1]])
  
  lags_leads <- eq_string_s[grepl("\\_lag_[0-9])|\\_lead_[0-9])",eq_string_s)]
  lags_leads <- gsub("\\)|log|exp|\\(","",lags_leads)
  
  params <- eq_string_s[grepl("[a]{1}[0-9]{1}[a-z]{1,10}", eq_string_s)]
  params <- gsub("\\)","",params)
  params <- gsub("\\(","",params)
  
  variables <- eq_string_s[!grepl("[a]{1}[0-9]{1}[a-z]{1,10}", eq_string_s)]
  variables <- gsub("_lag.*","",variables)
  variables <- gsub("\\)","",variables)
  variables <- gsub("\\(","",variables)
  variables <- gsub("log|exp","",variables)
  variables <- variables[grepl("[^0-9\\.]",variables)] 
  variables <- unique(variables)
  
  
  # Find the raw variable names
  
  # Filter data, create lags, create leads
  moddata <- data[,c("Date",variables)]
  

  # Add lags / leads (if applicable - not done yet)
  if(!is_empty(lags_leads)){
    
  
    for(i in 1:length(lags_leads)){
  
      
      a <- gsub("\\(","",lags_leads)
      
      v <- gsub("_lag.*","",a[i]) # Find variable to lag
      l <- as.numeric(gsub("[^0-9]","",
                           gsub(v,"",a[i]) ) )  # Find lag number
     
      moddata[[a[i]]] <- dplyr::lag(moddata[[v]],l) # Add lagged variable to data
    
      }
  
  }
  # Use 0 for starting valueparameters
  startpar <- list()
  for(i in 1:length(params)){
    
    startpar[[params[i]]] <-  1
    
  }
  
  eq_string <- gsub("\\(-","_lag_",equation)
  eq_string <- gsub("(?<=_[0-9])\\)","",eq_string, perl = TRUE) # Use lookbehind 
  
  eq_string <- gsub("=","~",eq_string)
  
  moddata <- moddata[complete.cases(moddata),]
  
  # Use nls to estimate equation
  nlsestimate <-  nls(formula = eq_string, data = moddata, start = startpar, nls.control( warnOnly = TRUE))
  
  fitplot <- tibble(Date = moddata$Date,
         Val = fitted.values(nlsestimate)+resid(nlsestimate),
         Fit =fitted.values(nlsestimate)
        ) %>% 
    ggplot()+
    geom_line(aes(Date,Val),colour = tst_colors[1])+
    geom_line(aes(Date,Fit), colour = tst_colors[2])+
    tst_theme()+
    ylab("")+
    xlab("")
  
  residplot <- tibble(Residuals = resid(nlsestimate),
                      Date = moddata$Date) %>% 
    ggplot()+
    geom_line(aes(Date,Residuals),colour = tst_colors[1])+
    tst_theme()+
    xlab("")
  
  fitplot <-  gridExtra::grid.arrange(fitplot,residplot, nrow = 2)
  
  estimate <- list(nls_output = nlsestimate,
                  equation = equation,
                  fitplot = fitplot                   
                  )
  
  print(broom::tidy(estimate$nls_output))
  
  return(estimate)
  
  
}
AdamElderfield/tst_package documentation built on Dec. 5, 2019, 2:08 a.m.