R/ModelHistory.R

Defines functions ModelHistory

Documented in ModelHistory

#' ModelHistory
#' 
#' This function takes an overall dataset, a model type option and returns unseen predictions for the past n days.
#' 
#' @param data The total dataset to be split on each day
#' @param model An option for the type of model, (1) "TREE" does CART and (2) "GLM" does LogReg
#' @param n The number of days back to calculate unseed predictions on
#' @param coln If set to TRUE returns line of columns per day, if FALSE simply all predictions in one
#' 
#' @export

ModelHistory <- function(data,model,n,coln){
  
  
  dates <- unique(data$Date)
  last <- length(dates)
  comb <- list()

  
  for(i in 1:n){
    
    latest <- dates[last-(i-1)]
    
    test <- data[which(data$Date==latest),]
    
    dex <- as.Date(data$Date,format="%m/%d/%y") < as.Date(latest,format="%m/%d/%y")
    
    train <- data[dex,]
    
    c5_model <- C50::C5.0(x = train[,c("ML","SP","MLTR","SPTR","PBR","SPTDR","MLTDR","H1A0")],
                          y = train$strWL,
                          control = C50::C5.0Control(minCases = 1))
    
    
    GLM_model <- glm(strWL~SP+SPTR*D0F1+H1A0+MLTR:H1A0+ML:MLTDR+PBR,data = train,family=binomial)
    
    if(model=="TREE"){
      
      trp <- stats::predict(c5_model,test,type='prob')
      trpc <- stats::predict(c5_model,test,type='class')
      
      trp <- data.frame(trp)
      colnames(trp) <- c("PrL","PrW")
      print("before DF")
      treeout <- data.frame(Date=test$Date,Team=test$Team,PrL=trp$PrL,PrW=trp$PrW,PrWL=trpc,TrWL=test$strWL)
      treeout[3:4] <- round(treeout[3:4],digits = 3)
      
      if(coln==FALSE){
        comb[[i]] <- treeout
        
      } else if(coln==TRUE){
        
        c <- colnames(treeout)
        out <- rbind(c,treeout)
        comb[[i]] <- out
        
      }
    
    } else if(model=="GLM"){
      
      z <- qnorm(.99)    
      preds <- stats::predict(GLM_model,test,type='response',se.fit=TRUE)
      
      lower <- preds$fit - z*preds$se.fit
      upper <- preds$fit + z*preds$se.fit
      
      glmout <- data.frame(Date=test$Date,Team=test$Team,Lo=lower,Avg=preds$fit,Hi=upper,PrWL=ifelse(preds$fit>.5,"1","0"),TrWL=test$strWL)
      glmout[3:5] <- round(glmout[3:5],digits = 3)
      
      
      
      if(coln==FALSE){
        
            comb[[i]] <- glmout  
            } else if(coln==TRUE){
              c <- colnames(glmout)
              out <- rbind(c,glmout)
              comb[[i]] <- out
              
              
            }
            
          }
  }

  all <- do.call(rbind,comb)
  
  return(all)
}
dennist2/MerlinV2 documentation built on Dec. 23, 2019, 6:36 p.m.