R/ModelHistory.R

Defines functions ModelHistory

Documented in ModelHistory

#' ModelHistory
#' 
#' This function takes an aggregate dataset, the type of model ("TREE","GLM") and the number of days you'd like to predict backwards
#' 
#' @param data The aggregate dataset to be repeatedly split and re-trained on each day.
#' @param model The type of model to run (a) "TREE" or (b) "GLM"
#' @param n The number of days back you'd like to predict for.
#' @param options (a) "NoTrial" returns OGstrWLtree, (b) "7Trials" returns strWLtree w/7 trials, (c) "sprWL" depending on model arguement either TREEsprWL or GLMsprWL, (d) "OGglm" returns ogGLM strWL.
#' 
#' 
#' @export


ModelHistory <- function(data,model,n,options){

  all_dates <- unique(data$Date)
  last <- length(all_dates)
  comb <- list()
  
  for(i in 1:n){
    lastD <- all_dates[last-(i-1)]
    
    test <- data[which(data$Date==lastD),]
    
    ind <- as.Date(data$Date,format="%m/%d/%y") < as.Date(lastD,format="%m/%d/%y")
    
    train <- data[ind,]
    
    
    if(options=="NoTrial"){
    c5_model <- C50::C5.0(x = train[,c("ML","SP","MLTR","SPTR","PBR","SPTDR","MLTDR","H1A0")],
                          y = train$strWL,
                          control = C50::C5.0Control(minCases = 1))
    } else if(options=="7Trials"){
      c5_model <- C50::C5.0(x = train[,c("ML","SP","MLTR","SPTR","PBR","SPTDR","MLTDR","H1A0")],
                            y = train$strWL,
                            trials = 7,
                            control = C50::C5.0Control(minCases = 1))
      
    } else if(options=="sprWL"){
      
      c5_model <- C50::C5.0(x = train[,c("ML","SP","MLTR","SPTR","PBR","SPTDR","MLTDR","H1A0")],
                            y = train$sprWL,
                            control = C50::C5.0Control(minCases = 1))
      GLM_model <- glm(sprWL~SP+SPTR*D0F1+H1A0+MLTR:D0F1+ML:SPTR+PBR,data = train,family=binomial)
    } else if(options=="OGglm"){
      GLM_model <- glm(strWL~SP+SPTR*D0F1+H1A0+MLTR:D0F1+ML:SPTR+PBR,data = train,family=binomial) 
      
    }
    
    
    
    
    
    
    
    if(model=="TREE"){
      
      trp <- stats::predict(c5_model,test,type='prob')
      trpc <- stats::predict(c5_model,test,type='class')
      
      trp <- data.frame(trp)
      colnames(trp) <- c("PrL","PrW")
      
      treeout <- data.frame(Date=test$Date,Team=test$Team,PrL=trp$PrL,PrW=trp$PrW,PrWL=trpc,TrWL=test$strWL)
      treeout[3:4] <- round(treeout[3:4],digits = 3)
      cols <- colnames(treeout)
      out <- rbind(cols,treeout)
      comb[[i]] <- out
      
    } else if(model=="GLM"){
      
      z <- qnorm(.99)    
      preds <- stats::predict(GLM_model,test,type='response',se.fit=TRUE)
      
      lower <- preds$fit - z*preds$se.fit
      upper <- preds$fit + z*preds$se.fit
      
            glmout <- data.frame(Date=test$Date,Team=test$Team,Lo=lower,Avg=preds$fit,Hi=upper,PrWL=ifelse(preds$fit>.5,"1","0"),TrWL=test$strWL)
            glmout[3:5] <- round(glmout[3:5],digits = 3)
            
            
            cols <- colnames(glmout)
            out <- rbind(cols,glmout)
            comb[[i]] <- out
        }
  }
  
  all <- do.call(rbind,comb)
  
  return(all)
}
dennist2/MerlinV1 documentation built on Dec. 11, 2019, 8:41 p.m.