R/logistic.dma.default.R
In dma: Dynamic Model Averaging

logistic.dma.default <-
    function(x, y, models.which, lambda=0.99, alpha=0.99,autotune=TRUE, 
             initmodelprobs=NULL,initialsamp=NULL) {
        
        #K is number of candidate models
        #T is time
        #d is total number of covariates (including intercept)
        #inputs
        #########
        #x (d-1) by T matrix of observed covariates.  Note that a column of 1's is added
        #automatically for the intercept
        #y T vector of binary responses
        #models.which K by (d-1) matrix defining models.  A 1 indicates a covariate is included
        #in a particular model, a 0 if it is excluded.  Model averaging is done over all
        #modeld specified in models.which
        #lambda scalar forgetting factor with each model
        #alpha scalar forgetting factor for model averaging
        #autotune T/F indicates whether or not the automatic tuning procedure desribed in 
        #McCormick et al. should be applied.  Default is true.
        #initmodelprobs K vector of starting probabilities for model averaging.  If null (default),
        #then use 1/K for each model.
        #initialsamp scalar indicating how many observations to use for generating initial 
        #values.  If null (default), then use 10 percent.

      #how much of the sample should be used to generate initial values?
      if(is.null(initialsamp)){initialsamp<-round(nrow(x)/10,0)}
      if(initialsamp >= nrow(x)) stop("Initial sample must be smaller than the number of observations (", nrow(x), ").")
        
      # take the initial sample as training data
      trainX <- x[1:initialsamp,, drop = FALSE]
      trainY <- y[1:initialsamp]
 
      # train the dma logistic model
      mod <- logdma.init(trainX, trainY, models.which)
      
      # take the rest as test data  
      st <- initialsamp+1
      en <- dim(x)[1]
      testX <- x[st:en,, drop = FALSE]
      testY <- y[st:en]
      
      # update the model
      mod <- logdma.update(mod, testX, testY, lambda = lambda, autotune = autotune)
      
      # dynamic model averaging
      mod <- logdma.average(mod, alpha = alpha, initmodelprobs = initmodelprobs)
      
      # removing unwanted attributes(these were needed for updating the model)
      mod$varcov <- NULL
      mod$laplacemodel <- NULL

      class(mod)<-"logistic.dma"
      
      mod
}

logdma.init <- function(x, y, models.which) {
    # Prepare for estimation using training data
    #K is number of candidate models
    #T is time
    #d is total number of covariates (including intercept)
    #inputs
    #x (d-1) by T matrix of observed covariates (training data only).  Note that a column of 1's is added
    #automatically for the intercept
    #y T vector of binary responses (training data only)
    #models.which K by (d-1) matrix defining models.  A 1 indicates a covariate is included
    #in a particular model, a 0 if it is excluded.  Model averaging is done over all
    #modeld specified in models.which
    
    K<-nrow(models.which)
    nx <- nrow(x)
    
    #set up arrays with output for each candidate model
    baytah<-array(dim=c(K, nx, (ncol(x)+1)))
    varbaytah<-array(dim=c(K, nx, (ncol(x)+1)))
    yhatmodel<-array(dim=c(K, nx))
    varcovar<-array(dim=c(K, (ncol(x)+1),(ncol(x)+1)))
    
    #dynamic logistic regression for each candidate model
    #this section could also be done in parallel
    for(mm in 1:K){
        #data matrix for model mm
        xdat<-x[,c(which(models.which[mm,]==1)), drop=FALSE]
        xdat <- cbind(rep(1,dim(xdat)[1]),xdat)
        sel.rows <- c(1,1+which(models.which[mm,]==1))
        #generate inital values using glm
        init.temp <- dlogr.init(xdat, y)
        d <- length(init.temp$BetaHat)
        baytah[mm, , sel.rows] <- matrix(rep(init.temp$BetaHat, nx), nx, d, byrow=TRUE)
        varbaytah[mm, , sel.rows] <- matrix(rep(diag(init.temp$VarBetaHat), nx), nx, d, byrow=TRUE)
        varcovar[mm, sel.rows, sel.rows] <- init.temp$VarBetaHat ## SK - This stores the initial variance covariance matrix
        yhatmodel[mm, ] <- 1/(1 + exp( - xdat %*% init.temp$BetaHat))
    }
    return(list(x=x,y=y,models=models.which,
               varcov = varcovar,  
               theta=baytah,
               vartheta=varbaytah,
               yhatmodel=yhatmodel))
}



logdma.predict <- function(fit, newx){
    models.which <- fit$models
    if(is.null(dim(newx)))
        newx <- rbind(newx) # creates a matrix with one row
    
    newx <- cbind(rep(1,dim(newx)[1]), newx)
    num.mods <- dim(fit$theta)[1]
    last <- dim(fit$theta)[2]
    
    yhat <- matrix(0, nrow=nrow(newx), ncol=nrow(models.which))
    for(mm in 1:num.mods){
        sel.rows <- c(1,1+which(models.which[mm,]==1))
        xx <- newx[,sel.rows]
        BetaHat <- cbind(fit$theta[mm,last,sel.rows]) # creates a column from a vector
        yhat[,mm] <- dlogr.predict(xx,BetaHat)
    }
    return(yhat)
}



logdma.update <- function(fit, newx, newy, lambda = 0.99, autotune=TRUE){
    # Update fit for new observation(s)
    #autotune T/F indicates whether or not the automatic tuning procedure desribed in 
    #McCormick et al. should be applied.  Default is true.
    #lambda scalar tuning factor within models
    models.which <- fit$models
    if(is.null(dim(newx)))
        newx <- rbind(newx) # creates a matrix with one row
    x <- rbind(fit$x,newx)
    dimnames(x) <- NULL
    y <- c(fit$y, newy)
    
    newx <- cbind(1, newx)
    last <- dim(fit$theta)[2]
    
    big.len <- nrow(fit$x) + nrow(newx)
    K<-nrow(models.which)
    L <- ncol(fit$x)+1

    # Store fit values in the arrays
    len <- dim(fit$x)[1]
    update.index <- (len+1):big.len
    
    # set up arrays with output for each candidate model
    theta<-array(dim=c(K, big.len, L))
    vartheta<-array(dim=c(K, big.len, L))
    laplacemodel<-array(dim=c(K, big.len))
    yhatmodel<-array(dim=c(K, big.len))
    varcov<-array(dim=c(K, L, L))
    
    theta[,1:len,] <- fit$theta
    vartheta[,1:len,] <- fit$vartheta
    yhatmodel[,1:len] <- fit$yhatmodel
    
    if(is.null(fit$laplacemodel)){
        laplacemodel[,1:len]<-rep(0,len)
    }else{
        laplacemodel[,1:len]<-fit$laplacemodel
    }
    
    for(mm in 1:K){
        sel.rows <- c(1,1+which(models.which[mm,]==1))
        xx <- newx[,sel.rows, drop = FALSE]
        d <- length(sel.rows)
        #matrix of possible combinations of lambda
        tune.mat<- if(autotune==FALSE) matrix(rep(lambda,d),nrow=1,ncol=d) else tunemat.fn(lambda,1,d)

        BetaHat <- cbind(fit$theta[mm,last,sel.rows]) # creates a column from a vector
        varbetahat.tm1 <- fit$varcov[mm,sel.rows,sel.rows]

        # update
        for(i in 1:nrow(newx)) {
            x.t <- xx[i,]
            y.t <- newy[i]
            step.tmp <- dlogr.step(x.t,y.t,BetaHat,varbetahat.tm1,tune.mat)
            #compute fitted value
            yhatmodel[mm, update.index[i]] <- 1/(1 + exp(- x.t%*%step.tmp$betahat.t))
            # gather output
            theta[mm, update.index[i], sel.rows] <- BetaHat <- step.tmp$betahat.t
            vartheta[mm, update.index[i], sel.rows] <- diag(step.tmp$varbetahat.t)
            laplacemodel[mm, update.index[i]] <- step.tmp$laplace.t
            varbetahat.tm1 <- step.tmp$varbetahat.t
        }
        varcov[mm, sel.rows, sel.rows] <- step.tmp$varbetahat.t
    }
    est <- fit
    # update only items that were changed here
    for(item in c("x", "y", "lambda", "theta", "vartheta", "yhatmodel", "varcov", "laplacemodel"))
        est[[item]] <- get(item)
    
    return(est)
}


logdma.average <- function(fit, alpha = 0.99, initmodelprobs=NULL){
    # inputs
    #alpha scalar forgetting factor for model averaging
    #initmodelprobs K vector of starting probabilities for model averaging.  If null (default),
    #then use 1/K for each model.
    
    laplacemodel <- fit$laplacemodel
    K <- nrow(fit$models)
    x <- fit$x
    
    if(any(is.na(laplacemodel)) || any(laplacemodel==Inf) || any(laplacemodel==-Inf))
        warning("At least one laplace approximation is not well behaved. This will likely lead to issues with posterior model probabilities. This is likely a computation issue")

    #Dynamic model averaging
    #set up arrays for posterior model probabilities 
    dimnames <- list(paste("model", 1:K, sep = "_"), paste("time", 1:nrow(x), sep = "_"))
    pi.t.t <- array(dim = c(K, nrow(x)), dimnames = dimnames)
    pi.t.tm1 <- array(dim = c(K, nrow(x)), dimnames = dimnames)
    omega.tk <- array(dim = c(K, nrow(x)), dimnames = dimnames)
    
    #initial probability estimates
    #default is uniform or user-specified
    if(is.null(initmodelprobs)){
        pi.t.t[,1]<-1/K
        pi.t.tm1[,1]<-1/K
        omega.tk[,1]<-1/K
    }else{pi.t.t[,1]<-initmodelprobs
    pi.t.tm1[,1]<-initmodelprobs
    omega.tk[,1]<-initmodelprobs}
    
    #initialize model forgetting factor, alpha
    alpha.vec<-rep(NA,nrow(x))
    alpha.vec[1]<-alpha
    #can also include the option of always forgetting just a little by 
    #setting alpha.noforget<1
    alpha.noforget=1           
    #iterate for each subsequent
    for(t in 2:length(alpha.vec)){
        if(t>2){rm(alpha.tmp)}
        pred.forget<-sum(exp(laplacemodel[,t])*((pi.t.t[,t-1]^alpha)/sum(pi.t.t[,t-1]^alpha)))
        pred.noforget<-sum(exp(laplacemodel[,t])*((pi.t.t[,t-1]^alpha.noforget)/sum(pi.t.t[,t-1]^alpha.noforget)))
        alpha.vec[t]<-ifelse(pred.forget>=pred.noforget,alpha,alpha.noforget)      
        alpha.tmp<-alpha.vec[t]    
        for(k in 1:K){
            pi.t.tm1[k,t]<-(pi.t.t[k,t-1]^alpha.tmp)/sum(pi.t.t[,t-1]^alpha.tmp)
            omega.tk[k,t]<-pi.t.tm1[k,t]*max(exp(laplacemodel[k,t]), .Machine$double.xmin)
            #omega.tk[k,t]<-exp(laplace.mat[k,t])
        }
        for(k in 1:K){
            pi.t.t[k,t]<-omega.tk[k,t]/sum(omega.tk[,t])
            #if a model probability is within rounding error of zero,
            #the alg tends to get "stuck" at that value, so move 
            #slightly away to avoid this issue
            pi.t.t[k,t]<-ifelse(pi.t.t[k,t]>0,pi.t.t[k,t],.001)
            pi.t.t[k,t]<-ifelse(pi.t.t[k,t]<1,pi.t.t[k,t],.999)
        }
    }
    yhatdma=colSums(fit$yhatmodel*pi.t.t)
    
    #outputs#
    #x time by (d-1) matrix of covariates
    #y time by 1 vector of binary responses
    #models.which K by (d-1) matrix of candidate models
    #lambda scalar, tuning factor within models
    #alpha scalar, forgetting factor for model averaging 
    #autotune T/F, indicator of whether or not to use autotuning algorithm
    #alpha.used time-length vector of alpha values used
    #theta K by time by d array of dynamic logistic regression estimates for each model
    #vartheta K by time by d array of dynamic logistic regression variances for each model
    #pmp K by time array of posterior model probabilities
    #yhatbma time length vector of model-averaged predictions
    #yhatmodel K by time vector of fitted values for each model
    
    #define outputs
    est <- fit
    est$yhatdma <- yhatdma
    est$pmp <- pi.t.t
    est$alpha <- alpha
    est$alpha.used <- alpha.vec
    est$fitted.values <- yhatdma
    est$residuals <- fit$y - yhatdma
    
    #define as class to allow other commands later
    class(est)<-"logistic.dma"
    
    return(est)
}