R/predict.R

Defines functions predict.dalmatian

Documented in predict.dalmatian

##' Prediction method for dalmatian objects
##'
##' @param object Object of class \code{dalmatian} created by \code{dalmatian()}.
##' @param newdata data frame containing predictor values to predict response variables. Defaults to data in \code{object} if not supplied. (data.frame)
##' @param method Method to construct the fitted model. Either posterior mean (\code{"mean"}) or posterior mode (\code{"mode"}) (character)
##' @param population If TRUE then generate predictions at the population level rather than the individual level. (logical)
##' @param se if TRUE return the posterior standard deviation (logical)
##' @param ci returning credible intervals for predictions if TRUE (logical)
##' @param type  The type of prediction required (as in \code{predict()} for
##' models generated by \code{glm()}).  The default is on the scale of the
##' linear predictors; the alternative "response" is on the scale of the
##' response variable. E.g., if the link between the mean and its linear
##' predictor is the logit function then the default prediction for the mean
##' will be on the scale of the log-odds. If the link between the mean and
##' its linear predictor is the log function then the defaults prediction
##' will be on the scale of the log.
##' @param level vector of levels of credible intervals for predictions (numeric)
##' @param ... Ignored
##' @return predictions (list)
##' @export
predict.dalmatian <- function(object,
                              newdata=object$df,
                              method = "mean",
                              population = FALSE,
                              se = TRUE,
                              ci = TRUE,
                              type = c("link","response"),
                              level = c(0.50,0.95),
                              ...) {

  
  ## Choose scale of the response
  type <- match.arg(type)
  
	#########################
	## PART 1: WRONG CASES ##
	#########################
  
	# labels for FIXED effects in mean model and dispersion model
	mean.fixed.label <- all.vars(object$mean.model$fixed$formula)
	var.fixed.label <- all.vars(object$dispersion.model$fixed$formula)

	# labels for RANDOM effects in mean model and dispersion model
	if (!is.null(object$mean.model$random)) { # mean model
		mean.random.label <- all.vars(object$mean.model$random$formula)
	} else {
		mean.random.label <- NULL
	}

	if (!is.null(object$dispersion.model$random)) { # dispersion model
		var.random.label <- all.vars(object$dispersion.model$random$formula)
	} else {
		var.random.label <- NULL
	}

	# combine all variables names
	all.label <- c(mean.fixed.label, mean.random.label, var.fixed.label, var.random.label)
	all.label <- unique(all.label)

	### CHECK IF "newdata" INCLUDES ALL REQUIRED VARIABLES ###
  check.names <- all.label %in% names(newdata)
  
	if (all(check.names == TRUE) == FALSE) {
          stop("Newdata does not include all required variables. Check variable names in newdata.\n",
               "Missing variables: ",
               all.label[which(check.names == FALSE)])
	}

	### CHECK IF "method" is entered correctly
	if ((method != "mean") && (method != "mode")) {
		stop("method should be either 'mean' or 'mode'.")
	}

### CHECK IF "se" is entered correctly
  if (!is.logical(se)){
    stop("se should be a logical value: TRUE or FALSE.")
  }
  if(se & type == "response"){
    warning("Computation of posterior standard deviations is only implemented for predictions on the scale of the linear predictor (type = \"link\"). Standard deviations will not be computed.")

    se <- FALSE
  }

### CHECK IF "ci" is entered correctly
	if (!is.logical(ci)) {
		stop("ci should be a logical value: TRUE or FALSE.")
	}

	### CHECK IF "level" is entered correctly
	if (any(level < 0) || any(level > 1)) {
		stop("level)s) for credible intervals should be a real numbers between 0 and 1.")
	}

### CHECK IF "type" is entered correctly
  if(!type %in% c("link","response")){
    stop("type must either be link (prediction on the scale of the linear predictor) or response (prediction on the scale of the response.")
  }
  
### CHECK for random effects
  if(!population){
    for (ranName in unique(c(mean.random.label,var.random.label))) {
      ## Check if all levels in newdata exist in original data
      check.re <- all(levels(newdata[,ranName]) %in%
                      levels(object$df[,ranName]))
      
      if(check.re){
        ## Relabel levels of newdata to match original data
        newdata[,ranName] <- factor(newdata[,ranName],
                                    levels=levels(object$df[,ranName]))
      }
      else{
        stop(paste("The random effect",ranName,"contains levels",
                   "not present in the original data. This is ",
                   "not yet supported."))
      }
    }
  }
  
	####################################
	## PART 2: CREATE DESIGN MATRICES ##
	####################################

	# for fixed effects in mean and dispersion models
	mean.fixed.designMat <- model.matrix(object$mean.model$fixed$formula, newdata)
	var.fixed.designMat <- model.matrix(object$dispersion.model$fixed$formula, newdata)
  
	# for random effects in mean and dispersion models
	if (!is.null(object$mean.model$random)) { # mean model
          mean.random.designMat <- model.matrix(object$mean.model$random$formula,
                                                data = model.frame(object$mean.model$random$formula,
                                                                   data = newdata,
                                                                   na.action = "na.pass"))
          
		mean.random.designMat[is.na(mean.random.designMat)] <- 0
	} else {
		mean.random.designMat <- NULL
	}

	if (!is.null(object$dispersion.model$random)) { # dispersion model
          var.random.designMat <- model.matrix(object$dispersion.model$random$formula,
                                               data = model.frame(object$dispersion.model$random$formula,
                                                                  data = newdata,
                                                                  na.action = "na.pass"))
          
		var.random.designMat[is.na(var.random.designMat)] <- 0
	} else {
		var.random.designMat <- NULL
	}

	#######################################################
	## PART 3: RE-ARRANGE ESTIMATES CREATED BY dalmatian ##
	#######################################################
  
  ## combine all chains first
  all.chains <- do.call(rbind, object$coda)

  ## coefficients for FIXED effects in MEAN model
  pars <- paste(object$mean.model$fixed$name,
                colnames(mean.fixed.designMat), sep = ".")
  mean.fixed.coef <- all.chains[,pars]

  ## coefficients for RANDOM effects in MEAN model
  if (!is.null(mean.random.designMat)) {
    pars <- paste(object$mean.model$random$name,
                  colnames(mean.random.designMat), sep = ".")
    mean.random.coef <- all.chains[,pars]
  }

  ## coefficients for FIXED effects in DISPERSION model
  pars <- paste(object$dispersion.model$fixed$name,
                colnames(var.fixed.designMat), sep = ".")
  var.fixed.coef <- all.chains[,pars]
  
  ## ## DISPERSION PARAMETER for RANDOM effects in MEAN model
  ## if (!is.null(mean.random.designMat)) {
  ##   mean.disper <- all.chains[,cur.index]
  ## }

	# DISPERSION PARAMETER and coefficients for RANDOM effects in DISPERSION model
	if (!is.null(var.random.designMat)) {
          pars <- paste(object$dispersion.model$random$name,
                  colnames(var.random.designMat), sep = ".")
          var.random.coef <- all.chains[,pars]
	}
	########################################################################################
	## PART 4.2: PREDICTIONS FOR MEAN AND DISPERSION MODEL WITH MEAN (OR MODE) OF ESTIMATES ##
	########################################################################################

	### mean model predictions
	mean.pred <- mean.fixed.designMat %*% t(mean.fixed.coef)
	
	if (!is.null(mean.random.designMat)) {
	  
	  mean.random.pred <- mean.random.designMat %*% t(mean.random.coef)
	  mean.pred <- mean.pred + mean.random.pred
	  
	} 
	
	### dispersion model predictions
  var.pred <- var.fixed.designMat %*% t(var.fixed.coef)
	
	if (!is.null(var.random.designMat)) {
	  
	  var.random.pred <- var.random.designMat %*% t(var.random.coef)
	  var.pred <- var.pred + var.random.pred
	  
	} 
	
	if (method != "mean") { # if method == "mode"

		### get POSTERIOR MODES for predictions

		# mean model
		est.mean.pred <- apply(mean.pred, 1, function(vec) density(vec)$x[which.max(density(vec)$y)])

		## if (!is.null(mean.random.designMat)) {
		## 	est.mean.disper <- density(mean.disper)$x[which.max(density(mean.disper)$y)]
		## }

		# dispersion model
		est.var.pred <- apply(var.pred, 1, function(vec) density(vec)$x[which.max(density(vec)$y)])

		## if (!is.null(var.random.designMat)) {
		## 	est.var.disper <- density(var.disper)$x[which.max(density(var.disper)$y)]
		## }

	} else {

		### get POSTERIOR MEANS for predictions

		# mean model
		est.mean.pred <- apply(mean.pred, 1, function(vec) mean(vec))

		## if (!is.null(mean.random.designMat)) {
		## 	est.mean.disper <- mean(mean.disper)
		## }

		# dispersion model
		est.var.pred <- apply(var.pred, 1, function(vec) mean(vec))

		## if (!is.null(var.random.designMat)) {
		## 	est.var.disper <- mean(var.disper)
		## }

	}

	##################################################
	## PART 4.3: STANDARD ERRORS FOR PREDICTIONS ##
	##################################################

  if (se) {
    ## Compute posterior standard deviation

    ## Mean model
    se.mean.pred <- apply(mean.pred,1,sd)

    ## Dispersion model
    se.var.pred <- apply(var.pred,1,sd)
  }

        ##################################################
	## PART 4.4: CREDIBLE INTERVALS FOR PREDICTIONS ##
	##################################################

	if (ci) { # if ci == TRUE
          
### get CREDIBLE INTERVALS

                                        # mean model
		ci.mean.pred <- apply(mean.pred, 1, function(vec) quantile(vec, c( (1-level)/2, 1-(1 - level)/2 )))

		## if (!is.null(mean.random.designMat)) {
		## 	ci.mean.disper <- quantile(mean.disper, c( (1-level)/2, 1-(1 - level)/2 ))
		## }

		# dispersion model
		ci.var.pred <- apply(var.pred, 1, function(vec) quantile(vec, c( (1-level)/2, 1-(1 - level)/2 )))

		## if (!is.null(var.random.designMat)) {
		## 	ci.var.disper <- quantile(var.disper, c( (1-level)/2, 1-(1 - level)/2 ))
		## }
	}

	########################################
	## PART 5: CREATE A LIST TO BE RETURN ##
	########################################

	mean.pred <- data.frame(Predicted = est.mean.pred) # for mean model
	var.pred <- data.frame(Predicted = est.var.pred) # for dispersion model

  if (se){
    mean.pred$se <- se.mean.pred
    var.pred$se <- se.var.pred
  }
  
	if (ci) {
          mean.pred <- cbind(mean.pred, t(ci.mean.pred))

          var.pred <- cbind(var.pred, t(ci.var.pred))
	}

	########################################
	## PART 6: Back Transform             ##
	########################################

  if(type == "response"){
	if(!is.null(object$mean.model$link)){
	  mean.pred = switch(object$mean.model$link,
	                     "log"=exp(mean.pred),
	                     "logit"=(1+exp(-mean.pred))^-1,
	                     "sqrt"=mean.pred^2)
	}
	
	if(!is.null(object$dispersion.model$link)){
	  var.pred = switch(object$dispersion.model$link,
	                     "log"=exp(var.pred),
	                     "logit"=(1+exp(-var.pred))^-1,
	                     "sqrt"=var.pred^2)
	}
  }
	## Return output list
  list(mean=cbind(newdata,mean.pred),
       dispersion=cbind(newdata,var.pred))
}

Try the dalmatian package in your browser

Any scripts or data that you put into this service are public.

dalmatian documentation built on Nov. 23, 2021, 1:08 a.m.