#' Run partial proportional odds models for ordinal outcomes
#'
#' This function runs partial proportional odds models for ordinal outcomes.
#'
#' @param y.name A character vector specifying the name of the variable to be modeled.
#' @param in.data The input data object of type data frame or matrix.
#' @param prop.odds.formula An optional formula specifying the predictor variables assumed to have proportional odds across levels of y. At least one of prop.odds.formula and non.prop.odds.formula must be specified.
#' @param beta.prop.odds.start A vector of starting values for proportional odds betas. This should only be specified in conjunction with prop.odds.formula.
#' @param non.prop.odds.formula An optional formula specifying the predictor variables assumed not to have proportional odds across levels of y. At least one of prop.odds.formula and non.prop.odds.formula must be specified.
#' @param beta.non.prop.odds.start A matrix of starting values for non proportional odds betas. This should only be specified in conjunction with non.prop.odds.formula. Columns correspond to the j-1 bottom levels of the outcome variable y, rows correspond to variables.
#' @param method A character specifying the optimization method to be used by package optimx in maximizing the log likelihood. Defaults to BFGS.
#' @param int.vec.scale A tuning parameter used to adjust the starting values for the intercepts. Defaults to 5.
#' @param itnmax An optional scalar specifying the iteration limit used in maximizing the log likelihood. Defaults to the default optimx value for the given method.
#' @param seed A vector of length 2 specifying the seed used to generate starting values for model coefficients, if not user specified. Defaults to c(14, 15).
#'
#' @return A list of class partial.prop.odds
#' \item{y.name}{A character vector specifying the model outcome.}
#' \item{y.levels}{The ordered levels of the model outcome.}
#' \item{prop.odds.formula}{The formula used for the proportional odds betas.}
#' \item{non.prop.odds.formula}{The formula used for the non-proportional odds betas.}
#' \item{log.lik}{The log-likelihood of the fitted model.}
#' \item{conv.code}{The convergence code from optimx.}
#' \item{intercepts}{The fitted model intercepts}
#' \item{beta.hat.prop.odds}{A vector of the estimated proportional odds coefficients, if specified.}
#' \item{beta.hat.non.prop.odds}{A matrix of the estimated non-proportional odds coefficients, where the j-1 columns correspond to the j-1 bottom levels of y, and the rows are betas.}
#' \item{est.probs}{The fitted probabilities of each level of y for each subject. Rows are subjects, columns are levels of y.}
#'
#' @examples
#'
#' data(red_train)
#' starts <- coef(lm(quality ~ alcohol+ pH + volatile.acidity, data = red_train))
#' training.result <- partial.prop.odds.mod(y ="quality", in.data = red_train,
#' prop.odds.formula = ~ alcohol + pH,
#' beta.prop.odds.start = starts[2:3],
#' non.prop.odds.formula = ~ volatile.acidity,
#' beta.non.prop.odds.start = matrix(rep(starts[4], 5), nrow = 1),
#' method = "BFGS",
#' seed = c(14, 15), itnmax = 1000)
#'
#' @export
partial.prop.odds.mod <- function(y.name, in.data, prop.odds.formula = NULL, beta.prop.odds.start = NULL,
non.prop.odds.formula = NULL, beta.non.prop.odds.start = NULL,
method = "BFGS", int.vec.scale = 5, itnmax = NULL, seed = c(14, 15)){
#################################################################################3
#Arguments
#
# y.name: a character specifying the name of the y variable to be modeled.
# in.data: the input data object
# prop.odds.formula: an optional formula specifying the predictor variables assumed to have
# proportional odds across levels of y. At least one of prop.odds.formula
# and non.prop.odds.formula must be specified.
# beta.prop.odds.start: an optional vector of starting values for the non-proportional odds betas
# non.prop.odds.formula: an optional forumula specifying the predictor variables assumed to have
# non-proportional odds across levels of y. At least one of prop.odds.formula
# and non.prop.odds.formula must be specified.
# beta.non.prop.odds.start: an optional matrix of starting values for non-proportional odds betas
# method: The method used to maximize the log-likelihood (in package optimx). Defaults to "BFGS"
# int.vec.scale: tuning parameter used to adjust the starting values for the intercepts. Defaults to 5.
# seed: the seed used to generate the starting values for the iterative maximization procedure
#
#################################################################################
#make sure we have at least one of prop.odds.formula and non.prop.odds.formula specified
if (is.null(prop.odds.formula) & is.null(non.prop.odds.formula)){
stop("At least one of prop.odds.formula or non.prop.odds.formula must be specified.")
}
#get outcome vector
y <- in.data[ , y.name]
#get levels of y
y.levels <- sort(unique(y))
n.ylevels <- length(y.levels)
#get design matrix for proportional odds predictors
if (!is.null(prop.odds.formula)){
x.prop.odds <- model.matrix(prop.odds.formula, in.data)
#get rid of intercept, we will specify these separately
x.prop.odds <- x.prop.odds[ , ! colnames(x.prop.odds) == "(Intercept)", drop = F]
#also need starting values if not speciified
if (! is.null(beta.prop.odds.start)){
beta.prop.odds <- beta.prop.odds.start
} else {
#just using random uniform draws
n.prop.betas <- ncol(x.prop.odds)
set.seed(seed[1])
beta.prop.odds <- runif(n.prop.betas)
}
} else{
x.prop.odds <- NULL
beta.prop.odds <- NULL
}
#get design matrix for non-proportional odds predictors
if (!is.null(non.prop.odds.formula)){
x.non.prop.odds <- model.matrix(non.prop.odds.formula, in.data)
#get rid of intercept, we will specify these separately
x.non.prop.odds <- x.non.prop.odds[ , ! colnames(x.non.prop.odds) == "(Intercept)", drop = F]
n.non.prop.preds <- ncol(x.non.prop.odds)
#also need starting values if not specified
if (! is.null(beta.non.prop.odds.start)){
beta.non.prop.odds <- beta.non.prop.odds.start
} else {
#just using random uniform draws
non.prop.betas <- n.non.prop.preds*(n.ylevels - 1)
set.seed(seed[2])
beta.non.prop.odds <- matrix(runif(non.prop.betas), byrow = F, nrow = n.non.prop.preds)
}
} else{
x.non.prop.odds <- NULL
beta.non.prop.odds <- NULL
}
#starting value for intercepts
#make intercepts start proportional to the level of y
cat.probs <- as.vector(by(in.data, in.data$quality, function(x) nrow(x))) / nrow(in.data)
int.vector <- log(int.vec.scale*cumsum(cat.probs[1:(n.ylevels - 1)]))
#maxmize parameter estimates
optim.result <- maximize.partial.prop.odds.ll(y = y, y.levels = y.levels, in.data = in.data, int.vector = int.vector, method = method,
x.prop.odds = x.prop.odds, x.non.prop.odds = x.non.prop.odds, beta.prop.odds = beta.prop.odds,
beta.non.prop.odds = beta.non.prop.odds, itnmax = itnmax)
#pick out the appropriate peices of the output
intercepts <- unlist(optim.result[1:length(int.vector)])
ll <- optim.result$value
conv.code <- optim.result$convcode
if(conv.code != 0){
warning("log-likelihood maximization did not converge")
}
#start putting together results
results.list <- list(y.name = y.name, y.levels = y.levels, prop.odds.formula = prop.odds.formula,
non.prop.odds.formula = non.prop.odds.formula,
log.lik = ll, conv.code = conv.code, intercepts = intercepts)
#beta's for proportional odds predictors
if (! is.null(beta.prop.odds)){
beta.hat.prop.odds <- unlist(optim.result[(length(int.vector) + 1): (length(int.vector) + length(beta.prop.odds))])
results.list$beta.hat.prop.odds <- beta.hat.prop.odds
#also store result for calculating probabilites of different categories
xb.prop.odds <- x.prop.odds %*% beta.hat.prop.odds
}
#betas for non-proportional odds predictors
if (! is.null(beta.non.prop.odds)){
beta.hat.non.prop.odds <- unlist(optim.result[(length(int.vector) + length(beta.prop.odds)+1):
(length(int.vector) + length(beta.prop.odds) + length(beta.non.prop.odds))])
beta.hat.non.prop.odds.mat <- matrix(beta.hat.non.prop.odds, nrow = n.non.prop.preds, byrow = F)
results.list$beta.hat.non.prop.odds <- beta.hat.non.prop.odds.mat
#again store result for calculating estimated probabilities of each category
xb.non.prop.odds <- x.non.prop.odds %*% beta.hat.non.prop.odds.mat
}
#estimated probabilities for each category of the outcome
top.minus1.level <- length(y.levels) - 1
top.level <- length(y.levels)
if (! is.null(beta.prop.odds) & ! is.null(beta.non.prop.odds)){
top.level.prob <- 1 - plogis(intercepts[top.minus1.level] + xb.prop.odds + xb.non.prop.odds[ , top.minus1.level])
bottom.level.prob <- plogis(intercepts[1] + xb.prop.odds + xb.non.prop.odds[ , 1])
middle.levels <- sapply(2:top.minus1.level, function(mid.level){
plogis(intercepts[mid.level] + xb.prop.odds + xb.non.prop.odds[ , mid.level]) -
plogis(intercepts[mid.level - 1] + xb.prop.odds + xb.non.prop.odds[ , mid.level - 1])
})
} else if (! is.null(beta.prop.odds)){
top.level.prob <- 1 - plogis(intercepts[top.minus1.level] + xb.prop.odds)
bottom.level.prob <- plogis(intercepts[1] + xb.prop.odds)
middle.levels <- sapply(2:top.minus1.level, function(mid.level){
plogis(intercepts[mid.level] + xb.prop.odds) - plogis(intercepts[mid.level - 1] + xb.prop.odds)
})
} else if (! is.null(beta.non.prop.odds)){
top.level.prob <- 1 - plogis(intercepts[top.minus1.level] + xb.non.prop.odds[ , top.minus1.level])
bottom.level.prob <- plogis(intercepts[1] + xb.non.prop.odds[ , 1])
middle.levels <- sapply(2:top.minus1.level, function(mid.level){
plogis(intercepts[mid.level] + xb.non.prop.odds[ , mid.level]) -
plogis(intercepts[mid.level - 1] + xb.non.prop.odds[ , mid.level - 1])
})
}
probs <- cbind(bottom.level.prob, middle.levels, top.level.prob)
colnames(probs) <- y.levels
#check for negative probabilities
if (any(probs <= 0)){
stop("Model did not converge and has estimated negative or zero probabilities")
}
results.list$est.probs <- probs
class(results.list) <- "partial.prop.odds"
return(results.list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.