# Density of tobit model
dtobit <- function(y, xTbeta, sigma, left = -1, log = FALSE)
{
d <- y > left
if (log)
{
return(d * (dnorm((y - xTbeta)/sigma, log = TRUE) - log(sigma)) +
(1 - d) * pnorm((left - xTbeta)/sigma, log.p = TRUE))
} else
{
return((1/sigma * dnorm((y - xTbeta)/sigma))^d * pnorm((xTbeta - left)/sigma, lower.tail = FALSE)^(1-d))
}
}
# Log-likelihood of mixture of tobit
llMixtureTobit <- function(y, K, w_ik, lambda, xTbeta, sigma, n_i)
{
# A list of the K components of the log-likelihood
ll.components <- lapply(1:K, function(k){
sum(w_ik[[k]] * (log(lambda[k])/n_i +
dtobit(y = y, xTbeta = xTbeta[[k]],
sigma = sigma[k], log = TRUE)))
})
ll <- do.call(sum, ll.components)
return(ll)
}
prep.theta.optim <- function(beta, sigma, K)
{
theta.init <- lapply(1:K, function(k){
return(c(beta[[k]], sigma[k]))
})
return(theta.init)
}
recover.params <- function(theta, K)
{
beta <- lapply(1:K, function(k){
length.theta_k <- length(theta[[k]])
return(theta[[k]][-length.theta_k])
})
sigma <- lapply(1:K, function(k){
length.theta_k <- length(theta[[k]])
return(theta[[k]][length.theta_k])
})
sigma <- do.call(c, sigma)
return(list(beta = beta, sigma = sigma))
}
Q_k.tobit <- function(theta_k, y, X, delta_k, lambda.tplus1_k, n_i)
{
beta <- theta_k[-length(theta_k)]
sigma <- theta_k[length(theta_k)]
xTbeta <- X %*% beta
ll <- sum(delta_k * (log(lambda.tplus1_k)/n_i +
dtobit(y = y, xTbeta = xTbeta,
sigma = sigma, log = TRUE)))
return(ll)
}
Q_k.tobit <- cmpfun(Q_k.tobit)
EM.tobit <- function(y, start.beta, start.sigma, start.lambda, K, ll.prev, X, id.vec, n_i,
theta.lower = NULL, theta.upper = NULL, method = "L-BFGS-B", tol = 1e-5,
best.beta, best.sigma, best.lambda, best.delta = NULL, best.ll)
{
xTbeta <- lapply(start.beta, function(b){
return(X %*% b)
})
# Components of f(y | theta). That is, f(y | theta) = \sum_k^K[\lambda_k \times f_k(y | theta)]
f.y.theta.comps <- lapply(1:K, function(k){
f.y.theta_k <- data.table(aggregate(dtobit(y, xTbeta = xTbeta[[k]], sigma = start.sigma[[k]]),
by = list(id.vec), function(x){prod(x) * start.lambda[k]}))
names(f.y.theta_k) <- c("id", "f.y.theta_k")
merged <- merge(data.table("id" = id.vec), f.y.theta_k, by = "id")
return(merged$f.y.theta_k)
})
# add these vectors together element-wise to get a single vector of size nrow(X)
f.y.theta <- rowSums(do.call(cbind, f.y.theta.comps))
# Estimated probabilities of each observation belonging to group k
delta <- lapply(1:K, function(k){
f.y.theta.comps[[k]] / f.y.theta
})
deltasum <- rowSums(do.call(cbind, delta))
# Have to use all.equal because, amazingly, == does not recognize
# min(deltasum) as 1 even though it prints exactly 1.
# More here: https://www.reddit.com/r/rstats/comments/34p0bm/logical_operators_not_working_as_expected_r_says/
stopifnot(all.equal(min(deltasum), 1) && all.equal(max(deltasum), 1))
lambda.tplus1 <- sapply(delta, mean)
theta.init <- prep.theta.optim(beta = start.beta, sigma = start.sigma, K = K)
print("Trying beta...")
print(theta.init)
print("Previous lambda...")
print(start.lambda)
print(paste("Previous log-likelihood:", ll.prev))
# Create the objective function (the negative of the Q function)
# Doing this inside the EM function so that we don't need
# to pass parameters that don't change (X, K, delta, ...)
Js <- lapply(1:K, function(k){
J <- function(theta_k){-Q_k.tobit(theta_k = theta_k, y = y, X = X,
delta_k = delta[[k]],
lambda.tplus1_k = lambda.tplus1[k],
n_i = n_i)}
return(cmpfun(J))
})
# Use the first optimization routine in the vector provided by the variable "method"
current.method <- method[1]
# same thing for tolerance
current.tol <- tol[1]
print(paste("Using", current.method, "with a tolerance of", current.tol, sep = " "))
if(current.method == "Nelder-Mead")
{
optims <- lapply(1:K, function(k){
optim(theta.init[[k]], Js[[k]], method = "Nelder-Mead")
})
} else if(current.method == "L-BFGS-B" && !is.null(theta.lower) && !is.null(theta.upper))
{
optims <- lapply(1:K, function(k){
optim(theta.init[[k]], Js[[k]], method = "L-BFGS-B",
lower = theta.lower, upper = theta.upper)
})
} else
{
optims <- lapply(1:K, function(k){
optim(theta.init[[k]], Js[[k]], method = current.method)
})
}
optimal.params <- lapply(optims, function(o){
return(o$par)
})
optima <- lapply(optims, function(o){
return(o$value)
})
params <- recover.params(optimal.params, K = K)
beta.tplus1 <- params$beta
sigma.tplus1 <- params$sigma
xTbeta.tplus1 <- lapply(beta.tplus1, function(b){
return(X %*% b)
})
# Get predicted latent class for each observation
pred.latent.class <- max.col(do.call(cbind, delta))
# Create list of K vectors where the k'th vector is a vector of logicals
# indicating class membership to latent class k. This is required for the
# log-likelihood to be evaluated.
# w_ik <- lapply(1:K, function(k){
# return(pred.latent.class == k)
# })
ll <- -do.call(sum, optima)
if(ll < ll.prev)
{
warning("E-step did not increase log-likelihood!")
}
if(ll > best.ll)
{
best.ll <- ll
best.beta <- beta.tplus1
best.lambda <- lambda.tplus1
best.sigma <- sigma.tplus1
best.delta <- delta
}
print(paste("log-likelihood:", ll))
if(abs(ll/ll.prev - 1) < current.tol)
{
if(length(tol) == 1 && length(method) == 1)
{
print("converged")
return(list(beta = best.beta,
sigma = best.sigma,
lambda = best.lambda,
delta = best.delta,
ll = best.ll))
} else if (length(tol) > 1 && length(method) > 1)
{
tol <- tol[-1]
method <- method[-1]
} else
{
stop("Must provide tol and method of same length")
}
}
return(EM.tobit(y = y, start.beta = beta.tplus1, start.sigma = sigma.tplus1,
start.lambda = lambda.tplus1, K = K, id.vec = id.vec, n_i = n_i, ll.prev = ll, X = X,
method = method, theta.lower = theta.lower,
theta.upper = theta.upper, tol = tol, best.beta = best.beta, best.sigma = best.sigma,
best.lambda = best.lambda, best.delta = best.delta, best.ll = best.ll))
}
#' Perform maximum-likelihood estimation for a mixture of tobit regression models
#'
#' This function performs maximum-likelihood estimation via the E-M algorithm to obtain
#' estimates of regression coefficients in a mixture of tobit regression models.
#'
#' @importFrom stats model.matrix dnorm pnorm rnorm optim aggregate
#' @importFrom survival survreg Surv
#' @importFrom compiler cmpfun
#' @import data.table
#' @param formula a regression formula describing the relationship between the response and the covariates
#' @param data the data.frame containing the responses and covariates
#' @param K the number of mixtures (or latent classes)
#' @param start.beta a list of length K of starting values for each mixture's beta coefficients
#' @param start.sigma a vector of length K of starting values for each mixture's sigma value
#' @param start.lambda a vector of length K of starting values for the mixing proportions
#' @param id a string specifying the name of the column that identifies subjects
#' @param left a number specifying where left-censoring occurred
#' @param tol a vector of numbers specifying the tolerance(s) used to determine convergence. If length(tol) > 1, the methods used should be supplied as a vector to "method".
#' @param theta.lower a numeric vector of lower bounds for the theta parameters
#' @param theta.upper a numeric vector of upper bounds for the theta parameters
#' @param method a vector of strings specifying the optimization routine(s) to be used by optim. If length(method) > 1, optimization will be performed in succession with the tolerances provided to "tol"
#' @param beta.starting.sd a numeric for what std deviation to use for starting the EM with random betas
#' @return a list containing the following elements:\cr
#' \item{beta}{a list containing the estimated regression coefficients}
#' \item{sigma}{a vector containing the estimated values of sigma}
#' \item{lambda}{a vector containing the estimated mixing proportions}
#' \item{delta}{a list of length K containing the estimated class membership probabilities for each observation}
#' \item{ll}{the log-likelihood function evaluated at the MLE}
#' @export
mixturetobit <- function(formula, data, K = 2, start.beta = NULL,
start.sigma = NULL, start.lambda = NULL, id = NULL,
left = -1, tol = 1e-5, theta.lower = NULL,
theta.upper = NULL, method = "L-BFGS-B", beta.starting.sd = 0.5)
{
stopifnot(K >= 2)
stopifnot(length(method) == length(tol))
data <- data.table(data)
X <- model.matrix(formula, data = data)
d <- ncol(X) # dimension of beta
response.varname <- all.vars(formula)[1]
y <- data[[response.varname]]
if(is.null(start.beta) || is.null(start.sigma) || is.null(start.lambda))
{
print("Using naive tobit regression model to initialize optimization")
# create a list with K elements, each of which is a d-dimensional numeric vector
start.beta <- rep(list(numeric(d)), K)
naive.model <- survreg(Surv(y, y > left, type = "left") ~
X[, -1], dist = "gaussian")
naive.beta <- naive.model$coefficients
names(naive.beta) <- colnames(X)
start.beta <- lapply(start.beta, function(x){
return(rnorm(n = d, mean = naive.beta, sd = beta.starting.sd)) # Assign some random starting points
})
start.sigma <- rep(naive.model$scale, K) # Give same sigma to each group
start.lambda <- rep(1/K, K)
}
id.vec <- data[[id]]
n_i <- data[, n_i := .N, by = eval(id)]$n_i
MLE <- EM.tobit(y = y, start.beta = start.beta, start.sigma = start.sigma,
start.lambda = start.lambda, K = K, ll.prev = -Inf, X = X, id.vec = id.vec, n_i = n_i,
theta.lower = theta.lower, theta.upper = theta.upper, method = method, tol = tol,
best.beta = start.beta, best.sigma = start.sigma, best.lambda = start.lambda,
best.ll = -Inf)
return(MLE)
}
# Incorporate the following as an example later
#
# K=3
# formula <- tto ~ mo + sc + ua + pd + ad
# theta.lower <- c(rep(-Inf, 1 * 21), rep(1e-16, 1))
# theta.upper <- c(rep(Inf, 1 * 21), rep(Inf, 1))
# start.lambda <- rep(1/K, K)
#
# start.beta <- list(beta.tto.true[[1]] + 1, beta.tto.true[[2]], beta.tto.true[[3]])
# start.sigma <- sigma.tto + c(0.1, -0.1, 0)
#
# # start.beta <- beta.tto.true
# # start.sigma <- sigma.tto
# set.seed(75)
# system.time(MLE.KSO <- mixturetobit(formula, data = data, K = K, start.beta = start.beta,
# start.sigma = start.sigma, start.lambda = start.lambda, id = "SubjectID",
# theta.lower = theta.lower, theta.upper = theta.upper, method = "L-BFGS-B", tol = 1e-8))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.