Nothing
#' Check if a predictor is dichotomous, adopted from package \code{circGLM}
#'
#' @param x A character or numerical vector to be tested.
#'
#' @return A logical, \code{TRUE} if the \code{x} has dummy coding (0, 1),
#' \code{FALSE} otherwise.
#'
is.dichotomous <- function(x) {
n_unique_x <- length(unique(x))
if (n_unique_x == 2) {
if (all(x == 0 | x == 1)) {
return(TRUE)
} else {
warning("A predictor might be dichotomous but not 0|1.")
}
} else if (n_unique_x > 2 & n_unique_x < 8) {
warning(
paste(
"A predictor has between 3 and 7 unique values.",
"It might be categorical with multiple categories",
"but without dummy coding."
)
)
} else if (n_unique_x == 1) {
stop("A predictor had only a single unique value.")
}
FALSE
}
#' Fitting finite quantile mixture models
#'
#' The main function for running the finite quantile mixture model. The function returns a \code{qmix} object that can be further investigated using standard functions such as \code{plot}, \code{print}, and \code{coef}. The model can be passed using a \code{formula} as in \code{lm()}. Convergence diagnotics can be performed using either \code{print(object, "mcmc")} or \code{plot(object, "mcmc")}.
#'
#' @param formula An object of class "formula" (or one that can be
#' coerced to that class): a symbolic description of the model to be fitted.
#' @param data A data frame containing the variables in the model.
#' @param nmix The number of mixture components.
#' @param design Quantile specification. Options include "fixed" and "random". The default choice is "fixed" which requires quantile inputs from the user.
#' @param q The quantile value.
#' @param nsim The number of iterations.
#' @param burnin The number of burnin iterations.
#' @param thin Thinning parameter.
#' @param CIsize The size of posterior confidence interval.
#' @param nchain The number of parallel chains.
#' @param seeds Random seeds to replicate the results.
#' @param offset Offset values to enhance sampling stability. The default value is 1e-20.
#' @param inverse_distr If FALSE, the ALD will not be reversed. The default is FALSE.
#'
#' @return A \code{qmix} object. An object of class \code{qmix} contains the following elements
#'
#' \describe{
#'
#' \item{\code{Call}}{The matched call.}
#' \item{\code{formula}}{Symbolic representation of the model.}
#' \item{\code{nmix}}{Number of mixture components. If unspecified in the fixed-quantile specification, the value equals the number of quantiles specified. Otherwise, an error will be generated for the missing value.}
#' \item{\code{design}}{Options include "fixed" and "random" for fixed- and random-quantile specification.}
#' \item{\code{q}}{Quantiles in the fixed-quantile specification.}
#' \item{\code{nsim}}{Number of iterations.}
#' \item{\code{Burnin}}{Number of burnin iterations.}
#' \item{\code{thin}}{Thinning.}
#' \item{\code{seeds}}{Random seeds for reproducibility. The default is 12345.}
#' \item{\code{CIsize}}{Size of the posterior confidence interval.}
#' \item{\code{inverse_distr}}{Indicating whether ALD should be inversed.}
#' \item{\code{offset}}{Offset to enhance stability in estimation. The default value is 1e-20.}
#' \item{\code{data}}{Data used.}
#' \item{\code{x}}{Independent variables.}
#' \item{\code{y}}{Dependent variables.}
#' \item{\code{xnames}}{Names of the independent variables.}
#' \item{\code{stanfit}}{Output from stan.}
#' \item{\code{sampledf}}{Posterior samples.}
#' \item{\code{summaryout}}{Summary of the posterior samples.}
#' \item{\code{npars}}{Number of covariates.}
#' \item{\code{ulbs}}{Upper and lower bounds based on the specified confidence interval.}
#' \item{\code{means}}{Mean estimates.}
#' \item{\code{thetas}}{Estimated proportions of each mixture component.}
#' \item{\code{binarylogic}}{Indicating whether the data contain a binary dependent variable.}
#'
#' }
#'
#' @references
#' Lu, Xiao (2019). Beyond the Average: Conditional Hypothesis Testing with Quantile Mixture. Working Paper.
#'
#' @importFrom stats coef model.frame model.matrix quantile
#'
#' @export
#'
#' @examples
#'
#' # simulate a mixture of 2 ALDs
#' k <- 2
#' N <- 50
#' # true effects: -10 and 10 respectively for two mixture components
#' beta1 <- -10
#' beta2 <- 10
#' set.seed(34324)
#' x1 <- rnorm(N,0,1)
#' x2 <- rnorm(N,0,1)
#' xb1 <- x1*beta1
#' xb2 <- x2*beta2
#' y1 <- y2 <- NA
#' # quantiles at 0.1 and 0.9
#' p1 <- 0.1
#' p2 <- 0.9
#' for (i in 1:N){
#' y1[i] <- rald(1,mu = xb1[i],p = p1,sigma = 1)
#' y2[i] <- rald(1,mu = xb2[i],p = p2,sigma = 1)
#' }
#' y <- c(y1,y2)
#' x <- c(x1,x2)
#' dat <- as.data.frame(cbind(y,x))
#' # Estimate the model using both the fixed- and random-quantile specification
#' model <- qmix(y ~ x, data = dat, nmix = 2, design = "fixed", q = c(0.1, 0.9))
#' # Summary the results
#' coef(model)
#' print(model)
#' # check traceplots
#' plot(model)
#'
qmix <- function(formula,
data,
nmix = 3,
design = "fixed",
q = NULL,
nsim = 1000,
burnin = NULL,
thin = 1,
CIsize = .95,
nchain = 1,
seeds = 12345,
offset = 1e-20,
inverse_distr = FALSE) {
if (is.null(burnin))
burnin <- floor(nsim / 2)
if (burnin < 0)
stop("Burn-in must be non-negative.")
if (thin < 1)
stop("Thinning factor must be positive.")
if (CIsize <= 0)
stop("Confidence interval size 'CIsize' must be positive.")
if (CIsize > 1)
stop(paste0("Confidence interval size 'CIsize' ",
"can not be larger than 1."))
if (missing(formula) | missing(data)) {
stop(paste0("Formula and data should be given."))
}
if (missing(nmix) &
design == "random")
stop(
"The number of mixture components nmix needs to be specified in the random-quantile design."
)
if (nmix != length(q) & design == "fixed")
stop(
"The number of mixture components in q does not match the number of specified quantiles nmix."
)
if (nmix < 2)
stop("The number of mixture components must be larger than one.")
if (nmix > 3) {
warning(
"The number of specified quantiles nmix is larger than 3. Check convergence carefully using posterior samples from multiple chains!"
)
}
if (!all(q > 0 &
q < 1))
stop("The specified quantiles are out of range. The values must be in (0,1).")
f <- Formula::Formula(formula)
data <- model.frame(f, data)
y <- c(as.matrix(model.frame(f, data)[, 1]))
x <- model.matrix(f, data)
n_covariate <- dim(x)[2]
N <- length(y)
if (length(unique(y)) == 2 &
!is.dichotomous(y))
stop("The binary dependent variable must be coded with values in {0,1}.")
binarylogic = is.dichotomous(y)
if (binarylogic == FALSE) {
if (design == "fixed") {
stanmodel <- stanmodels$qmixcf
datlist <- list(
N = N,
Y = c(y),
D = n_covariate,
X = x,
k = nmix,
p = q
)
} else {
stanmodel <- stanmodels$qmixcr
datlist <- list(
N = N,
Y = c(y),
D = n_covariate,
X = x,
k = nmix
)
}
} else {
if (design == "fixed") {
if (inverse_distr == FALSE) {
stanmodel <- stanmodels$qmixbfv
datlist <- list(
N = N,
Y = c(y),
D = n_covariate,
X = x,
k = nmix,
p = q,
offset = offset
)
} else {
stanmodel <- stanmodels$qmixbf
datlist <- list(
N = N,
Y = c(y),
D = n_covariate,
X = x,
k = nmix,
p = q,
offset = offset
)
}
} else {
if (inverse_distr == FALSE) {
stanmodel <- stanmodels$qmixbrv
datlist <- list(
N = N,
Y = c(y),
D = n_covariate,
X = x,
k = nmix,
offset = offset
)
} else {
stanmodel <- stanmodels$qmixbr
datlist <- list(
N = N,
Y = c(y),
D = n_covariate,
X = x,
k = nmix,
offset = offset
)
}
}
}
if (binarylogic == TRUE & design == "fixed") {
pars <- c("beta", "theta")
} else if (binarylogic == FALSE & design == "fixed") {
pars <- c("beta", "theta", "sigma")
} else if (binarylogic == TRUE & design == "random") {
pars <- c("beta", "theta", "p")
} else {
pars <- c("beta", "theta", "sigma", "p")
}
stanout <- sampling(
stanmodel,
data = datlist,
pars = pars,
seed = seeds,
iter = nsim,
thin = thin,
warmup = burnin,
chains = nchain
)
summaryout <- rstan::summary(stanout)$summary
if (binarylogic == TRUE & design == "fixed") {
sampledf <- as.data.frame(stanout)[, 1:(n_covariate * nmix + nmix)]
} else if (binarylogic == FALSE & design == "fixed") {
sampledf <- as.data.frame(stanout)[, 1:(n_covariate * nmix + 2 * nmix)]
} else if (binarylogic == TRUE & design == "random") {
sampledf <- as.data.frame(stanout)[, 1:(n_covariate * nmix + 2 * nmix)]
} else {
sampledf <- as.data.frame(stanout)[, 1:(n_covariate * nmix + 3 * nmix)]
}
out <- list()
class(out) <- c("qmix", class(out))
out$Call <- match.call()
out$formula <- formula
out$nmix <- nmix
out$design <- design
out$q <- q
out$nsim <- nsim
out$burnin <- burnin
out$thin <- thin
out$seeds <- seeds
out$CIsize <- CIsize
out$inverse_distr <- inverse_distr
out$offset <- offset
out$data <- data
out$x <- x
out$y <- y
out$xnames <- colnames(x)
out$stanfit <- stanout
out$sampledf <- sampledf
out$summaryout <- summaryout
out$npars <- n_covariate
out$ulbs <-
apply(sampledf, 2, quantile, probs = c((1 - CIsize) / 2, 1 - (1 - CIsize) / 2))
out$means <- apply(sampledf, 2, mean)
out$thetas <-
summaryout[(n_covariate * nmix + 1):(n_covariate * nmix + nmix), 1]
out$binarylogic <- binarylogic
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.