R/gfilinreg.R In gfilinreg: Generalized Fiducial Inference for Low-Dimensional Robust Linear Regression

Documented in gfilinreg

#' @name gfilinreg
#' @rdname gfilinreg
#' @title Fiducial sampler for linear regression model
#' @description Weighted samples of the fiducial distribution of the
#'   parameters of a linear regression model with normal, Student, Cauchy, or
#'   logistic error terms.
#'
#' @param formula two-sided formula defining the model
#' @param data dataframe containing the data
#' @param distr the distribution of the error terms, \code{"normal"},
#'   \code{"student"}, \code{"cauchy"}, or \code{"logistic"}
#' @param df degrees of freedom of the Student distribution if
#'   \code{distr = "student"}
#' @param L number of subdivisions of each axis of the hypercube
#'   \code{(0,1)^(p+1)}
#' @param Kmax maximal number of combinations of indices to use
#' @param stopifbig logical, whether to stop if the algorithm requires huge
#'   matrices
#'
#' @return A \code{gfilinreg} object, list with the fiducial samples and the
#'   weights.
#'
#' @references Jan Hannig, Randy C.S. Lai, Thomas C.M. Lee.
#'   \emph{Computational issues of generalized fiducial inference}.
#'   Computational Statistics and Data Analysis 71 (2014), 849–858.
#'   <doi:10.1016/j.csda.2013.03.003>
#'
#' @examples set.seed(666L)
#' x <- c(1, 2, 3, 4)
#' y <- x + 3 * rcauchy(4L)
#' gfi <- gfilinreg(y ~ x, distr = "cauchy", L = 30L, nthreads = 2L)
#' gfiSummary(gfi)
#'
#' @importFrom EigenR Eigen_rank
#' @importFrom stats model.matrix as.formula
#' @importFrom lazyeval f_eval_lhs f_rhs
#' @importFrom data.table CJ as.data.table rbindlist
#' @importFrom parallel detectCores
#' @export
gfilinreg <- function(
formula, data = NULL, distr = "student", df = Inf, L = 30L, Kmax = 50L,
nthreads = parallel::detectCores(), stopifbig = TRUE
){
if(inSolaris()){
}
stopifnot(Kmax >= 2)
distr <- tolower(distr)
distr <- match.arg(
distr, c("normal", "gaussian", "student", "cauchy", "logistic")
)
if(distr == "student"){
if(df == Inf){
distr <- "normal"
}else if(df == 1){
distr <- "cauchy"
}
}
y <- f_eval_lhs(formula, data = data)
X <- model.matrix(formula, data = data)
betas <- colnames(X)
X <- unname(X)
n <- nrow(X)
p <- ncol(X)
if(Eigen_rank(X) < p){
stop("Design is not of full rank.")
}
q <- p + 1L
#
if(choose(n, q) < 20000L){
goodCombs <- goodCombinations(X)
nGoodCombs <- ncol(goodCombs)
if(Kmax >= nGoodCombs){
K <- nGoodCombs
combs <- goodCombs
}else{
combs <- goodCombs[, sample.int(nGoodCombs, Kmax)]
K <- Kmax
}
}else{
message(sprintf("Sampling %d combinations of indices...", Kmax))
combs <- sampleCombinations(X, Kmax)
message("Done.")
K <- Kmax
}
#
if(!enoughRAM(q*L^q*(1+K/2))){
stop("Insufficient free RAM.")
}
#
if(stopifbig && (K * q * L^q / 2 > 8e7)){
stop(
paste0(
"The algorithm needs to deal with two big matrices ",
sprintf(
"(the biggest with %g entries: %g GB). ",
K * q * L^q / 2, K * q * L^q / 2 * 8e-9
),
"Set the option stopifbig to FALSE if you want to proceed anyway."
)
)
}
M <- ceiling(L^q / 2L)
# centers of hypercubes (volume 1/L^p)
centers <- as.matrix(
do.call(
function(...){CJ(..., sorted = FALSE)},
rep(list(seq(0, 1, length.out = L+1L)[-1L] - 1/(2*L)), q)
)
)
# algorithm
XIs <- do.call(cbind, lapply(1L:K, function(k) X[combs[, k], , drop = FALSE]))
XmIs <-
do.call(cbind, lapply(1L:K, function(k) X[-combs[, k], , drop = FALSE]))
yIs <- apply(combs, 2L, function(I) y[I])
ymIs <- rbind(apply(combs, 2L, function(I) y[-I]))
if(distr %in% c("normal", "gaussian")){
cpp <- f_normal(
centers = t(centers),
XIs = XIs, XmIs = XmIs,
yIs = yIs, ymIs = ymIs,
K = K, p = p, M = M, n = n,
)
}else if(distr == "student"){
cpp <- f_student(
centers = t(centers),
XIs = XIs, XmIs = XmIs,
yIs = yIs, ymIs = ymIs,
K = K, p = p, M = M, n = n,
nu = df,
)
}else if(distr == "cauchy"){
cpp <- f_cauchy(
centers = t(centers),
XIs = XIs, XmIs = XmIs,
yIs = yIs, ymIs = ymIs,
K = K, p = p, M = M, n = n,
)
}else if(distr == "logistic"){
cpp <- f_logistic(
centers = t(centers),
XIs = XIs, XmIs = XmIs,
yIs = yIs, ymIs = ymIs,
K = K, p = p, M = M, n = n,
)
}
#
LOGWEIGHTS <- lapply(cpp, [[, "logWeights")
THETAS <- lapply(cpp, function(output){
as.data.table(colnames<-(output[["Theta"]], c(betas, "sigma")))
})
J <- exp(do.call(c, LOGWEIGHTS))
out <- list(
Theta = rbindlist(THETAS),
weight = J/sum(J)
)
attr(out, "distr") <- distr
attr(out, "df") <- df
rhs <- as.character(f_rhs(formula))
if(rhs[1L] == "+") rhs <- rhs[-1L]
attr(out, "formula") <- as.formula(
paste0("~ ", paste0(rhs, collapse = " + "))
)
class(out) <- "gfilinreg"
out
}


Try the gfilinreg package in your browser

Any scripts or data that you put into this service are public.

gfilinreg documentation built on March 17, 2021, 1:06 a.m.