#' Stacked species regression models, possibly fitted in parallel
#'
#' @param y A matrix of species responses
#' @param formula_X An object of class \code{formula} representing the relationship to the covariates to be fitted. There should be nothing to the left hand side of the "~" sign.
#' @param data Data frame of the covariates
#' @param family Either a single character vector, in which case all responses are assumed to be from this family, or a vector of character strings of the same length as the number of columns of y. Families as strings and not actual \code{family} class objects. This could be changed though if desired in the future e.g., for custom link functions. Currently, the following families are supported (hopefully properly!): "gaussian", "negative.binomial" (with quadratic mean-variance relationship), "poisson", "binomial" (with logit link), "tweedie", "Gamma" (with log link), "exponential", "beta" (with logit link), "ordinal" (cumulative logit model), "ztpoisson", "ztnegative.binomial", "zipoisson", "zinegative.binomial".
#' @param trial_size The trial size if any of the responses are binomial. Is either a single number or a matrix with the same dimension as y. If the latter, then all columns that do not correspond to binomial responses are ignored.
#' @param do_parallel Do the separate species model fits in parallel? Defaults to \code{TRUE}
#' @param ncores The number of cores to use if separate the species model fits are done in parallel. If \code{do_parallel = TRUE}, then it defaults to \code{detectCores() - 2}
#' @param trace Print information. This is not actually used currently
#' @section Details:
#' \code{stackedsdm} behaves somewhat like the \code{manyglm} or \code{manyany} function in the package \code{\link{mvabund}}, in the sense that it fits a separate regression to each species response i.e., column of \code{y}. The main difference is that different families can be permitted for each species, which thus allows for mixed responses types.
#' @return A object of class \code{stackedsdm} with the following components:
#' \code{call} The function call;
#' \code{fits} A list where the j-th element corresponds to the to the fitted model for species j i.e., the j-th column in \code{y};
#; \code{y, formula_X, data, family, trial_size} As per the arguments;
#' \code{linear_predictor} A matrix of the fitted linear predictors
#' \code{fitted} A matrix of the fitted values
#' @section Author(s):
#' Francis K.C. Hui <francis.hui@anu.edu.au>.
#' @import glm2
#' @import mgcv
#' @import mvabund
#' @importFrom betareg betareg
#' @import ordinal
#' @import compiler
#' @import doParallel
#' @import foreach
#' @import stats
#' @export
#' @examples
#' data(spider)
#' X <- spider$x
#' abund <- spider$abund
#'
#' # Example 1: Simple example
#' myfamily <- "negative.binomial"
#' # Example 1: Funkier example where Species are assumed to have different distributions
#' # Fit models including all covariates are linear terms, but exclude for bare sand
#' fit0 <- stackedsdm(abund, formula_X = ~. -bare.sand, data = X, family = myfamily, ncores = 2)
#'
#' # Example 2: Funkier example where Species are assumed to have different distributions
#' abund[,1:3] <- (abund[,1:3]>0)*1 # First three columns for presence absence
#' myfamily <- c(rep(c("binomial"), 3),
#' rep(c("negative.binomial"), (ncol(abund)-3)))
#' fit0 <- stackedsdm(abund, formula_X = ~ bare.sand, data = X, family = myfamily, ncores = 2)
stackedsdm <- function(y, formula_X= ~1, data=NULL, family="negative.binomial",
trial_size = 1, do_parallel = FALSE,
ncores = NULL, trace = FALSE) {
tol <- 1e-8
y <- as.matrix(y)
if(is.null(colnames(y)))
colnames(y) <- paste0("resp", 1:ncol(y))
if(is.null(rownames(y)))
rownames(y) <- paste0("units", 1:nrow(y))
num_units <- nrow(y)
num_spp <- ncol(y)
formula_X <- check_formula_X(formula_X, data = data)
family <- check_family(family = family, y = y)
if(is.matrix(trial_size)) {
if(nrow(trial_size) != num_units | ncol(trial_size) != num_spp)
stop("trial_size should either be a scalar, or a matrix of the same as y. If the latter, then columns which are not classed as having binomial responses are ignored.")
}
if(do_parallel) {
if(is.null(ncores))
registerDoParallel(cores = parallel::detectCores()-2)
if(!is.null(ncores))
registerDoParallel(cores = ncores)
}
##---------------
## Set up function to do SDM per species
##---------------
respfit_fn <- function(j) {
out_params <- list()
if(family[j] %in% c("gaussian", "poisson")) {
fit_init <- glm2(formula_X, family = family[j], data = data.frame(resp = y[,j], data))
out_params$coefficients <- fit_init$coefficients
if(family[j] == "gaussian")
out_params$dispparam <- summary(fit_init)$sigma^2
}
if(family[j] %in% c("Gamma", "exponential")) {
fit_init <- glm2(formula_X, family = Gamma(link = "log"), data = data.frame(resp = y[,j], data))
out_params$coefficients <- fit_init$coefficients
if(family[j] == "exponential") {
out_params$coefficients <- summary(fit_init, disperson = 1)$coefficients[,1]
}
}
if(family[j] %in% c("binomial")) {
if(length(trial_size) == 1)
cw_trial_size <- rep(trial_size, num_units)
if(is.matrix(trial_size))
cw_trial_size <- trial_size[,j]
formula_X <- update.formula(formula_X, cbind(resp, trial_size - resp) ~ .)
fit_init <- glm2(formula_X, family = "binomial", data = data.frame(resp = y[,j], trial_size = cw_trial_size, data))
out_params$coefficients <- fit_init$coefficients
}
if(family[j] %in% c("negative.binomial")) {
fit_init <- manyglm(formula_X, data = data.frame(resp = y[,j], data), family = "negative.binomial")
out_params$coefficients <- fit_init$coefficients
out_params$dispparam<- fit_init$phi
}
if(family[j] == "tweedie") {
fit_init <- gam(formula_X, data = data.frame(resp = y[,j], data), family = mgcv::tw(), method = "ML")
out_params$coefficients <- fit_init$coefficients
out_params$dispparam <- summary(fit_init)$dispersion
out_params$powerparam <- as.numeric(strsplit(strsplit(fit_init$family$family, "p=")[[1]][2], ")")[[1]])
}
if(family[j] == "beta") {
fit_init <- betareg(formula_X, data = data.frame(resp = y[,j], data), link = "logit")
out_params$coefficients <- fit_init$coefficients$mean
out_params$dispparam <- 1/fit_init$coefficients$precision
}
# if(family[j] == "ztpoisson") {
# fit_init <- countreg::zerotrunc(formula_X, data = data.frame(resp = y[,j], data), dist = "poisson")
# out_params$coefficients <- fit_init$coefficients
# }
# if(family[j] == "ztnegative.binomial") {
# fit_init <- countreg::zerotrunc(formula_X, data = data.frame(resp = y[,j], data), dist = "negbin")
# out_params$coefficients <- fit_init$coefficients
# out_params$dispparam <- 1/fit_init$theta
# }
# if(family[j] == "zipoisson") {
# fit_init <- countreg::zeroinfl(formula_X, data = data.frame(resp = y[,j], data), dist = "poisson", link = "logit")
# out_params$coefficients <- fit_init$coefficients$count
# out_params$ziintercept <- fit_init$coefficients$zero
# }
# if(family[j] == "zinegative.binomial") {
# fit_init <- countreg::zeroinfl(formula_X, data = data.frame(resp = y[,j], data), dist = "negbin", link = "logit")
# out_params$coefficients <- fit_init$coefficients$count
# out_params$ziintercept <- fit_init$coefficients$zero
# out_params$dispparam <- 1/fit_init$theta
# }
if(family[j] == "ordinal") {
fit_init <- clm(formula_X, data = data.frame(resp = ordered(y[,j]), data), link = "logit")
out_params$coefficients <- fit_init$beta
out_params$cutoffs <- fit_init$alpha
}
return(list(params = out_params, fit = fit_init))
}
respfit_cmpfn <- compiler::cmpfun(respfit_fn)
rm(respfit_fn)
j=NULL #need this to pass check()
if(do_parallel)
all_fits <- foreach(j = 1:num_spp) %dopar% respfit_cmpfn(j = j)
if(!do_parallel)
all_fits <- foreach(j = 1:num_spp) %do% respfit_cmpfn(j = j)
##-----------------
## Format output
##-----------------
out_allfits <- list(call = match.call(), fits = all_fits, y = y, formula_X = formula_X, data = data, family = family, trial_size = trial_size)
out_allfits$linear_predictor <- sapply(1:num_spp, function(x) {
if(family[x] != "ordinal")
s <- all_fits[[x]]$fit$linear
if(family[x] == "ordinal") {
s <- predict(all_fits[[x]]$fit, newdata = data, type = "linear.predictor")$eta1[,1]
s = pmax(s, binomial()$linkfun(tol)/2)
s = pmin(s, binomial()$linkfun(1-tol)/2)
}
return(s)
}
)
out_allfits$fitted <- sapply(all_fits, function(x) fitted(x$fit))
dimnames(out_allfits$fitted)=dimnames(y)
class(out_allfits) <- "stackedsdm"
rm(all_fits)
gc()
return(out_allfits)
}
#' @export
mgcv::ldTweedie
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.