speedglm <- function (formula, data, family = gaussian(), weights = NULL,
start = NULL, etastart = NULL, mustart = NULL, offset = NULL,
maxit = 25, k = 2, sparse = NULL, set.default = list(), trace = FALSE,
method = c("eigen", "Cholesky", "qr"),
model = FALSE, y = FALSE, fitted = FALSE, ...)
{
call <- match.call()
target <- y
M <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset",
"weights", "na.action", "etastart",
"mustart", "offset"), names(M), 0L)
M <- M[c(1L, m)]
M$drop.unused.levels <- TRUE
M[[1L]] <- quote(stats::model.frame)
M <- eval(M, parent.frame())
y <- M[[1]]
tf <- attr(M, "terms")
X <- model.matrix(tf, M)
offset <- model.offset(M)
intercept <- attributes(tf)$intercept
set <- list(sparselim = 0.9, camp = 0.01, eigendec = TRUE,
row.chunk = NULL, tol.solve = .Machine$double.eps, acc = 1e-08,
tol.values = 1e-07, tol.vectors = 1e-07, method = match.arg(method))
nmsC <- names(set)
set[(namc <- names(set.default))] <- set.default
if (length(noNms <- namc[!namc %in% nmsC]) > 0)
warning("unknown names in set.default: ", paste(noNms,
collapse = ", "))
rval <- speedglm.wfit(y = y, X = X, family = family, weights = weights,
start = start, etastart = etastart, mustart = mustart,
offset = offset, intercept = intercept, row.chunk = set$row.chunk,
maxit = maxit, k = k, acc = set$acc, sparselim = set$sparselim,
camp = set$camp, eigendec = set$eigendec, tol.solve = set$tol.solve,
sparse = sparse, tol.values = set$tol.values, trace = trace,
tol.vectors = set$tol.vectors, method = set$method)
rval$terms <- tf
rval$call <- call
rval$xlevels <- .getXlevels(tf, M)
class(rval) <- c("speedglm", "speedlm")
if (model)
rval$model <- M
if (fitted) {
if (missing(data))
data <- get_all_vars(M)
rval$linear.predictors <- predict.speedlm(rval, newdata = data)
}
if (target)
rval$y <- y
if ((rval$iter == maxit) & (!rval$convergence))
warning("Maximum number of iterations reached without convergence")
rval
}
speedglm.wfit <- function (y, X, intercept = TRUE, weights = NULL, row.chunk = NULL,
family = gaussian(), start = NULL, etastart = NULL, mustart = NULL,
offset = NULL, acc = 1e-08, maxit = 25, k = 2, sparselim = 0.9,
camp = 0.01, eigendec = TRUE, tol.values = 1e-07, tol.vectors = 1e-07,
tol.solve = .Machine$double.eps, sparse = NULL, method = c("eigen",
"Cholesky", "qr"), trace = FALSE, ...)
{
nobs <- NROW(y)
nvar <- ncol(X)
if (missing(y))
stop("Argument y is missing")
if (missing(X))
stop("Argument X is missing")
if (is.null(offset))
offset <- rep.int(0, nobs)
if (is.null(weights))
weights <- rep(1, nobs)
col.names <- dimnames(X)[[2]]
method <- match.arg(method)
fam <- family$family
link <- family$link
variance <- family$variance
dev.resids <- family$dev.resids
aic <- family$aic
linkinv <- family$linkinv
mu.eta <- family$mu.eta
if (is.null(sparse))
sparse <- is.sparse(X = X, sparselim, camp)
if (is.null(start)) {
if (is.null(mustart))
eval(family$initialize)
eta <- if (is.null(etastart))
family$linkfun(mustart)
else etastart
mu <- mustart
start <- rep(0, nvar)
}
else {
eta <- offset + as.vector(if (nvar == 1)
X * start
else {
if (sparse)
X %*% start
else tcrossprod(X, t(start))
})
mu <- linkinv(eta)
}
iter <- 0
dev <- sum(dev.resids(y, c(mu), weights))
tol <- 1
if ((fam == "gaussian") & (link == "identity"))
maxit <- 1
C_Cdqrls <- getNativeSymbolInfo("Cdqrls", PACKAGE = getLoadedDLLs()$stats)
while ((tol > acc) & (iter < maxit)) {
iter <- iter + 1
beta <- start
dev0 <- dev
varmu <- variance(mu)
mu.eta.val <- mu.eta(eta)
z <- (eta - offset) + (y - mu)/mu.eta.val
W <- (weights * mu.eta.val * mu.eta.val)/varmu
XTX <- cp(X, W, row.chunk, sparse)
XTz <- if (sparse)
t(X) %*% (W * z)
else t(crossprod((W * z), X))
if (iter == 1 & method != "qr") {
variable <- colnames(X)
ris <- if (eigendec)
control(XTX, , tol.values, tol.vectors, , method)
else list(rank = nvar, pivot = 1:nvar)
ok <- ris$pivot[1:ris$rank]
if (eigendec) {
XTX <- ris$XTX
X <- X[, ok]
XTz <- XTz[ok]
start <- start[ok]
}
beta <- start
}
if (method == "qr") {
ris <- .Call(C_Cdqrls, XTX, XTz, tol.values, FALSE)
start <- if (ris$rank < nvar)
ris$coefficients[ris$pivot]
else ris$coefficients
}
else {
start <- solve(XTX, XTz, tol = tol.solve)
}
eta <- if (sparse)
drop(X %*% start)
else drop(tcrossprod(X, t(start)))
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, c(mu), weights))
tol <- max(abs(dev0 - dev)/(abs(dev) + 0.1))
if (trace)
cat("iter", iter, "tol", tol, "\n")
}
wt <- sum(weights)
wtdmu <- if (intercept)
sum(weights * y)/wt
else linkinv(offset)
nulldev <- sum(dev.resids(y, c(wtdmu), weights))
n.ok <- nobs - sum(weights == 0)
nulldf <- n.ok - as.integer(intercept)
rank <- ris$rank
dfr <- nobs - rank - sum(weights == 0)
aic.model <- aic(y, nobs, mu, weights, dev) + k * rank
ll.nuovo <- ll.speedglm(fam, aic.model, rank)
res <- (y - mu)/mu.eta(eta)
resdf <- n.ok - rank
RSS <- sum(W * res * res)
var_res <- RSS/dfr
dispersion <- if (fam %in% c("poisson", "binomial"))
1
else var_res
if (method == "qr") {
coefficients <- start
coefficients[coefficients == 0] = NA
ok <- ris$pivot[1:rank]
}
else {
coefficients <- rep(NA, nvar)
start <- as(start, "numeric")
coefficients[ok] <- start
}
names(coefficients) <- if (is.null(col.names) & (!is.null(coefficients))) {
if (intercept) {
if (length(coefficients) > 1)
c("(Intercept)", paste("V", 1:(length(coefficients) -
1), sep = ""))
else "(Intercept)"
}
else paste("V", 1:length(coefficients), sep = "")
}
else col.names
rval <- list(coefficients = coefficients, logLik = ll.nuovo,
iter = iter, tol = tol, family = family, link = link,
df = dfr, XTX = XTX, dispersion = dispersion, ok = ok,
rank = rank, RSS = RSS, method = method, aic = aic.model, offset=offset,
sparse = sparse, deviance = dev, nulldf = nulldf, nulldev = nulldev,
ngoodobs = n.ok, n = nobs, intercept = intercept, convergence = (!(tol >
acc)))
class(rval) <- "speedglm"
rval
}
shglm <- function(formula, datafun, family = gaussian(), weights.fo = NULL,
start = NULL, etastart = NULL, mustart = NULL, offset = NULL,
maxit = 25, k = 2, chunksize = 5000, sparse = NULL,trace=FALSE,
all.levels = FALSE, set.default = list(), ...){
if (!is.null(start)) stop("Sorry, code for argument start is not implemented yet")
if (!is.null(mustart)) stop("Sorry, code for argument mustart is not implemented yet")
if (!is.null(etastart)) stop("Sorry, code for argument etastart is not implemented yet")
dati <- datafun(reset = TRUE)
dati <- datafun(reset = FALSE)
call <- match.call()
tf <- terms(formula, data=dati)
M <- model.frame(tf, dati)
y <- M[[1]]
X <- model.matrix(tf, M)
offset <- model.offset(M)
set <- list(sparselim = 0.9, camp = 0.01, eigendec = TRUE,
row.chunk = NULL, tol.solve = .Machine$double.eps, acc = 1e-08,
tol.values = 1e-07, tol.vectors = 1e-07, method = "eigen")
nmsC <- names(set)
set[(namc <- names(set.default))] <- set.default
if (length(noNms <- namc[!namc %in% nmsC]) > 0)
warning("unknown names in set.default: ", paste(noNms,
collapse = ", "))
obj <- list()
obj$terms <- tf
fa <- NULL
if (!all.levels) {
fa <- which(attributes(attributes(M)$terms)$dataClasses=="factor")
if (length(fa)>0){
for (i in 1:length(fa)) {
eval(parse(text = paste("obj$levels$'", names(M)[fa[i]],
"'", "<-levels(M[,fa[i]])", sep = "")))
}
}
}
nomicol <- colnames(dati)
variable <- colnames(X)
nobs <- length(y)
weights <- if (is.null(weights.fo))
rep(1, nobs) else model.frame(weights.fo, dati)[[1]]
intercept <- attributes(tf)$intercept
nvar <- ncol(X)
if (is.null(offset))
offset <- rep.int(0, nobs)
if (is.null(sparse)) {
sp <- NULL
sparse <- is.sparse(X = X, set$sparselim, set$camp)
} else sp <- sparse
ok <- 1:ncol(X)
rank <- ncol(X)
dfr <- nobs - rank - sum(weights == 0)
fam <- family$family
link <- family$link
linkfun <- family$linkfun
variance <- family$variance
dev.resids <- family$dev.resids
linkinv <- family$linkinv
mu.eta <- family$mu.eta
iter <- 0
tol <- 1
dev <- nobs
weights.cum <- wt <- wy <- nulldev <- tot.obs <- zero.weights <- 0
block.cum <- nrow(X)
end.iter <- FALSE
while ((tol > set$acc) & (iter < maxit)) {
dev.prec <- dev
aic.model <- dev <- Wy <- RSS <- 0
iter <- iter + 1
XTX <- matrix(0, ncol(X), ncol(X))
XTz <- matrix(0, ncol(X), 1)
eof <- FALSE
iter2 <- 0
while (!eof) {
iter2 <- iter2 + 1
if (iter > 1) {
if (is.null(sp))
sparse <- is.sparse(X = X, set$sparselim, set$camp)
if (nvar == 1) {
eta <- (offset + as.vector(X * start))[1:length(y)]
} else {
eta <- (offset + if (sparse)
drop(X[, ok] %*% start) else drop(tcrossprod(X[, ok], t(start))))[1:length(y)]
}
mu <- linkinv(eta <- eta)
} else {
if (is.null(mustart)) eval(family$initialize)
if (is.null(start)) {
eta <- (if (is.null(etastart)) linkfun(mustart) else etastart)[1:length(y)]
mu <- mustart[1:length(y)]
} else {
if (nvar == 1) {
eta <- (offset + as.vector(X * start))[1:length(y)]
} else {
eta <- (offset + if (sparse)
drop(X %*% start) else as.vector(tcrossprod(X, t(start))))[1:length(y)]
}
mu <- linkinv(eta)
}
}
if (trace) cat("iter",iter,"elaborating chunk",iter2,"dim",dim(X),"\n")
dev <- dev + sum(dev.resids(y, c(mu), weights))
if (iter>1) aic.model <- aic.model + aic.shglm(fam,y,wt,mu,weights,dev.prec)
varmu <- variance(mu)
mu.eta.val <- mu.eta(eta)
z <- (eta - offset) + (y - mu)/mu.eta.val
W <- (weights * mu.eta.val * mu.eta.val)/varmu
if (iter2 == 1) {
XTX <- XTX + cp(X, W, set$row.chunk, sparse)
XTz <- XTz + if (sparse) t(X)%*%(W*z) else t(crossprod((W*z),X))
} else {
Ax <- XTX
XTX <- cp(X, W, set$row.chunk, sparse)
XTX[rownames(Ax),colnames(Ax)]<-XTX[rownames(Ax),colnames(Ax)]+Ax
Az <- XTz
XTz <- if (sparse) t(X)%*%(W*z) else t(crossprod((W*z), X))
XTz[rownames(Az), ] <- XTz[rownames(Az), ] + Az
}
res <- (y - mu)/mu.eta(eta)
RSS <- RSS + sum(W * res * res)
if (iter == 1) weights.cum <- weights.cum + sum(weights == 0)
if (is.null(dati <- datafun(reset = FALSE))) {
if (iter == 1) {
ris <- if (set$eigendec)
control(XTX,,set$tol.values,set$tol.vectors,out.B=FALSE,set$method) else
list(rank = nvar, pivot = 1:nvar)
ok <- ris$pivot[1:ris$rank]
}
if (is.null(start)) start <- rep(0, rank)
beta <- if (iter == 1) start[ok] else start
start <- solve(XTX[ok, ok], XTz[ok], tol = set$tol.solve)
if (trace) cat("coef",start,"tol",tol,"\n")
tol <- max(abs(dev.prec - dev)/(abs(dev) + 0.1))
if ((tol > set$acc) & (iter < maxit)) {
dati <- datafun(reset = TRUE)
dati <- datafun(reset = FALSE)
eof <- TRUE
} else break
}
colnames(dati) <- nomicol
M <- model.frame(tf, dati)
y <- M[[1]]
if ((!(all.levels))&(length(fa)>0)) {
flevels <- list()
j <- 0
for (i in 1:length(fa)) {
j <- j + 1
eval(parse(text = paste("flevels$'", names(M)[fa[i]],
"'", "<-levels(M[,fa[i]])", sep = "")))
a <- c(obj$levels[[j]][!(obj$levels[[j]] %in%
flevels[[j]])], flevels[[j]])
flevels[[j]] <- sort(a)
}
M <- model.frame(obj$terms, dati, xlev = flevels)
X <- model.matrix(obj$terms, M, xlev = flevels)
obj$levels <- flevels
} else {
X <- model.matrix(obj$terms, M)
flevels <- obj$levels
}
offset <- model.offset(M)
nobs <- length(y)
if (is.null(offset))
offset <- rep.int(0, nobs)
weights <- if (is.null(weights.fo))
rep(1, nobs) else model.frame(weights.fo, dati)[[1]]
if (iter == 1) {
tot.obs <- tot.obs + nobs
wt <- wt + sum(weights)
wy <- wy + crossprod(weights, y)
zero.weights <- zero.weights + sum(weights == 0)
block.cum <- block.cum + nrow(X)
}
if (iter == 2) {
wtdmu <- if (intercept) wy/wt else linkinv(offset)
nulldev <- nulldev + sum(dev.resids(y, c(wtdmu), weights))
}
}
}
datafun(reset = TRUE)
rank <- ris$rank
n.ok <- tot.obs - zero.weights
nulldf <- n.ok - as.integer(intercept)
aic.rest <- ifelse((fam %in% c("Gamma", "inverse.gaussian",
"gaussian")), 2, 0)
aic.model <- aic.model + k * rank + aic.rest
ll.nuovo <- ll.speedglm(fam, aic.model, rank)
resdf <- n.ok - rank
var_res <- RSS/resdf
dispersion <- if (fam %in% c("poisson", "binomial")) 1 else var_res
coefficients <- rep(NA, ncol(X))
start <- as(start, "numeric")
coefficients[ok] <- start
names(coefficients) <- colnames(X)
rval <- list(coefficients = coefficients, logLik = ll.nuovo,
iter = iter, tol = tol, family = family, link = link, df = resdf,
XTX = XTX[ok, ok], dispersion = dispersion, nok = ris$nok,
ok = ok, RSS = RSS, ncoll = ris$ncoll, sparse = sparse,
nulldev = nulldev, rank = rank, deviance = dev,
nulldf = nulldf, ngoodobs = n.ok, n = tot.obs, intercept = intercept,
aic = aic.model, convergence = (!(tol> set$acc)),method="eigen")
rval$tf <- tf
rval$call <- call
if ((rval$iter==maxit)&(!rval$convergence))
warning("Maximum number of iterations reached without convergence")
class(rval) <- "speedglm"
rval
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.