Nothing
#### All the functions in library FWD have been written by
#### KJELL KONIS (kkonis@statsci.com)
#### and
#### MARCO RIANI (mriani@unipr.it)
####
#### R porting by Luca Scrucca (luca@stat.unipg.it)
####
##---------------------------------------------------------------------------##
## file: fwdlm.q
###############################################################################
#### fwdlm = function which performs the fwd search in linear regression models
###############################################################################
"fwdlm" <-
function(formula, data, nsamp = "best", x = NULL, y = NULL, intercept = TRUE, na.action, trace = TRUE)
{
if (!missing(formula))
{
m <- call("model.frame", formula = formula)
if (!missing(data))
m$data <- data
if (!missing(na.action))
m$na.action <- na.action
m <- eval(m, parent.frame())
Terms <- attr(m, "terms")
xvars <- as.character(attr(Terms, "variables"))
if ((yvar <- attr(Terms, "response")) > 0)
xvars <- xvars[ - yvar]
if (length(xvars) > 0)
{
xlevels <- lapply(m[xvars], levels)
xlevels <- xlevels[!sapply(xlevels, is.null)]
if (length(xlevels) == 0)
xlevels <- NULL
}
else
xlevels <- NULL
y <- model.response(m, "numeric")
x <- model.matrix(Terms, m, NULL)
}
else
{
if (is.null(x) || is.null(y))
stop("No data supplied")
y <- as.vector(y)
x <- as.matrix(x)
if (intercept)
x <- cbind(1,x)
}
n <- nrow(x)
p <- ncol(x)
## We supress warnings generated by lmsreg.default, e.g.
## More than half of the subsamples were singular.
temp.warn <- options()$warn
options(warn = -1)
#KR lms <- lmsreg.default(x, y, nsamp, intercept = F, mve = F)
lms <- lmsreg(x, y, nsamp = nsamp, intercept = FALSE)
options(warn = temp.warn)
sor <- order(abs(lms$residuals))
coef.names <- names(lms$coefficients)
# included = all cases included at each step ##
included <- list()
# Unit = Unit(s) included in each step
Unit <- matrix(as.integer(NA), nrow = n-p+1, ncol = 5)
# Coefficients = estimated beta Coefficients
Coefficients <- matrix(as.double(NA), nrow = n - p + 1, ncol = p)
# tStatistics = t statistics
tStatistics <- matrix(as.double(NA), nrow = n - p, ncol = p)
# S2 = 1st col S^2 2nd col R^2
S2 <- matrix(as.double(NA), nrow = n - p + 1, ncol = 2)
## ModCookDist = modified Cook distance
ModCookDist <- matrix(as.double(NA), nrow = n - p, ncol = 5)
## MaxRes = max studentized residual
MaxRes <- matrix(as.double(NA), nrow = n - p, ncol = 1)
## Leverage = Leverage
Leverage <- matrix(as.double(NA), nrow = n, ncol = n - p + 1)
## CookDist = Forward Cook distance
CookDist <- matrix(as.double(NA), n - p, 1)
## MinDelRes = Minimum deletion residual
MinDelRes <- matrix(as.double(NA), n - p - 1, 1)
## Residuals = Residuals in each step
Residuals <- matrix(as.double(NA), n, n-p+1)
residuals <- lms$residuals
inibsb <- integer(0)
n1 <- 1:n
if (trace)
cat("Starting Forward Search.\n")
for (m in p:n)
{
if(trace && m %% 10 == 0)
cat(paste("fwdlm: finished ", m, " steps out of ", n, ".\n", sep = ""))
bsb <- sor[1:m]
included[[m-p+1]] <- sort(bsb) # added info
unit <- setdiff(bsb, inibsb)
if(length(unit) > 5)
Unit[m-p+1, ] <- unit[1:5]
else
Unit[m-p+1, 1:length(unit)] <- unit
inibsb <- bsb
xb <- x[bsb,]
yb <- y[bsb]
#KR zz <- try(lm.fit.qr(xb, yb, qr = T))
zz <- try(lm.fit(xb, yb, method="qr"))
if (!inherits(zz,"try-error") && (zz$qr$rank == p))
{
residuals <- y - x %*% zz$coefficients
if (!any(is.na(residuals)))
sor <- order(abs(residuals))
### store residuals ###
Residuals[ ,m-p+1] <- residuals
### store estimated coefficients ###
Coefficients[m-p+1, ] <- zz$coefficients
### compute leverage ###
h <- try(as.vector(qr.Q(zz$qr)^2 %*% array(1, c(p, 1))))
if (!inherits(h,"try-error"))
Leverage[bsb, m-p+1] <- h
### store s^2 ###
SSR <- sum(zz$residuals^2) #used to be num
sigma2 <- SSR/(m-p) # used to be s
sigma <- sqrt(sigma2)
S2[m-p+1, 1] <- sigma2
### store R^2 ###
S2[m-p+1, 2] <- 1-(SSR/sum((yb - mean(yb))^2))
## If mAm is invertable then we compute the t statistics and ##
## distances. ##
mAm <- t(xb) %*% xb
# mmx <- try(solve(mAm)) # does not work
if (det(mAm, tol=1e-20) == 0)
{ mmx <- "singlular matrix"
class(mmx) <- "error" }
else
mmx <- solve(mAm, tol=1e-20)
if (!inherits(mmx,"try-error") && (m > p))
{
### store t statistics ###
tStatistics[m-p, ] <- zz$coefficients/(sigma * sqrt(diag(mmx)))
### store max studentized residual ###
MaxRes[m-p, 1] <- max(abs(residuals[bsb])/(sigma*sqrt(1-h)))
### store approximate Cook's distance ###
bib <- (Coefficients[m-p+1, ] - Coefficients[m-p, ])
CookDist[m-p] <- bib %*% mAm %*% matrix(bib)/(p*sigma2)
### compute modified Cook's distance ###
if (length(unit))
{
for (i in 1:length(unit))
{
hi <- x[unit[i], , drop = FALSE] %*% mmx %*% t(x[unit[i], , drop=FALSE])
if (!is.na(S2[m-p, 1]))
ModCookDist[m-p, i] <- abs(residuals[unit[i]])/(1-hi)*
sqrt(((m-p)/p*hi)/S2[m-p, 1])
else
ModCookDist[m-p, i] <- 0
}
}
}
if (!inherits(mmx,"try-error") && (m < n))
{
# bsb.comp = complement of bsb #used to be ncl
bsb.comp <- setdiff(n1, bsb)
# find minimum deletion residual
ord <- numeric(length(bsb.comp))
for (i in 1:length(bsb.comp))
{ hii <- x[bsb.comp[i], , drop = FALSE] %*% mmx %*% t(x[bsb.comp[i], , drop = FALSE])
ord[i] <- residuals[bsb.comp[i]]^2 / (1+hii)
}
if (!is.na(S2[m-p+1, 1]) && S2[m-p+1, 1] > 0)
MinDelRes[m-p, 1] <- sqrt(min(abs(ord)) / S2[m-p+1, 1])
}
} #end of zz was fit if block
else
{
if (m == p)
Residuals[ ,m-p+1] <- y - x %*% lms$coef
else
Residuals[ ,m-p+1] <- Residuals[ ,m-p]
}
} #end of for loop
Residuals <- Residuals / sigma
#### Add fwdlm search index in each matrix
dimnames(Unit) <- list(paste("m=", p:n, sep=""), paste("Inclusion", 1:5))
names(included) <- paste("m=", p:n, sep = "")
dimnames(Coefficients) <- list(paste("m=", p:n, sep=""), coef.names)
dimnames(tStatistics) <- list(paste("m=", (p+1):n, sep=""), coef.names)
dimnames(Residuals) <- list(n1, paste("m=", p:n, sep=""))
dimnames(Leverage) <- list(n1, paste("m=", p:n, sep=""))
dimnames(CookDist) <- list((p+1):n, "Cook dist.")
dimnames(ModCookDist) <- list((p+1):n, paste("Inclusion", 1:5))
dimnames(MaxRes) <- list((p+1):n, "Max. res")
dimnames(S2) <- list(p:n, c("s^2","R^2"))
dimnames(MinDelRes) <- list((p+1):(n-1), "Min del. res.")
ans <- list(call = match.call(),
Residuals = Residuals,
Unit = Unit,
included = included,
Coefficients = Coefficients,
tStatistics = tStatistics,
CookDist = CookDist,
ModCookDist = ModCookDist,
Leverage = Leverage,
S2 = S2,
MaxRes = MaxRes,
MinDelRes = MinDelRes,
StartingModel = lms)
class(ans) <- "fwdlm"
ans
}
##---------------------------------------------------------------------------##
## file: fwdlm.methods.q
"summary.fwdlm" <- function(object, steps = "auto", ...)
{
n <- dim(object$Residuals)[1]
p <- dim(object$Coefficients)[2]
if (steps == "auto")
steps <- max(5, as.integer(0.05 * n))
else
if (steps == "all")
steps <- n-p #-1
if (steps + p > n)
stop("Too many steps")
steps <- steps - 1
if (any(!is.na(object$Unit[n-p-steps, 2])))
Unit <- object$Unit[(n-p-steps+1):(n-p+1), ]
else
Unit <- t(object$Unit[(n-p-steps+1):(n-p+1), 1])
int <- intersect(1:n, as.vector(object$Unit[(n - p - steps):(n - p), ]))
Leverage <- object$Leverage[int, (n - p - steps + 1):(n - p + 1)]
Residuals <- object$Residuals[int, (n - p - steps + 1):(n - p + 1)]
MaxRes <- object$MaxRes[(n - p - steps):(n - p)]
names(MaxRes) <- paste("m=", as.character((n - steps):n), sep = "")
MinDelRes <- object$MinDelRes[(n - p - steps - 1):(n - p - 1)]
names(MinDelRes) <- paste("m=", as.character((n - steps - 1):(n - 1)), sep = "")
Coefficients <- object$Coefficients[(n - p - steps + 1):(n - p + 1), ]
tStatistics <- object$tStatistics[(n - p - steps):(n - p), ]
CookDist <- object$CookDist[(n - p - steps):(n - p)]
names(CookDist) <- paste("m=", as.character((n - steps):(n)), sep = "")
ModCookDist <- object$ModCookDist[(n - p - steps):(n - p)]
names(ModCookDist) <- paste("m=", as.character((n - steps):(n)), sep = "")
S2 <- object$S2[(n - p - steps + 1):(n - p + 1), ]
dimnames(S2)[[1]] <- paste("m=", dimnames(S2)[[1]], sep = "")
smry <- list(call = object$call, Unit = Unit, Leverage = Leverage,
Residuals = Residuals, MaxRes = MaxRes,
MinDelRes = MinDelRes, Coefficients = Coefficients,
tStatistics = tStatistics, CookDist = CookDist,
ModCookDist = ModCookDist, S2 = S2)
class(smry) <- "summary.fwdlm"
smry
}
"print.summary.fwdlm" <- function(x, digits = 4, ...)
{
cat("Call:\n")
print(x$call)
steps <- dim(x$Unit)[2]
cat(paste("\nUnits included in the last", steps, "steps:\n"))
if(dim(x$Unit)[1] == 1)
dimnames(x$Unit)[1] <- "Unit"
print(x$Unit)
cat("\nResiduals:\n")
print(x$Residuals, digits = digits)
cat("\nLeverage:\n")
print(x$Leverage, digits = digits)
cat("\nMaximum Studentized Residuals:\n")
print(x$MaxRes, digits = digits)
cat("\nMinumum Deletion Residuals:\n")
print(x$MinDelRes, digits = digits)
cat("\nEstimated Coefficients:\n")
print(x$Coefficients, digits = digits)
cat("\nt Statistics:\n")
print(x$tStatistics, digits = digits)
cat("\nCook's Distance:\n")
print(x$CookDist, digits = digits)
cat("\nModified Cook's Distance:\n")
print(x$ModCookDist, digits = digits)
cat("\ns^2 and R^2:\n")
print(x$S2, digits = digits)
invisible(x)
}
"print.fwdlm" <- function(x, ...)
{
cat("Call:\n")
print(x$call)
n <- dim(x$Residuals)[1]
p <- dim(x$Coefficients)[2]
cat("\nLast 5 units included in the forward search:\n")
units <- x$Unit[(n-p-3):(n-p+1), 1, drop = FALSE]
dimnames(units)[[2]] <- ""
print(t(units))
cat("\nEstimated coefficients for 50% of the data and for the full model:\n")
print(x$Coefficients[c(ceiling(n/2) - p + 1, n - p + 1), ])
cat("\n")
invisible(x)
}
"plot.fwdlm" <-
function(x, which.plots = 1:10, squared = FALSE, scaled = TRUE, ylim = NULL, xlim = NULL, th.Res = 2, th.Lev = 0.25, sig.Tst = 2.58, labels.in.plot = TRUE, ...)
{
if(!is.numeric(which.plots) || max(which.plots) > 10 || min(which.plots) < 1)
stop("which.plots must be an integer vector containing only 1 to 10")
oldpar <- par()
# on.exit(par(oldpar))
if (length(which.plots)>1)
{ par(ask = TRUE)
on.exit(par(ask=oldpar$ask)) }
n <- dim(x$Residuals)[1]
p <- dim(x$Coefficients)[2]
## Plot Residuals ##
if (is.element(1, which.plots))
{
x1 <- x$Residuals
if (squared)
{ x1 <- x1^2
ylab <- "Squared Scaled Residuals" }
else
{ ylab <- "Scaled Residuals" }
if (is.null(ylim))
y.lim <- c(ifelse(squared, 0, min(x1, na.rm = TRUE)),
max(x1, na.rm = TRUE))
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p - 1, n + 1)
else
x.lim <- xlim
plot(p:n, x1[1, ],
type = "n",
ylim = y.lim,
xlim = x.lim,
xlab = "Subset Size",
ylab = ylab)
for (i in 1:nrow(x1))
lines(p:n, x1[i, ], lty = i)
### Write numbers for selected units which
### have residuals bigger than a certain threshold
ma <- apply(abs(x1), 1, max, na.rm = TRUE)
nam <- which(ma > th.Res)
if (lnam <- length(nam))
{
text(rep(p, lnam), x1[nam, 1], paste(nam, " ", sep = ""), adj = 1)
text(rep(n, lnam), x1[nam, ncol(x1)], paste(" ", nam, sep = ""), adj = 0)
}
}
## Plot Leverage ##
if (is.element(2, which.plots))
{
x1 <- x$Leverage
if (is.null(ylim))
y.lim <- c(0, 1)
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p - 1, n + 1)
else
x.lim <- xlim
plot(0.5, 0.5,
type = "n",
ylim = y.lim,
xlim = x.lim,
xlab = "Subset Size",
ylab = "Leverage")
for (i in 1:(nrow(x1)))
{ lines(p:n, x1[i, ], lty = i, col = i, lwd = 1) }
sy <- which(is.na(x1[,ncol(x1)-1]))
points(n, x1[sy, ncol(x1)], pch = 16)
ma <- apply(x1[, round(.667*ncol(x1)):ncol(x1)], 1, max, na.rm = TRUE)
nam <- which(ma > th.Lev)
if (length(nam))
{
text(rep(n, length(nam)), x1[nam, ncol(x1)],
paste(" ", nam, sep = ""), adj = 0)
## in the following lines you find the steps of the fwd search in ##
## which these units enter ##
xcoord <- apply(is.na(x1[nam, , drop = FALSE]), 1, sum) + 1
## exclude the unit included in the last step (if it is present in tee)
xcoord <- xcoord[xcoord < (n-p+1)]
if (length(xcoord))
{
## ycoord = y coordinate for labels
if (length(xcoord) > 1)
ycoord <- diag(x1[as.integer(names(xcoord)), xcoord])
else
ycoord <- x1[as.integer(names(xcoord)), xcoord]
text(xcoord+p, ycoord,
paste(names(xcoord), " ", sep = ""), adj = 1)
}
}
}
## Plot maximum studentized residual ##
if (is.element(3, which.plots))
{
x1 <- x$MaxRes[, 1]
if (is.null(ylim))
y.lim <- c(0, max(x1[is.finite(x1)], na.rm = TRUE))
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p, n + 1)
else
x.lim <- xlim
plot((p+1):n, x1,
type = "l",
xlab = "Subset Size",
ylab = "Maximum Studentized Residuals",
ylim = y.lim,
xlim = x.lim)
}
## Plot minimum deletion residuals ##
if (is.element(4, which.plots))
{
x1 <- x$MinDelRes
if (nrow(x1) > 3)
{ a <- p + 4
x1 <- x1[4:nrow(x1),] }
else
a <- p + 1
if (is.null(ylim))
y.lim <- c(0, max(x1, na.rm = TRUE))
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(a-1, n)
else
x.lim <- xlim
plot(a:(n-1), as.numeric(x1),
type = "l",
xlab = "Subset Size",
ylab = "Minimum Deletion Residuals",
ylim = y.lim,
xlim = x.lim)
}
## Plot Coefficients ##
if (is.element(5, which.plots))
{
x1 <- na.omit(x$Coefficients)
n.dropped <- n - p + 1 - dim(x1)[1]
if (scaled)
{ x1 <- apply(x1, 2, function(x) sign(mean(x))*x/mean(x))
ylab <- "Scaled Coefficient Estimates" }
else
{ ylab <- "Coefficient Estimates" }
if (is.null(ylim))
y.lim <- range(x1, na.rm = TRUE)
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p, ifelse(labels.in.plot, p+1.1*(n-p), n))
else
x.lim <- xlim
plot(0, 0,
type = "n",
xlab = "Subset Size",
ylab = ylab,
ylim = y.lim,
xlim = x.lim)
for (i in 1:ncol(x1))
lines((p + n.dropped):n, x1[, i], lty = i, lwd = 1)
if (labels.in.plot)
# text(rep(n,p), x1[dim(x1)[1],], labels = paste(" C", 0:(p-1), sep = ""), cex = 1.1, adj = 0)
# Mathematical annotation
for (i in 1:p)
{ text(rep(n,p), x1[dim(x1)[1],i],
substitute(hat(beta)[b], list(b=i-1)), pos=4) }
}
## Plot t statistics ##
if (is.element(6, which.plots))
{
x1 <- x$tStatistics
if (is.null(ylim))
{ y.lim <- as.integer(.667 * dim(x1)[1]):(dim(x1)[1])
y.lim <- range(x1[y.lim, , drop = FALSE], na.rm = TRUE)
y.lim <- max(abs(range(y.lim)))
y.lim <- c(-1*y.lim, y.lim)
}
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p, ifelse(labels.in.plot, p+1.1*(n-p), n))
else
x.lim <- xlim
# x1[x1 <= y.lim[1] | x1 >= y.lim [2]] <- NA
plot(0, 0,
type = "n",
xlab = "Subset Size",
ylab = "t Statistics",
ylim = y.lim,
xlim = x.lim)
for (i in 1:ncol(x1))
lines((p+1):n, x1[ ,i], lty = i, lwd = 1)
if (is.numeric(sig.Tst))
{ abline(h = sig.Tst, col = "lightgrey")
abline(h = -sig.Tst, col = "lightgrey") }
if (labels.in.plot)
# text(rep(n,p), x1[n-p,], labels = paste(" C", 0:(p-1), sep = ""), cex = 1.1, adj = 0)
# Mathematical annotation
for (i in 1:p)
{ text(rep(n,p), x1[dim(x1)[1],i],
substitute(hat(beta)[b], list(b=i-1)), pos=4) }
}
## Plot forward Cook distance ##
if (is.element(7, which.plots))
{
x1 <- x$CookDist
if (nrow(x1) > 3)
{ a <- p + 4
x1 <- x1[4:nrow(x1),] }
else
a <- p + 1
if (is.null(ylim))
y.lim <- c(0, max(as.numeric(x1), na.rm = TRUE))
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(a - 1, n+1)
else
x.lim <- xlim
plot(a:n, as.numeric(x1),
type = "l",
xlab = "Subset Size",
ylab = "Cook's Distance",
ylim = y.lim,
xlim = x.lim)
}
## Plot modified forward Cook's distance ##
if (is.element(8, which.plots))
{
x1 <- x$ModCookDist
if (nrow(x1) > 3)
{ a <- p+4
x1 <- x1[4:nrow(x1),] }
else
a <- p+1
if (is.null(ylim))
y.lim <- c(0, max(as.numeric(x1[,1]), na.rm = TRUE))
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(a - 1, n+1)
else
x.lim <- xlim
plot(a:n, as.numeric(x1[, 1]),
type = "l",
xlab = "Subset Size",
ylab = "Modified Cook's Distance",
ylim = y.lim,
xlim = x.lim)
}
## Plot S^2 ##
if (is.element(9, which.plots))
{
x1 <- x$S2
if (is.null(ylim))
y.lim <- c(0, max(x1[2:nrow(x1),1], na.rm = TRUE))
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p - 1, n + 1)
else
x.lim <- xlim
plot(p:n, x1[,1],
type = "l",
xlab = "Subset Size",
ylab = expression(S^2),
ylim = y.lim,
xlim = x.lim)
}
## Plot R^2 ##
if (is.element(10, which.plots))
{
x1 <- x$S2
if (is.null(ylim))
y.lim <- c(0, max(x1[2:nrow(x1),2], na.rm = TRUE))
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p - 1, n + 1)
else
x.lim <- xlim
plot(p:n, x1[,2],
type="l",
xlab = "Subset Size",
ylab = expression(R^2),
ylim = y.lim,
xlim = x.lim)
}
invisible(x)
}
##---------------------------------------------------------------------------##
## file: fwdsco.q
### fwdsco = fwd search for transformation of response in linear regression
### models
###############################################################################
#### fwdsco = function which computes the score test and the likelihood
#### in each step of the fwd search
###############################################################################
"fwdsco" <-
function(formula, data, nsamp = "best", lambda = c(-1, -0.5, 0, 0.5, 1), x = NULL, y = NULL, intercept = TRUE, na.action, trace = TRUE)
{
if (!missing(formula))
{
m <- call("model.frame", formula = formula)
if (!missing(data))
m$data <- substitute(data)
if (!missing(na.action))
m$na.action <- na.action
m <- eval(m, parent.frame())
Terms <- attr(m, "terms")
xvars <- as.character(attr(Terms, "variables"))
if ((yvar <- attr(Terms, "response")) > 0)
xvars <- xvars[-yvar]
if (length(xvars) > 0)
{
xlevels <- lapply(m[xvars], levels)
xlevels <- xlevels[!sapply(xlevels, is.null)]
if (length(xlevels) == 0)
xlevels <- NULL
}
else
xlevels <- NULL
y <- model.response(m, "numeric")
x <- model.matrix(Terms, m, contrasts.arg = NULL)
}
else
{
if (is.null(x) || is.null(y))
stop("No data supplied")
y <- as.vector(y)
x <- as.matrix(x)
if (intercept)
x <- cbind(1, x)
}
## Program has added a column of ones ##
n <- nrow(x)
p <- ncol(x)
n.lambda <- length(lambda)
total.iter <- (n-p+1)*n.lambda
n.iter <- 0
Likelihood <- matrix(0, nrow = n - p-1, ncol = n.lambda)
ScoreTest <- matrix(0, nrow = n - p-1, ncol = n.lambda)
acla <- as.character(lambda)
one.third <- abs(n.lambda - .3333333333) < .0000000001
acla[one.third] <- "1/3"
str.lam <- paste(rep("lambda = ", n.lambda), acla, sep="")
str.Sco <- paste("m=", (p+2):n, sep="")
Unit <- list()
for (j in 1:n.lambda) # in R we need to fill the elements of the list
Unit[[j]] <- matrix(as.integer(NA), nrow = n-p+1, ncol = 5)
for (j in 1:n.lambda)
{
Unit[[j]] <- matrix(as.integer(NA), nrow = n-p+1, ncol = 5)
## Transform all the units of the response var. ##
if (lambda[j] == 0)
yt <- log(y)
else
yt <- y^lambda[j]
## We supress warnings generated by lmsreg.default, e.g.
## More than half of the subsamples were singular.
temp.warn <- options()$warn
options(warn = -1)
#KR lms <- lmsreg.default(x, yt, nsamp, intercept = F, mve = F)
lms <- lmsreg(x, yt, nsamp = nsamp, intercept = FALSE)
options(warn = temp.warn)
sor <- order(abs(lms$residuals))
inibsb <- integer(0)
for (m in p:n)
{
bsb <- sor[1:m]
unit <- setdiff(bsb, inibsb)
if (length(unit) > 5)
Unit[[j]][m-p+1, ] <- unit[1:5]
else
Unit[[j]][m-p+1, 1:length(unit)] <- unit
inibsb <- bsb
xb <- x[bsb, ]
yb <- y[bsb]
yb.transf <- yt[bsb]
#KR zz <- try(lm.fit.qr(xb, yb.transf, qr=T))
zz <- try(lm.fit(xb, yb.transf, method="qr"))
if (!inherits(zz,"try-error") && (zz$qr$rank == p))
{
if (m > p+1)
{
# Score test and lik. on original data
test <- score.s(xb, yb, lambda[j])
# Store score and lik.
Likelihood[m-p-1, j] <- test$Likelihood
ScoreTest[m-p-1, j] <- test$Score
}
residuals <- abs(yt - x %*% zz$coefficients)
if (!any(is.na(residuals)))
sor <- order(abs(residuals))
}
if (trace)
{
n.iter <- n.iter + 1
if (n.iter %% 10 == 0)
cat(paste("fwdsco: finished ", n.iter, " steps out of ",
total.iter, ".\n", sep = ""))
}
} ### end of fwd search
} ### end of the loop for lambda[j]
if (trace)
cat("\n")
ScoreTest[1:2,][abs(ScoreTest[1:2,]) > 8] <- NA
Likelihood[is.infinite(Likelihood)] <- NA
names(Unit) <- str.lam
for (i in 1:n.lambda)
dimnames(Unit[[i]]) <- list(paste("m=", p:n, sep=""),
paste("Inclusion", 1:5))
Input <- list(n,p,lambda)
names(Input) <- c("n","p","lambda")
dimnames(ScoreTest) <- list(str.Sco, str.lam)
dimnames(Likelihood) <- list(str.Sco, str.lam)
transf <- list(call = match.call(),
Likelihood = Likelihood,
ScoreTest = ScoreTest,
Unit = Unit,
Input = Input,
x = x,
y = y)
class(transf) <- "fwdsco"
transf
}
##---------------------------------------------------------------------------##
## file: score.q
#############################################################
### score.s = function which computes the score test
#############################################################
"score.s" <- function(x, y, la, tol = 1e-20)
{
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
Score <- NA
Likelihood <- NA
# y <- y / mean(y)
# x <- apply(x, 2, function(u) u / mean(u))
if (n > p && min(y) > 1e-12)
{
xx <- t(x)%*% x
if (min(eigen(xx)$values) < tol)
warning("Matrix X is not full rank.")
else
{
A <- -x %*% solve(xx, tol=tol) %*% t(x)
diag(A) <- 1 + diag(A)
g <- exp(mean(log(y)))
if (la == 0)
{
z <- g*log(y)
w <- g*log(y)*(log(y)/2-log(g)) }
else {
z <- (y^la-1)/(la*g^(la-1))
w <- (y^la*log(y)-(y^la-1)*(1/la+log(g)))/(la*g^(la-1))
}
tz <- matrix(z,1,n)
tw <- matrix(w,1,n)
r <- tz%*%A%*%z
Aw <- A %*% w
zAw <- tz %*% Aw
wAw <- tw %*% Aw
#if (abs(wAw) > 1e-12)
Sz <- sqrt(r - zAw^2/wAw)
if (!is.na(Sz) && Sz > 0.0001)
Score <- -zAw * sqrt(n-p-1) / (Sz * sqrt(wAw))
else
Score <- NA
if (r > 0.0)
Likelihood <- -n * log(r/n)
else
Likelihood <- Inf
}
}
list(Score = Score, Likelihood = Likelihood)
}
##---------------------------------------------------------------------------##
## file: fwdsco.methods.q
"summary.fwdsco" <- function(object, steps = "auto", lambdaMLE = FALSE, ...)
{
n <- nrow(object$Unit[[1]])
if (steps == "auto")
steps <- max(5, 0.05 * n)
else
if (steps == "all")
steps <- dim(object$Likelihood)[1]
else
steps <- as.integer(steps)
steps <- steps - 1
Unit <- object$Unit
for (i in 1:length(object$Unit))
{
if (any(!is.na(Unit[[i]][(n-steps):n, 2])))
Unit[[i]] <- Unit[[i]][(n-steps):n, ]
else
Unit[[i]] <- Unit[[i]][(n-steps):n, 1]
}
n <- dim(object$Likelihood)[1]
Likelihood <- object$Likelihood[(n-steps):n, , drop = FALSE]
ScoreTest <- object$ScoreTest[(n-steps):n, , drop = FALSE]
smry <- list(call = object$call,
Unit = Unit,
Likelihood = Likelihood,
ScoreTest = ScoreTest,
Steps = steps + 1)
if (lambdaMLE)
smry$mle <- lambda.mle(object$x, object$y)
class(smry) <- "summary.fwdsco"
smry
}
"print.summary.fwdsco" <- function(x, digits = 4, ...)
{
cat("Call:\n")
print(x$call)
steps <- x$Steps
cat(paste("\nUnits included in the last", steps, "steps for different lambdas:\n"))
for (name in names(x$Unit))
{
cat(paste("\n", name, ":\n", sep = ""))
print(x$Unit[[name]])
}
cat("\nLog Likelihood:\n")
print(x$Likelihood, digits = digits)
cat("\nScore test statistic:\n")
print(x$ScoreTest, digits = digits)
if (!is.null(x$mle))
{
cat("\nMaximum Likelihood Estimate of Lambda in the Final Step:\n")
print(signif(x$mle, digits = digits))
}
}
"print.fwdsco" <- function(x, th.Sco=2.58, ...)
{
cat("Call:\n")
print(x$call)
all.lambda <- x$Input$lambda
cat("\nLambda:\n")
print(all.lambda)
lambda <- all.lambda[!apply(x$ScoreTest, 2, function(u, c) any(abs(u) > c, na.rm = TRUE), c = th.Sco)]
if (length(lambda) == 1)
{
cat(paste("\nThe score test statistic for the following value of lambda was\nless than", th.Sco, "during each step of the forward search.\n"))
}
else
if (length(lambda) > 1)
{
cat(paste("\nThe score test statistics for the following values of lambda were\nless than", th.Sco, "during each step of the forward search.\n"))
}
else
{
cat(paste("\nThe score test statistic for each value of lambda exceeded", th.Sco, "\nfor at least one subset during forward search.\n\n"))
}
if (length(lambda))
cat(lambda, "\n")
invisible(x)
}
"plot.fwdsco" <-
function(x, plot.Sco = TRUE, plot.Lik = FALSE, th.Sco = 2.58, plot.mle = TRUE, ylim = NULL, xlim = NULL, ...)
{
if (plot.Sco && plot.Lik)
{ ask <- par()$ask
par(ask = TRUE)
on.exit(par(ask=ask)) }
p <- x$Input$p
n <- x$Input$n
if (plot.Sco)
{
x1 <- x$ScoreTest
if (is.null(ylim))
{
y.lim <- as.integer(.5 * dim(x1)[1]):(dim(x1)[1])
y.lim <- range(x1[y.lim, , drop = FALSE], na.rm = TRUE)
y.lim <- max(abs(range(y.lim)))
y.lim <- c(-1*y.lim, y.lim)
}
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p+1, n + (n - p+1)*0.05)
else
x.lim <- xlim
plot(0, 0,
xlim = x.lim,
ylim = y.lim,
xlab = "Subset Size",
ylab = "Score Test Statistic",
type = "n")
for (i in 1:ncol(x1))
lines((p+2):n, x1[, i], lty = i, lwd = 1)
one.third <- abs(x$Input$la - .3333333333) < .0000000001
if (sum(!one.third))
text(n, (x1[nrow(x1),])[!one.third],
paste(" ", x$Input$la[!one.third], sep = ""),
adj = 0)
if (sum(one.third))
text(n, (x1[nrow(x1),])[one.third], " 1/3", adj=0)
if (th.Sco !=0)
{
if (th.Sco < y.lim[2] && th.Sco > y.lim[1])
abline(h = th.Sco, col = "lightgrey")
if (-th.Sco < y.lim[2] && -th.Sco > y.lim[1])
abline(h = -th.Sco, col = "lightgrey")
}
if (plot.mle)
{
points(n, 0, pch = 16)
text(n, 0, " MLE", adj=0)
}
}
if (plot.Lik)
{
x1 <- x$Likelihood
if (is.null(ylim))
y.lim <- range(x1, na.rm = TRUE)
else
y.lim <- ylim
if (is.null(xlim))
x.lim <- c(p+1, n+1)
else
x.lim <- xlim
plot(0, 0,
xlim = x.lim,
ylim = y.lim,
xlab = "Subset Size",
ylab = "Log Likelihood",
type = "n")
for (i in 1:ncol(x1))
lines((p+2):n, x1[, i], lty = i, lwd = 1)
one.third <- abs(x$Input$la - .3333333333) < .0000000001
if (sum(!one.third))
text(n, (x1[nrow(x1),])[!one.third],
paste(" ", x$Input$la[!one.third], sep = ""),
adj = 0)
if (sum(one.third))
text(n + 1, (x1[nrow(x1),])[one.third], " 1/3", adj = 0)
if (sum(!one.third))
text(p + 2, (x1[1,])[!one.third],
paste(x$Input$la[!one.third], " ", sep = ""),
adj = 1)
if (sum(one.third))
text(p + 2, (x1[1,])[one.third], "1/3 ", adj = 1)
}
invisible(x)
}
##---------------------------------------------------------------------------##
## file: lambdaMLE.q
lambda.mle <- function(x, y, init = c(-2, 2), tol = 0.0001)
{
lambda <- c(init[1], .61803 * init[1] + .38197 * init[2], init[2])
lik <- sapply(lambda, function(u, x, y)
{ score.s(x, y, u)$Lik},
x = x, y = y )
right.side <- TRUE
while (lambda[3] - lambda[1] > tol)
{
if (right.side)
{
a <- lambda[2] + .38197 * (lambda[3] - lambda[2])
lika <- score.s(x, y, a)$Lik
if (lik[2] > lika)
{
lambda[3] <- a
lik[3] <- lika
right.side <- FALSE
}
else
{
lambda <- c(lambda[2], a, lambda[3])
lik <- c(lik[2], lika, lik[3])
right.side <- TRUE
}
}
else
{
a <- lambda[1] + .61803 * (lambda[2] - lambda[1])
lika <- score.s(x, y, a)$Lik
if (lik[2] > lika)
{
lambda[1] <- a
lik[1] <- lika
right.side <- TRUE
}
else
{
lambda <- c(lambda[1], a, lambda[2])
lik <- c(lik[1], lika, lik[2])
right.side <- FALSE
}
}
}
lambda[lik == min(lik)][1]
}
##---------------------------------------------------------------------------##
## file: fwdglm.q
"fwdglm" <-
function(formula, family, data, weights, na.action, contrasts = NULL, bsb = NULL, balanced = TRUE, maxit = 50, epsilon = 1e-6, nsamp = 100, trace = TRUE)
{
call <- match.call()
if (is.character(family))
family <- get(family)
if (is.function(family))
family <- family()
if (is.null(family$family))
{ print(family)
stop("`family' not recognized")
}
if (missing(data))
data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
mf$family <- mf$bsb <- mf$balanced <- mf$maxit <- NULL
mf$epsilon <- mf$nsamp <- mf$trace <- mf$contrasts <- NULL
mf$drop.unused.levels <- TRUE
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
na.act <- attr(mf, "na.action")
xvars <- as.character(attr(mt, "variables"))[-1]
if ((yvar <- attr(mt, "response")) > 0)
xvars <- xvars[-yvar]
xlev <- if (length(xvars) > 0)
{ xlev <- lapply(mf[xvars], levels)
xlev[!sapply(xlev, is.null)]
}
x <- if (!is.empty.model(mt))
model.matrix(mt, mf, contrasts)
y <- model.response(mf, "numeric")
weights <- model.weights(mf)
offset <- model.offset(mf)
if (!length(weights))
weights <- rep(1, nrow(x))
else
if (any(weights < 0))
stop("negative weights not allowed")
if (is.null(offset)) offset <- 0
#KR family <- as.family(family)
n <- nrow(x)
p <- ncol(x)
xcolnames <- dimnames(x)[[2]]
# Unit = Unit(s) included in each step ##
Unit <- matrix(as.integer(NA), nrow = n-p+1, ncol = 5)
# included = all cases included at each step ##
included <- list()
# Coefficients = estimated beta coefficients ##
Coefficients <- matrix(as.double(NA), nrow = n-p+1, ncol = p)
# tStatistics = t statistics ##
tStatistics <- matrix(as.double(NA), nrow = n-p+1, ncol = p)
# lik = deviance + residual deviance + psuedo R2 + dispersion parameter ##
lik <- matrix(as.double(NA), nrow = n-p+1, ncol = 4)
## MaxRes = max deviance residual in bsb and mth deviance residual ##
MaxRes <- matrix(as.double(NA), nrow = n-p+1, ncol = 2)
## MinDelRes = minimum deviance residual out of bsb and ##
## (m+1)th deviance residual ##
MinDelRes <- matrix(as.double(NA), nrow = n-p, ncol = 2)
## CookDist = Forward Cook distance ##
CookDist <- matrix(as.double(NA), nrow = n-p, ncol = 1)
## ModCookDist = modified Cook's distance ##
ModCookDist <- matrix(as.double(NA), nrow = n-p, ncol = 5)
## ScoreTest = matrix to hold score test ##
ScoreTest <- matrix(as.double(NA), nrow = n - p, ncol = 1)
## Rglm = Residuals in each step ##
Rglm <- matrix(as.double(NA), nrow = n, ncol = n - p + 1)
## Leverage = Leverage ##
Leverage <- matrix(as.double(NA), nrow = n, ncol = n - p + 1)
## Weights = matrix of weights used for each step ##
Weights <- matrix(as.double(NA), nrow = n, ncol = n - p + 1)
inverse.fn <- family$linkinv
variance.fn <- family$variance
# in R family$dev.resids returns d_i^2 so Deviance=sum(d_i^2), and it lacks
# a further argument to get deviance residuals. Here I correct the function
# and also note the different order of the arguments:
deviance.fn <- function(mu, y, weights)
{ sign(y-mu) * sqrt(abs(family$dev.resids(y, mu, weights))) }
# Suppress warnings
user.warnings <- options()$warn
options(warn = -1)
on.exit(options(warn = user.warnings))
## For a balanced search we store the fraction of successes ##
binary <- (max(y) - min(y) == 1 && length(unique(y)) == 2)
if (balanced && !binary) balanced <- FALSE
if (balanced)
{ total.ones <- sum(y)
prop1 <- total.ones / length(y) }
if (is.null(bsb))
{
zz <- lmsglm(x, y, family = family, weights = weights,
offset = offset, n.samples = nsamp,
max.samples = ifelse(nsamp == "all", -1, 2*nsamp),
epsilon = epsilon, maxit = maxit, trace = trace)
initial.bsb <- bsb <- zz$bsb
dev.res <- abs(zz$dev.res)
}
else
{
initial.bsb <- bsb
if (any(offset))
obsb <- offset[bsb]
else
obsb <- 0
zz <- glm.fit(x = x[bsb,], y = y[bsb], weights = weights[bsb],
offset = obsb, family = family,
control = glm.control(epsilon = epsilon, maxit = maxit))
eta <- offset + x %*% matrix(zz$coefficients, ncol = 1)
mu <- inverse.fn(eta)
dev.res <- abs(deviance.fn(mu, y, weights))
}
if (length(dev.res) != n || all(is.na(dev.res)))
stop("fwdglm was not able to select an initial subset.")
# In quasi-non-identifiable models the glm.fit is very sensitive to
# the order of obs, so instead of using ...
# sor <- order(abs(dev.res))
# ... I use directly the best subset as found or provided
sor <- bsb
inibsb <- integer(0)
if (trace)
cat("\n\t*** Starting Forward Search ***\n\n")
for (m in p:n)
{
if (trace && m %% 10 == 0)
cat(paste("fwdglm: finished ", m, " steps out of ", n, ".\n", sep = ""))
## determin the subset for mth iteration ##
if (balanced)
{
## if the search is balanced then we try to keep the success/failure ##
## ratio as close as possible to that of the entire data ##
ones <- round(prop1*m)
if (ones == 0)
ones <- 1
else
if (ones == m)
ones <- m - 1
if (ones > total.ones)
ones <- total.ones
zeros <- m - ones
zeros <- sort(dev.res[y==0])[1:zeros]
zeros <- as.integer(names(zeros))
ones <- sort(dev.res[y==1])[1:ones]
ones <- as.integer(names(ones))
bsb <- c(zeros, ones)
}
else
if (binary && !balanced)
{
## for searches with binary response we make sure that there is at ##
## least one success and one failure in each subset ##
bsb <- sor[1:m]
n.response <- unique(y[bsb])
if (length(n.response) == 1)
{ missing.response <- ifelse(n.response == 0, 1, 0)
values <- sort(dev.res[y == missing.response])
missing.response <- which(values == min(values))[1]
bsb <- c(missing.response, bsb[-m])
}
}
else
bsb <- sor[1:m]
included[[m-p+1]] <- sort(bsb) # added info
## determin which units have entered the subset ##
unit <- setdiff(bsb, inibsb)
if (length(unit) > 5)
unit <- unit[1:5]
Unit[m-p+1, 1:length(unit)] <- unit
inibsb <- bsb # luca
## subset the data ##
xbsb <- x[bsb, , drop = FALSE]
ybsb <- y[bsb]
wbsb <- weights[bsb]
if (any(offset))
obsb <- offset[bsb]
else
obsb <- 0
zz <- try(glm.fit(xbsb, ybsb, weights = wbsb, family = family,
offset = obsb,
# null.dev = T, qr = T,
control = glm.control(epsilon = epsilon,
maxit = maxit)))
if (!inherits(zz,"try-error") &&
zz$qr$rank == p &&
!any(is.na(zz$coefficients)))
{
## If an offset and intercept is present, iterations are needed to ##
## compute the Null deviance; these are done here, unless the model ##
## is NULL, in which case the computations have been done already ##
if(any(offset) && attr(mt, "intercept"))
{
zz$null.deviance <-
if (length(mt))
glm.fit(xbsb[, "(Intercept)", drop = FALSE],
ybsb, wbsb, offset = obsb,
family = family,
control = glm.control(epsilon = epsilon,
maxit = maxit))$deviance
else zz$deviance
}
eta <- offset + x %*% matrix(zz$coefficients, ncol = 1)
mu <- inverse.fn(eta)
dev.res <- deviance.fn(mu, y, weights)
## store deviance residuals ##
Rglm[ ,m-p+1] <- dev.res
dev.res <- abs(dev.res)
## if there are no missing values in dev.res then update sor ##
if (!any(is.na(dev.res)))
sor <- order(dev.res)
## store the coefficients and the weights ##
Coefficients[m - p + 1, ] <- zz$coefficients
Weights[bsb, m - p + 1] <- zz$weights
## calculate the pearson residuals ##
V <- variance.fn(mu) / weights
pearson.res <- (y - mu) / sqrt(V)
## store the deviance, the residual deviance, the pseudo R2 and the ##
## dispersion parameter ##
lik[m-p+1, 1] <- zz$deviance
lik[m-p+1, 2] <- zz$null.deviance - zz$deviance
lik[m-p+1, 3] <- 1 - zz$deviance / zz$null.deviance
if (m > p)
lik[m-p+1, 4] <- sum(pearson.res[bsb]^2) / (m - p)
## calculate the dispersion ##
dispersion <- switch(casefold(family$family[1]),
binomial = 1,
poisson = 1,
lik[m - p + 1, 4])
## calculate t statistics ##
rinv <- try(backsolve(zz$R, diag(p)))
if (!inherits(rinv,"try-error") && m > p)
{
rowlen <- drop(((rinv^2.) %*% rep(1., p))^0.5)
tStatistics[m-p+1, ] <- zz$coef / rowlen %o% sqrt(dispersion)
}
## calculate leverage ##
xb <- xbsb * sqrt(zz$weights)
h <- try(diag(xb %*% solve(t(xb) %*% xb, tol=1e-20) %*% t(xb)))
if (!inherits(h,"try-error"))
Leverage[bsb, m - p + 1] <- h
if (m > p)
{
## compute the score test statistic ##
h <- scglm(xbsb, ybsb, family = family,
weights = zz$weights, offset = obsb,
beta = zz$coefficients, phi = dispersion)
if (h != "singular")
ScoreTest[m-p, 1] <- h
## compute Cook's distance ##
bdiff <- Coefficients[m-p+1, , drop = FALSE] -
Coefficients[m-p, , drop = FALSE]
Xt <- sqrt(zz$weights) * xbsb
CookDist[m-p, 1] <- bdiff %*% (t(Xt) %*% Xt) %*% t(bdiff) /
(p * dispersion)
## compute modified Cook's distance ##
h <- Leverage[unit, m-p+1]
h <- sqrt((m-p)/p) * sqrt((h/(1-h)^2) / dispersion) * dev.res[unit]
ModCookDist[m - p, 1:length(h)] <- h
}
}
else
{
Coefficients[m - p + 1, ] <- Coefficients[m - p, ]
lik[m - p + 1, 1] <- lik[m - p, 1]
tStatistics[m - p + 1, ] <- tStatistics[m - p, ]
Leverage[bsb, m - p + 1] <- Leverage[bsb, m - p + 1]
Rglm[ ,m - p + 1] <- Rglm[ , m - p]
if (m > p + 1)
{ ScoreTest[m - p, 1] <- ScoreTest[m - p - 1, 1]
CookDist[m - p, 1] <- CookDist[m - p - 1, 1] }
}
## compute the maximum (in bsb) and mth deviance ##
MaxRes[m - p + 1, 1] <- max(dev.res[bsb])
MaxRes[m - p + 1, 2] <- dev.res[sor[m]]
if (m < n)
{ bsb.comp <- setdiff(1:n, bsb)
## calculate the minimum (in complement of bsb) and (m+1)th deviance ##
MinDelRes[m - p + 1, 1] <- min(dev.res[bsb.comp])
MinDelRes[m - p + 1, 2] <- dev.res[sor[m + 1]]
}
}
dimnames(Unit) <- list(paste("m=", p:n, sep = ""), 1:5)
names(included) <- paste("m=", p:n, sep = "")
dimnames(Coefficients) <- list(paste("m=", p:n, sep = ""), xcolnames)
dimnames(tStatistics) <- list(paste("m=", p:n, sep = ""), xcolnames)
dimnames(lik) <- list(paste("m=", p:n, sep = ""), c("Deviance", "Residual Deviance", "Pseudo R2", "Dispersion"))
dimnames(MaxRes) <- list(paste("m=", p:n, sep = ""), c("max", "mth"))
dimnames(MinDelRes) <- list(paste("m=", p:(n - 1), sep = ""), c("min", "(m+1)th"))
dimnames(CookDist) <- list(paste("m=", (p + 1):n, sep = ""), "Cook's")
dimnames(ScoreTest) <- list(paste("m=", (p + 1):n, sep = ""), "Score Test")
dimnames(Rglm) <- list(1:n, paste("m=", p:n, sep = ""))
dimnames(Leverage) <- list(1:n, paste("m=", p:n, sep = ""))
dimnames(Weights) <- list(1:n, paste("m=", p:n, sep = ""))
dimnames(ModCookDist) <- list(paste("m=", (p + 1):n, sep = ""), 1:5)
ans <- list(call = match.call(),
Residuals = Rglm,
Unit = Unit,
included = included,
Coefficients = Coefficients,
tStatistics = tStatistics,
Leverage = Leverage,
MaxRes = MaxRes,
MinDelRes = MinDelRes,
ScoreTest = ScoreTest,
Likelihood = lik,
CookDist = CookDist,
ModCookDist = ModCookDist,
Weights = Weights,
inibsb = initial.bsb,
binary.response = binary)
#KR oldClass(ans) <- "fwdglm"
class(ans) <- "fwdglm"
ans
}
##---------------------------------------------------------------------------##
## file: lmsglm.q
"lmsglm" <-
function(x, y, family, weights, offset, n.samples = 100, max.samples = 200, epsilon = 1e-4, maxit = 50, trace = FALSE)
{
user.warnings <- options()$warn
user.error <- options()$show.error.messages
on.exit(options(warn = user.warnings, show.error.messages = user.error))
options(warn = -1, show.error.messages = FALSE)
n <- dim(x)[1]
p <- dim(x)[2]
#KR family <- as.family(family)
#R change:
if (missing(offset)) offset <- 0
if (missing(weights)) weights <- rep(1,n)
subset.median <- function(bsb, x, y, family, weights,
offset, epsilon, maxit)
{
ybsb <- y[bsb]
if (length(unique(ybsb)) == 1)
return(as.double(NA))
if (any(offset))
obsb <- offset[bsb]
else
obsb <- 0
fit <- try(glm.fit(x[bsb,], ybsb, family = family, weights = weights[bsb],
offset = obsb, # qr = T,
control = glm.control(epsilon = epsilon,
maxit = maxit)))
if (!inherits(fit,"try-error") && fit$qr$rank == dim(x)[2])
{
mu <- family$linkinv(x %*% matrix(fit$coefficients, ncol = 1) + offset)
dev.res <- family$dev.resids(y, mu, weights)
return(median(abs(dev.res[-bsb])))
}
as.double(NA)
}
if (n.samples == "all")
{
subsets <- t(fwd.combn(n, p))
medians <- apply(subsets, 1, subset.median,
x = x, y = y, family = family,
weights = weights, offset = offset,
epsilon = epsilon, maxit = maxit)
# medians <- na.omit(medians)
# good.samples <- length(medians)
good.samples <- length(na.omit(medians))
total.samples <- "all"
bsb <- subsets[which(medians == min(medians, na.rm = TRUE))[1],]
}
else
{
total.samples <- 0
good.samples <- 0
least.median <- Inf
best.bsb <- integer()
n1 <- 1:n
while (good.samples < n.samples && total.samples < max.samples)
{
bsb <- sample(n1, p)
med <- subset.median(bsb, x = x, y = y, family = family,
weights = weights, offset = offset,
epsilon = epsilon, maxit = maxit)
if (!is.na(med) && med > 1e-006)
{
good.samples <- good.samples + 1
if (med < least.median)
{ least.median <- med
best.bsb <- bsb }
}
total.samples <- total.samples + 1
if (trace && total.samples %% 10 == 0)
cat(paste("lmsglm: found ", good.samples,
" good subsets after trying ", total.samples,
".\n", sep = ""))
}
bsb <- best.bsb
}
if (!good.samples)
{ cat("There were no good subsets in lmsglm. \n")
stop() }
if (any(offset))
obsb <- offset[bsb]
else
obsb <- 0
fit <- glm.fit(x[bsb, ], y[bsb], family = family,
weights = weights[bsb], offset = obsb,
control = glm.control(epsilon = epsilon, maxit = maxit))
mu <- family$linkinv(x %*% matrix(fit$coefficients, ncol = 1) + offset)
dev.res <- family$dev.resids(y, mu, weights)
message <- paste("There were", good.samples, "usable subsets out of",
total.samples, "subsets tried.")
list(bsb = bsb, dev.res = dev.res, message = message, model = fit)
}
##---------------------------------------------------------------------------##
## file: fwdglm.methods.q
print.fwdglm <- function(x, ...)
{
cat("Call:\n")
print(x$call)
n <- dim(x$Residuals)[1]
p <- dim(x$Coefficients)[2]
cat("\nLast 5 units included in the forward search:\n")
units <- x$Unit[(n - p - 3):(n - p + 1), 1, drop = FALSE]
dimnames(units) <- list(dimnames(units)[[1]], "")
print(t(units))
cat("\nEstimated coefficients for 50% of the data and for the full model:\n")
p <- dim(x$Coefficients)[2]
n <- dim(x$Coefficients)[1] + p - 1
print(x$Coefficients[c(ceiling(n/2) - p + 1, n - p + 1), ])
cat("\n")
invisible(x)
}
summary.fwdglm <- function(object, steps = "auto", remove.perfect.fit = TRUE, ...)
{
n <- dim(object$Residuals)[1]
p <- dim(object$Coefficients)[2]
if (steps == "auto")
steps <- max(5, as.integer(.05*n), na.rm = TRUE)
else if (steps == "all")
steps <- n-p #-1
else
steps <- as.integer(steps)
if (remove.perfect.fit && object$binary.response)
{
idx <- max((1:nrow(object$Likelihood))[object$Likelihood[,1] < 1e-3],
na.rm = TRUE)
new.steps <- nrow(object$Likelihood) - idx
if (new.steps < steps)
steps <- new.steps
}
if (steps + p > n)
stop("Too many steps")
steps <- steps - 1
if (any(!is.na(object$Unit[(n-p-steps+1):(n-p), 2])))
Unit <- object$Unit[(n-p-steps+1):(n-p+1), , drop = FALSE]
else
Unit <- t(object$Unit[(n-p-steps+1):(n-p+1), 1, drop = FALSE])
int <- intersect(1:n, as.vector(object$Unit[(n - p - steps):(n - p), ]))
Leverage <- object$Leverage[int, (n - p - steps + 1):(n - p + 1),
drop = FALSE]
Residuals <- object$Residuals[int, (n - p - steps + 1):(n - p + 1),
drop = FALSE]
MaxRes <- object$MaxRes[(n - p - steps + 1):(n - p + 1), , drop = FALSE]
MinDelRes <- object$MinDelRes[(n - p - steps):(n - p), , drop = FALSE]
Coefficients <- object$Coefficients[(n - p - steps + 1):(n - p + 1), ,
drop = FALSE]
tStatistics <- object$tStatistics[(n - p - steps + 1):(n - p + 1), ,
drop = FALSE]
CookDist <- object$CookDist[(n - p - steps):(n - p), , drop = FALSE]
na.cols <- apply(object$ModCookDist[(n - p - steps):(n - p), ], 2,
function(u) all(is.na(u)))
ModCookDist <- object$ModCookDist[(n - p - steps):(n - p), !na.cols,
drop = FALSE]
if (dim(ModCookDist)[2] == 1)
dimnames(ModCookDist)[[2]] <- "Distance"
Likelihood <- object$Likelihood[(n - p - steps + 1):(n - p + 1), ,
drop = FALSE]
ScoreTest <- object$ScoreTest[(n - p - steps):(n - p), , drop = FALSE]
smry <- list(call = object$call,
Unit = Unit,
Leverage = Leverage,
Residuals = Residuals,
MaxRes = MaxRes,
MinDelRes = MinDelRes,
Coefficients = Coefficients,
tStatistics = tStatistics,
CookDist = CookDist,
ModCookDist = ModCookDist,
Likelihood = Likelihood,
ScoreTest = ScoreTest)
class(smry) <- "summary.fwdglm"
smry
}
print.summary.fwdglm <- function(x, digits = 4, ...)
{
cat("Call:\n")
print(x$call)
steps <- dim(x$Unit)[1]
cat(paste("\nUnits included in the last", steps, "steps:\n"))
if (dim(x$Unit)[1] == 1)
dimnames(x$Unit)[1] <- "Unit"
print(x$Unit)
if (!is.element ("squared", names(x$Call)))
cat("\nDeviances:\n")
else if (x$Call$squared)
cat("\nSquared Deviances:\n")
else
cat("\nDeviances:\n")
print(x$Residuals, digits = digits)
cat("\nLeverage:\n")
print(x$Leverage, digits = digits)
cat("\nMaximum Deviance and mth Deviance:\n")
print(x$MaxRes, digits = digits)
cat("\nMinumum Deviance and (m+1)th Deviance:\n")
print(x$MinDelRes, digits = digits)
cat("\nCoefficients:\n")
print(x$Coefficients, digits = digits)
cat("\nt Statistics:\n")
print(x$tStatistics, digits = digits)
cat("\nDiagnostics:\n")
print(x$Likelihood, digits = digits)
cat("\nScore Test:\n")
print(x$ScoreTest, digits = digits)
cat("\nCook's Distance:\n")
print(x$CookDist, digits = digits)
cat("\nModified Cook's Distance:\n")
print(x$ModCookDist, digits = digits)
invisible(x)
}
"plot.fwdglm" <-
function(x, which.plots = 1:11, squared = FALSE, scaled = FALSE, ylim = NULL, xlim = NULL, th.Res = 4, th.Lev = 0.25, sig.Tst = 2.58, sig.score = 1.96, plot.pf = FALSE, labels.in.plot = TRUE, ...)
{
if (!is.numeric(which.plots) || max(which.plots) > 11 || min(which.plots) < 1)
stop("which.plots must be an integer vector with values between 1 and 11")
oldpar <- par()
# on.exit(par(oldpar))
if (length(which.plots)>1)
{ par(ask = TRUE)
on.exit(par(ask=oldpar$ask)) }
p <- ncol(x$Coefficients)
n <- nrow(x$Residuals)
if (!plot.pf && x$binary.response)
pf.idx <- max((1:nrow(x$Likelihood))[x$Likelihood[,1] < 1e-3], na.rm = TRUE)
else
pf.idx <- FALSE
## Plot Deviance Residuals ##
if (is.element(1, which.plots))
{
x1 <- x$Residuals
if (pf.idx)
x1 <- x1[, (pf.idx+1):(n-p+1), drop = FALSE]
if (squared)
{ x1 <- x1^2
ylab <- "Squared Deviance Residuals" }
else
{ ylab <- "Deviance Residuals" }
if (is.null(ylim))
y.lim <- range(x1, na.rm = TRUE)
else
y.lim <- ylim
domain <- (n-dim(x1)[2]+1):n
jitter <- 0.025*diff(range(domain, na.rm = TRUE))
if (is.null(xlim))
x.lim <- c(min(domain, na.rm = TRUE) - jitter,
max(domain, na.rm = TRUE) + jitter)
else
x.lim <- xlim
plot(domain, x1[1, ],
type = "n",
ylim = y.lim,
xlim = x.lim,
xlab = "Subset Size",
ylab = ylab)
for (i in 1:nrow(x1))
lines(domain, x1[i, ], lty = i)
### Write numbers for selected units which
### have residuals bigger than th.Res
ma <- apply(abs(x1), 1, max, na.rm = TRUE)
nam <- which(ma > th.Res)
if (lnam <- length(nam))
{
text(rep(p, lnam), x1[nam, 1], paste(nam, " ", sep = ""), adj = 1)
text(rep(n, lnam), x1[nam, ncol(x1)], paste(" ", nam, sep = ""), adj = 0)
}
}
## Plot Leverage ##
if (is.element(2, which.plots))
{
x1 <- x$Leverage
if (pf.idx)
x1 <- x1[, (pf.idx+1):(n-p+1), drop = FALSE]
if (is.null(ylim))
y.lim <- c(0, 1)
else
y.lim <- ylim
domain <- (n-dim(x1)[2]+1):n
if (is.null(xlim))
x.lim <- c(min(domain)-1, max(domain)+ 1)
else
x.lim <- xlim
plot(domain, rep(0.5, length(domain)),
type = "n",
ylim = y.lim,
xlim = x.lim,
xlab = "Subset Size",
ylab = "Leverage")
for (i in 1:(nrow(x1)))
lines(domain, x1[i, ], lty = i, col = i, lwd = 1)
## Draw the last unit as a point ##
sy <- which(is.na(x1[, ncol(x1) - 1]))
points(n, x1[sy, ncol(x1)], pch = 16)
label.index <- which(x1[, dim(x1)[2]] > th.Lev)
if (n.points <- length(label.index))
{
label.ycoord <- x1[label.index, dim(x1)[2]]
text(rep(n, n.points), label.ycoord,
paste(" ", label.index, sep = ""), adj = 0)
### In the following lines you find the steps
### of the fwd search in which these units enter
x1 <- x1[label.index, , drop = FALSE]
entry.points <- (n-dim(x1)[2]+1):n
index.fun <- function(u, col.names)
col.names[min(which(!is.na(u)), na.rm = TRUE)]
entry.xcoord <- apply(x1, 1, index.fun, col.names = entry.points)
### Exclude the unit included in the last step (if it is present in tee)
not.last.unit <- entry.xcoord < n
if (all(not.last.unit))
{
entry.xcoord <- entry.xcoord[not.last.unit]
label.index <- label.index[not.last.unit]
entry.ycoord <- apply(x1[not.last.unit, , drop = FALSE], 1,
function(u) na.omit(u)[1])
text(entry.xcoord, entry.ycoord,
paste(label.index, " ", sep = ""), adj = 1)
}
}
}
## Plot maximum deviance residuals ##
if (is.element(3, which.plots))
{
x1 <- x$MaxRes
if (pf.idx)
x1 <- x1[(pf.idx+1):(n-p+1), , drop = FALSE]
if (is.null(ylim))
y.lim <- c(0, max(x1[is.finite(x1)], na.rm = TRUE))
else
{ y.lim <- ylim
# x1[x1 < min(y.lim, na.rm=T) | x1 > max(y.lim, na.rm=T)] <- NA
}
domain <- (n-dim(x1)[1]+1):n
if (is.null(xlim))
x.lim <- c(min(domain), max(domain)+1)
else
x.lim <- xlim
plot(domain, x1[,1],
xlab = "Subset Size",
ylab = "Max and mth Residual",
type = "l",
ylim = y.lim,
xlim = x.lim)
lines(domain, x1[,2], lty = 2, lwd = 1)
usr <- par()$usr
legend(usr[1] + 0.05*diff(usr[1:2]), usr[4] - 0.05*diff(usr[3:4]),
c("Max", "(m)th"), lty = c(1,2), lwd = 1)
}
## Plot minimum deviance residuals ##
if (is.element(4, which.plots))
{
x1 <- x$MinDelRes
if (pf.idx)
x1 <- x1[(pf.idx+1):(n-p), , drop = FALSE]
if (is.null(ylim))
y.lim <- c(0, max(x1[is.finite(x1)], na.rm = TRUE))
else
y.lim <- ylim
domain <- (n-dim(x1)[1]):(n-1)
if (is.null(xlim))
x.lim <- range(domain, na.rm = TRUE)
else
x.lim <- xlim
plot(domain, x1[ ,1],
xlab = "Subset Size",
ylab = "Min and (m+1)th Residual",
type = "l",
ylim = y.lim,
xlim = x.lim)
lines(domain, x1[,2], lty = 2)
usr <- par()$usr
legend(usr[1] + 0.05*diff(usr[1:2]), usr[4] - 0.05*diff(usr[3:4]),
c("Min", "(m+1)th"), lty = c(1,2), lwd = 1)
}
## Plot coefficients ##
if (is.element(5, which.plots))
{
x1 <- x$Coefficients
if (pf.idx)
x1 <- x1[(pf.idx+1):(n-p+1), , drop = FALSE]
if (scaled)
{ ssb <- round(.333*nrow(x1)):nrow(x1)
for (i in 1:ncol(x1))
x1[, i] <- x1[, i] / abs(mean(x1[ssb, i], na.rm = TRUE))
ylab <- "Scaled Coefficient Estimates" }
else
{ ylab <- "Coefficient Estimates" }
if (is.null(ylim))
y.lim <- range(x1, na.rm = TRUE)
else
y.lim <- ylim
domain <- (n-dim(x1)[1]+1):n
if (is.null(xlim))
{ x.lim <- range(domain, na.rm = TRUE)
if (labels.in.plot)
{ adj <- 0.05*diff(x.lim)
x.lim <- c(0, adj) + x.lim }
}
else
x.lim <- xlim
plot(0, 0,
type = "n",
xlab = "Subset Size",
ylab = ylab,
ylim = y.lim,
xlim = x.lim)
for (i in 1:ncol(x1))
lines(domain, x1[, i], lty = i, lwd = 1)
if (labels.in.plot)
# text(rep(n,p), x1[nrow(x1),], labels = paste(" C", 0:(p-1), sep = ""), cex = 1.1, adj = 0)
# Mathematical annotation
for (i in 1:p)
{ text(rep(n,p), x1[nrow(x1),i],
substitute(hat(beta)[b], list(b=i-1)), pos=4) }
}
## Plot t statistics ##
if (is.element(6, which.plots))
{
x1 <- x$tStatistics
if (pf.idx)
x1 <- x1[(pf.idx+1):(n-p+1), , drop = FALSE]
if (is.null(ylim))
{ y.lim <- round(.667 * dim(x1)[1]):(dim(x1)[1])
y.lim <- range(x1[y.lim, , drop = FALSE], na.rm = TRUE)
y.lim <- max(abs(range(y.lim)))
y.lim <- c(-1*y.lim, y.lim)
}
else
y.lim <- ylim
domain <- (n-dim(x1)[1]+1):n
if (is.null(xlim))
{ x.lim <- range(domain, na.rm = TRUE)
if (labels.in.plot)
{ adj <- 0.05*diff(x.lim)
x.lim <- c(0, adj) + x.lim }
}
else
x.lim <- xlim
plot(0, 0,
type = "n",
xlab = "Subset Size",
ylab = "t Statistics",
ylim = y.lim,
xlim = x.lim)
for (i in 1:ncol(x1))
lines(domain, x1[, i], lty = i, lwd = 1)
if (is.numeric(sig.Tst))
{ abline(h = sig.Tst, col = "lightgrey")
abline(h = -sig.Tst, col = "lightgrey") }
if (labels.in.plot)
# text(rep(n,p), x1[nrow(x1),], labels = paste(" C", 0:(p-1), sep = ""), cex = 1.1, adj = 0)
# Mathematical annotation
for (i in 1:p)
{ text(rep(n,p), x1[nrow(x1),i],
substitute(t[b], list(b=i-1)), pos=4) }
}
## Plot Likelihood matrix ##
if (is.element(7, which.plots))
{
x1 <- x$Likelihood
if (pf.idx)
x1 <- x1[(pf.idx+1):(n-p+1), , drop = FALSE]
domain <- (n-dim(x1)[1]+1):n
y.labels <- c("Deviance", "Deviance Explained",
"Pseudo R-squared", "Dispersion Parameter")
par(mfrow = c(2, 2), mar=c(5,4,1,2))
for (i in 1:4)
{ plot(domain, x1[, i],
xlab = "Subset Size",
ylab = y.labels[i],
type = "l")
subticks <- domain[which(domain%%5 == 0)]
axis(1, at = subticks, labels = rep("", length(subticks)))
}
par(mfrow = oldpar$mfrow, mar = oldpar$mar)
}
## Plot score test
if (is.element(8, which.plots))
{
x1 <- x$ScoreTest
if (pf.idx)
x1 <- x1[(pf.idx):(n-p), , drop = FALSE]
if (is.null(ylim))
{ y.lim <- range(x1, na.rm = TRUE)
y.lim <- max(c(abs(y.lim), 2.2), na.rm = TRUE)
y.lim <- c(-1*y.lim, y.lim)
}
else
y.lim <- ylim
domain <- (n-dim(x1)[1]+1):n
if (is.null(xlim))
x.lim <- range(domain, na.rm = TRUE)
else
x.lim <- xlim
plot(domain, x1,
xlab = "Subset Size",
ylab = "Goodness of Link Test",
type = "l",
ylim = y.lim,
xlim = x.lim)
subticks <- domain[which(domain%%5 == 0)]
axis(1, at = subticks, labels = rep("", length(subticks)))
if (is.numeric(sig.score))
{ abline(h = sig.score, col = "lightgrey")
abline(h = -sig.score, col = "lightgrey") }
}
## Plot Cook's distance
if (is.element(9, which.plots))
{
x1 <- x$CookDist
if (pf.idx)
x1 <- x1[(pf.idx):(n-p), , drop = FALSE]
if (is.null(ylim))
y.lim <- c(0, max(as.numeric(x1), na.rm = TRUE))
else
y.lim <- ylim
domain <- (n-dim(x1)[1]+1):n
if (is.null(xlim))
x.lim <- range(domain, na.rm = TRUE)
else
x.lim <- xlim
plot(domain, x1,
xlim = x.lim,
ylim = y.lim,
xlab = "Subset Size",
ylab = "Cook's Distance",
type = "l")
}
# Plot modified Cook's distance
if (is.element(10, which.plots))
{
x1 <- x$ModCookDist
if (pf.idx)
x1 <- x1[(pf.idx):(n-p), , drop = FALSE]
if (is.null(ylim))
y.lim <- c(0, max(as.numeric(x1), na.rm = TRUE))
else
y.lim <- ylim
domain <- (n-dim(x1)[1]+1):n
if (is.null(xlim))
x.lim <- range(domain, na.rm = TRUE)
else
x.lim <- xlim
plot(domain, x1[,1],
xlim = x.lim,
ylim = y.lim,
xlab = "Subset Size",
ylab = "Modified Cook's Distance",
type = "n")
for (i in 1:5)
lines(domain, x1[,i], lty=i)
}
## Plot the weights for each step ##
if (is.element(11, which.plots))
{
x1 <- x$Weights
if (pf.idx)
x1 <- x1[ , (pf.idx+1):(n-p+1), drop = FALSE]
if (is.null(ylim))
y.lim <- range(x1, na.rm = TRUE)
else
y.lim <- ylim
domain <- (n-dim(x1)[2]+1):n
if (is.null(xlim))
{ x.lim <- range(domain, na.rm = TRUE)
x.lim[2] <- x.lim[2] + 0.05*diff(x.lim) }
else
x.lim <- xlim
plot(domain, x1[1, ],
type = "n",
xlab = "Subset Size",
ylab = "Weights",
ylim = y.lim,
xlim = x.lim)
for (i in 1:(nrow(x1)))
lines(domain, x1[i, ], lty = i, col = i, lwd = 1)
text(n, x1[,ncol(x1)], paste(" ", dimnames(x1)[[1]], sep = ""), adj = 0)
}
invisible(x)
}
##---------------------------------------------------------------------------##
## file: scglm.q
"scglm" <-
function(x, y, family, weights, beta, phi=1, offset)
{
n <- dim(x)[1]
p <- dim(x)[2]
if (missing(weights)) weights <- rep(1,n)
weights <- sqrt(weights)
if (missing(offset)) offset <- 0
if (missing(beta))
stop("coefficients estimates must be provided")
user.error <- options()$show.error.messages
on.exit(options(show.error.messages = user.error))
options(show.error.messages = FALSE)
#KR inverse.fn <- family$inverse
inverse.fn <- family$linkinv
variance.fn <- family$variance
#KR dm.fn <- family$deriv
dm.fn <- function(eta) 1/family$mu.eta(eta)
eta <- offset + x %*% matrix(beta, ncol = 1)
#KR zz <- eta + (y - inverse.fn(eta)) * dm.fn(inverse.fn(eta)) - offset
zz <- eta + (y - inverse.fn(eta)) * dm.fn(eta) - offset
Xb <- weights * cbind(x, eta^2)
CXb <- t(Xb) %*% Xb
Cb <- try(solve(CXb, tol=1e-20))
if (inherits(Cb,"try-error"))
return("singular")
b.add <- Cb %*% t(Xb) %*% (zz * weights)
se <- sqrt(phi * Cb[p + 1, p + 1])
b.add[p + 1] / se
}
##---------------------------------------------------------------------------##
## file: chasalow.q
"fwd.combn" <-
function(x, m, fun = NULL, simplify = TRUE, ...)
{
# Renamed by Kjell Konis for inclusion in the Forward Library 11/2002
#
# DATE WRITTEN: 14 April 1994 LAST REVISED: 10 July 1995
# AUTHOR: Scott Chasalow
#
# DESCRIPTION:
# Generate all combinations of the elements of x taken m at a time.
# If x is a positive integer, returns all combinations
# of the elements of seq(x) taken m at a time.
# If argument "fun" is not null, applies a function given
# by the argument to each point. If simplify is FALSE, returns
# a list; else returns a vector or an array. "..." are passed
# unchanged to function given by argument fun, if any.
# REFERENCE:
# Nijenhuis, A. and Wilf, H.S. (1978) Combinatorial Algorithms for
# Computers and Calculators. NY: Academic Press.
# EXAMPLES:
# > combn(letters[1:4], 2)
# > combn(10, 5, min) # minimum value in each combination
# Different way of encoding points:
# > combn(c(1,1,1,1,2,2,2,3,3,4), 3, tabulate, nbins = 4)
# Compute support points and (scaled) probabilities for a
# Multivariate-Hypergeometric(n = 3, N = c(4,3,2,1)) p.f.:
# > table.mat(t(combn(c(1,1,1,1,2,2,2,3,3,4), 3, tabulate,nbins=4)))
#
if(length(m) > 1) {
warning(paste("Argument m has", length(m),
"elements: only the first used"))
m <- m[1]
}
if(m < 0)
stop("m < 0")
if(m == 0)
return(if(simplify) vector(mode(x), 0) else list())
if(is.numeric(x) && length(x) == 1 && x > 0 && trunc(x) == x)
x <- seq(x)
n <- length(x)
if(n < m)
stop("n < m")
e <- 0
h <- m
a <- 1:m
nofun <- is.null(fun)
count <- fwd.nCm(n, m, 0.10000000000000002)
out <- vector("list", count)
out[[1]] <- if(nofun) x[a] else fun(x[a], ...)
if(simplify) {
dim.use <- NULL
if(nofun) {
if(count > 1)
dim.use <- c(m, count)
}
else {
out1 <- out[[1]]
d <- dim(out1)
if(count > 1) {
if(length(d) > 1)
dim.use <- c(d, count)
else if(length(out1) > 1)
dim.use <- c(length(out1), count)
}
else if(length(d) > 1)
dim.use <- d
}
}
i <- 2
nmmp1 <- n - m + 1
mp1 <- m + 1
while(a[1] != nmmp1) {
if(e < n - h) {
h <- 1
e <- a[m]
j <- 1
}
else {
h <- h + 1
e <- a[mp1 - h]
j <- 1:h
}
a[m - h + j] <- e + j
out[[i]] <- if(nofun) x[a] else fun(x[a], ...)
i <- i + 1
}
if(simplify) {
if(is.null(dim.use))
out <- unlist(out)
else out <- array(unlist(out), dim.use)
}
out
}
"fwd.nCm" <-
function(n, m, tol = 9.9999999999999984e-009)
{
# Renamed by Kjell Konis for inclusion in the Forward Library 11/2002
#
# DATE WRITTEN: 7 June 1995 LAST REVISED: 10 July 1995
# AUTHOR: Scott Chasalow
#
# DESCRIPTION:
# Compute the binomial coefficient ("n choose m"), where n is any
# real number and m is any integer. Arguments n and m may be vectors;
# they will be replicated as necessary to have the same length.
#
# Argument tol controls rounding of results to integers. If the
# difference between a value and its nearest integer is less than tol,
# the value returned will be rounded to its nearest integer. To turn
# off rounding, use tol = 0. Values of tol greater than the default
# should be used only with great caution, unless you are certain only
# integer values should be returned.
#
# REFERENCE:
# Feller (1968) An Introduction to Probability Theory and Its
# Applications, Volume I, 3rd Edition, pp 50, 63.
#
len <- max(length(n), length(m))
out <- numeric(len)
n <- rep(n, length = len)
m <- rep(m, length = len)
mint <- (trunc(m) == m)
out[!mint] <- NA
out[m == 0] <- 1 # out[mint & (m < 0 | (m > 0 & n == 0))] <- 0
whichm <- (mint & m > 0)
whichn <- (n < 0)
which <- (whichm & whichn)
if(any(which)) {
nnow <- n[which]
mnow <- m[which]
out[which] <- ((-1)^mnow) * Recall(mnow - nnow - 1, mnow)
}
whichn <- (n > 0)
nint <- (trunc(n) == n)
which <- (whichm & whichn & !nint & n < m)
if(any(which)) {
nnow <- n[which]
mnow <- m[which]
foo <- function(j, nn, mm)
{
n <- nn[j]
m <- mm[j]
iseq <- seq(n - m + 1, n)
negs <- sum(iseq < 0)
((-1)^negs) * exp(sum(log(abs(iseq))) - lgamma(m + 1))
}
out[which] <- unlist(lapply(seq(along = nnow), foo, nn = nnow,
mm = mnow))
}
which <- (whichm & whichn & n >= m)
nnow <- n[which]
mnow <- m[which]
out[which] <- exp(lgamma(nnow + 1) - lgamma(mnow + 1) - lgamma(nnow -
mnow + 1))
nna <- !is.na(out)
outnow <- out[nna]
rout <- round(outnow)
smalldif <- abs(rout - outnow) < tol
outnow[smalldif] <- rout[smalldif]
out[nna] <- outnow
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.