Nothing
#main author: Kévin Allan Sales Rodrigues
#' Forward Search in Linear L1 Models
#'
#' This function applies the forward search approach to robust analysis in linear L1 models. This function is based on function \code{fwdlm} of package 'forward'.
#'
#' @param formula a formula object, with the response on the left of a ~ operator, and the terms, separated by + operators, on the right.
#' @param data a data.frame in which to interpret the variables named in the formula, or in the subset and the weights argument. If this is missing, then the variables in the formula should be on the search list. This may also be a single number to handle some special cases – see below for details.
#' @param nsamp the initial subset for the forward search in linear regression is found by fitting the regression model with the R function \code{\link{lmsreg}}. This argument allows to control how many subsets are used in the Least Median of Squares regression. The choices are: the number of samples or "best" (the default) or "exact" or "sample". For details see \code{\link{lmsreg}}.
#' @param intercept logical for the inclusion of the intercept (if no formula is provided).
#' @param trace logical, if TRUE a message is printed for every ten iterations completed during the forward search.
#' @param subset an optional vector specifying a subset of observations to be used in the fitting process.
#' @param weights vector of observation weights; if supplied, the algorithm fits to minimize the sum of the weights multiplied into the absolute residuals. The length of weights must be the same as the number of observations. The weights must be nonnegative and it is strongly recommended that they be strictly positive, since zero weights are ambiguous.
#' @param na.action a function to filter missing data. This is applied to the model.frame after any subset argument has been used. The default (with na.fail) is to create an error if any missing values are found. A possible alternative is na.omit, which deletes observations that contain one or more missing values.
#' @param method the algorithmic method used to compute the fit. There are several options: "br", "fn", "pfn", "sfn", "fnc", "conquer", "pfnb", "qfnb", "ppro" and "lasso". See \code{\link{rq}} for more details.
#' @param model if TRUE then the model frame is returned. This is essential if one wants to call summary subsequently.
#' @param contrasts a list giving contrasts for some or all of the factors default = NULL appearing in the model formula. The elements of the list should have the same name as the variable and should be either a contrast matrix (specifically, any full-rank matrix with as many rows as there are levels in the factor), or else a function to compute such a matrix given the number of levels.
#' @param ... additional arguments for the fitting routines (see \code{\link{rq.fit.br}} and \code{\link{rq.fit.fnb}}, etc. and the functions they call).
#'
#' @returns A fitted forward search in linear L1 regression model object.
#'
#' @importFrom MASS lmsreg
#'
#' @export
#'
#' @references Atkinson, A.C. and Riani, M. (2000). \emph{Robust Diagnostic Regression Analysis}. New York: Springer.
#'
#'
#' @examples
#' \donttest{
#' # applies the forward search approach to robust analysis in a linear L1 model
#' mod = forwardSearch_regL1(Concentration ~ Age, data = bile)
#' }
forwardSearch_regL1 = function(formula, data, nsamp = "best",
intercept = TRUE, trace = TRUE,
subset, weights, na.action, method = "br", #argumentos padrão
model = TRUE, contrasts = NULL, ...)
{
tau = 0.5 # to L1 regression
if (!missing(formula))
{
m <- call("model.frame", formula = formula)
if (!missing(data))
m$data <- data
if (!missing(na.action))
m$na.action <- na.action
m <- eval(m, parent.frame())
Terms <- attr(m, "terms")
xvars <- as.character(attr(Terms, "variables"))
if ((yvar <- attr(Terms, "response")) > 0)
xvars <- xvars[ - yvar]
if (length(xvars) > 0)
{
xlevels <- lapply(m[xvars], levels)
xlevels <- xlevels[!sapply(xlevels, is.null)]
if (length(xlevels) == 0)
xlevels <- NULL
}
else
xlevels <- NULL
y <- model.response(m, "numeric")
x <- model.matrix(Terms, m, NULL)
}
else
{
if (is.null(x) || is.null(y))
stop("No data supplied")
y <- as.vector(y)
x <- as.matrix(x)
if (intercept)
x <- cbind(1,x)
}
n = nrow(x)
p = ncol(x)
## We supress warnings generated by lmsreg.default, e.g.
## More than half of the subsamples were singular.
temp.warn <- options()$warn
options(warn = -1)
lms <- MASS::lmsreg(x, y, nsamp = nsamp, intercept = FALSE)
options(warn = temp.warn)
sor <- order(abs(lms$residuals))
coef.names <- names(lms$coefficients)
# included = all cases included at each step ##
included <- list()
# Unit = Unit(s) included in each step
Unit <- matrix(as.integer(NA), nrow = n-p+1, ncol = 5)
# Coefficients = estimated beta Coefficients
Coefficients <- matrix(as.double(NA), nrow = n - p + 1, ncol = p)
# tStatistics = t statistics
tStatistics <- matrix(as.double(NA), nrow = n - p, ncol = p)
# MAE - Mean Absolute Error
MAE <- matrix(as.double(NA), nrow = n - p + 1, ncol = 1)
# R2
# R2 <- matrix(as.double(NA), nrow = n - p + 1, ncol = 1)
## MinDelRes = Minimum deletion residual
MinDelRes <- matrix(as.double(NA), n - p - 1, 1)
## Residuals = Residuals in each step
Residuals <- matrix(as.double(NA), n, n-p+1)
residuals <- lms$residuals
inibsb <- integer(0)
n1 <- 1:n
if (trace)
message("Starting Forward Search.\n")
for (m in p:n)
{
if(trace && m %% 10 == 0)
message(paste("Forward Search: finished ", m, " steps out of ", n, ".\n", sep = ""))
bsb <- sor[1:m]
included[[m-p+1]] <- sort(bsb) # added info
unit <- setdiff(bsb, inibsb)
if(length(unit) > 5)
Unit[m-p+1, ] <- unit[1:5]
else
Unit[m-p+1, 1:length(unit)] <- unit
inibsb <- bsb
xb <- x[bsb,]
yb <- y[bsb]
xb_without_ones = as.matrix(xb)[,-1] # removing ones column
zz <- try(regL1(yb ~ xb_without_ones))
if (!inherits(zz,"try-error") )# && (zz$qr$rank == p))
{
residuals <- y - x %*% zz$coefficients
if (!any(is.na(residuals)))
sor <- order(abs(residuals)) # get order of minimum residuals
### store residuals ###
Residuals[ ,m-p+1] <- residuals
### store estimated coefficients ###
Coefficients[m-p+1, ] <- zz$coefficients
### store MAE ### MAE - Mean Absolute Error
mae = zz$MAE
MAE[m-p+1] <- mae # 'mae' is temp MAE
### store R^2 ### -> trocar por R2 da regL1
# R2[m-p+1] <- mae # 1-(SSR/sum((yb - mean(yb))^2))
## If mAm is invertable then we compute the t statistics and ##
## distances. ##
mAm <- t(xb) %*% xb
# mmx <- try(solve(mAm)) # does not work
if (det(mAm, tol=1e-20) == 0)
{ mmx <- "singular matrix"
class(mmx) <- "error" }
else
mmx <- solve(mAm, tol=1e-20)
if (!inherits(mmx,"try-error") && (m > p))
{
### store t statistics ### extracted from summary
# trying get beta's estimates
beta_try = try(
suppressWarnings({
as.numeric(summary(zz, se = 'iid')$coefficients[,3])
})
, silent = TRUE)
if(inherits(beta_try, "try-error")){
beta_try = rep(0,p)
}
tStatistics[m-p, ] <- beta_try
}
if (!inherits(mmx,"try-error") && (m < n))
{
# bsb.comp = o que não está em bsb
# bsb.comp = complement of bsb #used to be ncl
bsb.comp <- setdiff(n1, bsb)
# find minimum deletion residual
ord <- numeric(length(bsb.comp))
#hii is leverage!
# tenho que refazer considerando o reajuste L1
for (i in 1:length(bsb.comp))
{
#hii <- x[bsb.comp[i], , drop = FALSE] %*% mmx %*% t(x[bsb.comp[i], , drop = FALSE])
#ord[i] <- residuals[bsb.comp[i]]^2 / (1+hii)
ord[i] <- residuals[bsb.comp[i]]
}
#somente dividi o resíduo absoluto pelo MAE ()
if (!is.na(MAE[m-p+1]) && MAE[m-p+1] > 0)
MinDelRes[m-p, 1] <- sqrt(min(abs(ord)) / MAE[m-p+1])
}
} #end of zz was fit if block
else
{
if (m == p)
Residuals[ ,m-p+1] <- y - x %*% lms$coef
else
Residuals[ ,m-p+1] <- Residuals[ ,m-p]
}
} #end of for loop
Residuals <- Residuals / mae
#### names
dimnames(Unit) <- list(paste("m=", p:n, sep=""), paste("Inclusion", 1:5))
names(included) <- paste("m=", p:n, sep = "")
dimnames(Coefficients) <- list(paste("m=", p:n, sep=""), coef.names)
dimnames(tStatistics) <- list(paste("m=", (p+1):n, sep=""), coef.names)
dimnames(Residuals) <- list(n1, paste("m=", p:n, sep=""))
#dimnames(R2) <- list(p:n, c("R^2"))
dimnames(MinDelRes) <- list((p+1):(n-1), "Min del. res.")
#excluding lines of zeros in tStatistics # NULL values
soma_abs <- rowSums(abs(tStatistics))
tStatistics <- tStatistics[soma_abs != 0, ]
number_excluded_lines = sum(soma_abs == 0)
ans <- list(call = match.call(),
Residuals = Residuals,
Unit = Unit,
included = included,
Coefficients = Coefficients,
tStatistics = tStatistics,
MAE = MAE,
#R2 = R2,
MinDelRes = MinDelRes,
StartingModel = lms)
class(ans) <- "forwardSearch_regL1"
ans
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.