Nothing
## Called from ./lmrob_simulation.Rnw
## ~~~~~~~~~~~~~~~~~~~~~
###########################################################################
## 1. simulation helper functions
###########################################################################
f.estname <- function(est = 'lmrob')
## Purpose: translate between 'estname' and actual function name,
## defaults to 'lmrob'
## f.lmRob is just a wrapper for lmRob, since there are some
## problems with the weight and weights arguments
## ----------------------------------------------------------------------
## Arguments: est: name of estimator
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 6 Oct 2009, 13:36
switch(est,
lm.rbase = 'lmrob', lm.robust = 'f.lmRob', rlm = 'rlm', lm = 'lm',
est)
f.errname <- function(err, prefix = 'r')
## Purpose: translate between natural name of distribution and
## R (r,p,q,d)-name
## ----------------------------------------------------------------------
## Arguments: err: name of distribution
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 6 Oct 2009, 13:36
paste(prefix,
switch(err,normal="norm", t="t", cauchy="cauchy",cnormal="cnorm",
err),sep = '')
f.requires.envir <- function(estname)
## Purpose: returns indicator on whether estname requires envir argument
## ----------------------------------------------------------------------
## Arguments: estname: name of estimating function
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 7 Oct 2009, 09:34
switch(estname,
f.lmrob.local = TRUE,
FALSE)
f..paste..list <- function(lst)
if (length(lst) == 0) return("") else
paste(names(lst),lst,sep='=',collapse=', ')
f..split..str <- function(str) {
litems <- strsplit(str,', ')
lst <- lapply(litems, function(str) strsplit(str,'='))
rlst <- list()
for (llst in lst) {
lv <- vector()
for (litem in llst) lv[litem[1]] <- litem[2]
rlst <- c(rlst, list(lv))
}
rlst
}
f.list2str <- function(lst, idx)
## Purpose: convert a list into a string that identifies the
## function and parameter configuration
## ----------------------------------------------------------------------
## Arguments: lst: list or list of lists
## idx: only take the elements in idx
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 7 Oct 2009, 10:03
f..paste..list(if(missing(idx)) unlist(lst) else unlist(lst)[idx])
f.as.numeric <- function(val)
{
## Purpose: convert value to numeric if possible
## ----------------------------------------------------------------------
## Arguments: vec: value to convert
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 26 Oct 2009, 12:10
r <- suppressWarnings(as.numeric(val))
if (is.na(r)) {
## is character, try to convert to TRUE and FALSE
return(switch(casefold(val),
"true" = TRUE,
"false" = FALSE,
val))
} else return(r)
}
f.as.numeric.vectorized <- function(val) sapply(val, f.as.numeric)
f.as.integer <- function(val)
{
## Purpose: convert value to numeric if possible
## ----------------------------------------------------------------------
## Arguments: vec: value to convert
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 26 Oct 2009, 12:10
r <- suppressWarnings(as.integer(val))
if (is.na(r)) {
## is character, try to convert to TRUE and FALSE
return(switch(casefold(val),
"true" = TRUE,
"false" = FALSE,
val))
} else return(r)
}
f.str2list <- function(str, splitchar = '\\.')
{
## Purpose: inverse of f.list2str
## ----------------------------------------------------------------------
## Arguments: str: string or list of strings produced with f.list2str
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 14:20
## split input string or strings into a list of vectors
lst <- f..split..str(as.character(str))
rlst <- list()
## walk list
for (lv in lst) {
lrlst <- list()
## for each element of the vector
for (ln in names(lv)) {
## split
lnames <- strsplit(ln, splitchar)[[1]]
## set either directly
if (length(lnames) == 1) lrlst[ln] <- f.as.numeric(lv[ln])
## or, if it contains a dot, as a sublist
else {
if (is.null(lrlst[[lnames[1]]])) lrlst[[lnames[1]]] <- list()
lrlst[[lnames[1]]][paste(lnames[-1],collapse='.')] <- f.as.numeric(lv[ln])
}
}
rlst <- c(rlst, list(lrlst))
}
rlst
}
f.round.numeric <- function(num, digits = 0) { ## round only numeric values in list
idx <- sapply(num, is.numeric)
ret <- num
ret[idx] <- lapply(num[idx],round,digits=digits)
ret
}
f.errs2str <- function(errs)
{
## Purpose: convert list of errors into pretty strings
## ----------------------------------------------------------------------
## Arguments: errs: estlist element errs
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 14:51
rv <- vector()
for (lerr in errs) {
rv <- c(rv,
switch(lerr$err,
normal = paste("N(",lerr$args$mean,",",
lerr$args$sd,")", sep=""),
set =,
t = paste("t",lerr$args$df,sep=""),
paste(lerr$err,"(",paste(f.round.numeric(lerr$args,2),
collapse=","),")",sep="")))
}
rv
}
f.procedures2str <- function(procs)
{
## Purpose: convert procedures element in estlist to pretty data.frame
## ----------------------------------------------------------------------
## Arguments: proc: estlist element procedures
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 14:57
rdf <- rep(" ",7)
for (lproc in procs) {
method <- if(is.null(lproc$args$method))
switch(lproc$estname,
lm = 'lsq',
"SM") else lproc$args$method
cov <- switch(lproc$estname, ## lm.robust, rlm, lmrob: set default arguments
lm.robust = list(cov = 'Default',
cov.corrfact = 'empirical',
cov.xwx = TRUE,
cov.resid = 'trick',
cov.hubercorr = TRUE,
cov.dfcorr = 1),
rlm = list(cov = 'Default',
cov.corrfact = 'empirical',
cov.xwx = FALSE,
cov.resid = 'final',
cov.hubercorr = TRUE,
cov.dfcorr = 1),
## lmrob = list(cov = 'f.avar1', ## method .vcov.MM equals f.avar1
## cov.resid = 'final'),
lmrob = do.call('lmrob.control', ## get default arguments from lmrob.control
lproc$args)[c('cov', 'cov.corrfact', 'cov.xwx',
'cov.resid', 'cov.hubercorr', 'cov.dfcorr')],
if (is.null(lproc$args)) list(cov = 'Default') else lproc$args)
if (is.null(lproc$args$psi)) {
psi <- switch(lproc$estname,
rlm =,
lmrob = 'bisquare',
lm.robust = {
if (is.null(lproc$args$weight)) {
if (is.null(lproc$args$weight2)) 'optimal'
else lproc$args$weight2
} else lproc$args$weight[2] },
"NA")
} else {
psi <- lproc$args$psi
## test if tuning.psi is the default one
if (!is.null(lproc$args$tuning.psi) &&
isTRUE(all.equal(lproc$args$tuning.psi, .Mpsi.tuning.default(psi))))
psi <- paste(psi, lproc$args$tuning.psi)
}
D.type <- switch(lproc$estname,
lmrob.u =,
lmrob = if (is.null(lproc$args$method) ||
lproc$args$method %in% c('SM', 'MM')) 'S' else 'D',
lmrob.mar = if (is.null(lproc$args$type)) 'qE' else lproc$args$type,
rlm = 'rlm',
lm.robust = 'rob',
lm = 'lm',
'NA')
rdf <- rbind(rdf,c(lproc$estname, method, f.args2str(lproc$args),
cov$cov, f.cov2str(cov), psi, D.type))
}
colnames(rdf) <- c("Function", "Method", "Tuning", "Cov", "Cov.Tuning", "Psi", "D.type")
if (NROW(rdf) == 2) t(rdf[-1,]) else rdf[-1,]
}
f.chop <- function(str,l=1)
## Purpose: chop string by l characters
## ----------------------------------------------------------------------
## Arguments: str: string to chop
## l: number of characters to chop
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 15:19
substr(str,1,nchar(str)-l)
fMpsi2str <- function(psi)
{
## Purpose: make pretty M.psi and D.chi, etc.
## ----------------------------------------------------------------------
## Arguments: M.psi: M.psi argument
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 15:28
if (is.null(psi)) psi
else if (psi == "tukeyPsi1" || psi == "tukeyChi") "bisquare"
else if (grepl("Psi1$",psi)) f.chop(psi,4)
else if (grepl("Chi$",psi)) f.chop(psi,3)
else psi
}
f.c.psi2str <- function(c.psi)
{
## Purpose: make pretty tuning.psi and D.tuning.chi, etc.
## ----------------------------------------------------------------------
## Arguments: c.psi: tuning.psi argument
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 15:34
if (is.null(c.psi)) return(NULL)
round(as.numeric(c.psi),2)
}
f.args2str <- function(args)
{
## Purpose: convert args element in procedures element of estlist
## to a pretty string
## ----------------------------------------------------------------------
## Arguments: args: args element in procedures element of estlist
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 15:11
lst <- list()
lst$psi <- if (!is.null(args$weight)) args$weight[2]
else if (!is.null(args$weight2)) args$weight2
else args$psi
lst$c.psi <- if (!is.null(args$efficiency))
round(f.eff2c.psi(args$efficiency, lst$psi),2)
else f.c.psi2str(args$tuning.psi)
if (!is.null(args$method) && grepl("D",args$method)) {
lst$D <- args$D.type # possibly NULL
lst$tau <- args$tau
}
f..paste..list(lst)
}
f.cov2str <- function(args)
{
## Purpose: convert cov part in args element in procedures element of
## estlist to a pretty string
## ----------------------------------------------------------------------
## Arguments: args: args element in procedures element of estlist
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 15:39
lst <- list()
if (!is.null(args$cov) && !args$cov %in% c('Default','f.avarwh'))
lst$cov <- sub('^f\\.', '', args$cov)
else {
lst$hc <- args$cov.hubercorr
lst$dfc <- args$cov.dfcorr
lst$r <- args$cov.resid
lst$rtau <- args$cov.corrfact
lst$xwx <- args$cov.xwx
}
## convert logical to numeric
lst <- lapply(lst, function(x) if (is.logical(x)) as.numeric(x) else x)
f..paste..list(lst)
}
f.procstr2id <- function(procstrs, fact = TRUE)
{
## Purpose: create short identifiers of procstrs
## ----------------------------------------------------------------------
## Arguments: procstrs: vector of procstrs
## fact: convert to factor or not
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 3 Nov 2009, 08:58
lst0 <- f.str2list(procstrs)
r <- sapply(lst0, function(x) {
paste(c(x$estname,
if (is.null(x$args$method)) NULL else x$args$method,
substr(c(x$args$psi,x$args$weight2, x$args$weight[2]), 1, 3)),
collapse = '.')
})
if (fact) ru <- unique(r)
if (fact) factor(r, levels = ru, labels = ru) else r
}
f.splitstrs <- function(strs, split = '_', ...)
{
## Purpose: split vector of strings by split and convert the list into
## a data.frame with columns type and id
## ----------------------------------------------------------------------
## Arguments: strs: vector of strings
## split: character vector to use for splitting
## ...: arguments to strsplit, see ?strsplit
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 19 Oct 2009, 08:46
lstr <- strsplit(strs, split, ...)
ldf <- t(as.data.frame(lstr))
rownames(ldf) <- NULL
as.data.frame(ldf, stringsAsFactors = FALSE)
}
f.abind <- function(arr1,arr2, along = ndim)
{
## Purpose: like abind, but less powerful
## ----------------------------------------------------------------------
## Arguments: arr1, arr2: arrays to bind
## along: dimension along to bind to,
## defaults to last dimension
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 20 Oct 2009, 11:33
## if along =! last dimension: permutate array
ndim <- length(dim(arr1))
if (along != ndim) {
arr1 <- aperm(arr1, perm = c((1:ndim)[-along],along))
arr2 <- aperm(arr2, perm = c((1:ndim)[-along],along))
}
ldmn1 <- dimnames(arr1)
ldmn2 <- dimnames(arr2)
ld1 <- dim(arr1)
ld2 <- dim(arr2)
if (length(ld1) != length(ld2))
stop('f.abind: Dimensions must be identical')
if (!identical(ldmn1[-ndim],ldmn2[-ndim]))
stop('f.abind: Dimnames other than in the along dimension must match exactly')
if (any(ldmn1[[ndim]] %in% ldmn2[[ndim]]))
stop('f.abind: Dimnames in along dimension must be unique')
ldmn3 <- ldmn1
ldmn3[[ndim]] <- c(ldmn1[[ndim]], ldmn2[[ndim]])
ld3 <- ld1
ld3[ndim] <- ld1[ndim] + ld2[ndim]
## build array
arr3 <- array(c(arr1, arr2), dim = ld3, dimnames = ldmn3)
## permutate dimensions back
if (along != ndim) {
lperm <- 1:ndim
lperm[along] <- ndim
lperm[(along+1):ndim] <- along:(ndim-1)
arr3 <- aperm(arr3, perm = lperm)
}
arr3
}
f.abind.3 <- function(...) f.abind(..., along = 3)
f.rename.level <- function(factor, from, to) {
## Purpose: rename level in a factor
## ----------------------------------------------------------------------
## Arguments: factor: factor variable
## from: level to be changed
## to: value
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 18 Aug 2010, 14:45
levels(factor)[levels(factor) == from] <- to
factor
}
###########################################################################
## 2. main simulation functions
###########################################################################
f.sim <- function(estlist,
.combine = 'f.abind',
.combine.2 = 'f.abind.3', ## hack for foreach
silent = TRUE)
{
## Purpose: perform simulation according to estlist entry ec
## ----------------------------------------------------------------------
## Arguments: ec: estlist, list consisting of:
## - design: data frame of design
## - nrep: number of repetitions
## - errs: list of error distributions including arguments
## - err: name of error distribution
## - args: list of arguments (to be passed to do.call()
## - procedures: list of parameter configurations and
## procedures to call
## - estname: name of estimation procedure
## - args: arguments that define the call
## silent: silent argument to try
## ----------------------------------------------------------------------
## Author: Werner Stahel / Manuel Koller, Date: 21 Aug 2008, 07:55
## get designs
ldd <- estlist$design
use.intercept <- if(is.null(estlist$use.intercept)) TRUE
else estlist$use.intercept
nobs <- NROW(ldd)
npar <- NCOL(ldd) + use.intercept
nrep <- estlist$nrep
nlerrs <- nobs*nrep
## initialize:
lestlist <- estlist
## 'evaluate' estlist$procedure list
lprocs <- c()
for (i in seq_len(estlist$procedures)) {
## generate lprocstr (identification string)
lprocs[i] <- estlist[['procedures']][[i]][['lprocstr']] <-
f.list2str(estlist[['procedures']][[i]])
}
## find all error distributions
lerrs <- unique(sapply(lestlist$errs, f.list2str))
## walk estlist$output to create output column names vector
## store result into lnames, it is used in f.sim.process
lnames <- c()
for (i in seq_len(estlist$output)) {
llnames <- estlist[['output']][[i]][['lnames']] <-
eval(estlist[['output']][[i]][['names']])
lnames <- c(lnames, llnames)
}
## get different psi functions
lpsifuns <- unlist(unique(lt <- sapply(estlist$procedures, function(x) x$args$psi)))
## get entries without psi argument
lrest <- sapply(lt, is.null)
if (sum(lrest) > 0) lpsifuns <- c(lpsifuns, '__rest__')
## Walk error distributions
res <- foreach(lerrlst = estlist$errs, .combine = .combine) %:%
foreach(lpsifun = lpsifuns, .combine = .combine.2) %dopar% {
## filter for psi functions
lidx <- if (lpsifun == '__rest__') lrest else
unlist(sapply(estlist$procedures,
function(x) !is.null(x$args$psi) && x$args$psi == lpsifun))
cat(f.errs2str(list(lerrlst)), lpsifun, " ")
## get function name and parameters
lerrfun <- f.errname(lerrlst$err)
lerrpar <- lerrlst$args
lerrstr <- f.list2str(lerrlst)
## --- initialize array
lres <- array(NA, dim=c(nrep, ## data dimension
length(lnames), ## output type dimension
sum(lidx), ## estimation functions and arguments dimension
1), ## error distributions dimension
dimnames = list(Data = NULL,
Type = lnames, Procstr = lprocs[lidx], Errstr = lerrstr))
## set seed
set.seed(estlist$seed)
## generate errors: seperately for each repetition
lerrs <- c(sapply(1:nrep, function(x) do.call(lerrfun, c(n = nobs, lerrpar))))
## if estlist$design has an attribute 'gen'
## then this function gen will generate designs
## and takes arguments: n, p, rep
## and returns the designs in a list
if (is.function(attr(ldd, 'gen'))) {
ldds <- attr(ldd, 'gen')(nobs, npar - use.intercept, nrep, lerrlst)
}
## Walk repetitions
for (lrep in 1:nrep) {
if (lrep%%100 == 0) cat(" ", lrep)
lerr <- lerrs[(1:nobs)+(lrep-1)*nobs]
if (exists('ldds')) {
ldd <- ldds[[lrep]]
## f.sim.reset.envirs()
}
## Walk estimator configurations
for (lproc in estlist$procedures[lidx]) {
## call estimating procedure
lrr <- tryCatch(do.call(f.estname(lproc$estname),
c(if(use.intercept)
list(lerr ~ . , data = ldd) else
list(lerr ~ . - 1, data = ldd), lproc$args)),
error=function(e)e)
ERR <- inherits(lrr, 'error')
if (ERR && !silent) {
print(lproc$lprocstr)
print(lrr)
}
if (!silent && !converged(lrr)) {
print(lproc$lprocstr)
browser() ## <<<
}
## check class: if procedure failed:
if (ERR) next
## check convergence of estimator
if (!converged(lrr)) next
## process output
for (lov in estlist$output) {
llnames <- lov$lnames
ret <- tryCatch(lres[lrep,llnames,lproc$lprocstr,lerrstr] <- eval(lov$fun),
error= function(e)e)
if (!silent && inherits(ret, 'error')) {
cat('Error', dQuote(ret$message), 'in repetition',lrep,
'\n for:',llnames,'procstr:',lproc$lprocstr,'\n')
browser() ## <<<
print(lov$fun)
print(try(eval(lov$fun)))
}
}
}
}
## print debug information if requested
if (!silent) str(lres)
lres
}
## restore original order of lprocs
res <- res[,,match(lprocs, dimnames(res)[[3]]),,drop=FALSE]
## set attributes
attr(res, 'estlist') <- lestlist
cat("\n")
res
}
###########################################################################
## build estlist
###########################################################################
f.combine <- function(..., keep.list = FALSE) {
## Purpose: creates a list of all combinations of elements given as
## arguments, similar to expand.grid.
## Arguments can be named.
## If an argument is a list, then its elements are considered
## as fixed objects that should not be recombined.
## if keep.list = TRUE, these elements are combined
## as a list with argument.
## ----------------------------------------------------------------------
## Arguments: collection of lists or vectors with argument names
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 7 Oct 2009, 11:13
## convert arguments into a big list
args <- list(...)
## if more than two arguments, call recursively
if (length(args) > 2)
lst <- do.call("f.combine", c(args[-1], list(keep.list=keep.list)))
else {
## if just two arguments, create list of second argument
## if this is a list, then there's nothing to do
if (!keep.list && is.list(args[[2]])) lst <- args[[2]]
## else convert to a list of one-elements lists with proper name
else {
lst <- list()
for (lelem in args[[2]]) {
llst <- list(lelem)
if (!is.null(names(args)[2])) names(llst)[1] <- names(args)[2]
lst <- c(lst, list(llst))
}
}
}
## ok, now we can add the first element to all elements of lst
lst2 <- list()
if (keep.list && is.list(args[[1]])) args[[1]] <- lapply(args[[1]], list)
for (lelem in args[[1]]) {
for (relem in lst) {
llst <- c(lelem, relem)
if (nchar(names(llst)[1]) == 0 && nchar(names(args)[1])>0)
names(llst)[1] <- names(args)[1]
lst2 <- c(lst2, list(llst))
}
}
lst2
}
## some fragments to build estlist
## errors
.errs.normal.1 <- list(err = 'normal',
args = list(mean = 0, sd = 1))
.errs.normal.2 <- list(err = 'normal',
args = list(mean = 0, sd = 2))
.errs.t.13 <- list(err = 't',
args = list(df = 13))
.errs.t.11 <- list(err = 't',
args = list(df = 11))
.errs.t.10 <- list(err = 't',
args = list(df = 10))
.errs.t.9 <- list(err = 't',
args = list(df = 9))
.errs.t.8 <- list(err = 't',
args = list(df = 8))
.errs.t.7 <- list(err = 't',
args = list(df = 7))
.errs.t.5 <- list(err = 't',
args = list(df = 5))
.errs.t.3 <- list(err = 't',
args = list(df = 3))
.errs.t.1 <- list(err = 't',
args = list(df = 1))
## skewed t distribution
.errs.skt.Inf.2 <- list(err = 'cskt',
args = list(df = Inf, gamma = 2))
.errs.skt.5.2 <- list(err = 'cskt',
args = list(df = 5, gamma = 2))
## log normal distribution
.errs.lnrm <- list(err = 'lnorm',
args = list(meanlog = 0, sdlog = 0.6936944))
## laplace distribution
.errs.laplace <- list(err = 'laplace',
args = list(location = 0, scale = 1/sqrt(2)))
## contaminated normal
.errs.cnorm..1.0.10 <- list(err = 'cnorm',
args = list(epsilon = 0.1, meanc = 0, sdc = sqrt(10)))
.errs.cnorm..1.4.1 <- list(err = 'cnorm',
args = list(epsilon = 0.1, meanc = 4, sdc = 1))
.errs.test <- list(.errs.normal.1
,.errs.t.5
,.errs.t.3
,.errs.t.1
)
## arguments
.args.final <- f.combine(psi = c('optimal', 'bisquare', 'lqq', 'hampel'),
seed = 0,
max.it = 500,
k.max = 2000,
c(list(list(method = 'MM', cov = '.vcov.avar1')),
list(list(method = 'MM', cov = '.vcov.w',
start = 'lrr')),
f.combine(method = c('SMD', 'SMDM'),
cov = '.vcov.w',
start = 'lrr')))
## use fixInNamespace("lmrob.fit", "robustbase")
## insert:
## N = {
## tmp <- lmrob..M..fit(x = x/init$tau, y = y/init$tau, obj =
## init)
## tmp$qr <- NULL
## tmp
## },
## .args.final <- f.combine(psi = c('optimal', 'bisquare', 'ggw', 'lqq'),
## seed = 0,
## max.it = 500,
## k.max = 2000,
## c(list(list(method = "SMDM", cov = '.vcov.w')),
## list(list(method = "SMDN", cov = '.vcov.w',
## start = 'lrr'))))
## standard for lmRob
.args.bisquare.lmRob.0 <- list(## initial.alg = 'random',
efficiency = 0.95
,weight = c('bisquare', 'bisquare'),
trace = FALSE
)
.args.optimal.lmRob.0 <- list(## initial.alg = 'random',
efficiency = 0.95
,weight = c('optimal', 'optimal'),
trace = FALSE)
.procedures.final <- c(list(list(estname = 'lm')),
f.combine(estname = 'lmrob.u', args = .args.final,
keep.list = TRUE),
f.combine(estname = 'lmrob.mar',
args = f.combine(psi = 'bisquare',
seed = 0, max.it = 500, k.max = 2000,
cov = '.vcov.w', type = c('qT', 'qE')),
keep.list = TRUE),
f.combine(estname = 'lm.robust',
args = list(.args.bisquare.lmRob.0,
.args.optimal.lmRob.0), keep.list = TRUE))
## output
.output.sigma <- list(sigma = list(
names = quote("sigma"),
fun = quote(sigma(lrr))))
.output.beta <- list(beta = list(
names = quote(paste('beta',1:npar,sep='_')),
fun = quote(coef(lrr))))
.output.se <- list(se = list(
names = quote(paste('se',1:npar,sep='_')),
fun = quote(sqrt(diag(covariance.matrix(lrr))))))
.output.sumw <- list(sumw = list(
names = quote("sumw"),
fun = quote(sum(robustness.weights(lrr)))))
.output.nnz <- list(nnz = list(
names = quote("nnz"),
fun = quote(sum(robustness.weights(lrr) < 1e-3))))
###########################################################################
## simulation results processing functions
###########################################################################
## use apply to aggregate data
## use matplot(t(result)) to plot aggregated data
f.apply <- function(res, items = dimnames(res)[[2]],
FUN, ..., swap = FALSE)
{
## Purpose: similar to apply, return data not as matrix, but
## as data.frame
## ----------------------------------------------------------------------
## Arguments: res: simulation results array
## items: items to use in apply
## FUN: function to apply
## ...: additional arguments to FUN
## swap: if TRUE: swap first two columns
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 8 Oct 2009, 13:39
## aggregate data
lz <- apply(res[,items,,,drop=FALSE], 2:4, FUN, ...)
## if return object has four dimensions (multidim output of FUN)
## rotate first three dimensions
if (length(dim(lz)) == 4 && swap) aperm(lz, perm=c(2,1,3,4)) else lz
}
f.dimnames2df <- function(arr, dm = dimnames(arr),
page = TRUE, err.on.same.page = TRUE,
value.col = ndim - 2,
procstr.col = ndim - 1,
errstr.col = ndim,
procstr.id = TRUE,
split = '_')
{
## Purpose: create data frame from dimnames:
## len_1 .. len_100, cpr_1 .. cpr_100
## will yield a data frame with column id from 1 .. 100
## column type with cpr and len and columns procstr and errstr
## It is assumed, that the max number (100) is the same for all
## output value types
## ----------------------------------------------------------------------
## Arguments: arr: 3 or more dim array (optional)
## dm: dimnames to be used
## page: add a column page to simplify plots
## err.on.same.page: whether all errs should be on the same
## page
## value.col: index of value column (set to NULL for none)
## the values in this column are split name_id
## and put into two columns in the data frame
## procstr.col: index of procedure column
## (both: or NULL for not to be converted)
## errstr.col: index of error string column
## procstr.id: create procstr id
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 19 Oct 2009, 08:41
if (!is.list(dm)) stop('f.dimnames2df: dm must be a list')
## remove 'NULL' dimensions
dm <- dm[!sapply(dm,is.null)]
ndim <- length(dm)
if (ndim == 0) stop('f.dimnames2df: dimnames all null')
ldims <- sapply(dm, length)
## split and convert types into data.frame
if (!is.null(value.col)) {
ldf <- f.splitstrs(dm[[value.col]], split = split)
lid <- NCOL(ldf) == 2
if (lid) lids <- unique(as.numeric(ldf[,2])) ## convert ids into numeric
## we do not need to repeat over different types of values, only ids
ldims[value.col] <- ldims[value.col] / length(unique(ldf[,1]))
}
## merge into one large data.frame: for each distribution
rdf <- list()
for (ld in 1:ndim) {
lname <- if (is.null(lname <- names(dm)[ld])) length(rdf)+1 else lname
ltimes <- if (ld == ndim) 1 else prod(ldims[(ld+1):ndim])
leach <- if (ld == 1) 1 else prod(ldims[1:(ld-1)])
if (!is.null(value.col) && ld == value.col) {
if (lid) rdf[[paste(lname,'Id')]] <-
rep(lids,times=ltimes,each=leach) ## value ids
## no else: the values will be added in the a2df procedures
} else if (!is.null(procstr.col) && ld == procstr.col) {
## convert procstrs to data.frame with pretty names
lprdf <- data.frame(f.procedures2str(f.str2list(dm[[ld]])),
Procstr = factor(dm[[ld]], levels = dm[[ld]],
labels = dm[[ld]]))
if (procstr.id) lprdf$PId <- f.procstr2id(dm[[ld]])
## repeat
lprdf <- if (ltimes == 1 && leach == 1)
lprdf else apply(lprdf,2,rep,times=ltimes,each=leach)
lprdf <- as.data.frame(lprdf, stringsAsFactors=FALSE)
## convert all into nice factors (with the original ordering)
for (lk in colnames(lprdf)) {
luniq <- unique(lprdf[[lk]])
lprdf[[lk]] <- factor(lprdf[[lk]], levels = luniq, labels = luniq)
}
rdf <- c(rdf, lprdf)
} else if (!is.null(errstr.col) && ld == errstr.col) {
## convert errstrs to data.frame with pretty names
ledf <- f.errs2str(f.str2list(dm[[ld]]))
## repeat and convert to factor with correct ordering
rdf[[lname]] <- factor(rep(dm[[ld]],times=ltimes,each=leach),
levels = dm[[ld]], labels = dm[[ld]])
rdf[['Error']] <- factor(rep(ledf,times=ltimes,each=leach),
levels = ledf, labels = ledf)
} else {
## no conversion necessary
rdf[[lname]] <- rep(dm[[ld]],times=ltimes,each=leach)
}
}
## add page argument
if (page && !is.null(procstr.col)) {
ltpf <- if (!is.null(errstr.col) && !err.on.same.page)
interaction(rdf[['Procstr']],rdf[['Error']])
else interaction(rdf[['Procstr']])
rdf[['Page']] <- as.numeric(factor(ltpf, unique(ltpf)))
}
rdf <- as.data.frame(rdf)
if (!is.null(value.col))
attr(rdf, 'Types') <- unique(ldf[,1])
rdf
}
f.a2df.2 <- function(arr, dm = dimnames(arr), err.on.same.page = FALSE, ...)
{
## Purpose: convert arr to data.frame
## uses f.dimnames2df and adds a column to contain the values
## if ndim == 4 and dimnames NULL: assumes first dimension is
## data dimension which is ignored by f.dimnames2df
## add counter
## ----------------------------------------------------------------------
## Arguments: arr: array to convert
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 23 Oct 2009, 12:29
## ndim == 2 ??
ndim <- length(dim(arr))
## if ndim == 4: check if dimnames of dim 1 are NULL
if (ndim == 4 && is.null(dm[[1]]))
dm[[1]] <- 1:dim(arr)[1]
rdf <- f.dimnames2df(dm=dm, ...)
## just add values for all 'Types', possibly including Type.ID
if (ndim > 2)
for (lvt in attr(rdf, 'Types')) {
llvt <- if (is.null(rdf$Type.Id)) lvt else paste(lvt,unique(rdf$Type.Id),sep='_')
rdf[[lvt]] <- as.vector(switch(ndim,
stop('wrong number of dimensions'), ## 1
arr, ## 2
arr[llvt,,], ## 3
arr[,llvt,,])) ## 4
}
else
rdf$values <- as.vector(arr)
rdf
}
f.dimnames2pc.df <- function(arr, dm = dimnames(arr),
npcs = NCOL(estlist$design.predict), ...)
{
## Purpose: create data frame to be used in plotting of pc components
## calls f.dimnames2df and adds an additional column for
## identifying the principal components
## ----------------------------------------------------------------------
## Arguments: arr, dm: see f.dimnames.df
## npcs: number of principal components
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 23 Oct 2009, 11:51
if (missing(npcs) && !is.null(attr(estlist$design.predict, 'npcs')))
npcs <- attr(estlist$design.predict, 'npcs')
## convert into data.frame
rdf <- f.dimnames2df(dm = dm, ...)
## calculate number of points per principal component
npts <- (length(unique(rdf$Type.Id)) - 1) / npcs
## add new column pc
rdf$PC <- 1
if (npcs > 1)
for (li in 2:npcs) {
lids <- (1:npts + npts*(li-1) + 1)
rdf$PC[rdf$Type.Id %in% lids] <- li ## fixme: center is not repeated
}
rdf$PC <- factor(rdf$PC, levels = 1:npcs, labels = paste('PC',1:npcs,sep=' '))
rdf
}
f.a2pc.df <- function(arr, ...)
{
## Purpose: convert arr to data.frame
## uses f.dimnames2pc.df and adds a column to contain the values
## ----------------------------------------------------------------------
## Arguments: arr: array to convert
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 23 Oct 2009, 12:29
## convert dimnames
rdf <- f.dimnames2pc.df(arr, err.on.same.page = FALSE,...)
## add values
for (lvt in attr(rdf, 'Types'))
rdf[[lvt]] <- as.vector(arr[paste(lvt,unique(rdf$Type.Id),sep='_'),,])
## repeat values: only PC_1 has center value, add it for other PCs
## build index
idx <- 1:NROW(rdf)
rpc <- as.character(rdf$PC)
for (lerr in levels(rdf$Error)) {
for (lprc in levels(rdf$Procstr)) {
for (lpc in levels(rdf$PC)) {
if (lpc == 'PC 1') next
## get first entry of this PC
lmin <- min(which(rdf$Error == lerr & rdf$Procstr == lprc & rdf$PC == lpc))
## where is this in idx?
lwm <- min(which(lmin == idx))
## get first entry of PC_1
lmin1 <- min(which(rdf$Error == lerr & rdf$Procstr == lprc & rdf$PC == 'PC 1'))
## update idx
idx <- c(idx[1:(lwm-1)], lmin1, idx[lwm:length(idx)])
## update PC column of result
rpc <- c(rpc[1:(lwm-1)], lpc, rpc[lwm:length(rpc)])
}
}
}
## repeat centers
rdf <- rdf[idx,]
## update PC column
rdf$PC <- factor(rpc)
## return
rdf
}
f.calculate <- function(expr,arr,dimname = as.character(expr))
{
## Purpose: calculate formula and return as conformable array
## ----------------------------------------------------------------------
## Arguments: expr: expression to calculate (string is also ok)
## arr: array (from f.sim)
## dimname: name of the calculated dimension
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 9 Oct 2009, 10:15
if (!is.expression(expr)) expr <- as.expression(expr)
lnams <- dimnames(arr)[[2]]
lst <- list()
for (lnam in lnams)
expr <- gsub(paste(lnam,'\\b',sep=''),
paste("arr[,",lnam,",,,drop=FALSE]",sep='"'), expr)
r <- eval(parse(text = expr))
dimnames(r)[[2]] <- dimname
r
## maybe use abind to merge the two arrays?
}
f.calculate.many <- function(expr, arr, dimname = dims, dims)
{
## Purpose: calculate formula and abind into array
## supply expr as string with # symbols to be replaced
## dimname can also contain # symbols
## ----------------------------------------------------------------------
## Arguments: same as f.calculate and
## dims: vector of items to replace # with
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 14 Oct 2009, 10:11
for (i in seq_len(dims)) {
lexpr <- gsub("#",dims[i],expr)
ldimname <-
if (length(dimname) > 1) dimname[i] else gsub("#",dims[i],dimname)
if (i == 1)
rarr <- f.calculate(lexpr,arr,ldimname)
else
rarr <- abind(rarr, f.calculate(lexpr,arr,ldimname), along=2)
}
rarr
}
f.errs <- function(estlist, err, rep, gen = NULL, nobs, npar)
{
## Purpose: generate and return errors of specified repetition
## or, if missing, all errors as a matrix
## ----------------------------------------------------------------------
## Arguments: estlist: estlist
## err: error distribution (estlist$errs[1] for example)
## rep: desired repetition (optional)
## gen: function to generate designs (optional)
## nobs: nr. rows, npap: nr. predictors (both optional)
## ---------------------------------------------------------------------
## Author: Manuel Koller, Date: 13 Oct 2009, 11:21
nobs <- NROW(estlist$design)
nrep <- estlist$nrep
nlerrs <- nobs*nrep
npred <- NROW(estlist$design.predict)
## get function name and parameters
lerrfun <- f.errname(err$err)
lerrpar <- err$args
lerrstr <- f.list2str(err)
## set seed
set.seed(estlist$seed)
## generate errors: seperately for each repetition
lerrs <- c(sapply(1:nrep, function(x) do.call(lerrfun, c(n = nobs, lerrpar))))
## lerrs <- do.call(lerrfun, c(n = nlerrs, lerrpar))
## to get to the same seed state as f.sim(.default)
## generate also the additional errors
## calculate additional number of errors
for (i in seq_len(estlist$output)) {
if (!is.null(estlist[['output']][[i]][['nlerrs']]))
nlerrs <- nlerrs + eval(estlist[['output']][[i]][['nlerrs']])
}
if (length(lerrs) < nlerrs)
nowhere <- do.call(lerrfun, c(n = nlerrs - length(lerrs), lerrpar))
## generate designs
if (!is.null(gen) && is.function(gen)) {
ldds <- gen(nobs, npar, nrep, err)
}
## return errors
ret <- if (!missing(rep)) lerrs[1:nobs+(rep-1)*nobs] else matrix(lerrs, nobs)
if (exists('ldds')) attr(ret, 'designs') <- if (!missing(rep)) ldds[[i]] else ldds
ret
}
f.selection <- function(procstrs = dimnames(r.test)[[3]],
what = c('estname', 'args.method', 'args.psi', 'args.tuning.psi',
'args.type', 'args.weight2', 'args.efficiency'),
restr = '')
{
## Purpose: get selection of results: first one of the specified estimates
## ----------------------------------------------------------------------
## Arguments: procstrs: what is the selection
## what: named vector to use in grep
## restr: do not select estimators with procstr
## that match this regexp parameters
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 2 Nov 2009, 09:06
## match restrictions
lrestr <- -(lall <- 1:length(procstrs)) ## no restrictions
if (!missing(restr)) {
lrestr <- grep(restr, procstrs)
if (length(lrestr) == 0) lrestr <- -lall
procstrs <- procstrs[-lrestr]
}
## procstr2list, but do not split into sublists
lproclst <- f.str2list(procstrs, splitchar='_____')
## helper function: select only items that occur what
tfun <- function(x) x[what]
lproclst <- lapply(lproclst, tfun)
## convert back to string
lprocstr <- sapply(lproclst, f.list2str)
## get all unique combinations and the first positions
lidx <- match(unique(lprocstr), lprocstr)
r <- procstrs[lidx]
attr(r, 'idx') <- lall[-lrestr][lidx]
r
}
f.get.current.dimnames <- function(i,dn,margin)
{
## Purpose: get current dimnames in the margins of array
## we're applying on
## ----------------------------------------------------------------------
## Arguments: i: counter
## dn: dimnames
## margin: margin argument to apply
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 16 Apr 2010, 10:44
## pos <- integer(0)
lcdn <- character(0)
for (lm in margin) {
## get length of current margin
llen <- length(dn[[lm]])
## i modulo llen gives the current position in this dimension
lpos <- (if (i > 0) i-1 else 0) %% llen + 1
## update pos
## pos <- c(pos, lpos)
## update lcdn
lcdn <- c(lcdn, dn[[lm]][lpos])
## update i: subtract lpos and divide by llen
i <- (i - lpos) / llen + 1
}
lcdn
}
f.n <- Vectorize(function(design)
{
## Purpose: get n obs of design
## ----------------------------------------------------------------------
## Arguments: design: design to get n of
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 19 Apr 2010, 11:19
NROW(get(design))
})
f.p <- Vectorize(function(design)
{
## Purpose: get p par of design
## ----------------------------------------------------------------------
## Arguments: design: design to get p of
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 19 Apr 2010, 11:19
NCOL(get(design)) + 1
})
f.which.min <- function(x, nr = 1) {
## Purpose: get the indices of the minimal nr of observations
## ----------------------------------------------------------------------
## Arguments: x: vector of values
## nr: number of indices to return
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 4 May 2010, 12:18
match(sort(x)[1:nr], x)
}
f.which.max <- function(x, nr = 1) f.which.min(-x, nr)
## f.get.scale <- function(procstr, proclst = f.str2list(procstr))
## {
## ## Purpose: get scale estimate used for procstrs
## ## ----------------------------------------------------------------------
## ## Arguments: procstr: procstrs (dimnames(r.test)[[3]]) as output by
## ## f.list2str()
## ## proclst: list of procedures, as in estlist$procedures
## ## ----------------------------------------------------------------------
## ## Author: Manuel Koller, Date: 9 Sep 2010, 13:52
## ret <- list()
## for (lproc in proclst) {
## if (lproc$estname == 'lm') {
## ## least squares
## ret <- c(ret, list(list(fun='f.lsq')))
## } else {
## ## default (S-scale):
## fun <- 'lmrob.mscale'
## lidx <- names(lproc$args)[na.omit(match(c('psi', 'tuning.chi', 'seed'),
## names(lproc$args)))]
## if (!is.null(lproc$args$method) &&
## substr(lproc$args$method,1,3) == 'SMD') {
## ## D-scale
## fun <- 'lmrob.dscale'
## lidx <- names(lproc$args)[na.omit(match(c('psi', 'tuning.psi'),
## names(lproc$args)))]
## } else if (lproc$estname == 'lmrob.mar' ### continue here
## ret <- c(ret, list(list(fun=fun, args=lproc$args[lidx])))
## }
## })
###########################################################################
## functions related to prediction
###########################################################################
f.prediction.points <- function(design, type = c('pc', 'grid'),
length.out = 4*NCOL(design), f = 0.5,
direction = +1, max.pc = 5)
{
## Purpose: generate prediction points for design
## generate four points along the second principal component
## in the center, 2 intermediate distances and long distance
## (from the center)
## ----------------------------------------------------------------------
## Arguments: design: design matrix
## type: type of prediction points: grid / principal components
## length.out: approximate number of prediction points
## f: extend range by f (like extendrange())
## direction: +1 or -1: which direction to go from the center
## max.pc: maximum number of principal components to use
## ----------------------------------------------------------------------
## Author: Manuel Koller, Date: 9 Oct 2009, 16:48
## match type argument
type = match.arg(type)
## get ranges
lrange <- apply(design, 2, range)
## extend range by f
lrange <- data.frame(apply(lrange, 2, extendrange, f = f))
switch(type,
pc = {
## calculate robust covariance matrix
rob <- covMcd(design)
## and use it to calculate the principal components
rpc <- princomp(covmat = rob$cov)
## get corner with maximum distance from rob$center
lidx <- apply(abs(lrange - rob$center),2,which.max)
lcr <- diag(as.matrix(lrange[lidx,]))
## create grid points:
rdf <- rob$center
## for each principal component
for (id in 1:min(NCOL(rpc$loadings),max.pc)) {
## calculate factor to reach each boundary
lfct <- (lcr - rob$center) / rpc$loadings[,id]
## calculate distances to boundaries and take the minimal one
lmin <- which.min(sapply(lfct, function(x) sum((rpc$loadings[,id] * x)^2)))
## create sequence of multiplicands
lmult <- seq(0,lfct[lmin], length.out=length.out/NCOL(rpc$loadings))
rdf <- rbind(rdf, rep(rob$center,each=length(lmult)-1) +
direction*lmult[-1] %*% t(rpc$loadings[,id]))
}
},
grid = {
## generate sequences for every dimension
lval <- as.data.frame(apply(lrange, 2L, f.seq,
length.out = round(length.out^(1/NCOL(design))) ))
## return if 1 dimension, otherwise create all combinations
rdf <- if (NCOL(design) > 1)
t(as.data.frame(do.call('f.combine', lval))) else lval
})
rdf <- as.data.frame(rdf)
rownames(rdf) <- NULL
colnames(rdf) <- colnames(design)
if (type == 'pc') attr(rdf, 'npcs') <- id
rdf
}
## ## plot with
## require(rgl)
## plot3d(design)
## points3d(f.prediction.points(design), col = 2)
## d.data <- data.frame(y = rnorm(10), x = 1:10)
## pred <- f.prediction.points(d.data[,-1,drop=FALSE])
## obj <- f.lmrob.local(y ~ x, d.data)
## f.predict(obj, pred, interval = 'prediction')
## as.vector(t(cbind(rnorm(4), f.predict(obj, pred, interval = 'prediction'))))
## estlist for prediction:
## start with .output.test
## we only need sigma
.output.prediction <- c(.output.sigma,.output.beta,.output.se,.output.sumw,.output.nnz)
.output.prediction$predict <-
list(names = quote({
npred <- NROW(estlist$design.predict)
paste(c('fit', 'lwr', 'upr', 'se.fit', 'cpr'),
rep(1:npred,each = 5), sep = '_')}),
fun = quote({
lpr <- f.predict(lrr, estlist$design.predict, interval = 'prediction',
se.fit = TRUE) ##, df = 16)
lpr <- cbind(lpr$fit, lpr$se.fit)
lqf <- f.errname(lerrlst$err, 'p')
lcpr <- do.call(lqf, c(list(lpr[,'upr']), lerrpar)) -
do.call(lqf, c(list(lpr[,'lwr']), lerrpar))
as.vector(t(cbind(lpr,lcpr)))}))
.estlist.prediction <- list(design = dd,
nrep = 200,
errs = .errs.test,
seed = 0,
procedures = .procedures.final,
design.predict = f.prediction.points(dd),
output = .output.prediction,
use.intercept = TRUE)
## predict confidence intervals instead of prediction intervals
.estlist.confint <- .estlist.prediction
.estlist.confint$output$predict$fun <-
parse(text=gsub('prediction', 'confidence', deparse(.output.prediction$predict$fun)))
###########################################################################
## Generate designs - function
###########################################################################
f.gen <- function(n, p, rep, err) {
## get function name and parameters
lerrfun <- f.errname(err$err)
lerrpar <- err$args
## generate random predictors
ret <- lapply(1:rep, function(...)
data.frame(matrix(do.call(lerrfun, c(n = n*p, lerrpar)), n, p)))
attr(ret[[1]], 'gen') <- f.gen
ret
}
.output.sigmaE <- list(sigmaE = list(
names = quote("sigmaE"),
fun = quote({
## estimate scale using current scale estimate.
## this amounts to recalculating the estimate
## with just an intercept
llargs <- lproc$args
llestname <- lproc$estname
## save time and just calculate S-estimate and no covariance matrix
if (grepl('^lmrob', llestname)) {
llestname <- 'lmrob'
llargs$cov <- 'none'
llargs$envir <- NULL ## drop envir argument
if (llargs$method %in% c('MM', 'SM')) llargs$method <- 'S'
if (grepl('M$', llargs$method))
llargs$method <- f.chop(llargs$method)
} else if (lproc$estname == 'lm.robust') {
llargs$estim <- 'Initial'
}
llrr <- tryCatch(do.call(f.estname(lproc$estname),
c(list(lerr ~ 1), llargs)),
error = function(e)e)
## check class: if procedure failed: class == 'try-error'
if (inherits(llrr, 'error')) NA
## check convergence of estimator
else if (lproc$estname != 'lm.robust' && !converged(llrr)) NA
else sigma(llrr)
})))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.