Nothing
## New version of get.biom using aux functions, because get.biom was
## becoming too big to also include new things like the lasso
## stability path.
get.biom <- function(X, Y, fmethod = "all",
type = c("stab", "HC", "coef"),
ncomp = 2, biom.opt = biom.options(),
scale.p = "auto", ...)
{
## allow for older fmethods pclda and plsda
if ("plsda" %in% fmethod) {
fmethod[fmethod == "plsda"] <- "pls"
warning("fmethod 'plsda' is obsolete - please use 'pls'")
}
if ("pclda" %in% fmethod) {
fmethod[fmethod == "pclda"] <- "pcr"
warning("fmethod 'pclda' is obsolete - please use 'pcr'")
}
## Which biomarker selection methods should we consider?
fmethod <- match.arg(fmethod, c("all", biom.opt$fmethods),
several.ok = TRUE)
if ("all" %in% fmethod)
fmethod <- biom.opt$fmethods
multiv <- fmethod[!(fmethod %in% biom.opt$univ.methods)]
nmultiv <- length(multiv)
univ <- fmethod[(fmethod %in% biom.opt$univ.methods)]
nuniv <- length(univ)
fmethod <- c(univ, multiv) # do univariate methods first
nncomp <- rep(c(1, length(ncomp)), c(nuniv, nmultiv))
type <- match.arg(type)
if (type == "HC") fmethod <- fmethod[fmethod != "lasso"]
fname <- paste(fmethod, ifelse(type == "stab", "stab", "coef"), sep = ".")
## every list element consists of one fmethod, with one or more
## sublists corresponding to different settings
result <- vector(length(fmethod), mode = "list")
names(result) <- fmethod
if (is.factor(Y)) {
Y <- factor(Y) ## get rid of extra levels
} else {
if (length(table(Y)) == 2) {
warning("Y has only two values: assuming discrimination!")
Y <- factor(Y)
}
}
## Get settings from the biom.opt argument, mostly for stability-based BS
if (type == "stab") {
oob.size <- biom.opt$oob.size
oob.fraction <- biom.opt$oob.fraction
min.present <- biom.opt$min.present
if (is.factor(Y)) { ## classification
if (nlevels(Y) > 2)
stop("Only binary classification implemented")
smallest.class.fraction <- min(table(Y) / length(Y))
## for equal class sizes this is .5
if (is.null(oob.size))
oob.size <- round(smallest.class.fraction * oob.fraction * length(Y))
segments <- get.segments(Y, oob.size = oob.size,
max.seg = biom.opt$max.seg)
} else { ## we assume regression
if (is.null(oob.size))
oob.size <- round(oob.fraction * length(Y))
segments <- get.segments(1:length(Y), 1:length(Y),
oob.size = oob.size,
max.seg = biom.opt$max.seg)
}
variable.fraction <- biom.opt$variable.fraction
if (variable.fraction < 1) { # use different subsets of variables
nvar <- round(variable.fraction * ncol(X))
variables <- sapply(1:ncol(segments),
function(i) sample(ncol(X), nvar))
nvars <- table(variables)
if (length(nvars) < ncol(X))
stop(paste(c("Too few variables in resampling scheme:,\ntry",
"with a larger variable.fraction or use",
"more segments.")))
} else {
variables <- matrix(1:ncol(X), nrow = ncol(X), ncol = ncol(segments))
nvars <- rep(biom.opt$max.seg, ncol(X))
}
} else {
variables <- NULL
if (type == "HC") {
nset <- biom.opt$nset
HCalpha <- biom.opt$HCalpha
}
}
## Compared to earlier versions: treat HC separately because of the
## expensive evaluation of null distributions
## Temporary solution - not pretty though - is to do HC in the same
## way only for the univariate methods and to treat the multivariate
## methods separately. Take care that with future versions this may
## have to be revised.
for (m in seq(along = fmethod)) {
## Here the real work is done: call the modelling functions
huhn.models <- do.call(fname[m],
list(X = X, Y = Y,
segments = segments,
ncomp = ncomp,
scale.p = scale.p,
variables = variables, ...))
## huhn.models is always a matrix, possibly with one column
switch(type, ## extract relevant info
coef = {
## relevant info: coefficients
woppa <- huhn.models
},
stab = {
## relevant info:
## those coefficients occuring more often than the
## threshold min.present
orderfun <- function(xx) {
selection <- which(xx > min.present)
sel.order <- order(xx[selection], decreasing = TRUE)
list(biom.indices = selection[sel.order],
fraction.selected = xx)
}
woppa <- lapply(1:dim(huhn.models)[2],
function(i, x) orderfun(x[,i]),
huhn.models)
},
HC = {
## relevant info:
## pvals, and the ones selected by the HC criterion
if (m <= nuniv) {
huhn.pvals <-
apply(huhn.models, 2,
function(x) 2*(1 - pt(abs(x), nrow(X) - 2)))
woppa <-
lapply(1:ncol(huhn.models),
function(i)
list(biom.indices =
HCthresh(huhn.pvals[,i], alpha = HCalpha,
plotit = FALSE),
pvals = huhn.pvals[,i]))
} else { # just return something, real calcs later
woppa <- lapply(1:ncol(huhn.models),
function(i)
list(biom.indices = NULL,
pvals = huhn.models[,i]))
}
})
if (type == "coef") { ## result is a matrix, possibly with 1 column
colnames(woppa) <- switch(fmethod[m],
studentt = ,
shrinkt = NULL, # was:fmethod[m]
pls = ,
vip = ,
pcr = ncomp,
lasso =
round(as.numeric(colnames(huhn.models)), 4))
} else { ## result is a list
names(woppa) <- switch(fmethod[m],
studentt = ,
shrinkt = NULL, # was:fmethod[m],
pls = ,
vip = ,
pcr = ncomp,
lasso =
round(as.numeric(colnames(huhn.models)), 4))
}
result[[m]] <- woppa
}
if (type == "HC" & nmultiv > 0) {
## Possible PCR, PLS and VIP calculations for HC are done here
which.pcr <- which(substr(names(result), 1, 5) == "pcr")
if (length(which.pcr) > 0) {
huhn.models <- pval.pcr(X, Y, ncomp, scale.p, nset)
result[[which.pcr]] <-
lapply(1:ncol(huhn.models),
function(i)
list(biom.indices =
HCthresh(huhn.models[,i], alpha = HCalpha,
plotit = FALSE),
pvals = huhn.models[,i]))
}
which.pls <- which(substr(names(result), 1, 5) == "pls")
which.vip <- which(substr(names(result), 1, 3) == "vip")
if (length(which.pls) > 0 | length(which.vip > 0)) {
if (length(which.pls) > 0) {
if (length(which.vip > 0)) {
smethod <- "both"
} else {
smethod <- "pls"
}
} else {
smethod <- "vip"
}
## next statement takes some time...
huhn.models <- pval.plsvip(X, Y, ncomp, scale.p, nset, smethod)
if (length(which.pls) > 0) {
result[[which.pls]] <-
lapply(1:dim(huhn.models)[2],
function(i)
list(biom.indices =
HCthresh(huhn.models[, i, "pls"], alpha = HCalpha,
plotit = FALSE),
pvals = huhn.models[, i, "pls"]))
}
}
if (length(which.vip) > 0) {
result[[which.vip]] <-
lapply(1:dim(huhn.models)[2],
function(i)
list(biom.indices =
HCthresh(huhn.models[, i, "vip"], alpha = HCalpha,
plotit = FALSE),
pvals = huhn.models[, i, "vip"]))
}
}
if("lasso" %in% fmethod) {
info.lst <- list(call = match.call(),
type = type, fmethod = fmethod,
nvar = ncol(X),
lasso = biom.options()$lasso)
} else {
info.lst <- list(call = match.call(),
type = type, fmethod = fmethod,
nvar = ncol(X))
}
result2 <- c(result, list(info = info.lst))
class(result2) <- "BMark"
result2
}
print.BMark <- function(x, ...) {
type <- x$info$type
switch(type,
coef = cat("Result of coefficient-based biomarker selection using ",
length(x)-1, " modelling method",
ifelse(length(x) > 2, "s", ""), ".\n", sep = ""),
HC = cat("Result of HC-based biomarker selection using ",
length(x)-1, " modelling method",
ifelse(length(x) > 2, "s", ""), ".\n", sep = ""),
cat("Result of stability-based biomarker selection using ",
length(x)-1, " modelling method",
ifelse(length(x) > 2, "s", ""), ".\n", sep = ""))
}
summary.BMark <- function(object, ...) {
type <- object$info$type
nslots <- length(object)
infoslot <- which(names(object) == "info")
switch(type,
coef = {
nsett <- sapply(object[-infoslot], ncol)
names(nsett) <- names(object)[-infoslot]
cat("Result of coefficient-based biomarker selection using ",
nslots-1, " modelling method",
ifelse(length(object) > 2, "s", ""), ".\n", sep = "")
cat("Number of different settings for each method:\n")
print(nsett)
cat("\nTotal number of variables in the X matrix:",
object[[infoslot]]$nvar, "\n")
},
{
nsett <- sapply(object[-infoslot], length)
names(nsett) <- names(object)[-infoslot]
typestr <- ifelse(type == "HC", "HC-based", "stability-based")
cat("Result of ", typestr, " biomarker selection using ",
nslots-1, " modelling method",
ifelse(length(object) > 2, "s", ""), ".\n", sep = "")
cat("Number of different settings for each method:\n")
print(nsett)
cat("\nTotal number of variables in the X matrix:",
object[[infoslot]]$nvar, "\n")
cat("Number of variables selected:\n")
nsel <- sapply(object[-infoslot],
function(xx)
sapply(xx, function(yy) length(yy$biom.indices)))
print(nsel)
}
)
}
## returns "coefficients", which are only real coefficients when type
## == "coef", otherwise they are either stabilities or p values (for
## stability selection and HC, respectively)
coef.BMark <- function(object, ...) {
huhn <- object[(names(object) != "info")]
switch(object$info$type,
coef = {
huhn
## lapply(huhn,
## function(x) x)
},
HC = {
lapply(huhn,
function(x)
lapply(x, function(xx) xx$pvals))
},
stab = {
lapply(huhn,
function(x)
lapply(x, function(xx) xx$fraction.selected))
})
}
selection <- function(object, ...) {
huhn <- object[(names(object) != "info")]
if (object$info$type == "coef") {
stop("no selection made when type == 'coef'")
} else {
lapply(huhn,
function(x) lapply(x, function(xx) xx$biom.indices))
}
}
## utility function to plot the lasso trace as a function of
## lambda. Can be used either for coefficients, or for the stability
## trace.
traceplot <- function(object, ...) {
if (length(lasso.idx <- which(names(object) == "lasso")) == 0)
stop("No lasso results present")
switch(object$info$type,
coef = {
cfs <- coef(object)[[lasso.idx]]
lambdas <- as.numeric(colnames(cfs))
matplot(lambdas, t(cfs), type = "l",
ylab = "Coefficient size", xlab = expression(lambda),
main = "Lasso/elastic net coefficient trace", ...)
mtext(bquote(alpha == .(object$info$lasso$alpha)), line = .25)
},
stab = {
cfs <- do.call(cbind,
lapply(object[[lasso.idx]],
function(x) x$fraction.selected))
lambdas <- names(object[[lasso.idx]])
matplot(lambdas, t(cfs), type = "l",
ylab = expression(Pi), xlab = expression(lambda),
main = "Lasso/elastic net stability trace", ...)
abline(h = biom.options()$min.present, col = 4, lty = 3)
mtext(bquote(alpha == .(object$info$lasso$alpha)), line = .25)
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.