#' Elastic net classification panel
#'
#' build an elastic net classification panel
#' @param X nxp matrix - training dataset
#' @param Y categorical variables
#' @param alpha = 1 (lasso), alpha = 0 (ridge), 0 < alpha < 1 (elastic net penalty)
#' @param lambda = strength of elastic net penalty
#' @param family "binomial" or "multinomial"
#' @param X.test nxp matrx - test dataset
#' @param filter = "none" or "p.value"
#' @param topranked = 50 (top number of features to select and build a classifier)
#' @export
enet = function (X, Y, alpha, lambda = NULL, family, X.test = NULL,
Y.test = NULL, filter = "p.value", topranked = 50, keepVar = NULL){
library(glmnet)
library(limma)
library(pROC)
library(OptimalCutpoints)
if (filter == "none") {
X1 <- X
}
if (filter == "p.value") {
design <- model.matrix(~Y)
fit <- eBayes(lmFit(t(X), design))
top <- topTable(fit, coef = 2, adjust.method = "BH",
n = nrow(fit))
X1 <- X[, rownames(top)[1:topranked]]
}
if(is.null(keepVar)){
penalty.factor <- rep(1, ncol(X1))
X2 <- X1
} else {
X1 <- X1[, setdiff(colnames(X1), keepVar)]
X2 <- as.matrix(cbind(X1, X[, keepVar]))
colnames(X2) <- c(colnames(X1), keepVar)
penalty.factor <- c(rep(1, ncol(X1)), rep(0, length(keepVar)))
}
if (family == "binomial") {
fit <- glmnet(X2, Y, family = "binomial", alpha = alpha, penalty.factor=penalty.factor)
cv.fit <- cv.glmnet(X2, Y, family = "binomial")
if (is.null(lambda)) {
lambda = cv.fit$lambda.min
} else {
lambda = lambda
}
Coefficients <- coef(fit, s = lambda)
Active.Index <- which(Coefficients[, 1] != 0)
Active.Coefficients <- Coefficients[Active.Index, ]
enet.panel <- names(Active.Coefficients)[-1]
enet.panel.length <- length(enet.panel)
}
if (family == "multinomial") {
fit <- glmnet(X2, Y, family = "multinomial", alpha = alpha,
type.multinomial = "grouped", penalty.factor=penalty.factor)
cv.fit <- cv.glmnet(X2, Y, family = "multinomial")
if (is.null(lambda)) {
lambda = cv.fit$lambda.min
} else {
lambda = lambda
}
Coefficients <- coef(fit, s = lambda)
Active.Index <- which(Coefficients[[1]][, 1] != 0)
Active.Coefficients <- Coefficients[[1]][Active.Index,
]
enet.panel <- names(Active.Coefficients)[-1]
enet.panel.length <- length(enet.panel)
}
if (!is.null(X.test)) {
library(pROC)
library(OptimalCutpoints)
probs <- predict(fit, newx = X.test[, colnames(X2)], s = lambda, type = "response")
predictResponse <- unlist(predict(fit, newx = X.test,
s = lambda, type = "class"))
if (family == "binomial") {
perfTest <- amritr::tperformance(weights = as.numeric(as.matrix(probs)),
trueLabels = Y.test)
} else {
mat <- table(factor(as.character(predictResponse),
levels = levels(Y.test)), Y.test)
mat2 <- mat
diag(mat2) <- 0
classError <- colSums(mat2)/colSums(mat)
er <- sum(mat2)/sum(mat)
ber <- mean(classError)
perfTest <- c(classError, er, ber)
names(perfTest) <- c(names(classError), "ER", "BER")
}
} else {
perfTest <- predictResponse <- probs <- NA
}
return(list(X = X, Y = Y, fit = fit, enet.panel = enet.panel,
lambda = lambda, alpha = alpha, family = family, probs = probs,
Active.Coefficients = Active.Coefficients, perfTest = perfTest,
predictResponse = predictResponse, filter = filter, topranked = topranked, keepVar=keepVar))
}
#' interal function (enet cross-validation)
#'
#' Estimate test error of elastic net panel
#' @param X nxp matrix - training dataset
#' @param Y categorical variables
#' @param M = # of folds
#' @param folds list of length M specifiying which samples are in which fold
#' @param family "binomial" or "multinomial"
#' @param filter = "none" or "p.value"
#' @param topranked = 50 (top number of features to select and build a classifier)
#' @export
enetCV = function (X, Y, alpha, lambda, M, folds, progressBar, family,
filter, topranked, keepVar) {
library(pROC)
library(limma)
library(glmnet)
library(OptimalCutpoints)
probs <- predictResponseList <- enet.panel <- list()
if (progressBar == TRUE)
pb <- txtProgressBar(style = 3)
for (i in 1:M) {
if (progressBar == TRUE)
setTxtProgressBar(pb, i/M)
omit = folds[[i]]
X.train = X[-omit, , drop = FALSE]
Y.train = Y[-omit]
if (filter == "none") {
X.train1 <- X.train
}
if (filter == "p.value") {
design <- model.matrix(~Y.train)
fit <- eBayes(lmFit(t(X.train), design))
top <- topTable(fit, coef = 2, adjust.method = "BH",
n = nrow(fit))
X.train1 <- X.train[, rownames(top)[1:topranked],
drop = FALSE]
}
if(is.null(keepVar)){
penalty.factor <- rep(1, ncol(X.train1))
X.train2 <- X.train1
} else {
X.train1 <- X.train1[, setdiff(colnames(X.train1), keepVar)]
X.train2 <- as.matrix(cbind(X.train1, X.train[, keepVar]))
colnames(X.train2) <- c(colnames(X.train1), keepVar)
penalty.factor <- c(rep(1, ncol(X.train1)), rep(0, length(keepVar)))
}
X.test1 = X[omit, colnames(X.train2), drop = FALSE]
if (family == "binomial") {
fit <- glmnet(X.train2, Y.train, family = "binomial", alpha = alpha, penalty.factor=penalty.factor)
cv.fit <- cv.glmnet(X.train2, Y.train, family = "binomial")
if (is.null(lambda)) {
lambda = cv.fit$lambda.min
}
else {
lambda = lambda
}
Coefficients <- coef(fit, s = lambda)
Active.Index <- which(Coefficients[, 1] != 0)
Active.Coefficients <- Coefficients[Active.Index,
]
enet.panel[[i]] <- names(Active.Coefficients)[-1]
}
if (family == "multinomial") {
fit <- glmnet(X.train2, Y.train, family = "multinomial",
alpha = alpha, type.multinomial = "grouped", penalty.factor=penalty.factor)
cv.fit <- cv.glmnet(X.train2, Y.train, family = "multinomial")
if (is.null(lambda)) {
lambda = cv.fit$lambda.min
}
else {
lambda = lambda
}
Coefficients <- coef(fit, s = lambda)
Active.Index <- which(Coefficients[[1]][, 1] != 0)
Active.Coefficients <- Coefficients[[1]][Active.Index,
]
enet.panel[[i]] <- names(Active.Coefficients)[-1]
}
probs[[i]] <- predict(fit, newx = X.test1, s = lambda,
type = "response")
predictResponseList[[i]] <- predict(fit, newx = X.test1,
s = lambda, type = "class")
}
predictResponse <- unlist(predictResponseList)
if (family == "binomial") {
probs <- unlist(probs)
trueLabels = Y[unlist(folds)]
library(pROC)
library(OptimalCutpoints)
perf <- amritr::tperformance(weights = probs, trueLabels = trueLabels)
}
else {
trueLabels = Y[unlist(folds)]
mat <- table(factor(trueLabels, levels(Y)), factor(predictResponse,
levels(Y)))
mat2 <- mat
diag(mat2) <- 0
classError <- colSums(mat2)/colSums(mat)
er <- sum(mat2)/sum(mat)
ber <- mean(classError)
perf <- c(classError, er, ber)
names(perf) <- c(names(classError), "ER", "BER")
}
return(list(probs = probs, trueLabels = trueLabels, perf = perf,
enet.panel = enet.panel, predictResponse = predictResponse))
}
#' cross-validation function for elastic net panel
#'
#' Estimate test error of elastic net panel
#' @param X nxp matrix - training dataset
#' @param Y categorical variables
#' @param alpha = 1 (lasso), alpha = 0 (ridge), 0 < alpha < 1 (elastic net penalty)
#' @param family "binomial" or "multinomial"
#' @param lambda = strength of elastic net penalty
#' @param M = # of folds
#' @param iter = number of times to repeat cross-valiation
#' @param threads - number of cpus (each cross-valiation scheme performed on a separate node)
#' @param progressBar - show progressbar (TRUE/FALE)
#' @export
perf.enet = function (object, validation = c("Mfold", "loo"), M = 5, iter = 10,
threads = 4, progressBar = TRUE) {
library(dplyr)
library(tidyr)
X = object$X
Y = object$Y
n = nrow(X)
alpha = object$alpha
family = object$family
lambda = object$lambda
filter = object$filter
topranked = object$topranked
keepVar = object$keepVar
if (validation == "Mfold") {
folds <- lapply(1:iter, function(i) createFolds(Y, k = M))
require(parallel)
cl <- parallel::makeCluster(mc <- getOption("cl.cores",
threads))
parallel::clusterExport(cl, varlist = c("enetCV", "enet",
"X", "Y", "alpha", "lambda", "M", "folds", "progressBar",
"family", "filter", "topranked", "keepVar"), envir = environment())
cv <- parallel::parLapply(cl, folds, function(foldsi,
X, Y, alpha, lambda, M, progressBar, family, filter,
topranked, keepVar) {
enetCV(X = X, Y = Y, alpha = alpha, lambda = lambda,
M = M, folds = foldsi, progressBar = progressBar,
family = family, filter = filter, topranked = topranked,
keepVar=keepVar)
}, X, Y, alpha, lambda, M, progressBar, family, filter,
topranked, keepVar) %>% amritr::zip_nPure()
parallel::stopCluster(cl)
perf <- do.call(rbind, cv$perf) %>% as.data.frame %>%
gather(ErrName, Err) %>% dplyr::group_by(ErrName) %>%
dplyr::summarise(Mean = mean(Err), SD = sd(Err))
}
else {
folds = split(1:n, rep(1:n, length = n))
M = n
cv <- enetCV(X, Y, alpha, lambda, M, folds, progressBar,
family, filter, topranked, keepVar)
perf <- data.frame(Mean = cv$perf) %>% mutate(ErrName = rownames(.))
perf$SD <- NA
}
result = list()
result$folds = folds
result$probs = cv$probs
result$trueLabels = cv$trueLabels
result$panels = cv$enet.panel
result$perf = perf
method = "enet.mthd"
result$meth = "enet.mthd"
class(result) = c("perf", method)
return(invisible(result))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.