R/mt_fs.R
In mt: Metabolomics Data Analysis Toolbox

Documented in boxplot.frankvali feat.agg feat.freq feat.mfs feat.mfs.stab feat.mfs.stats feat.rank.re frank.err frankvali frankvali.default frankvali.formula fs.anova fs.auc fs.bw fs.cl fs.cl.1 fs.kruskal fs.pca fs.pls fs.plsvip fs.plsvip.1 fs.plsvip.2 fs.relief fs.rf fs.rf.1 fs.rfe fs.snr fs.welch fs.wilcox get.fs.len print.frankvali print.summary.frankvali summary.frankvali

#' =======================================================================
#' Feature frequency and stability of feature ranking.
#' History:
#'   01-02-2007: commence
#'   04-02-2007: feature ranking frequency and overlap rate
#'   10-02-2007: add the stability
#'   09-07-2007: output all frequency
#' Arguments:
#'   x           - a matrix or data frame of feature order
#'   rank.cutoff - top feature order cut-off
#'   freq.cutoff - feature frequency cut-off
#' References:
#' 1.) Chad A Davis, et al., (2006), Reliable gene signatures for microarray
#' classification: assessment of stability and performance. Bioinformatics,
#' vol.22, no.19, pages 2356 - 2363.
#' 2.) Michiels, S.; Koscielny, S. & Hill, C. Prediction of cancer outcome
#' with microarrays: a multiple random validation strategy. Lancet, 365,
#' 488-492
feat.freq <- function(x, rank.cutoff = 50, freq.cutoff = 0.5) {
  x <- as.matrix(x)
  x <- x[1:rank.cutoff, , drop = F]
  tmp <- table(x)
  tmp <- sort(tmp, decreasing = T) / ncol(x)
  fre <- as.vector(tmp) #' strip off a character 'x'
  names(fre) <- names(tmp)
  #' overlap.rate <- sum(fre==1)/rank.cutoff

  res <- list(
    freq.all = fre,
    freq = fre[fre >= freq.cutoff],
    rank.cutoff = rank.cutoff,
    freq.cutoff = freq.cutoff,
    stability = sum(fre) / length(fre)
    #' overlap.fs   = if (overlap.rate) names(fre[fre==1]) else NULL,
  )
  return(res)
}

#' ========================================================================
#' lwc-16-02-2007: Calculate the consensus of feature selection by different
#' methods
#' Internal function.
#' Arguments:
#'  freq - a list consisting of feature frequency more than a threshold
feat.cons <- function(freq, disp = TRUE) {
  #' lwc-18-02-2007: If only retrieve the names, the following code line
  #'                 is enough.
  #' fs.names <- unique(unlist(lapply(freq, names)))

  fs.names <- lapply(freq, function(x) names(x))
  fs.names <- do.call("c", fs.names)
  fs.names <- sort(table(fs.names), decreasing = T)

  mat <- matrix(
    data = NA, nrow = length(fs.names), ncol = length(freq),
    dimnames = list(names(fs.names), names(freq))
  )

  fs.tab <- sapply(names(freq), function(x) {
    dname <- names(freq[[x]])
    if (disp) mat[dname, x] <- freq[[x]] else mat[dname, x] <- 1
    #' mat[dname,x] <- ifelse(disp, freq[[x]],1) #' do not work
    return(mat[, x])
  })

  #' print(fs.tab, na.print="")
  return(fs.tab)
}

#' ========================================================================
#' wll-04-07-2007: Multiple feature selectors with resampling procedures.
#'                 Will give rank table based on the statistics and feature
#'                 consensus table.
#' wll-12-12-2007: add output of fs.freq and R doc
#' wll-29-04-2008: Wrapper function for multiple feature selection with or
#'                 without re-sampling.
#' wll-29-05-2008: Add no-resampling code;
#' wll-21-10-2009: Integrate re-sampling and no-resampling
#' lwc-15-02-2010: drop fs.tab.
#' lwc-25-02-2010: minor changes
#' Arguments:
#'   x         - data matrix or data frame
#'   y         - class labels (factor)
#'   method    - a set of feature selections methods
#'   pars      - validation control parameters.
#'   is.resam  - Boolean indicator of re-sampling.
feat.mfs <- function(x, y, method, pars = valipars(), is.resam = TRUE,
                     ...) {
  res <- lapply(method, function(m) {
    cat("\nFeature Selector = :", m, "\n")
    flush.console()
    if (is.resam) {
      feat.rank.re(x, y, method = m, pars = pars, ...)
    } else {
      #'    model <- if (is.function(m)) m
      #'             else if (is.character(m)) get(m)
      #'             else eval(m)
      model <- get(m)
      model(x, y, ...)
    }
  })
  names(res) <- method

  fs.rank <- sapply(res, function(x) x$fs.rank)
  fs.order <- sapply(res, function(x) x$fs.order)
  fs.order <- as.data.frame(fs.order, stringsAsFactors = F)

  fs.stats <- if (is.resam) {
    sapply(res, function(x) x$fs.stats)
  } else {
    sapply(res, function(x) x$stats)
  }

  feat.res <- list(
    fs.order = fs.order, fs.rank = fs.rank, fs.stats = fs.stats,
    all = res
  )

  return(feat.res)
}

#' ======================================================================
#' lwc-25-02-2010: Calculate frequency and consensus from results of 
#'  feat.mfs' with 'is.resam=TRUE'. 
#' wll-05-12-2015: Should give an error checking here.
feat.mfs.stab <- function(fs.res, rank.cutoff = 20, freq.cutoff = 0.5) {
  order.list <- lapply(fs.res$all, function(x) x$order.list)

  freq.all <- lapply(order.list, function(x) {
    feat.freq(x, rank.cutoff = rank.cutoff, freq.cutoff = freq.cutoff)
  })

  fs.freq <- lapply(freq.all, function(x) x$freq)
  fs.subs <- lapply(freq.all, function(x) names(x$freq))
  fs.stab <- lapply(freq.all, function(x) x$stability)
  fs.cons <- feat.cons(fs.freq)
  #' print(fs.cons,digits=2,na.print="")

  fs <- list(
    fs.freq = fs.freq, fs.subs = fs.subs,
    fs.stab = fs.stab, fs.cons = fs.cons
  )
  return(fs)
}

#' =======================================================================
#' lwc-03-09-2010: Plot the stats values of multiple feature selection.
#' lwc-06-09-2010: add fs.tab
#' wll-05-12-2015: should use this function in data analysis
#' Note:
#' Arguments:
#'  fs.stats  - Stats value of features
#'  cumu.plot - A logical value indicating the cumulative scores should be
#'              plotted.
feat.mfs.stats <- function(fs.stats, cumu.plot = FALSE, main = "Stats Plot",
                           ylab = "Values", xlab = "Index of variable", ...) {

  fs.stats <- as.matrix(fs.stats)
  nam <- colnames(fs.stats)
  fs.stats <- lapply(nam, function(x) { #'  x = nam[1]
    val <- fs.stats[, x] #' if stats is data frame, no names for val.
    val <- sort(val, decreasing = T, na.last = T)
  })
  names(fs.stats) <- nam

  #' Get feautes tab based on stats
  fs.tab <- lapply(fs.stats, function(x) {
    list(fs = names(x), val = x)
  })
  #' fs.tab <- list2df(un.list(fs.tab))

  #' Note-09-03-2010: If you use cumulative scores, you can easily calculate
  #' the numbers, such as fix at 80%.
  if (cumu.plot) {
    st <- lapply(fs.stats, function(x) cumsum(x / sum(x, na.rm = T)))
  } else {
    st <- fs.stats
  }

  #' reshape data for plotting
  st <- do.call(cbind, st)
  st <- cbind(st, idx = 1:nrow(st))
  st <- data.frame(st, stringsAsFactors = F)

  #' wl-06-11-2021, Sat: use base R function to get long format
  st_long <- with(st, data.frame(idx = st$idx, stack(st, select = -idx)))
  # st_long <- data.frame(idx = st$idx,stack(st,select = -idx))
  st_long <- st_long[c("idx", "ind", "values")]
  names(st_long) <- c("idx", "variable", "value")

  #' st_l <- reshape::melt(st, id = "idx")
  st.p <- xyplot(value ~ idx | variable,
    data = st_long,
    as.table = T, type = c("g", "l"),
    scales = list(cex = .75, relation = "free"),
    par.strip.text = list(cex = 0.65),
    ylab = ylab, xlab = xlab, main = main, ...
  )
  st.p

  res <- list(stats.tab = fs.tab, stats.long = st_long, stats.p = st.p)
  return(res)
}

#' =======================================================================
#' wll-06-11-2008: Use Borda count to get the final feature order
#' Note: Previous name is fs.agg
feat.agg <- function(fs.rank.list) {
  fs.score <- apply(fs.rank.list, 1, sum)
  fs.order <- order(fs.score, decreasing = F) #' order from best to worst.
  fs.rank <- order(fs.order, decreasing = F) #' feature rank score.
  names(fs.rank) <- rownames(fs.rank.list)
  temp <- names(fs.rank[fs.order])
  if (!is.null(temp)) fs.order <- temp
  return(list(fs.order = fs.order, fs.rank = fs.rank))
}

#' ======================================================================
#' wll-20-03-2007: resampling-based on feature ranking/selection
#' wll-23-07-2008: some change in loops handling
feat.rank.re <- function(x, y, method, pars = valipars(), tr.idx = NULL,
                         ...) {
  #' validity checking
  if (missing(x) || missing(y)) {
    stop("data set or class are missing")
  }
  if (length(dim(x)) != 2) {
    stop("'x' must be a matrix or data frame")
  }
  y <- as.factor(y) #' some classifier need it as factor, such as SVM.

  if (nrow(x) != length(y)) stop("x and y don't match.")
  if (length(unique(y)) < 2) {
    stop("Classification needs at least two classes.")
  }
  if (any(is.na(x)) || any(is.na(y))) {
    stop("NA is not permitted in data set or class labels.")
  }

  n <- nrow(x) #' number of samples
  p <- ncol(x) #' size of feature

  #' construct index of train data
  if (is.null(tr.idx)) {
    if (pars$sampling == "cv" && pars$nreps > n) {
      pars$sampling <- "loocv"
      pars$niter <- 1
    }
    if (pars$sampling == "cv" && pars$nreps < 2) {
      stop("Number of fold (nreps) for cv must greater than 1")
    }
    tr.idx <- trainind(y, pars = pars)
  } else {
    pars$sampling <- c("user")
  }
  pars$niter <- length(tr.idx)
  pars$nreps <- length(tr.idx[[1]])

  #' feature selection with re-sampling
  cat("Iter (", pars$niter, "):", sep = "")
  res.all <- lapply(1:pars$niter, function(i) {
    cat(" ", i, sep = "")
    flush.console()
    train.ind <- tr.idx[[i]]
    res <- lapply(1:pars$nreps, function(j) {
      x.tr <- x[train.ind[[j]], , drop = F]
      y.tr <- y[train.ind[[j]]]
      do.call(method, c(list(x = x.tr, y = y.tr), list(...)))
    })
    names(res) <- paste("Reps", 1:pars$nreps, sep = "_")
    res
  })
  cat("\n")
  names(res.all) <- paste("Iter", 1:pars$niter, sep = "_")

  rank.list <-
    lapply(res.all, function(x) as.data.frame(sapply(x, function(y) y$fs.rank)))

  order.list <-
    lapply(res.all, function(x) as.data.frame(sapply(x, function(y) y$fs.order)))
  stats.list <-
    lapply(res.all, function(x) as.data.frame(sapply(x, function(y) y$stats)))
  rank.list <- do.call("cbind", rank.list)
  order.list <- do.call("cbind", order.list)
  stats.list <- do.call("cbind", stats.list)

  fs.stats <- apply(stats.list, 1, mean)

  #' Use Borda count to get the final feature order
  fs.score <- apply(rank.list, 1, sum)
  fs.order <- order(fs.score, decreasing = F) #' feature order from best to worst.
  fs.rank <- order(fs.order, decreasing = F) #' feature rank score.
  names(fs.rank) <- rownames(rank.list)
  temp <- names(fs.rank[fs.order])
  if (!is.null(temp)) {
    fs.order <- noquote(temp)
  } #' lwc-16-02-2010: Should we remove noquote?

  res <- list(
    method = method,
    fs.order = fs.order, #' feature order
    fs.rank = fs.rank, #' feature rank
    fs.stats = fs.stats, #' means of stats
    rank.list = rank.list, #' full feature rank list
    order.list = order.list, #' full feature order list
    pars = pars, #' resampling parameters
    tr.idx = tr.idx, #' index of training samples.
    all = res.all
  ) #' all results of re-sampling
  return(res)
}

#' =======================================================================
#' wll-31-10-2007: feature selection using VIP of PLS.
#' NOTE: This function supports multiple response, Y (i.e. dummy matrix for
#' discriminant). The Mahalanobis distance of VIP is computed as the values
#' for selection of feature/variables.
#' References:
#' 1. Svante Wold et. al., PLS-regression: a basic tool of chemometrics.
#'    Chemometrics and Intelligent Laboratory Systems 58(2001), 109-130.
#' 2. Sarah J. Dixon, et.al., Pattern recognition of gas chromatography mass
#'    spectrometry of human volatiles in sweat to distinguish the sex of
#'    subjects and determine potential discriminatory marker peaks.
#'    Chemometrics and Intelligent Laboratory Systems 87(2007), 161-172.
#' 3. Chong, Il-Gyo and Jun, Chi-Hyuck, Performance of some variable
#'    selection methods when multicollinearity is present, Chemometrics and
#'    Intelligent Laboratory Systems 78(2005), 103-112.
fs.plsvip <- function(x, y, ncomp = 10, ...) {
  if (!is.data.frame(x)) x <- as.data.frame(x)
  if (length(y) != nrow(x)) {
    stop("x and y is not consistent.")
  }

  val <- plsc(x, y, pls = "oscorespls", ncomp = ncomp)
  #' NOTE: Only NIPLS supports VIP values.
  pls <- val$pls.out

  #' calculate SS
  y.lo <- (unclass(pls$Yloadings))^2
  x.sc <- colSums(pls$scores^2)
  x.sc <- matrix(x.sc, nrow = nrow(y.lo), ncol = ncol(y.lo), byrow = T)
  SS <- y.lo * x.sc #' not matrix product %*%.

  #' calculate normalised squared weight
  W <- (unclass(pls$loading.weights))^2
  sumW <- colSums(W)
  sumW <- matrix(sumW, nrow = nrow(W), ncol = ncol(W), byrow = T)
  W <- W / sumW

  SSW <- W %*% t(SS)

  sumSS <- apply(SS, 1, sum)
  sumSS <- matrix(sumSS, nrow = nrow(SSW), ncol = ncol(SSW), byrow = T)

  vip <- sqrt(nrow(SSW) * (SSW / sumSS))
  #' vip <- rowSums(abs(vip))

  #' Mahalanobis distances
  val <- sqrt(mahalanobis(vip, colMeans(vip), cov(vip), inverted = T))

  #' feature rank and feature order
  fs.order <- order(val, decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(val)
  nam <- names(val[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = val)
  return(res)
}

#' =======================================================================
#' wll-31-10-2007: feature selection using VIP of PLS.
#' NOTE: This function supports multiple response, Y (i.e. dummy matrix for
#'       discriminant). The final VIP is the means of absolute value of VIP.
fs.plsvip.1 <- function(x, y, ncomp = 10, ...) {
  if (!is.data.frame(x)) x <- as.data.frame(x)
  if (length(y) != nrow(x)) {
    stop("x and y is not consistent.")
  }

  val <- plsc(x, y, pls = "oscorespls", ncomp = ncomp)
  #' NOTE: Only NIPLS supports VIP values.
  pls <- val$pls.out

  #' calculate SS
  y.lo <- (unclass(pls$Yloadings))^2
  x.sc <- colSums(pls$scores^2)
  x.sc <- matrix(x.sc, nrow = nrow(y.lo), ncol = ncol(y.lo), byrow = T)
  SS <- y.lo * x.sc #' not matrix product %*%.

  #' calculate normalised squared weight
  W <- (unclass(pls$loading.weights))^2
  sumW <- colSums(W)
  sumW <- matrix(sumW, nrow = nrow(W), ncol = ncol(W), byrow = T)
  W <- W / sumW

  SSW <- W %*% t(SS)

  sumSS <- apply(SS, 1, sum)
  sumSS <- matrix(sumSS, nrow = nrow(SSW), ncol = ncol(SSW), byrow = T)

  vip <- sqrt(nrow(SSW) * (SSW / sumSS))

  val <- rowMeans(abs(vip))

  #' Mahalanobis distances
  #' val <- sqrt(mahalanobis(vip, colMeans(vip), cov(vip), inverted=T))

  #' feature rank and feature order
  fs.order <- order(val, decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(val)
  nam <- names(val[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = val)
  return(res)
}

#' =======================================================================
#' wll-29-10-2007: feature selection using VIP of PLS.
#' NOTE: To calculate VIP, two conditions needs to satisfy:
#'       1.) PLS algorithm is NIPLS;
#'       2.) Y must be single vector, not multiple vector, i.e matrix. Hence
#'       for classification, the coding of Y as a single vector is not
#'       efficient, especially for the multi-class problem. For two-class
#'       problem, coding of 0 and 1 or 1 and -1 may be OK for a single y
#'       vector.
fs.plsvip.2 <- function(x, y, ncomp = 10, ...) {
  if (!is.data.frame(x)) x <- as.data.frame(x)
  if (length(y) != nrow(x)) {
    stop("x and y is not consistent.")
  }

  #' convert to numeric, especially for factor.
  y <- as.numeric(y)
  #' NOTE: need to consider a nice way to convert

  pls <- oscorespls.fit(as.matrix(x), y, ncomp = ncomp)
  #' NOTE: Only NIPLS supports VIP values.

  #' VIP values (taken from http://mevik.net/work/software/pls.html)
  SS <- c(pls$Yloadings)^2 * colSums(pls$scores^2)
  Wnorm2 <- colSums(pls$loading.weights^2)
  SSW <- sweep(pls$loading.weights^2, 2, SS / Wnorm2, "*")

  val <- sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS))
  val <- val[ncomp, ] #' extract VIP values for ncomp components

  #' feature rank and feature order
  fs.order <- order(val, decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(val)
  nam <- names(val[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = val)
  return(res)
}

#' =========================================================================
#' wll-29-10-2007: feature selection using regression coefficient of PLS.
#' wll-19-11-2015: add 'pls:::' in the front of 'coef.mvr' since it 'hides'
#'   in the new version of 'pls'
#' NOTE: I try to use robust estimation of center and covariant matrix by
#'       cov.rob in package MASS. But it seems the collinearity problem to
#'       appear. Therefore, the simple Mahalanobis distance is used.
#' NOTES: 1.) Mahalanobis distance and leverage are often used to detect
#'       outliers especially in the development of linear regression models.
#'       A point that has a greater Mahalanobis distance from the rest of
#'       the sample population of points is said to have higher leverage
#'       since it has a greater influence on the slope or coefficients of
#'       the regression equation. (From
#'       http://en.wikipedia.org/wiki/Mahalanobis_distance)
fs.pls <- function(x, y, pls = "simpls", ncomp = 10, ...) {
  if (!is.data.frame(x)) x <- as.data.frame(x)
  if (length(y) != nrow(x)) {
    stop("x and y is not consistent.")
  }

  val <- plsc(x, y, pls = pls, ncomp = ncomp, ...)
  #' wl-08-11-2021, Mon: Use this one
  coe <- drop(coef(val$pls.out, ncomp = val$ncomp))
  # coe <- drop(pls:::coef.mvr(val$pls.out, ncomp = val$ncomp))


  #' lwc-14-06-2010: After running plsc, ncomp may change; hence ncomp here
  #'                 use val$ncomp

  #' Mahalanobis distances
  val <- sqrt(mahalanobis(coe, colMeans(coe), cov(coe), inverted = T))
  #' val <- sapply(as.data.frame(t(coe)), function(x) sqrt(sum(x^2)))
  #' val <- sapply(as.data.frame(t(coe)), function(x) sum(abs(x)))

  #' feature rank and feature order
  fs.order <- order(val, decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(val)
  nam <- names(val[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = val)
  return(res)
}

#' ======================================================================
#' wll-30-10-2007: feature selection using loadings of PCA.
#' lwc-22-09-2011: a bug fixed.
#' NOTE: 1. To check the eignenvalue, use screeplot().
#'       2. If it combines with other supervised methods such as fs.rf and
#'       fs.anova as the input methods for feat.mfs and feat.mfs.1, the
#'       'thres' should be given explicitly in case of conflicting with 'y'.
#'       e.g., fs.m <- c("fs.anova", "fs.rf", "fs.pca") feat.mfs.1(dat, cls,
#'       method=fs.m, is.resam=F, thres=0.8)
fs.pca <- function(x, thres = 0.8, ...) {
  x <- as.matrix(x)

  obj <- prcomp(x, ...)
  vars <- obj$sdev^2
  vars <- vars / sum(vars) #' Proportion of Variance
  cumvars <- cumsum(vars) #' Cumulative Proportion
  names(cumvars) <- colnames(obj$rotation)

  id <- which(cumvars >= thres)[1]
  if (id == 1) id <- 2 #' lwc-22-09-2011:
  lo <- obj$rotation[, 1:id] #' selected loadings

  #' Mahalanobis distances
  #' rob <- cov.rob(lo, method="mve")    #' c("mve", "mcd", "classical")
  #' val <- sqrt(mahalanobis(lo, rob$center, rob$cov,tol = 1e-7))

  val <- sqrt(mahalanobis(lo, colMeans(lo), cov(lo), inverted = T))
  #' val <- sapply(as.data.frame(t(lo)), function(x) sqrt(sum(x^2)))
  #' val <- sapply(as.data.frame(t(lo)), function(x) sum(abs(x)))

  #' feature rank and feature order
  fs.order <- order(val, decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(val)
  nam <- names(val[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = val)
  return(res)
}

#' =======================================================================
#' lwc-12-04-2007: feature selection using ratio of between-group
#'                 to within-group sums of squares (BW).
#' References: Dudoit S, Fridlyand J, Speed T.P. Comparison of
#'   discrimination methods for the classification of tumors using gene
#'   expression data. J Amer Statist Assoc 2002, 97:7.
#' NOTE: Someone claims that BW ratio for multiclass classification is a
#'       modification of the F-ratio statistics for one-way ANOVA.
fs.bw <- function(x, y, ...) {
  if (!is.data.frame(x)) x <- as.data.frame(x)
  if (length(y) != nrow(x)) {
    stop("x and y is not consistent.")
  }

  bw <- sapply(x, function(z) {
    #' z <- x[,1]      #' for debug
    mn.all <- mean(z)
    mn.grp <- tapply(z, y, mean)
    tmp.1 <- tmp.2 <- 0
    for (i in 1:length(z)) {
      cls <- y[i] #' which class
      tmp.1 <- tmp.1 + (mn.grp[[cls]] - mn.all)^2
      tmp.2 <- tmp.2 + (z[i] - mn.grp[[cls]])^2
    }
    tmp.1 / tmp.2
  })

  fs.order <- order(bw, decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(bw)
  nam <- names(bw[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = bw)
  return(res)
}

#' =========================================================================
#' lwc-06-04-07: Feature selection using RELIEF
#' wll-06-04-07: According to the original algorithm, a random sample should
#' be drawn in each computation. But result of each run will be different.
#' So I change the idea and each sample will be used to update the weight.
#' wll-15-10-07: 1. Extend to ReliefF, in which main ideas are that there
#'               are k (k>=1, default as 10) nearest hits/misses and all the
#'               hits and misses are averaged. 2. Add the user defined
#'               number of instances to sample. Default is all instances will
#'               be used.
#' References:
#' 1.) KIRA, K. and RENDEL, L. (1992). The Feature Selection Problem :
#' Traditional Methods and a new algorithm. Proc. Tenth National Conference
#' on Artificial Intelligence, MIT Press, 129-134.
#' 2.) KONONENKO, I., SIMEC, E., and ROBNIK-SIKONJA, M. (1997). Overcoming
#' the myopia of induction learning algorithms with RELIEFF. Applied
#' Intelligence Vol7, 1, 39-55.
#' 3.) Igor Kononenko, Estimating Attributes: Analysis and Extensions of
#' RELIEF, European Conference on Machine Learning, Ed. Francesco Bergadano
#' and Luc De Raedt, 1994, 171-182, Springer
#' 4.) MARKO ROBNIK-SIKONJA and IGOR KONONENKO, Theoretical and Empirical
#' Analysis of ReliefF and RReliefF, Machine Learning, 53, 23<U+FFFD>C69, 2003
fs.relief <- function(x, y, m = NULL, k = 10, ...) {

  #' Find the nearest neighbors from a matrix
  nearest <- function(x, mat, k = 10) {
    #' Euclidean distance
    dis <- sapply(as.data.frame(t(mat)), function(y) sqrt(sum((x - y)^2)))
    k <- min(k, length(dis)) #' wll-21-03-2008: fix a bug spotted by Ian Scott.
    #' ind  <- which.min(dis)
    ind <- sort.list(dis)[1:k]

    return(mat[ind, , drop = F])
  }

  if (!is.matrix(x)) x <- as.matrix(x)
  if (!is.factor(y)) y <- as.factor(y)
  if (length(y) != nrow(x)) {
    stop("x and y is not consistent.")
  }

  n <- nrow(x)
  p <- ncol(x)
  gp <- levels(y)
  prio <- table(y) / n #' Computing the prior

  #' Calculating the range of each feature. range = Max - Min
  rng <- sapply(as.data.frame(x), function(x) diff(range(x)))

  if (is.null(m)) {
    m <- n
  } else {
    m <- min(m, n)
  }
  idx <- sample(1:n, m, replace = F)

  weight <- rep(0, p)
  for (i in idx) {
    #' split x by group
    dat <- split.data.frame(x[-i, , drop = F], y[-i])

    #' find nearest neighbours
    near <- lapply(dat, function(z) nearest(x[i, ], z, k = k))

    hit <- gp[gp == y[i]]
    miss <- gp[gp != y[i]]

    delta <- rep(0, p)
    for (j in 1:p) {
      diff.hit <- -mean(abs(x[i, ][j] - near[[hit]][, j, drop = T]))
      diff.miss <- lapply(miss, function(z) {
        prio[z] * mean(abs(x[i, ][j] - near[[z]][, j, drop = T]))
      })
      diff.miss <- do.call("sum", diff.miss)
      diff.miss <- diff.miss / (1 - prio[hit])
      delta[j] <- (1 / m) * ((diff.hit + diff.miss) / rng[j])
    }
    #' updat weight
    weight <- weight + delta
  }

  names(weight) <- colnames(x)
  fs.order <- order(weight, decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(weight)
  nam <- names(weight[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = weight)
  return(res)
}

#' ========================================================================
#' RFE-SVM feature selection
#' History:
#'   lwc-02-11-2006:
#'   lwc-15-11-2006: fix fs.len as "power2"
#'   lwc-12-01-2007: re-write
#'   lwc-17-01-2007: sequence of number of features in decreasing order
#'   lwc-18-01-2008: dots argument has a problem. Bu I think it is problem of
#'                   svm, not dots' problem.  So I have to strip off ... here.
#'                   Does svm treat ... and list(...) differently?
fs.rfe <- function(x, y, fs.len = "power2", ...) {
  #' lwc-16-01-2007: avoid multiple actual arguments in calling svm.
  #' dots <- list(...)
  #' if(hasArg(kernel)) dots$kernel <- NULL

  #' LWC-24-10-2006: Calculates the primal variables w which are stored in
  #' warray
  wts.svm <- function(x) {
    #' warray[k,l,] is the weight vector for the binary pb class k against
    #' class l
    ncl <- length(x$labels)
    classk <- rep(1:ncl, x$nSV)
    p <- dim(x$SV)[2]

    #' array of the weight vectors
    warray <- array(0, dim <- c(ncl, ncl, p))
    #' loop to use the coefs
    for (s in 1:dim(x$SV)[1]) {
      for (co in 1:(ncl - 1)) {
        #' find the two class problem
        k <- classk[s]
        l <- ((1:ncl)[-k])[co]
        warray[k, l, ] <- warray[k, l, ] + x$coefs[s, co] * x$SV[s, ]
        warray[l, k, ] <- warray[l, k, ] + x$coefs[s, co] * x$SV[s, ]
      }
    }
    #' return twice the sum of the absolute value of primal variables w
    wts <- apply(abs(warray), 3, sum)
    return(wts)
  }

  y <- as.factor(y) #' 31-03-2007: must be factor if for classification.
  p <- ncol(x)
  fs.order <- seq(1, p)

  #' get feature lengths for SVM-RFE computation.
  len <- get.fs.len(p, fs.len = fs.len)
  len <- sort(len, decreasing = T) #' must be decreasing order for SVM-RFE

  nlen <- length(len)
  for (i in 1:nlen) {
    #' extract index of feature for this length.
    sel <- fs.order[1:len[i]]
    #' call SVM with linear kernel.
    model <- svm(x[, sel, drop = F], y, kernel = "linear")
    #' model <- svm(x[,sel,drop=F], y, kernel = "linear",dots)
    #' calculate the weights
    wts <- wts.svm(model)
    #' sort the feature based on the weights
    ord <- order(wts, decreasing = TRUE)
    #' update the feature set
    fs.order[1:len[i]] <- sel[ord]
  }

  fs.rank <- order(fs.order)

  names(fs.rank) <- colnames(x)
  nam <- colnames(x)[fs.order]
  if (!is.null(nam)) fs.order <- nam

  #'  wll-05-07-2007: add stats for consistent with other methods.
  #'                  No other purpose.
  stats <- length(fs.rank) - fs.rank

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = stats)
  return(res)
}

#' ========================================================================
#' SNR for feature selection
#' wll-20-03-2007: SNR is only for two-class classification.
#' wll-29-10-2008: is similar to Fisher criterion rate: (m1-m2)^2/(v1+v2)
fs.snr <- function(x, y, ...) {
  y <- as.factor(y)
  if (length(levels(y)) != 2) {
    stop("'y' must have two classes")
  }

  g.mn <- sapply(data.frame(x), function(x) tapply(x, y, mean))
  g.sd <- sapply(data.frame(x), function(x) tapply(x, y, sd))
  snr <- abs(g.mn[1, ] - g.mn[2, ]) / (g.sd[1, ] + g.sd[2, ])

  fs.order <- order(abs(snr), decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(snr)
  nam <- names(snr[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = abs(snr))
  return(res)
}

#' ========================================================================
#' AUC for feature selection
#' wll-20-03-2007:  AUC is only for two-class classification.
fs.auc <- function(x, y, ...) {
  y <- as.factor(y)
  if (length(levels(y)) != 2) {
    stop("'y' must have two classes")
  }

  levels(y) <- c(0, 1) #' change levels as 1,0
  y <- as.numeric(levels(y))[as.integer(y)]

  auc <- sapply(as.data.frame(x), function(x) {
    y <- y[order(x, decreasing = TRUE)]
    tmp <- cumsum(y) / sum(y)
    mean(tmp[y == 0])
  })

  #' library(limma)
  #' auc  <- sapply(as.data.frame(x),function(z) auROC(y,z))
  #' library(verification)
  #' auc  <- sapply(as.data.frame(x),function(z) roc.area(y,z)$A)
  #' library(ROCR)
  #' auc  <- sapply(as.data.frame(x),function(z)
  #'           as.numeric(performance(prediction(z,y),measure="auc")@y.values))

  auc[auc < 0.5] <- 1 - auc[auc < 0.5]

  fs.order <- order(abs(auc), decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(auc)
  nam <- names(auc[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = abs(auc))
  return(res)
}

#' ========================================================================
#' randomForest for feature selection
#' lwc-20-03-2007:Commence
#' wll-08-12-2015:Use scaled important scores.
#' Note-08-12-2015:
#'  1.) If there are large quantity of zeros in the important score which in
#'      turn lead to ties of rank list, the results of reampling in which
#'      rank aggregation is used are not reasonable.
#'  2.) Random Forest is random method, which leads to different results for
#'      different runs even if the random seed has been set by set.seed().
#'  3.) The application of 'fs.rf' should be limited.
fs.rf <- function(x, y, ...) {
  tmp <- randomForest(x, y, importance = T, ...)
  meas <- tmp$importance[, ncol(tmp$importance) - 1]
  meas[meas <= 0] <- 0
  #' Or use the following two lines
  if (F) {
    meas <- importance(tmp, type = 1, scale = TRUE)
    meas <- meas[, 1]
  }

  fs.order <- order(meas, decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(meas)
  nam <- names(meas[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = meas)
  return(res)
}

#' ========================================================================
#' wll-29-10-2008: Another version of RF for feature selection based on
#'                 successively eliminating the least important variables.
fs.rf.1 <- function(x, y, fs.len = "power2", ...) {
  y <- as.factor(y)
  p <- ncol(x)
  fs.order <- seq(1, p) #' initialisation

  #' get feature lengths
  len <- get.fs.len(p, fs.len = fs.len)
  len <- sort(len, decreasing = T) #' must be decreasing order

  nlen <- length(len)
  for (i in 1:nlen) {
    #' extract index of feature for this length.
    sel <- fs.order[1:len[i]]
    #' call randomForest
    rf <- randomForest(x[, sel, drop = F], y,
      importance = T,
      keep.forest = FALSE, ...
    )
    imp <- importance(rf, type = 1, scale = TRUE)
    imp <- imp[, 1]
    #' sort the feature based on the scaled important scores
    ord <- order(imp, decreasing = T, na.last = T)
    #' update the feature set
    fs.order[1:len[i]] <- sel[ord]
  }

  fs.rank <- order(fs.order)
  names(fs.rank) <- colnames(x)
  nam <- colnames(x)[fs.order]
  if (!is.null(nam)) fs.order <- nam

  #'  Add stats for consistent with other methods.  No other purpose.
  stats <- length(fs.rank) - fs.rank
  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = stats)
  return(res)
}

#' =========================================================================
#' Welch test for feature selection
#' lwc-05-01-2007: commence
#' lwc-16-03-2007: minor change. Note that do not use sort with na.last=T
#' lwc-19-05-2008: Change oneway.test as t.test with dot arguments. So this
#'                 method supports paired t.test (Welch). And also strip off
#'                 noquote.
#' wll-17-06-2008: change data.frame as as.data.frame in case of data frame
#'                 names changed. (e.g., names of a data frame is
#'                 numeric-like characteristic)
fs.welch <- function(x, y, ...) {
  tmp <- sapply(as.data.frame(x), function(x) {
    tmp <- t.test(x ~ y, var.equal = F, ...)
    #' tmp <- oneway.test(x ~ y,var.equal=F)
    c(tmp$statistic, tmp$p.value)
  })

  stats <- tmp[1, ]
  pval <- tmp[2, ]

  fs.order <- order(abs(stats), decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(stats)
  nam <- names(stats[fs.order])
  if (!is.null(nam)) fs.order <- nam #'  fs.order <- noquote(nam)

  res <- list(
    fs.order = fs.order, fs.rank = fs.rank, stats = abs(stats),
    pval = pval
  )
  return(res)
}

#' =========================================================================
#' wll-19-05-2008: Welch test for feature selection
#'   NOTE: This function selects features based on the p-values rather than
#'   absolute values of statistics. And also supports additional arguments
#'   passing, such as paired test or not, and the alternative hypothesis.
fs.welch.1 <- function(x, y, ...) {
  tmp <- sapply(as.data.frame(x), function(x) {
    tmp <- t.test(x ~ y, var.equal = F, ...)
    c(tmp$statistic, tmp$p.value)
  })

  stats <- tmp[1, ]
  pval <- tmp[2, ]

  fs.order <- order(pval, decreasing = F, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(pval)
  nam <- names(pval[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(
    fs.order = fs.order, fs.rank = fs.rank, pval = pval,
    stats = stats
  )
  return(res)
}

#' =========================================================================
#' Wilcoxon test for feature selection
#' lwc-21-06-2010: commence
#' Note: No doc and not export
fs.wilcox <- function(x, y, ...) {
  tmp <- sapply(as.data.frame(x), function(x) {
    tmp <- wilcox.test(x ~ y, ...)
    c(tmp$statistic, tmp$p.value)
  })

  stats <- tmp[1, ]
  pval <- tmp[2, ]

  fs.order <- order(abs(stats), decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(stats)
  nam <- names(stats[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(fs.order = fs.order, fs.rank = fs.rank, stats = abs(stats),
              pval = pval)
  return(res)
}

#' =========================================================================
#' ANOVA for feature selection
#' lwc-05-01-2007: commence
#' lwc-16-03-2007: minor change
fs.anova <- function(x, y, ...) {
  tmp <- sapply(as.data.frame(x), function(x) {
    tmp <- oneway.test(x ~ y, var.equal = T)
    c(tmp$statistic, tmp$p.value)
  })

  stats <- tmp[1, ]
  pval <- tmp[2, ]

  fs.order <- order(abs(stats), decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(stats)
  nam <- names(stats[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(
    fs.order = fs.order, fs.rank = fs.rank, stats = abs(stats),
    pval = pval
  )
  return(res)
}

#' =========================================================================
#' Kruskal-Wallis test for feature selection
#' lwc-05-01-2007: commence
#' lwc-16-03-2007: minor change
fs.kruskal <- function(x, y, ...) {
  tmp <- sapply(as.data.frame(x), function(x) {
    tmp <- kruskal.test(x ~ y)
    c(tmp$statistic, tmp$p.value)
  })

  stats <- tmp[1, ]
  pval <- tmp[2, ]

  fs.order <- order(abs(stats), decreasing = T, na.last = T)
  fs.rank <- order(fs.order)

  names(fs.rank) <- names(stats)
  nam <- names(stats[fs.order])
  if (!is.null(nam)) fs.order <- nam

  res <- list(
    fs.order = fs.order, fs.rank = fs.rank, stats = abs(stats),
    pval = pval
  )
  return(res)
}

#' =========================================================================
#' Feature ranking validation by error estimation.
#' History:
#'   08-11-2006: commence
#'   09-11-2006: output different errors.
#'   26-11-2006: Borda count for selecting final feature order
#'   10-01-2007: Add user-defined data partitioning

#'   12-10-2007: Add fs.order as a argument. This allows the user to
#'               estimate error using a feature order calculated somewhere
#'               else. Actually this function replaces rankvali (deprecated).
frankvali.default <- function(dat, cl, cl.method = "svm",
                              fs.method = "fs.auc", fs.order = NULL,
                              fs.len = "power2", pars = valipars(),
                              tr.idx = NULL, ...) {
  #' validity checking
  if (missing(dat) || missing(cl)) {
    stop("data set or class are missing")
  }
  if (length(dim(dat)) != 2) {
    stop("'dat' must be a matrix or data frame")
  }
  if (!is.factor(cl)) {
    stop("cl must be a factor.")
  }
  if (nrow(dat) != length(cl)) stop("dat and cl don't match.")
  if (length(unique(cl)) < 2) {
    stop("Classification needs at least two classes.")
  }
  if (any(is.na(dat)) || any(is.na(cl))) {
    stop("NA is not permitted in data set or class labels.")
  }

  dat <- as.matrix(dat)
  rownames(dat) <- NULL # strip off the row names

  n <- nrow(dat) #' number of samples
  p <- ncol(dat) #' size of feature

  len <- get.fs.len(p, fs.len = fs.len)
  nlen <- length(len)

  #' construct index of train data
  if (is.null(tr.idx)) {
    if (pars$sampling == "cv" && pars$nreps > n) {
      pars$sampling <- "loocv"
    }
    tr.idx <- trainind(cl, pars = pars)
  }
  pars$niter <- length(tr.idx)
  pars$nreps <- length(tr.idx[[1]])

  err.all <- list()
  fs.list <- list()

  for (i in 1:pars$niter) {
    train.ind <- tr.idx[[i]]
    res <- list()
    #' generic loop for loocv, cv, scv,random and bootstrap.
    for (j in 1:length(train.ind)) {
      dat.tr <- dat[train.ind[[j]], , drop = F]
      cl.tr <- cl[train.ind[[j]]]
      dat.te <- dat[-train.ind[[j]], , drop = F]
      cl.te <- cl[-train.ind[[j]]]
      #' Error estimation of feature selection with fs.order or with rank
      #' method.
      res[[j]] <- frank.err(dat.tr, cl.tr, dat.te, cl.te,
        cl.method = cl.method,
        fs.method = fs.method, fs.order = fs.order,
        fs.len = fs.len, ...
      )
    } #' end of j
    #' feature ranking list 
    fs.list[[i]] <- sapply(res, function(x) cbind(x$fs.rank))
    rownames(fs.list[[i]]) <- colnames(dat)

    #' error estimation
    err.all[[i]] <- t(sapply(res, function(x) cbind(x$error)))
    colnames(err.all[[i]]) <- len
    #' colnames(err.all[[i]]) <- paste("Len_", len, sep="")
  } #' End of i

  names(err.all) <- paste("Iter_", seq(1, pars$niter), sep = "")
  names(fs.list) <- paste("Iter_", seq(1, pars$niter), sep = "")

  err.iter <- t(sapply(err.all, function(x) apply(x, 2, mean)))
  err.avg <- apply(err.iter, 2, mean)
  #' or try
  #' err.mat <- do.call("rbind",err.all)
  #  err.avg <- apply(err.mat,2,mean)

  if (is.null(fs.order)) {
    #' final feature ranking
    #' Use Borda count to get the final feature order
    fs.mat <- do.call("cbind", fs.list)
    fs.score <- apply(fs.mat, 1, sum)
    fs.order <- order(fs.score, decreasing = F) #' fs order from best to worst.
    fs.rank <- order(fs.order, decreasing = F) #' fs rank score.
    names(fs.rank) <- rownames(fs.mat)
    temp <- names(fs.rank[fs.order])
    if (!is.null(temp)) {
      fs.order <- noquote(temp)
    }
  } else {
    fs.rank <- order2rank(fs.order)
    fs.method <- "User defined"
  }

  ret <- list(
    fs.method = fs.method,
    cl.method = cl.method,
    fs.len = len, #' computational levels
    err.avg = err.avg, #' average error
    err.iter = err.iter, #' error matrix on each iteration
    err.all = err.all, #' all error matrix
    fs.order = fs.order, #' final feature order
    fs.rank = fs.rank, #' final feature rank
    sampling = switch(pars$sampling,
      "cv"     = "cross validation",
      "loocv"  = "leave-one-out cross-validation",
      "boot"   = "bootstrap",
      "random" = "randomised validation (holdout)"
    ),
    niter = pars$niter, #' number of iteration
    nreps = pars$nreps
  )

  if (is.null(fs.order)) {
    ret$fs.list <- fs.list #' feature list of all computation
  }
  class(ret) <- "frankvali"
  return(ret)
}

#' ========================================================================
frankvali <- function(dat, ...) UseMethod("frankvali")

#' ========================================================================
#' lwc-12-11-2006:
frankvali.formula <- function(formula, data = NULL, ..., subset,
                              na.action = na.omit) {
  call <- match.call()
  if (!inherits(formula, "formula")) {
    stop("method is only for formula objects")
  }
  m <- match.call(expand.dots = FALSE)
  if (identical(class(eval.parent(m$data)), "matrix")) {
    m$data <- as.data.frame(eval.parent(m$data))
  }
  m$... <- NULL
  m[[1]] <- as.name("model.frame")
  m$na.action <- na.action
  m <- eval(m, parent.frame())
  Terms <- attr(m, "terms")
  attr(Terms, "intercept") <- 0
  x <- model.matrix(Terms, m)
  y <- model.extract(m, "response")
  attr(x, "na.action") <- attr(y, "na.action") <- attr(m, "na.action")

  ret <- frankvali.default(x, y, ..., na.action = na.action)

  ret$call <- call
  ret$call[[1]] <- as.name("frankvali")
  ret$terms <- Terms
  if (!is.null(attr(m, "na.action"))) {
    ret$na.action <- attr(m, "na.action")
  }
  class(ret) <- c("frankvali.formula", class(ret))
  return(ret)
}

#' =======================================================================
#' lwc-13-11-2006: boxplot the error rate on each iteration or computation.
boxplot.frankvali <- function(x, ...) {
  col <- "lightgray"
  xlab <- "Numbers of Feature"
  ylab <- "Error Rate"
  ylim <- c(0, 1.0)

  if (x$niter > 1) {
    main <- "Error rate on each iteration"
    #' tmp <- data.frame(x$err.iter)     #' why does data.frame change names?
    tmp <- as.data.frame(x$err.iter) #' wll-1706-2008: Is it OK?
    colnames(tmp) <- colnames(x$err.iter)
  } else {
    main <- "Error rate on each computation"
    tmp <- as.data.frame(x$err.all) #' tmp <- data.frame(x$err.all)
    colnames(tmp) <- colnames(x$err.all)
  }

  boxplot(tmp, main = main, col = col, xlab = xlab, ylab = ylab, ylim = ylim)
}

#' ========================================================================
print.frankvali <- function(x, digits = 3, ...) {
  cat("\nFeature selection method:\t\t", x$fs.method)
  cat("\nClassification method:\t\t", x$cl.method)
  cat("\nSampling:\t\t", x$sampling)
  cat("\nNo. of iteration.:\t", x$niter)
  cat("\nNo. of replications:\t", x$nreps)

  cat("\nFeature numbers:\t", x$fs.len)
  cat("\nAverage error:\t\t", round(x$err.avg, digits))

  cat(
    "\nFeature order (top 10):\t",
    x$fs.order[1:min(10, length(x$fs.order))]
  ) #' best to worst
  cat("\n")
  invisible(x)
}

#' ========================================================================
summary.frankvali <- function(object, ...) {
  structure(object, class = "summary.frankvali")
}

#' ========================================================================
print.summary.frankvali <- function(x, ...) {
  print.frankvali(x)
  cat("\nError of each iteration:\n")
  print(x$err.iter)
  #' cat("\nError of all computation:\n")
  #' print(x$err.all)
  if (!is.null(x$fs.list)) {
    cat("\nFeature ranking List:\n")
    print(x$fs.list)
  }
}

#' =========================================================================
#' Error estimation of feature ranking on a single data set.
#' History:
#'   08-11-2006: commence
#'   10-11-2006: generalise cl.method.
#'   10-01-2007: minor modification
#'   24-09-2007: Generalise fs.method
#'   12-10-2007: Add fs.order as a argument.
frank.err <- function(dat.tr, cl.tr, dat.te, cl.te, cl.method = "svm",
                      fs.method = "fs.auc", fs.order = NULL,
                      fs.len = "power2", ...) {
  if (missing(dat.tr) || missing(cl.tr) || missing(dat.te) || missing(cl.te)) {
    stop(" training and test data missing")
  }

  #' feature ranking
  if (is.null(fs.order)) {
    tmp <- do.call(fs.method, c(list(x = dat.tr, y = cl.tr), list(...)))
    fs.order <- tmp$fs.order
    fs.rank <- tmp$fs.rank
  } else {
    fs.rank <- order2rank(fs.order)
    fs.method <- "User defined"
  }

  #' generate feature length for error estimation
  p <- ncol(dat.tr)
  len <- get.fs.len(p, fs.len = fs.len)
  nlen <- length(len)

  #' error estimation
  error <- numeric(length = nlen)
  names(error) <- len
  #' names(error) <- paste("Len_", len, sep="")
  for (i in 1:nlen) {
    #' feature selection
    sel <- fs.order[1:len[i]]
    error[i] <- classifier(dat.tr[, sel, drop = F], cl.tr,
      dat.te[, sel, drop = F], cl.te,
      method = cl.method, ...
    )$err
  }

  ret <- list(
    cl.method = cl.method, fs.len = len, error = error,
    fs.method = fs.method, fs.order = fs.order, fs.rank = fs.rank
  )

  return(ret)
}

#' =========================================================================
#' wll-29-04-2008: Wrapper function for validation of feature selection by
#'                 classification.
#' wll-29-10-2008: Give a logical value to validate all fs or not.
#' lwc-07-10-2011: use get.fs.len again but remove the last one.
#' Note: 1. Similar but more complete function is frankvali
#'       2. should change cl.method as method
fs.cl <- function(dat, cl, fs.order = colnames(dat), fs.len = 1:ncol(dat),
                  cl.method = "svm", pars = valipars(), all.fs = FALSE, ...) {
  len <- get.fs.len(ncol(dat), fs.len = fs.len)
  if (!all.fs) {
    len <- len[1:(length(len) - 1)] #' remove the last one
  }
  nlen <- length(len)

  res <- sapply(1:nlen, function(i) {
    id <- fs.order[1:len[i]] #' extract index of selected features
    #' cat("\n--Feature Length = :",i,"\n"); flush.console()
    res <- aam.cl(dat[, id, drop = F], cl, method = cl.method, pars = pars, ...)
  })
  res <- t(res)
  rownames(res) <- len
  res
}

#' =========================================================================
#' lwc-27-06-2011: Wrapper function for validation of feature selection by
#'                classification.
#' Note: This function evaluate all features given by user either in
#' individual feature or aggregated features.
fs.cl.1 <- function(dat, cl, fs.order = colnames(dat), cl.method = "svm",
                    pars = valipars(), agg_f = FALSE, ...) {
  len <- length(fs.order)
  if (agg_f) {
    res <- sapply(1:len, function(i) {
      id <- fs.order[1:i] #' aggregation of features
      res <- aam.cl(dat[, id, drop = F], cl, method = cl.method, pars = pars, ...)
    })
  } else {
    res <- sapply(1:len, function(i) {
      id <- fs.order[i] #' individual feature
      res <- aam.cl(dat[, id, drop = F], cl, method = cl.method, pars = pars, ...)
    })
  }
  res <- t(res)
  rownames(res) <- 1:len
  res
}

#' =========================================================================
#' lwc-27-06-2011: Wrapper function for validation of feature selection by
#'                 classification.
#' lwc-19-05-2012: replace aam.cl with accest in order to get more results.
#' lwc-22-10-2012: To get aam, call perf.aam.
#' lwc-21-01-2014: To get other outcome such as SE and CI, need to provide
#' extra code scripts. Refer to frankvali. Usages:
#' usages
#'  data(abr1)
#'  dat <- abr1$pos
#'  x <- preproc(dat[, 110:500], method = "log10")
#'  y <- factor(abr1$fact$class)
#'  dat <- dat.sel(x, y, choices = c("1", "2"))
#'  x.1 <- dat[[1]]$dat
#'  y.1 <- dat[[1]]$cls
#'  pars <- valipars(sampling = "cv", niter = 4, nreps = 4)
#'
#'  #' multi-classes
#'  fs <- fs.rf(x, y)
#'  ord <- fs$fs.order[1:50]
#'  res <- fs.cl.2(x, y,
#'    fs.order = ord, cl.method = "svm", pars = pars,
#'    agg_f = TRUE
#'  )
#'  perf.aam(res)
#'
#'  #' two-classes
#'  fs <- fs.rf(x.1, y.1)
#'  ord <- fs$fs.order[1:50]
#'  res.1 <- fs.cl.2(x.1, y.1,
#'    fs.order = ord, cl.method = "svm", pars = pars,
#'    agg_f = TRUE
#'  )
#'  perf.aam(res.1)
#'
fs.cl.2 <- function(dat, cl, fs.order = colnames(dat), cl.method = "svm",
                    pars = valipars(), agg_f = FALSE, ...) {
  len <- length(fs.order)
  if (agg_f) {
    res <- lapply(1:len, function(i) {
      id <- fs.order[1:i] #' aggregation of features
      res <- accest(dat[, id, drop = F], cl,
        method = cl.method,
        pars = pars, ...
      )
      #' res <- aam.cl(dat[,id, drop=F],cl, method=cl.method, pars=pars,...)
    })
  } else {
    res <- lapply(1:len, function(i) {
      id <- fs.order[i] #' individual feature
      res <- accest(dat[, id, drop = F], cl,
        method = cl.method,
        pars = pars, ...
      )
      #' res <- aam.cl(dat[,id, drop=F],cl, method=cl.method, pars=pars,...)
    })
  }
  #' res <- t(res)
  #' rownames(res) <- 1:len
  names(res) <- 1:len
  res
}

#' =======================================================================
#' lwc-21-01-2014: get average of acc, auc and mar from outcome of fs.cl.2
perf.aam <- function(res) {
  tmp <- sapply(res, function(x) { #'    x = res[[1]]
    acc <- x$acc
    auc <- ifelse(!is.null(x$auc), x$auc, NA)
    mar <- ifelse(!is.null(x$mar), x$mar, NA)
    res <- c(acc = acc, auc = auc, mar = mar)
  })
  return(t(tmp))
}

#' =======================================================================
#' lwc-24-05-2012: Wrapper function for perf.aam.
perf <- function(res) {
  perf <- lapply(res, function(x) perf.aam(x))
}

#' ======================================================================
#' Generate a sequence of feature number
#' History:
#'  25-10-2006: commence
#'  31-10-2006: add user defined sequence
#'  15-11-2006: fix a bug in returning a decreasing vector
#' Usages:
#' get.fs.len(10,fs.len=c(1,5,3,11.2,7.8,23,1,0))
#' get.fs.len(200,fs.len="half")
get.fs.len <- function(p, fs.len = c("power2")) {
  if (!is.character(fs.len)) {
    fs.len <- as.vector(fs.len)
    fs.len <- as.integer(fs.len)
    fs.len <- fs.len[fs.len <= p & fs.len > 0]
    fs.len <- c(p, fs.len)
    x <- unique(fs.len)
  } else {
    fs.len <- match.arg(fs.len, c("full", "half", "power2"))

    if (fs.len == "full") {
      x <- seq(p, 1)
    } else if (fs.len == "half") {
      x <- tmp <- p
      while (tmp > 1) {
        tmp <- trunc(tmp / 2)
        x <- c(x, tmp)
      }
    } else {
      n <- ceiling(log2(p))
      x <- 2^(n:0)
      x[1] <- p
    }
  }

  #' x <- sort(x,decreasing = T) #' must be decreasing order for SVM-RFE
  x <- sort(x, decreasing = F)
  return(x)
}

#' ======================================================================
#' lwc-02-02-2007: convert feature rank to feature order
#' NOTE: The vector of fs.rank should have variable names.
#' Usages:
#'  load("D:\R_lwc\data\R-W2-GC\31_01_2007\class_rfrankvali_auto.RData")
#'  fs.rank.list <- do.call("cbind",fs$rfe$fs.list)
#'  tmp     <- mt:::rank2order(fs.rank.list[,1])
#'  fs.order.list <- sapply(1:ncol(fs.rank.list),
#'                          function(x) mt:::rank2order(fs.rank.list[,x]))
#' Internal function.
rank2order <- function(fs.rank) {
  fs.order <- order(fs.rank)
  tmp <- names(fs.rank[fs.order])
  if (!is.null(tmp)) fs.order <- tmp

  return(fs.order)
}

#' =======================================================================
#' wll-12-03-2007: convert feature order to feature rank
#' Internal function.
order2rank <- function(fs.order) {
  fs.rank <- order(fs.order)
  names(fs.rank) <- fs.order[fs.rank]
  return(fs.rank)
}

#'  1) feat.freq
#'  2) feat.cons
#'  3) feat.mfs
#'  4) feat.mfs.stab
#'  5) feat.mfs.stats
#'  6) feat.agg
#'  7) feat.rank.re
#'  8) fs.plsvip
#'  9) fs.plsvip.1
#' 10) fs.plsvip.2
#' 11) fs.pls
#' 12) fs.pca
#' 13) fs.bw
#' 14) fs.relief
#' 15) fs.rfe
#' 16) fs.snr
#' 17) fs.auc
#' 18) fs.rf
#' 19) fs.rf.1
#' 20) fs.welch
#' 21) fs.welch.1
#' 22) fs.wilcox
#' 23) fs.anova
#' 24) fs.kruskal
#' 25) frankvali.default
#' 26) frankvali
#' 27) frankvali.formula
#' 28) boxplot.frankvali
#' 29) print.frankvali
#' 30) summary.frankvali
#' 31) print.summary.frankvali
#' 32) frank.err
#' 33) fs.cl
#' 34) fs.cl.1
#' 35) fs.cl.2
#' 36) perf.aam
#' 37) perf
#' 38) get.fs.len
#' 39) rank2order
#' 40) order2rank