Nothing
#' Density-Convoluted Support Vector Machine
#'
#' Fits the density-convoluted support vector machine (DCSVM) through kernel density convolutions.
#'
#' @param x A numeric matrix with \eqn{N} rows and \eqn{p} columns representing predictors. Each row corresponds to an observation, and each column corresponds to a variable.
#' @param y A numeric vector of length \eqn{N} representing binary responses. Elements must be either -1 or 1.
#' @param nlambda Number of \code{lambda} values in the sequence. Default is 100.
#' @param lambda.factor Ratio of the smallest to the largest \code{lambda} in the sequence: \code{lambda.factor} = \code{min(lambda)} / \code{max(lambda)}. The default value is 0.0001 if \eqn{N >= p} or 0.01 if \eqn{N < p}. Takes no effect if a \code{lambda} sequence is specified.
#' @param lambda An optional user-specified sequence of \code{lambda} values. If \code{lambda = NULL} (default), the sequence is computed based on \code{nlambda} and \code{lambda.factor}. The program automatically sorts user-defined \code{lambda} sequences in decreasing order.
#' @param lam2 Users may tune \eqn{\lambda_2}, which controls the L2 regularization strength. Default is 0 (lasso).
#' @param kern Type of kernel method for smoothing. Options are \code{"gaussian"}, \code{"uniform"}, and \code{"epanechnikov"}. Default is \code{"epanechnikov"}.
#' @param hval The bandwidth parameter for kernel smoothing. Default is 1.
#' @param pf A numeric vector of length \eqn{p} representing the L1 penalty weights for each coefficient. A common choice is \eqn{(\beta + 1/n)^{-1}}, where \eqn{n} is the sample size and \eqn{\beta} is obtained from L1 DCSVM or enet DCSVM. Default is 1 for all predictors.
#' @param pf2 A numeric vector of length \eqn{p} representing the L2 penalty weights for each coefficient. A value of 0 indicates no L2 shrinkage. Default is 1 for all predictors.
#' @param exclude Indices of predictors to exclude from the model. Equivalent to assigning an infinite penalty factor. Default is none.
#' @param dfmax Maximum number of nonzero coefficients allowed in the model. Default is \eqn{p + 1}. Useful for large \eqn{p} when a partial path is acceptable.
#' @param pmax Maximum number of variables allowed to ever be nonzero during the computation. Default is \code{min(dfmax * 1.2, p)}.
#' @param standardize Logical indicating whether predictors should be standardized to unit variance. Default is \code{TRUE}. Note that predictors are always centered.
#' @param eps Convergence threshold. The algorithm stops when \eqn{4\max_j(\beta_j^{new} - \beta_j^{old})^2} is less than \code{eps}. Default is \code{1e-8}.
#' @param maxit Maximum number of iterations allowed. Default is \code{1e6}. Consider increasing \code{maxit} if the algorithm does not converge.
#' @param istrong Logical indicating whether to use the strong rule for faster computation. Default is \code{TRUE}.
#'
#' @return An object of class \code{dcsvm} containing the following components:
#' \item{b0}{Intercept values for each \code{lambda}.}
#' \item{beta}{Sparse matrix of coefficients for each \code{lambda}. Use \code{as.matrix()} to convert.}
#' \item{df}{Number of nonzero coefficients for each \code{lambda}.}
#' \item{dim}{Dimensions of the coefficient matrix.}
#' \item{lambda}{Sequence of \code{lambda} values used.}
#' \item{npasses}{Total number of iterations across all \code{lambda} values.}
#' \item{jerr}{Warnings and errors. 0 if no errors.}
#' \item{call}{The matched call.}
#'
#' @seealso \code{print.dcsvm}, \code{predict.dcsvm}, \code{coef.dcsvm}, \code{plot.dcsvm}, and \code{cv.dcsvm}.
#'
#' @examples
#' # Load the data
#' data(colon)
#' # Fit the elastic-net penalized DCSVM with lambda2 to be 1
#' fit <- dcsvm(colon$x, colon$y, lam2 = 1)
#' print(fit)
#' # Coefficients at some lambda value
#' c1 <- coef(fit, s = 0.005)
#' # Make predictions
#' predict(fit, newx = colon$x[1:10, ], s = c(0.01, 0.005))
#'
#' @useDynLib dcsvm
#' @export
dcsvm = function(x, y, nlambda=100, lambda.factor=ifelse(nobs < nvars, 0.01, 1e-04),
lambda=NULL, lam2=0, kern=c("gaussian", "uniform", "epanechnikov"),
hval=1, pf=rep(1, nvars), pf2=rep(1, nvars),
exclude, dfmax=nvars + 1, pmax=min(dfmax * 1.2, nvars), standardize=TRUE,
eps=1e-08, maxit=1e+06, istrong=TRUE) {
####################################################################
this.call = match.call()
y = drop(y)
x = as.matrix(x)
np = dim(x)
nobs = as.integer(np[1])
nvars = as.integer(np[2])
vnames = colnames(x)
if (is.null(vnames))
vnames = paste0("V", seq(nvars))
if (length(y) != nobs)
stop("x and y have different number of observations")
if (missing(kern))
kern = "epanechnikov" else kern = match.arg(kern)
if (missing(kern))
kern = "epanechnikov" else kern = match.arg(kern)
if (!match(kern, c("epanechnikov", "gaussian"), FALSE)) {
warning("Only 'epanechnikov' and 'gaussian' available for
kern; 'epanechnikov' used.")
kern = "epanechnikov"
}
####################################################################
#parameter setup
alpha = NULL # user can change this to tune elastic-net based on alpha.
if (!is.null(alpha)) {
alpha = as.double(alpha)
if (alpha <= 0 || alpha > 1)
stop("alpha: 0 < alpha <= 1")
if (!is.null(lam2))
warning("lam2 has no effect.")
lam2 = -1.0
} else {
if (!is.null(lam2)) {
if (lam2 < 0) stop("lam2 is non-negative.")
alpha = -1.0
} else {
alpha = 1.0 # default lasso
}
}
maxit = as.integer(maxit)
isd = as.integer(standardize)
eps = as.double(eps)
dfmax = as.integer(dfmax)
pmax = as.integer(pmax)
if (!missing(exclude)) {
jd = match(exclude, seq(nvars), 0)
if (!all(jd > 0))
stop("Some excluded variables are out of range.")
jd = as.integer(c(length(jd), jd))
} else jd = as.integer(0)
####################################################################
#lambda setup
nlam = as.integer(nlambda)
if (is.null(lambda)) {
if (lambda.factor >= 1)
stop("lambda.factor should be less than 1.")
flmin = as.double(lambda.factor)
ulam = double(1)
} else {
#flmin=1 if user define lambda
flmin = as.double(1)
if (any(lambda < 0))
stop("The values of lambda should be non-negative.")
ulam = as.double(rev(sort(lambda)))
nlam = as.integer(length(lambda))
}
pfncol = NCOL(pf)
pf = matrix(as.double(pf), ncol=pfncol)
if (NROW(pf) != nvars) {
stop("The size of L1 penalty factor must be the same with the number of input variables.")
} else {
if (pfncol != nlambda & NCOL(pf) != 1)
stop("pf should be a matrix with 1 or nlambda columns.")
}
if (length(pf2) != nvars)
stop("The size of L2 penalty factor must be the same with the number of input variables.")
pf2 = as.double(pf2)
####################################################################
fit = switch(kern,
gaussian = dcsvm.gauss(x, y, alpha, lam2, hval, nlam, flmin, ulam, isd,
eps, dfmax, pmax, jd, pfncol, pf, pf2, maxit,
istrong, nobs, nvars, vnames),
uniform = dcsvm.unif(x, y, alpha, lam2, hval, nlam, flmin, ulam, isd,
eps, dfmax, pmax, jd, pfncol, pf, pf2, maxit,
istrong, nobs, nvars, vnames),
epanechnikov = dcsvm.epane(x, y, alpha, lam2, hval, nlam, flmin, ulam, isd,
eps, dfmax, pmax, jd, pfncol, pf, pf2, maxit,
istrong, nobs, nvars, vnames))
if (is.null(lambda))
fit$lambda = lamfix(fit$lambda)
fit$kern = kern
fit$hval = hval
fit$lam2 = lam2
fit$call = this.call
####################################################################
class(fit) = c("dcsvm", class(fit))
fit
}
dcsvm.gauss = function(x, y, alpha, lam2, hval, nlam, flmin, ulam, isd, eps,
dfmax, pmax, jd, pfncol, pf, pf2, maxit, istrong, nobs, nvars, vnames) {
####################################################################
y = as.factor(y)
y = c(-1, 1)[as.numeric(y)]
if (!all(y %in% c(-1, 1)))
stop("y should be a factor with two levels.")
# call Fortran core
fit = .Fortran("dcsvm_gauss", alpha, lam2, hval, nobs, nvars,
as.double(x), as.double(y), jd, pfncol, pf, pf2, dfmax,
pmax, nlam, flmin, ulam, eps, isd, maxit,
istrong=as.integer(istrong),
nalam=integer(1), b0=double(nlam),
beta=double(pmax * nlam), ibeta=integer(pmax),
nbeta=integer(nlam), alam=double(nlam),
npass=integer(1), jerr=integer(1))
#################################################################
# output
outlist = getoutput(fit, maxit, pmax, nvars, vnames)
outlist = c(outlist, list(npasses = fit$npass, jerr = fit$jerr))
class(outlist) = c("gaussian")
outlist
}
dcsvm.unif = function(x, y, alpha, lam2, hval, nlam, flmin, ulam, isd, eps,
dfmax, pmax, jd, pfncol, pf, pf2, maxit, istrong, nobs, nvars, vnames) {
####################################################################
y = as.factor(y)
y = c(-1, 1)[as.numeric(y)]
if (!all(y %in% c(-1, 1)))
stop("y should be a factor with two levels.")
# call Fortran core
fit = .Fortran("dcsvm_unif", alpha, lam2, hval, nobs, nvars,
as.double(x), as.double(y), jd, pfncol, pf, pf2, dfmax,
pmax, nlam, flmin, ulam, eps, isd, maxit,
istrong=as.integer(istrong),
nalam=integer(1), b0=double(nlam),
beta=double(pmax * nlam), ibeta=integer(pmax),
nbeta=integer(nlam), alam=double(nlam),
npass=integer(1), jerr=integer(1))
#################################################################
# output
outlist = getoutput(fit, maxit, pmax, nvars, vnames)
outlist = c(outlist, list(npasses = fit$npass, jerr = fit$jerr))
class(outlist) = c("uniform")
outlist
}
dcsvm.epane = function(x, y, alpha, lam2, hval, nlam, flmin, ulam, isd, eps,
dfmax, pmax, jd, pfncol, pf, pf2, maxit, istrong, nobs, nvars, vnames) {
####################################################################
y = as.factor(y)
y = c(-1, 1)[as.numeric(y)]
if (!all(y %in% c(-1, 1)))
stop("y should be a factor with two levels.")
# call Fortran core
fit = .Fortran("dcsvm_epane", alpha, lam2, hval, nobs, nvars,
as.double(x), as.double(y), jd, pfncol, pf, pf2, dfmax,
pmax, nlam, flmin, ulam, eps, isd, maxit,
istrong=as.integer(istrong),
nalam=integer(1), b0=double(nlam),
beta=double(pmax * nlam), ibeta=integer(pmax),
nbeta=integer(nlam), alam=double(nlam),
npass=integer(1), jerr=integer(1))
#################################################################
# output
outlist = getoutput(fit, maxit, pmax, nvars, vnames)
outlist = c(outlist, list(npasses = fit$npass, jerr = fit$jerr))
class(outlist) = c("epanechnikov")
outlist
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.