Description Usage Arguments Value Author(s) Examples
minimizes 1 - (p)AUC plus a penalty
1 2 3 4 5 6 | rauc (formula, dat, s = 1,lambda=1, kernel="linear", para=NULL, start.method="rlogit",
eta0.init=NULL,beta.init = NULL, eta.diff.init=NULL,
maxit=50, tol=1e-5,minQuad.control = control.minQuad(),
init.alpha.from.previous = TRUE,mem.efficient = TRUE,
ret.vcov = FALSE, garbage.collection = TRUE, verbose = FALSE, ...
)
|
formula |
formula, e.g. y~x1+x2 |
dat |
Data frame |
s |
absolute value of the slope, default to 1 - REMOVE THIS, the pair (s,lambda) is redundant |
lambda |
scale parameter in front of the penalty function, default to 1 |
kernel |
See getK for more details |
para |
See getK for more details |
start.method |
a string. When kernel is linear: If "rlogit", robust logistic fit is used as beta.init. If "1", a vector of 1 is used as beta.init. If "0", a vector of 0 is used as beta.init. |
eta0.init |
a vector of the same length as the number of rows in dat |
beta.init |
a vector of length equal to no. of covariates (without intercept) of initial values for linear kernel. |
eta.diff.init |
a vector of the same length as the number of rows in dat |
maxit |
maximum number of iterations in the DCA algorithm |
tol |
absolute tolerance in RAUC if kernel is not linear, relative tolerance in coefficients if kernel is linear. |
minQuad.control |
control parameters passed to method minQuad, please see |
init.alpha.from.previous |
defaults to TRUE, if TRUE then after the first iteration |
mem.efficient |
if TRUE, the small matrix 'K' instead of 'Q' is used in computations, defaults to TRUE. |
ret.vcov |
logical, whether to return an estimate of the covariance matrix of 'beta' for normal or logistic sigmoid functions. |
garbage.collection |
logical, whether to call |
verbose |
prints information at each iteration, defaults to FALSE |
... |
for debugging purposes only |
A list with the following elements:
convergence |
0 if converged, 1 if maximum iteration is reached. |
value |
value of the objective function. |
iterations |
number of iterations until convergence or 'maxit' reached. |
Shuxin Yin
Youyi Fong youyifong@gmail.com
Krisztian Sebestyen ksebestyen@gmail.com
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | ## Not run:
# options(path.svml = 'D:/downloaded_scientific_programs/svmlight')
# options(path.svml ='~/bin/svmlight')
###########################################################
# a linear example
dat = sim.dat.1(n=200,seed=1)
# convergence takes long, to pass CRAN check, set maxit=1
fit1 = rauc (y~x1+x2, dat, lambda=2, kernel="linear", maxit=2)
#fit2 = rauc.linear (y~x1+x2, dat, lambda=2, verbose=TRUE)
#aux2=fit2$X %*% fit2$coefficients
#all(fit1$linear.combination-aux2<1e-2)
fit1$train.auc # 0.7206015
fit3 = rauc (y~x1+x2, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
fit3$train.auc # 0.7773434
fit4 = svml (y~x1+x2, dat, kernel="r", fitted=FALSE, cost=1e4)
fast.auc(predict(fit4, dat)$posterior[,1], dat$y) # 0.7921805
tune.svml(y~x1+x2, dat, kernel="r")
# 1 10 100 1000 10000 1e+05
#0.7027569 0.7254135 0.7517794 0.7653133 0.7921805 0.6674687
# glm derived score for comparision
fit.glm=glm(y~x1+x2, dat, family="binomial")
fast.auc(fit1$X %*% fit.glm$coef[-1], fit1$y) #
# add outliers
dat = sim.dat.1(n=200,seed=1, add.outliers=TRUE)
fit3 = rauc (y~x1+x2, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
fit3$train.auc # 0.7066667
fit4 = svml (y~x1+x2, dat, kernel="r", fitted=FALSE, cost=1e4)
fast.auc(predict(fit4, dat)$posterior[,1], dat$y) # 0.6910101
tune.svml(y~x1+x2, dat, kernel="r")
# 1 10 100 1000 10000 1e+05
#0.6485859 0.6705051 0.6722222 0.6767677 0.6910101 0.5007071
###########################################################
# a nonlinear example
dat=skin.orange (n=100,seed=1,noise=FALSE)
dim(dat)
# nonlinear kernel fit
fit1 = rauc (y~x1+x2+x3+x4, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
# glm fit
fit.glm=glm(y~x1+x2+x3+x4, dat, family="binomial")
# linear kernel fit
fit2 = rauc (y~x1+x2+x3+x4, dat, lambda=2, kernel="linear", start.method = "rlogit", verbose=TRUE)
# training data prediction
fast.auc(fit1$linear.combination, fit1$y)
fast.auc(fit1$X %*% fit.glm$coef[-1], fit1$y)
fast.auc(fit2$linear.combination, fit2$y)
# test data prediction
newdata=skin.orange (n=1000,seed=2,noise=FALSE)
fast.auc(predict(fit1, newdata), newdata$y)
fast.auc(as.matrix(subset(newdata, select=c(x1,x2,x3,x4))) %*% fit.glm$coef[-1], newdata$y)
fast.auc(predict(fit2, newdata), newdata$y)
###### IMPROVEMENTS ####################################################
## rank = 2 problem
dat = sim.dat.1(n=300,seed=1,add.outliers = TRUE,std.dev = 1.0);fm = y~x1+x2
## linear kernel and random working set selection - low rank (2) problem
## setting initial alpha (to be passed to minQuad at each iteration in dca-loop)
# to estimate from previous dca() iteration
## size of working set is automatically set
set.seed(100)
fit.lin = rauc (fm, dat,lambda=.1,kernel="linear",
verbose=TRUE,maxit = 100,tol = 1e-5,
init.alpha.from.previous = TRUE,mem.efficient = TRUE,
minQuad.control = control.minQuad(
verbose = 1,maxit = 1e6,tol = 1e-4,
method = "tron",
working.set= "rv2wg")
)
## 'rbf' kernel and random working set selection
## low rank mapped to possibly infinite rank problem try larger working set 'q' set.seed(100)
## size of working set is set to q = 100
fit.rbf = rauc (fm, dat,lambda=.1,kernel="rbf",para = 1, verbose=TRUE,maxit = 100,tol = 1e-5,
init.alpha.from.previous = TRUE,mem.efficient = TRUE,
minQuad.control = control.minQuad(
verbose = 1,maxit = 1e6,tol = 1e-4,
q = 100,
method = "tron",
working.set= "rv2wg")
)
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.