Nothing
#' Fit a pliable lasso model over a path of regularization values
#' @param x n by p matrix of predictors
#' @param z n by nz matrix of modifying variables. The elements of z may represent quantitative or categorical variables, or a mixture of the two.
#' Categorical varables should be coded by 0-1 dummy variables: for a k-level variable, one can use either k or k-1 dummy variables.
#' @param y n-vector of responses. The x and z variables are centered in the function. We recommmend that x and z also be standardized before the call
#' via x<-scale(x,T,T)/sqrt(nrow(x)-1); z<-scale(z,T,T)/sqrt(nrow(z)-1)
#' @param lambda decreasing sequence of lam values desired. Default NULL. If NULL, computed in function
#' @param nlambda number of lambda values desired (default 50).
#' @param family response type- either "gaussian", "binomial". In the binomial case, y should be 0s and 1s.
#' family "cox" (Prop Hazards model for right-censored data) will be implemented if there is popular demand
#' @param lambda.min.ratio the smallest value for lambda , as a fraction of
#' lambda.max, the (data derived) entry value (i.e. the smallest value for which all coefficients are zero).
#' Default is 0.001 if n>p, and 0.01 if n< p.
#' Along with "screen" (see below), this parameter is a main parameter
#' controlling the runtime of the algorithm, With a smaller lambda.min.ratio, more interactions will typically be entered.
#' If the algorithm stalls or stops near the end of the path, trying increasing lambda.min.ratio
#' On the other hand, if a more complex fit is desired, or the cross-validation curve from cv.pliable is
#' still going down at the end of the path,
#' consider decreasing lambda.min.ratio
#' @param alpha mixing parameter- default 0.5
#' @param w n-vector of observation wts (Default: all ones). Not currently used with Cox family.
#' @param thr convergence threshold for estimation of beta and joint estimation of (beta,theta).
#' Default 1e-5
#' @param penalty.factor vector of penalty factors, one for each X predictors. Default: a vector of ones
#' @param tt initial factor for Generalized grad algorithm for joint estimation of main effects and
#' interaction parameters- default 0.1/mean(x^2)
#' @param maxit maximum number of iterations in loop for one lambda. Default 10000
#' @param mxthit maximum number of theta solution iterations. Default 1000
#' @param mxkbt maximum number of backtracking iterations. Default 100
#' @param mxth maximum internal theta storage. Default 1000
#' @param kpmax maximum dimension of main effect storage. Default 100000
#' @param kthmax maximum dimension of interaction storage. Default 100000
#' @param verbose Should information be printed along the way? Default FALSE
#' @param maxinter Maximum number of interactions allowed in the model. Default 500
#' @param zlinear Should an (unregularized) linear term for z be included in the model? Default TRUE.
#' @param screen Screening quantile for features: for example a value screen=0.7 means that we keep only those features with univariate t-stats above the 0.7 quantile of all features.
#' Default is NULL, meaning that all features are kept
#'
#'
#' @details
#' This function fits a pliable lasso model over a sequence of lambda (regularization) values.
#' The inputs are a feature matrix x, response vector y
#' and a matrix of modifying variables z. The pliable lasso is a generalization of the lasso in which the weights multiplying the features x
#' are allowed to vary as a function of a set of modifying variables z. The model has the form:
#'
#' yhat=beta0+ z\%*\% betaz+ rowSums( x[,j]*beta[j] + x[,j]*z\%*\% theta[j,])
#'
#' The parameters are beta0 (scalar), betaz (nz-vector), beta (p-vector) and theta (p by nz).
#' The (convex) optimization problem is
#'
#' minimize_(beta0,betaz,beta,theta) (1/(2n))* sum_i ((y_i-yhat_i)^2) +(1-alpha)*lambda* sum_j (||(beta_j,theta_j)||_2 +||theta_j)|| +alpha*lambda* sum_j||theta_j||_1
#'
#' A blockwise coordinate descent procedure is employed
#' for the optimization.
#'
#' @return A trained pliable object with the following components:
#'
#' param: values of lambda used
#'
#' beta: matrix of estimated beta coefficients ncol(x) by length(lambda).
#'
#' theta: array of estimated theta coefficients ncol(x) by ncol(z) by length(lambda).
#'
#' betaz: estimated (main effect) coefficients for z ncol(z) by length(lambda).
#'
#' xbar: rowMeans of x
#'
#' zbar: rowMeans of z
#'
#' w: observation weights used
#'
#' args: list of arguments in the original call
#'
#' nulldev: null deviance for model
#'
#' dev.ratio: deviance/null.dev for each model in path
#'
#' nbeta: number of nonzero betas for each model in path
#'
#' nbeta.with.int: number of betas with some nonzero theta values, for each model in path
#'
#' ntheta: number of nonzero theta for each model in path
#'
#' df: rough degrees of freedom for each model in path (=nbeta)
#'
#' istor: for internal use
#'
#' fstor: for internal use
#'
#' thstor: for internal use
#'
#' jerr: for internal use
#'
#' call: the call made to pliable
#'
#'
#'
#'
#' @examples
#' # Train a pliable lasso model- Gaussian case
#' # Generate some data
#' set.seed(9334)
#' n = 20; p = 3 ;nz=3
#' x = matrix(rnorm(n*p), n, p)
#' mx=colMeans(x)
#' sx=sqrt(apply(x,2,var))
#' x=scale(x,mx,sx)
#' z =matrix(rnorm(n*nz),n,nz)
#' mz=colMeans(z)
#' sz=sqrt(apply(z,2,var))
#' z=scale(z,mz,sz)
#' y =4*x[,1] +5*x[,1]*z[,3]+ 3*rnorm(n)
#' # Fit model
#' fit = pliable(x,z,y)
#' fit #examine results
#' plot(fit) #plot path
#'
#' #Estimate main tuning parameter lambda by cross-validation
#' #NOT RUN
#' # cvfit=cv.pliable(fit,x,z,y,nfolds=5) # returns lambda.min and lambda.1se,
#' # the minimizing and 1se rules
#' # plot(cvfit)
#'
#' # Predict using the fitted model
#' #ntest=500
#' #xtest = matrix(rnorm(ntest*p),ntest,p)
#' #xtest=scale(xtest,mx,sx)
#' #ztest =matrix(rnorm(ntest*nz),ntest,nz)
#' #ztest=scale(ztest,mz,sz)
#' #ytest = 4*xtest[,1] +5*xtest[,1]*ztest[,3]+ 3*rnorm(ntest)
#' #pred= predict(fit,xtest,ztest,lambda=cvfit$lambda.min)
#' #plot(ytest,pred)
#'
#' #Binary outcome example
#' n = 20 ; p = 3 ;nz=3
#' x = matrix(rnorm(n*p), n, p)
#' mx=colMeans(x)
#' sx=sqrt(apply(x,2,var))
#' x=scale(x,mx,sx)
#' z =matrix(rnorm(n*nz),n,nz)
#' mz=colMeans(z)
#' sz=sqrt(apply(z,2,var))
#' z=scale(z,mz,sz)
#' y =4*x[,1] +5*x[,1]*z[,3]+ 3*rnorm(n)
#' y=1*(y>0)
#' fit = pliable(x,z,y,family="binomial")
#'
#'
#'
#' # Example where z is not observed in the test set, but predicted
#' # from a supervised learning algorithm
#'
#'
#' n = 20; p = 3 ;nz=3
#' x = matrix(rnorm(n*p), n, p)
#' mx=colMeans(x)
#' sx=sqrt(apply(x,2,var))
#' x=scale(x,mx,sx)
#' z =matrix(rnorm(n*nz),n,nz)
#' mz=colMeans(z)
#' sz=sqrt(apply(z,2,var))
#' z=scale(z,mz,sz)
#' y =4*x[,1] +5*x[,1]*z[,3]+ 3*rnorm(n)
#'
#' fit = pliable(x,z,y)
#' # predict z from x; here we use glmnet, but any other supervised method can be used
#'
#'
#' zfit=cv.glmnet(x,z,family="mgaussian")
#'
#' # Predict using the fitted model
# # NOT RUN
#' # ntest=100
#' #xtest =matrix(rnorm(ntest*nz),ntest,p)
#' #xtest=scale(xtest,mx,sx)
#' #ztest =predict(zfit,xtest,s=cvfit$lambda.min)[,,1]
#' #ytest = 4*xtest[,1] +5*xtest[,1]*ztest[,3]+ 3*rnorm(ntest)
#'
#' #pred= predict(fit,xtest,ztest)
#' #plot(ytest,pred[,27]) # Just used 27th path member, for illustration; see cv.pliable for
#' # details of how to cross-validate in this setting
#'
#'
#'
#' @seealso
#' cv.pliable, predict.pliable, plot.pliable, plot.cv.pliable
#'
#' @export
#'
#'
pliable <- function(x,z,y,lambda=NULL,nlambda=50,alpha=.5,family=c("gaussian","binomial"),
lambda.min.ratio=NULL,
w=NULL,thr=1e-5,tt=NULL,penalty.factor=rep(1,ncol(x)), maxit=10000,mxthit=100,mxkbt=100,
mxth=1000,
kpmax=100000,kthmax=100000,verbose=FALSE, maxinter=500, zlinear=TRUE, screen=NULL){
this.call = match.call()
# debug=F
# if(debug){
# lambda=NULL
# nlambda=50
# alpha=.5
# lambda.min.ratio=NULL
# w=rep(1,length(y))
# thr=1e-5
# tt=NULL
# maxit=10000
# mxthit=100
# mxkbt=100
# mxth=1000
# kpmax=100000
# kthmax=100000
# verbose=TRUE
# maxinter=500
# zlinear=TRUE
# screen=NULL
# }
SMALL=1e-8
family=match.arg(family)
if(family=="cox" & !is.null(w)) cat("wts ignored for Cox family",fill=T)
if(is.null(w)) w=rep(1,length(y))
if(family=="gaussian") errfun=errfun.gaussian
if(family=="binomial") errfun=errfun.binomial
if(family=="cox") errfun=function(yhat,coxinfo) {devc(no,coxinfo$kq, coxinfo$ddq, yhat, coxinfo$iriskq,status)}
if(!is.matrix(x)) x=matrix(x,ncol=1)
if(!is.matrix(z)) z=matrix(z,ncol=1)
if(family=="cox"){
# order data by y
o=order(y-.0001*status)
x= x[o,]
z=z[o,]
y=y[o]
status=status[o]
w=w[o]
}
# browser()
coxinfo=NULL
if(family=="cox") coxinfo=initcx(x,y,status)
vz=apply(z,2,var)
if(any(vz< SMALL)){
mess=which(vz<SMALL)[1];for(i in which(vz<SMALL)[-1] ){
mess=paste(mess,",",i,sep="")
}
# browser()
stop(c("Columns ", mess, " of z are constant"))
}
xbar=colMeans(x)
zbar=colMeans(z)
x=scale(x,xbar,F)
z=scale(z,zbar,F)
orig.x=x
if(!is.null(screen)){
ni.original=ncol(x)
if(family=="gaussian") ttv=quantitative.func(t(x),y)$tt
if(family=="binomial") ttv=ttest.func(t(x),y)$tt
kee=which(abs(ttv)>= quantile(abs(ttv),screen))
x=x[,kee]
}
# browser()
no=nrow(x)
ni=ncol(x)
nz=ncol(z)
nt=ni+nz
y=as.vector(y)
if(is.null(lambda.min.ratio)){
if( (family=="gaussian" | family=="cox") & no>ni) lambda.min.ratio=.001
if( (family=="gaussian" | family=="cox") & no<ni) lambda.min.ratio=.01
if(family=="binomial" & no>ni) lambda.min.ratio=.001
if(family=="binomial" & no<ni) lambda.min.ratio=.01
}
## error checking of input args
if (alpha > 1) {
warning("alpha >1; set to 1")
alpha = 1
}
if(any(penalty.factor<0)) stop("Penalty factors should be ge 0")
if(any(penalty.factor>0)) penalty.factor=ni*penalty.factor/sum(penalty.factor)
if (alpha < 0) {
warning("alpha<0; set to 0")
alpha = 0
}
if(!is.null(lambda)) nlambda=length(lambda)
if(!is.null(lambda)) {if(any(lambda<0)) stop("lambda must be non-negative")}
if(family=="binomial" & (sum(y==0)+sum(y==1))!=no) stop("y must be 0s and 1s for binomial family")
if(family=="cox"){
if( is.null(status)) stop("with family='cox' need to provide status argument")
if( any(y<=0)) stop("outcome y must be non-negative with family='cox'")
if((sum(status==1)+sum(status==0))!=length(y)) stop("status variable must be 1 or 0")
}
###
if(is.null(lambda)){
r=y
if(zlinear) {
r=lsfit(z,y)$res
if(sum(r^2)< 0.0001) cat("linear model in z overfit; consider rerunning with zlinear=F",fill=T)
}
if(family=="gaussian" | family=="binomial") lammax=max(abs(t(x)%*%r)/length(r))/(1-alpha)
if(family=="cox") {
junk2=calcz(x,coxinfo$kq,rep(0,no),status,coxinfo$iriskq,coxinfo$ddq,rep(0,ncol(x)))
ll <- as.vector(junk2$wz)
l <- as.vector(junk2$grad)
r = - l/ll
if(zlinear){
wt=ll;wt[wt<0]=0
zcoef=lsfit(z,r,intercept=F,wt=wt)$coef
}
lammax=max(abs(t(x) %*% (-ll * r))/no)/(1-alpha)
}
lambda= exp(seq(log(lammax),log(lammax*lambda.min.ratio),length=nlambda))
}
yhat=rep(0,no)
ybar=0
if(family=="gaussian"){
ybar=mean(y)
y=y-ybar
}
nlambda=length(lambda)
xx=cbind(x,z)
ulam=lambda
if(is.null(tt)) tt=.1/mean(x^2)
if(verbose) {cat(c("initial value of backtrack parameter tt=",tt),fill=T);cat("",fill=T)}
iverbose=1*verbose
kbos=iverbose
linenter=1*zlinear
mlam=0
mode(xx)="double"
mode(y)="double"
mode(w)="double"
mode(alpha)="double"
mode(nlambda)="integer"
mode(ulam)="double"
mode(kbos)="integer"
mode(maxinter)="integer"
mode(linenter)="integer"
mode(thr)="double"
mode(penalty.factor)="double"
mode(tt)="double"
mode(maxit)="integer"
mode(mxthit)="integer"
mode(mxkbt)="integer"
mode(mxth)="integer"
mode(kpmax)="integer"
mode(kthmax)="integer"
mode(mlam)="integer"
# if(!is.loaded("pliable")) dyn.load("/Users/tibs/dropbox/PAPERS/weightree2/jerry/pliable2.so")
# if(family=="binomial" & !is.loaded("logpliable")) dyn.load("/Users/tibs/dropbox/PAPERS/weightree2/jerry/logpliable.so")
if(family=="gaussian"){
out=.Fortran("pliable",
no,ni,nz,xx,y,w,alpha,nlambda,ulam,kbos,maxinter,linenter,thr,penalty.factor,maxit,tt,mxthit, mxkbt,mxth,kpmax,kthmax,mlam,
a0=double(nlambda),
lp=integer(2*nlambda),
istor=integer(2*kpmax),
fstor=double(kpmax),
thstor=double(kpmax),
jerr=integer(1),
PACKAGE="pliable"
)
}
if(family=="binomial"){
out=.Fortran("logpliable",
no,ni,nz,xx,y,w,alpha,nlambda,ulam,kbos,maxinter,linenter,thr,penalty.factor,maxit,tt,mxthit, mxkbt,mxth,kpmax,kthmax,mlam,
a0=double(nlambda),
lp=integer(2*nlambda),
istor=integer(2*kpmax),
fstor=double(kpmax),
thstor=double(kpmax),
jerr=integer(1),
PACKAGE="pliable"
)
}
if(family=="cox"){
mode(status)="integer"
out=.Fortran("coxpliable",
no,ni,nz,xx,y,status,w,alpha,nlambda,ulam,kbos,maxinter,linenter,
thr,maxit,tt,mxthit, mxkbt,mxth,kpmax,kthmax,mlam,
a0=double(nlambda),
lp=integer(2*nlambda),
istor=integer(2*kpmax),
fstor=double(kpmax),
thstor=double(kpmax),
jerr=integer(1),
PACKAGE="pliable"
)
}
out[1:17]=NULL #remove blank stuff for return
#
if(out$jerr<0){
cat(c("jerr=",out$jerr),fill=T)
cat(c("max iters exceeded, jerr=",out$jerr),fill=T)
}
if(out$jerr>0){
cat(c("jerr=",out$jerr),fill=T)
stop(c("Storage error in pliable, jerr=",out$jerr))
}
out$istor=matrix(out$istor,nrow=2)
out$lp=matrix(out$lp,nrow=2)
out$lambda=lambda
out$args=list(nlambda=nlambda,alpha=alpha,lambda.min.ratio=lambda.min.ratio,w=w,thr=thr,penalty.factor=penalty.factor,tt=tt,maxit=maxit,mxthit=mxthit,mxkbt=mxkbt,mxth=mxth,kpmax=kpmax,kthmax=kthmax,verbose=verbose,screen=screen)
beta=matrix(0,nrow=ncol(x),ncol=nlambda)
betaz=matrix(0,nrow=ncol(z),ncol=nlambda)
theta=array(0,c(ncol(x),ncol(z),nlambda))
nz.actual=nz*(zlinear)
for(k in 1:nlambda){
# cat(k)
# if(k==18) browser()
if(ncol(out$lp)>0 ){
if(out$lp[1,k]>0){
junk=modsoln(out,ni,nz,k)
#browser()
nb=length(junk$iv)-nz.actual
kv=junk$kv
if(nb>0) beta[junk$iv[1:nb],k]=junk$av[1:nb]
zlist=(nb+1):length(junk$iv)
if(zlinear){
iz=junk$iv[zlist]-ncol(x)
betaz[iz,k]=junk$av[zlist]
}
# kv = number of non-zero coefficients
# iv(kv) = identies of non-zero coefficients
# av(kv) = corresponding non-zero values
# it(kv) = interaction pointer for each non-zero coefficient:
# it(j)=0 => no interaction for iv(j) coefficient
# it(j)>0 => interaction coefficients (thetas) in th(1:nz,it(j))
# kz = number of interacting coefficient sets for this solution
# th(nz,kz) = all interaction coefficient sets for this solution
th=t(matrix(junk$th,nrow=nz))
# iv=rev(rev(junk$iv)[-(1:nz)]) #remove last nz components corr to z vars
# it=rev(rev(junk$it)[-(1:nz)])
inds=junk$iv[junk$it>0]
# inds=inds[inds<=ni]
if(length(inds)>0) theta[inds, ,k]=th[1:length(inds),]
}}}
out$family=family
out$beta=beta
out$df=colSums(beta!=0)
out$theta=theta
out$betaz=betaz
out$xbar=xbar
out$zbar=zbar
out$ybar=ybar
out$w=w
if(!is.null(screen)){
tbeta=matrix(0,ni.original,ncol(out$beta))
tbeta[kee,]=out$beta
out$beta=tbeta
ttheta=array(0,c(ni.original,dim(out$theta)[2],dim(out$theta)[3]))
ttheta[kee,,]=out$theta
out$theta=ttheta
}
# have to uncenter x and z below, to make it work with predict function
if(family=="gaussian") {
pred=predict.pliable(out,scale(orig.x,-xbar,FALSE), scale(z,-zbar,FALSE),type="response")
dev=colMeans( errfun(y+ybar,pred,w))
out$nulldev=mean(errfun(y+ybar,ybar,w))
}
if( family=="binomial") {
pred=predict.pliable(out,scale(orig.x,-xbar,FALSE), scale(z,-zbar,FALSE),type="response")
dev=colMeans( errfun(y,pred,w))
out$nulldev=mean(errfun(y,mean(y),w))
}
if(family=="cox"){
pred=predict.pliable(out,scale(orig.x,-xbar,FALSE), scale(z,-zbar,FALSE),type="link")
dev=apply(pred,2,errfun,coxinfo)
out$nulldev=errfun(rep(0,length(y)),coxinfo)
}
# dev=dev[!is.na(dev)]
# out$nulldev=mean( (y-mean(y))^2)
out$dev.ratio= dev/out$nulldev
out$dev.ratio=out$dev.ratio[!is.na(out$dev.ratio)]
oo=rev(colSums(abs(out$beta!=0))) # find zeros at end of path and remove them
o2=which(cumsum(oo)==0)
if(length(o2)>0){
ooo=max(o2)
ooo=max(ooo,0)
nonzero=1:(length(out$dev.ratio)-ooo)
out$dev.ratio=out$dev.ratio[nonzero]
out$beta=out$beta[,nonzero]
out$theta=out$theta[,,nonzero]
out$betaz=out$betaz[,nonzero]
out$lambda=out$lambda[nonzero]
}
if(any(diff(out$dev.ratio)>.01)) cat(c("Caution: deviance increasing along path; rerun with verbose=TRUE to
see the value of backtrack parameter tt, and then try rerunning pliable with tt= half its current value of ",tt))
out$nbeta=colSums(out$beta!=0)
out$nbeta.with.int=apply(apply(out$theta!=0,c(1,3),sum),2,sum)
out$ntheta=apply(out$theta!=0,c(3),sum)
out$coxinfo=coxinfo
out$pred=pred
out$call=this.call
class(out)="pliable"
return(out)
}
tn=function(x) sqrt(sum(x*x))
modsoln=function(out,ni,nz,k){
# browser()
istor=out$istor
fstor=out$fstor
thstor=out$thstor
lpp=out$lp[,k]
mode(lpp)="integer"
mode(istor)="integer"
mode(fstor)="double"
mode(nz)="integer"
mode(thstor)="double"
mxth=out$args$mxth
out2=.Fortran("modsoln",
nz,lpp,istor,fstor,thstor,kv=integer(1) ,iv=integer(500000),av=double(500000),it=integer(500000),kz=integer(1),
th=double(nz*mxth))
#make sure dims of iv,av,it ar big enough!
#if(k==25) browser()
iv=out2$iv[1:out2$kv]
av=out2$av[1:out2$kv]
it=out2$it[1:out2$kv]
th=out2$th
out=list(kv=out2$kv,iv=iv,av=av,it=it,kz=out2$kz,th=th)
return(out)
}
#' Compute predicted values from a fitted pliable object
#' Make predictions from a fitted pliable lasso model
#' @param object object returned from a call to pliable
#' @param x n by p matrix of predictors
#' @param z n by nz matrix of modifying variables. These may be observed or the predictions from a supervised learning
#' algorithm that predicts z from test features x and possibly other features. See example below
#'
#' @param type Returns either the fitted values with type="link" or "response" or the parameter estimates when type="coefficients" . Type "link" gives the linear
#' predictors for binomial, or
#' cox models; for gaussian models it gives the fitted
#' values. Type "response" gives the fitted probabilities for
#' binomial,
#' and the fitted relative-risk for cox; for gaussian
#' type "response" is equivalent to type "link".
#' @param lambda values of lambda at whcih predictions are desired. If NULL (default), the path of lambda values from the fitted model.
#' are used. If lambda is not NULL, the predictions are made at the closest values to lambda in the lambda path from the fitted model;
#' @param verbose Should information should be printed along the way? Default FALSE.
#' @param ... Further arguments (not used)
#' @return predicted values
#'
#' @examples
#' # Train a pliable lasso model
#' n = 20; p = 3 ;nz=3
#' x = matrix(rnorm(n*p), n, p)
#' z =matrix(rnorm(n*nz),n,nz)
#' y = x[,1] +x[,1]*z[,3]+ rnorm(n)
#' fit = pliable(x,z,y)
#' # plot coefficient profiles, indicating z-interactions with a "x" symbol
#' plot(fit)
#'
#'
#' # Predict using the fitted model
#' ntest=500
#' xtest = matrix(rnorm(ntest*p),ntest,p)
#' ztest =matrix(rnorm(ntest*nz),ntest,nz)
#'
#' pred= predict(fit,xtest,ztest)
#'
#' #Example where z is not observed in the test set, but predicted from a supervised
#' # learning method
#'
#' library(glmnet)
#' n = 20; p = 3 ;nz=3
#' x = matrix(rnorm(n*p), n, p)
#' z =matrix(rnorm(n*nz),n,nz)
#' y = x[,1] +x[,1]*z[,3]+ rnorm(n)
#'fit = pliable(x,z,y)
#' # predict z from x; here we use glmnet, but any other supervised learning method
#' # could be used
#' zfit=glmnet(x,z,family="mgaussian")
#'
#' # Predict using the fitted model
#' ntest=500
#' xtest = matrix(rnorm(ntest*p),ntest,p)
#' ztest =predict(zfit,xtest,s=zfit$lambda.min)[,,20]
#'
#' pred= predict(fit,xtest,ztest)
#'
#' @export
predict.pliable <- function(object,x,z, type=c("link","response", "coefficients"), lambda=NULL, verbose=FALSE, ...){
# note- current version uses closest lambda to the values used in the fitted model;
# in the future, linear interpolation or exact recomputation should be used
type = match.arg(type)
lambda.arg=lambda
if(is.null(lambda.arg))
{ lambda=object$lambda;isel=1:length(lambda)}
if(!is.null(lambda.arg)){
isel=as.numeric(knn1(matrix(object$lambda,ncol=1),matrix(lambda.arg,ncol=1),1:length(object$lambda)))
}
if(type=="link" | type=="response"){
if(!is.matrix(z)) z=matrix(z,ncol=1)
yh=matrix(NA,nrow(x),length(isel))
ii=0
for(m in isel){
ii=ii+1
if(verbose) cat(c("m=",m),fill=T)
if(ncol(object$lp)>0){
yh[,ii]=predict.pliable1(object,x,z,m,type=type)
}
}
return(yh)
}
if(type=="coefficients"){
beta=object$beta[,isel]
theta=object$theta[,,isel]
betaz=object$betaz[,isel]
return(list(betaz=betaz,beta=beta,theta=theta))
}
}
predict.pliable1=function(object,x,z,m,type=c("link","response", "coefficients")){
type = match.arg(type)
ni=ncol(x)
nz=ncol(z)
n=nrow(x)
# cat(colMeans(x),fill=T)
xc=scale(x,object$xbar,F)
zc=scale(z,object$zbar,F)
yhat=object$ybar+object$a0[m]+ zc%*%object$betaz[,m]+xc%*%object$beta[,m] #note that object$ybar is zero for binomial
# yhat=yhat+fit$zbar*fit$betaz[,m]+ sum(fit$xbar*fit$beta[,m])
ind=which(rowSums(object$theta[,,m,drop=F]!=0)>0)
for(j in ind){
xz=matrix(xc[,j],nrow(z),ncol(z))*zc
yhat=yhat+xz%*%object$theta[j,,m]
# yhat=yhat+object$xbar[j]*z%*%object$theta[j,,m]
# yhat=yhat+x[,j]*sum(object$zbar*object$theta[j,,m])
# yhat=yhat-object$xbar[j]*sum(object$zbar*object$theta[j,,m])
}
if(type=="response" & object$family=="binomial") yhat=1/(1+exp(-yhat))
if(type=="response" & object$family=="cox") yhat=exp(yhat)
return(yhat)
}
pliableMulti=function(x,z,y,lambda=NULL,nlambda=50,family="binomial",alpha=.5,lambda.min.ratio=NULL,w=rep(1,length(y)),thr=1e-5,tt=NULL, maxit=1000,mxthit=100,mxkbt=100,
mxth=1000,
kpmax=100000,kthmax=100000,verbose=TRUE, maxinter=1000, zlinear=TRUE){
nclass=length(table(y))
fit=vector("list",nclass)
for(k in 1:nclass){
yy=1*(y==k)
if(k==1) {lambda=lambda; nlambda=nlambda}
if(k>1) {lambda=fit[[k-1]]$lambda; nlambda=NULL}
fit[[k]]=pliable(x,z,yy,family=family,lambda=lambda,nlambda=nlambda,alpha=alpha,lambda.min.ratio=lambda.min.ratio,w=w,
thr=thr,tt=tt, maxit=maxit,mxthit=mxthit,mxkbt=mxkbt, mxth=mxth,
kpmax=kpmax,kthmax=kthmax,verbose=verbose, maxinter=maxinter, zlinear=zlinear)
}
class(fit)="pliableMulti"
return(fit)
}
predict.pliableMulti=
function(fit,x,z, type=c("response", "coefficients"), lambda=NULL, verbose=FALSE){
# note- current version uses closest lambda to the values used in the fitted model;
# in the future, linear interpolation or exact recomputation should be used
type = match.arg(type)
nclass=length(fit)
out=vector("list",nclass)
for(k in 1:nclass){
out[[k]]=predict.pliable(fit[[k]],x,z,type=type,lambda=lambda,verbose=verbose)
}
if(type=="coefficients") return(out)
if(type=="response"){
yhat=array(NA,c(nrow(out[[k]]),ncol(out[[k]]),nclass))
for(k in 1:nclass){
yhat[,,k]=out[[k]]
}
return(yhat)
}}
critjerry=function(bb,x,z,y,k,alpha=.5){
twonorm=function(x){ sqrt(sum(x*x))}
n=length(y)
p=ncol(x)
nz=ncol(z)
lam=bb$lam[k]
beta2=bb$beta[,k]
yhat=z%*%bb$betaz[,k]+x%*%beta2
theta=bb$theta[,,k]
if(!is.matrix(theta)) theta=matrix(theta,ncol=1)
for(j in 1:p){
xz=matrix(x[,j],nrow(z),ncol(z))*z
yhat=yhat+xz%*%theta[j,]
}
pen=0
for(j in 1:p){
pen1=twonorm(theta[j,])+twonorm(c(beta2[j],theta[j,]))
pen=pen+(1-alpha)*lam*pen1+alpha*lam*sum(abs(theta[j,]))
}
out=(1/(2*n))*sum((y-yhat)^2) +pen
return(list(out=out,pen=pen))
}
#' Carries out cross-validation for a pliable lasso model over a path of regularization values
#' @param fit object returned by the pliable function
#' @param x n by p matrix of predictors
#' @param z n by nz matrix of modifying variables. Note that z may be observed or the predictions from a supervised learning
#' algorithm that predicts z from validation set features x. IMPORTANT: in the latter case, the z matrix must be pre-computed before
#' calling cv.pliable, using the same CV folds used by cv.pliable. See the example below
#' @param y n-vector of responses. All variables are centered in the function.
#' @param nfolds number of cross-validation folds
#' @param foldid vector with values in 1:K, indicating folds for K-fold CV. Default NULL
#' @param keep Should pre-validated fits be returned? Default FALSE
#' @param type.measure Error measure for cross-validation: "deviance" (mse) for gaussian family, "deviance" or "class" (misclassification error) for
#' binomial family
#' @param verbose Should information be printed along the way? Default FALSE
#' @return predicted values
#' #'
#' @examples \donttest{
#' # Train and cross-validate a pliable lasso model- Gaussian case
#' n = 20 ; p = 3 ;nz=3
#' x = matrix(rnorm(n*p), n, p)
#' mx=colMeans(x)
#' sx=sqrt(apply(x,2,var))
#' x=scale(x,mx,sx)
#' z =matrix(rnorm(n*nz),n,nz)
#' mz=colMeans(z)
#' sz=sqrt(apply(z,2,var))
#' z=scale(z,mz,sz)
#' y =4*x[,1] +5*x[,1]*z[,3]+ 3*rnorm(n)
#fit the model
#' fit = pliable(x,z,y)
#'
#' cvfit=cv.pliable(fit,x,z,y,nfolds=5)
#' plot(cvfit)
#'
#' # Example of categorical z with 4 levels
#' n = 20; p = 3 ;nz=3
#' x = matrix(rnorm(n*p), n, p)
#' mx=colMeans(x)
#' sx=sqrt(apply(x,2,var))
#' x=scale(x,mx,sx)
#' z =sample(1:4,size=n,replace=T)
#' zi=model.matrix(~as.factor(z)-1)
#' y = x[,1] +x[,1]*zi[,3]-2*x[,1]*zi[,4]+rnorm(n)
#'fit = pliable(x,zi,y)
#'
#' # Train and cross-validate a pliable lasso model- Binomial case
#' n = 20; p = 3 ;nz=3
#' x = matrix(rnorm(n*p), n, p)
#' mx=colMeans(x)
#' sx=sqrt(apply(x,2,var))
#' x=scale(x,mx,sx)
#' z =matrix(rnorm(n*nz),n,nz)
#' mz=colMeans(z)
#' sz=sqrt(apply(z,2,var))
#' z=scale(z,mz,sz)
#' y =4*x[,1] +5*x[,1]*z[,3]+ 3*rnorm(n)
#' y= 1*(y>0)
#' fit = pliable(x,z,y)
#'
#' cvfit=cv.pliable(fit,x,z,y,type.measure="class", nfolds=5)
#' plot(cvfit)
#'
#'
#' # Example where z is not observed in the test set,
#' # but predicted from a supervised learning algorithm
#' # NOT RUN
#'
#'#library(glmnet)
#'# n = 20 ; p = 4 ;nz=3
#'# x = matrix(rnorm(n*p), n, p)
#'# mx=colMeans(x)
#'# sx=sqrt(apply(x,2,var))
#'# x=scale(x,mx,sx)
#'# z =matrix(rnorm(n*nz),n,nz)
#'# mz=colMeans(z)
#'# sz=sqrt(apply(z,2,var))
#'# z=scale(z,mz,sz)
#' #y =4*x[,1] +5*x[,1]*z[,3]+ 3*rnorm(n)
#' #fit = pliable(x,z,y)
#'
#' #nfolds=5
#' # set.seed(3321)
#' #foldid = sample(rep(seq(nfolds), length = n))
#'
#' # predict z from x; here we use glmnet, but any other supervised learning procedure
#' # could be used
#' #zhat=matrix(NA,n,ncol(z))
#' #for(ii in 1:nfolds){
#' # zfit=cv.glmnet(x[foldid!=ii,],z[foldid!=ii,],family="mgaussian")
#' # zhat[foldid==ii,]=predict(zfit,x[foldid==ii,],s=zfit$lambda.min)
#' #}
#'
#' #NOTE that the same foldid vector must be passed to cv.pliable
#' #cvfit=cv.pliable(fit,x,zhat,y,foldid=foldid)
#' #plot(cvfit)
#' #}
#'
#' @export
cv.pliable <-
function(fit,x,z,y,nfolds=10,foldid=NULL,keep=F,type.measure=c("deviance","class"),verbose=TRUE){
debug=F
status=NULL # Cox model not yet implemented
if(debug){
nfolds=10
foldid=NULL
keep=F
type.measure=c("deviance","class")
verbose=TRUE
}
#put data in time order- need to do this, even though pliable does it too
if(fit$family=="cox") {
o=order(y-.0001*status)
x= x[o,]
z=z[o,]
y=y[o]
status=status[o]
}
type.measure=match.arg(type.measure)
if(fit$family=="gaussian") errfun=errfun.gaussian
if(fit$family=="binomial" & type.measure=="deviance" ) errfun=errfun.binomial
if(fit$family=="binomial" & type.measure=="class" ) errfun=function(y,yhat,w) 1*(y!=yhat)*w
# if(fit$family=="cox") errfun=function(yhat,coxinfo) {devc(no,coxinfo$kq, coxinfo$ddq, yhat, coxinfo$iriskq, status)}
if(fit$family=="cox" & is.null(status)) stop("Must supply status arg with family='cox'")
BIG=10e9
ni=ncol(x)
no=length(y)
nz=ncol(z)
if(fit$family=="cox") coxinfo.folds=vector("list",nfolds)
ggg=vector("list",nfolds)
yhat=array(NA,c(no,length(fit$lambda)))
if(is.null(foldid)) foldid = sample(rep(seq(nfolds), length = no))
nfolds=length(table(foldid))
status.in=NULL
for(ii in 1:nfolds){
oo=foldid==ii
if(fit$family=="cox") status.in=status[!oo]
if(verbose) cat(c("\n","FOLD=",ii),fill=T)
ggg[[ii]]=pliable(x[!oo,,drop=F],z[!oo,,drop=F],y[!oo],family=fit$family,lambda=fit$lambda,alpha=fit$args$alpha ,lambda.min.ratio=fit$args$lambda.min.ratio,w=fit$w[!oo],thr=fit$args$thr,maxit=fit$args$maxit, tt=fit$args$tt, mxthit=fit$args$mxthit ,mxkbt=fit$args$mxkbt, mxth=fit$args$mxth,kpmax=fit$args$kpmax,kthmax=fit$args$kthmax,screen=fit$args$screen)
if(fit$family=="gaussian" | fit$family=="binomial") yhat[oo,]=predict.pliable(ggg[[ii]],x[oo,,drop=F],z[oo,])
if(fit$family=="cox") coxinfo.folds[[ii]]=ggg[[ii]]$coxinfo
}
nonzero=colSums(fit$beta!=0)
yhat.preval=NULL
ym=array(y,dim(yhat))
if(type.measure=="class") yhat=1*(yhat>mean(y))
if(fit$family=="gaussian" | fit$family=="binomial") {
err=errfun(ym,yhat,fit$w)
cvm=apply(err,2,mean,na.rm=T)
nn=apply(!is.na(err),2,sum,na.rm=T)
cvsd=sqrt(apply(err,2,var,na.rm=T)/nn)
if(keep) yhat.preval=yhat
}
if(fit$family=="cox") {
err=matrix(NA,nfolds,length(fit$lambda))
for(ii in 1:nfolds){
oo=foldid==ii
fit1=predict.pliable(ggg[[ii]],x,z)
fit2=predict.pliable(ggg[[ii]],x[!oo,,drop=F],z[!oo,])
for(k in 1:length(fit$lambda)){
dev1= devc(no, fit$coxinfo$kq, fit$coxinfo$ddq, fit1[,k], fit$coxinfo$iriskq, status)
dev2= devc(sum(!oo), coxinfo.folds[[ii]]$kq, coxinfo.folds[[ii]]$ddq, fit2[,k], coxinfo.folds[[ii]]$iriskq, status[!oo])
err[ii,k]=dev1-dev2
}
}
cvm=apply(err,2,mean,na.rm=T)
nn=apply(!is.na(err),2,sum,na.rm=T)
cvsd=sqrt(apply(err,2,var,na.rm=T)/nn)
}
cvm.nz=cvm; cvm.nz[nonzero==0]=BIG
imin=which.min(cvm.nz)
imin.1se=which(cvm< cvm[imin]+cvsd[imin])[1]
out=list(lambda=fit$lambda,cvm=cvm,cvsd=cvsd,cvup = cvm +
cvsd, cvlo = cvm - cvsd, nz=nonzero, df=nonzero,yhat.preval=yhat.preval,lambda.min=fit$lambda[imin],
lambda.1se=fit$lambda[imin.1se],name="Error")
class(out)="cv.pliable"
return(out)
}
plot.pliable=
function (x, xvar = c("norm", "lambda", "dev"), label =TRUE,
...)
{
xvar = match.arg(xvar)
plotCoef(x$beta, theta=x$theta, lambda = x$lambda, df = x$df, dev = x$dev.ratio,
label = label, xvar = xvar, ...)
}
plot.cv.pliable =
function (x, sign.lambda = 1, ...)
{
cvobj = x
xlab = "log(Lambda)"
if (sign.lambda < 0)
xlab = paste("-", xlab, sep = "")
plot.args = list(x = sign.lambda * log(cvobj$lambda), y = cvobj$cvm,
ylim = range(cvobj$cvup, cvobj$cvlo), xlab = xlab, ylab = cvobj$name,
type = "n")
new.args = list(...)
if (length(new.args))
plot.args[names(new.args)] = new.args
do.call("plot", plot.args)
error.bars(sign.lambda * log(cvobj$lambda), cvobj$cvup, cvobj$cvlo,
width = 0.01, col = "darkgrey")
points(sign.lambda * log(cvobj$lambda), cvobj$cvm, pch = 20,
col = "red")
axis(side = 3, at = sign.lambda * log(cvobj$lambda), labels = paste(cvobj$nz),
tick = FALSE, line = 0)
abline(v = sign.lambda * log(cvobj$lambda.min), lty = 3)
abline(v = sign.lambda * log(cvobj$lambda.1se), lty = 3)
invisible()
}
error.bars <-function(x, upper, lower, width = 0.02, ...) {
xlim <- range(x)
barw <- diff(xlim) * width
segments(x, upper, x, lower, ...)
segments(x - barw, upper, x + barw, upper, ...)
segments(x - barw, lower, x + barw, lower, ...)
range(upper, lower)
}
plotCoef=
function (beta, theta, norm, lambda, df, dev, label = FALSE, xvar = c("norm","lambda",
"dev"), xlab = iname, ylab = "Coefficients", ...)
{
which = nonzeroCoef(beta)
nwhich = length(which)
switch(nwhich + 1, `0` = {
warning("No plot produced since all coefficients zero")
return()
}, `1` = warning("1 or less nonzero coefficients; glmnet plot is not meaningful"))
sbeta=beta
beta = as.matrix(beta[which, , drop = FALSE])
xvar = match.arg(xvar)
switch(xvar, norm = {
index = if (missing(norm)) {apply(abs(beta), 2, sum)+apply(abs(theta),3,sum)} else norm
iname = "L1 Norm"
approx.f = 1
}, lambda = {
index = log(lambda)
iname = "Log Lambda"
approx.f = 0
}, dev = {
index = dev
iname = "Fraction Deviance Explained"
approx.f = 1
})
dotlist = list(...)
type = dotlist$type
if (is.null(type))
matplot(index, t(beta), lty = 1, xlab = xlab, ylab = ylab,
type = "l", ...)
else matplot(index, t(beta), lty = 1, xlab = xlab, ylab = ylab,
...)
atdf = pretty(index)
prettydf = approx(x = index, y = df, xout = atdf, rule = 2,
method = "constant", f = approx.f)$y
axis(3, at = atdf, labels = prettydf, tcl = NA)
if (label) {
nnz = length(which)
xpos = max(index)
pos = 4
if (xvar == "lambda") {
xpos = min(index)
pos = 2
}
xpos = rep(xpos, nnz)
ypos = beta[, ncol(beta)]
text(xpos, ypos, paste(which), cex = 0.5, pos = pos)
}
act=which(rowSums(abs(beta))>0)
ntheta= apply(theta!=0,c(1,3),sum)
for(j in act){
for(i in 1:length(index)){
if(ntheta[j,i]>0) text(index[i],sbeta[j,i],label="x",cex=.7)
}}
}
print.pliable=function(x,digits = max(3, getOption("digits") - 3),...){
cat("\nCall: ", deparse(x$call), "\n\n")
print(cbind(Lambda=signif(x$lambda,digits), "%Dev"=signif(x$dev.ratio,digits),"Num of main effects"=x$nbeta,"Num with ints"=x$nbeta.with.int,"Tot num of ints"=x$ntheta
))
}
#' Return estimated coefficients from a fitted pliable model.
#' @param object object returned from a call to pliable
#' @param lambda values of lambda at whcih predictions are desired. If NULL (default), the path of lambda values from the fitted model.
#' are used. If lambda is not NULL, the predictions are made at the closest values to lambda in the lambda path from the fitted model;
#' @param ... Further arguments (not used)
#.
#'
#' @return estimated parameters
coef.pliable=
function (object, lambda = NULL,...)
predict(object, lambda=lambda, type = "coefficients")
encode=function(x,vals=NULL){
if(is.null(vals)) vals=names(table(x))
xx=matrix(0,length(x),length(vals))
for(i in 1:length(x)){ xx[i,which(x[i]==vals)]=1}
dimnames(xx)=list(NULL,vals)
return(xx)
}
nonzeroCoef=
function (beta, bystep = FALSE)
{
nr = nrow(beta)
if (nr == 1) {
if (bystep)
apply(beta, 2, function(x) if (abs(x) > 0)
1
else NULL)
else {
if (any(abs(beta) > 0))
1
else NULL
}
}
else {
beta = abs(beta) > 0
which = seq(nr)
ones = rep(1, ncol(beta))
nz = as.vector((beta %*% ones) > 0)
which = which[nz]
if (bystep) {
if (length(which) > 0) {
beta = as.matrix(beta[which, , drop = FALSE])
nzel = function(x, which) if (any(x))
which[x]
else NULL
which = apply(beta, 2, nzel, which)
if (!is.list(which))
which = data.frame(which)
which
}
else {
dn = dimnames(beta)[[2]]
which = vector("list", length(dn))
names(which) = dn
which
}
}
else which
}
}
errfun.gaussian=function(y,yhat,w=rep(1,length(y))){ ( w*(y-yhat)^2) }
errfun.binomial=function(y,yhat,w=rep(1,length(y))){
prob_min = 1e-05
prob_max = 1 - prob_min
predmat = pmin(pmax(yhat, prob_min), prob_max)
-2*w*(y*log(predmat)+(1-y)*log(1-predmat))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.