#' 2-Wasserstein distance linear projections with an \eqn{L_1} penalty
#'
#' @param X An n x p matrix of covariates
#' @param Y An n x s matrix of predictions
#' @param theta optional parameter matrix for selection methods. Should be p x s.
#' @param penalty Form of penalty. One of "lasso", "ols", "mcp", "elastic.net","selection.lasso", "scad", "mcp.net", "scad.net", "grp.lasso", "grp.lasso.net", "grp.mcp","grp.scad", "grp.mcp.net", "grp.scad.net", "sparse.grp.lasso"
#' @param method "selection.variable" or "projection
#' @param transport.method Method for calculating the Wasserstein distance. One of "exact", "sinkhorn", "greenkhorn","hilbert"
#' @param epsilon Penalty parameter for Sinkhorn and Greenkhorn and optimal transport
#' @param OTmaxit Maximum iterations for the optimal transport iterations
#' @param model.size The maximum number of desired covariates. Defaults to the number of covariates.
#' @param lambda Penalty parameter for lasso regression. See \link[oem]{oem}.
#' @param nlambda Number of lambda values. See \link[oem]{oem}.
#' @param lambda.min.ratio Minimum lambda ratio for self selected lambda. See \link[oem]{oem}.
#' @param alpha elastic net mixing. See \link[oem]{oem}.
#' @param gamma tuning parameters for SCAD and MCP. See \link[oem]{oem}.
#' @param tau mixing parameter for sparse group lasso. See \link[oem]{oem}.
#' @param groups A vector of grouping values. See \link[oem]{oem}.
#' @param scale.factor Value to standardize the covariates by. Typically, is the standard deviation. Should have length 1 or length same as the number of covariates
#' @param penalty.factor Penalty factor for OEM. See \link[oem]{oem}.
#' @param group.weights Weights for group lasso. See \link[oem]{oem}.
#' @param maxit Max iteration for OEM. See \link[oem]{oem}.
#' @param tol Tolerance for OEM. See \link[oem]{oem}.
#' @param irls.maxit IRLS max iterations for OEM. See \link[oem]{oem}.
#' @param irls.tol IRLS tolerance for OEM. See \link[oem]{oem}.
#' @param infimum.maxit Maximum number of iterations alternating optimization and Wasserstein distance calculation. Irrelevant for projection method.
#' @param display.progress Display intermediate progress?
#'
#' @return Object of class `WpProj`
#' @keywords internal
#'
# @examples
# if(rlang::is_installed("stats")) {
# n <- 128
# p <- 10
# s <- 99
# x <- matrix( stats::rnorm( p * n ), nrow = n, ncol = p )
# beta <- (1:10)/10
# y <- x %*% beta + stats::rnorm(n)
# post_beta <- matrix(beta, nrow=p, ncol=s) + stats::rnorm(p*s, 0, 0.1)
# post_mu <- x %*% post_beta
#
# # for selection method, must specify an OT method and theta
# fit.s <- W2L1(X=x, Y=t(post_mu), theta = t(post_beta),
# penalty = "lasso",
# method = "selection.variable",
# transport.method = "sinkhorn",
# epsilon = 0.05, OTmaxit = 100
# )
#
# # for projection method, no OT method or theta need be specified
# fit.p <- W2L1(X=x, Y=t(post_mu),
# penalty = "lasso",
# method = "projection"
# )
# }
W2L1 <- function(X, Y=NULL, theta = NULL,
penalty = c("lasso", "ols", "mcp", "elastic.net",
"selection.lasso",
"scad", "mcp.net",
"scad.net",
"grp.lasso",
"grp.lasso.net", "grp.mcp",
"grp.scad", "grp.mcp.net",
"grp.scad.net",
"sparse.grp.lasso"),
method = c("projection","selection.variable","location.scale","scale"),
transport.method = transport_options(),
epsilon = 0.05,
OTmaxit = 0,
model.size = NULL,
lambda = numeric(0),
nlambda = 100L,
lambda.min.ratio = NULL, alpha = 1,
gamma = 1, tau = 0.5,
groups = numeric(0),
scale.factor = numeric(0),
penalty.factor = NULL,
group.weights = NULL, maxit = 500L,
tol = 1e-07, irls.maxit = 100L,
irls.tol = 0.001,
# pseudo_observations = 0.0,
infimum.maxit=NULL,
display.progress=FALSE)
{
this.call <- as.list(match.call()[-1])
family <- "gaussian"
# family <- match.arg(family)
if ("penalty" %in% names(this.call)) {
penalty <- match.arg(penalty, several.ok = TRUE)
}
else {
penalty <- match.arg(penalty, several.ok = FALSE)
}
if(is.null(method)){
method <- "projection"
}
method <- match.arg(method)
# make sure X is a matrix
if(!is.matrix(X)) X <- as.matrix(X)
if(dim(X)[2] == 1) X <- t(X)
# confirm dims
dims <- dim(X)
n_obs <- dims[1L]
p <- dims[2L]
# make sure isn't a sparse matrix
if (inherits(X, "sparseMatrix")) {
stop("Sparse matrices not allowed at this time")
}
#transpose X so covariates are by row
X_ <- t(X)
# is Y the same as X*theta ?
same <- FALSE # default is false
if(!is.null(theta)) {
# make sure theta is a matrix
if(!is.matrix(theta)) theta <- as.matrix(theta)
# transpose theta if covariates are not by row
if(ncol(theta) == p){
theta_ <- t(theta)
} else {
theta_ <- theta
}
# create Y if needed
if (is.null(Y) ) {
same <- TRUE
Y <- crossprod(X_,theta_)
}
} else if (is.null(Y)) {
stop("Must specify Y or theta if method == 'projection'. If method != 'projection' you must always specify theta. In the latter case, Y is optional")
}
if(!is.matrix(Y)) Y <- as.matrix(Y)
# properly orient Y
if (nrow(Y) == n_obs) {
Y_ <- Y
} else if (ncol(Y) == n_obs) {
Y_ <- t(Y)
} else {
stop("The number of observations of Y do not match X")
}
if(!any(dim(Y) %in% dim(X_))) stop("Number of observations of Y must match X")
if(method != "projection") if(all(Y_==crossprod(X_, theta_))) same <- TRUE
# create dummy theta for projection method
if (!is.null(Y_) && method == "projection") {
theta_ <- matrix(1, nrow = p, ncol = ncol(Y_))
}
theta_save <- theta_
if (method != "projection") {
if(nrow(theta_) != p) stop("dimensions of theta must match X")
if(ncol(Y_) != ncol(theta_)) stop("ncol of Y should be same as ncol of theta")
}
if(nrow(Y_) != n_obs) stop("The number of observations in Y and X don't line up. Make sure X is input with observations in rows.")
# xty <- drop(xty)
# if (p != NROW(xty))
# stop("xty must have length equal to the number of columns and rows of xtx. do NOT provide response vector")
if (family == "binomial")
stop("binomial not implemented yet")
if (is.null(penalty.factor)) {
penalty.factor <- rep(1, p)
}
varnames <- colnames(X)
if (is.null(varnames))
varnames = paste("V", seq(p), sep = "")
if (length(penalty.factor) != p) {
stop("penalty.factor must have same length as number of columns in x")
}
if ( any(penalty.factor < 0) ) {
penalty.factor <- abs( penalty.factor )
warning("penalty.factor had negative values. These were turned to positive values via abs().")
}
penalty.factor <- penalty.factor * p/sum(penalty.factor)
penalty.factor <- drop(penalty.factor)
if (any(grep("grp", penalty) > 0) & method !="location.scale") {
if (length(groups) != p) {
stop("groups must have same length as number of columns in x")
}
unique.groups <- sort(unique(groups))
zero.idx <- unique.groups[which(unique.groups == 0)]
groups <- drop(groups)
if (length(group.weights)!=0) {
if (length(zero.idx) > 0) {
group.weights[zero.idx] <- 0
}
group.weights <- drop(group.weights)
if (length(group.weights) != length(unique.groups)) {
stop("group.weights must have same length as the number of groups")
}
group.weights <- as.numeric(group.weights)
} else {
group.weights <- numeric(0)
}
} else if ( method != "location.scale" ) {
unique.groups <- numeric(0)
group.weights <- numeric(0)
}
if (is.null(lambda.min.ratio)) {
lambda.min.ratio <- 1e-04
} else {
lambda.min.ratio <- as.numeric(lambda.min.ratio)
}
if (lambda.min.ratio >= 1 | lambda.min.ratio <= 0) {
stop("lambda.min.ratio must be between 0 and 1")
}
if (nlambda[1] <= 0) {
stop("nlambda must be a positive integer")
}
if (!is.list(lambda)) {
lambda <- sort(as.numeric(lambda), decreasing = TRUE)
if (length(lambda) > 0) {
lambda <- as.double(lambda)
}
lambda <- rep(list(lambda), length(penalty))
}
else {
if (length(lambda) != length(penalty)) {
stop("If list of lambda vectors is provided, it must be the same length as the number of penalties fit")
}
nlambda.tmp <- length(lambda[[1]])
for (l in 1:length(lambda)) {
if (is.null(lambda[[l]]) || length(lambda[[l]]) <
1) {
stop("Provided lambda vector must have at least one value")
}
if (length(lambda[[l]]) != nlambda.tmp) {
stop("All provided lambda vectors must have same length")
}
lambda[[l]] <- as.double(sort(as.numeric(lambda[[l]]),
decreasing = TRUE))
}
}
if (is.null(model.size)) {
model.size <- p
}
infm.maxit <- infimum.maxit
if(is.null(infm.maxit)){
infm.maxit <-maxit
}
orig.method <- method
# if(method == "selection.variable" & penalty != "selection.lasso"){
# penalty <- "selection.lasso"
# warning("Penalty changed to 'selection.lasso' since 'selection.variable' method was chosen.")
# }
if(is.null(transport.method)){
transport.method <- "exact"
} else {
transport.method <- match.arg(transport.method, transport_options())
}
if(nrow(X) == 1) {
transport.method <- "univariate.approximation.pwr"
}
rmv.idx <- NULL
if(any(apply(theta_,1, function(x) all(x == 0)))) {
rmv.idx <- which(apply(theta_,1, function(x) all(x == 0)))
X_ <- X_[-rmv.idx, ,drop=FALSE]
theta_ <- theta_[-rmv.idx,, drop = FALSE]
penalty.factor <- penalty.factor[-rmv.idx]
warning("Some dimensions of 'theta' have no variation. These have been removed")
}
if ( method == "projection") {
# if(any(grep("grp", penalty) > 0)) stop("don't specify a group penalty")
if(penalty == "selection.lasso") penalty <- "lasso"
if(penalty != "ols") penalty <- paste0("projection.",penalty)
if( infm.maxit != 1){
infm.maxit <- 1
# warning("Infimum iterations set to 1 for projection method.")
}
}
if ( method == "location.scale" ) {
if (!any(grep("grp", penalty) > 0)) penalty <- paste0("grp.",penalty)
if (length(groups)>0) {
unique.groups <- sort(unique(groups))
zero.idx <- unique.groups[which(unique.groups == 0)]
groups <- drop(groups)
} else {
unique.groups <- 1:p
groups <- rep(unique.groups,2)
zero.idx <- which(penalty == 0)
}
if ( length( group.weights ) > 0 ) {
# group.weights <- penalty.factor
# } else {
if (length(zero.idx) > 0) {
group.weights[zero.idx] <- 0
}
group.weights <- drop(group.weights)
if (length(group.weights) != length(unique.groups)) {
stop("group.weights must have same length as the number of groups (2 times ncol X)")
}
group.weights <- as.numeric(group.weights)
} else {
group.weights <- penalty.factor
if (length(group.weights) != length(unique.groups)) {
stop("group.weights or penalty.factor must have same length as the number of groups (ncol X)")
}
}
if ( length( penalty.factor ) != 2 * p ) penalty.factor <- rep( penalty.factor, 2 )
if ( length( penalty.factor ) != 2 * p ) stop( "penalty.factor must have same length as ncol(X)" )
if(!any(grep("sparse", penalty) > 0)) penalty.factor <- rep(1, length(penalty.factor))
X_ <- rbind(X_,X_)
m_theta <- matrix(rowMeans(theta_), p, ncol(theta_))
c_theta <- theta_ - m_theta
theta_ <- rbind(c_theta,m_theta)
method <- "scale"
}
if(is.null(epsilon)) {
epsilon <- 0.05
}
if(is.null(OTmaxit) || missing(OTmaxit)) {
OTmaxit <- switch(transport.method, "exact" = 0L, 100)
}
# pseudo_observations <- 0.0
#make R types align with c types
groups <- as.integer(groups)
unique.groups <- as.integer(unique.groups)
nlambda <- as.integer(nlambda)
alpha <- as.double(alpha)
gamma <- as.double(gamma)
tau <- as.double(tau)
tol <- as.double(tol)
irls.tol <- as.double(irls.tol)
irls.maxit <- as.integer(irls.maxit)
maxit <- as.integer(maxit)
# pseudo_observations <- as.double(pseudo_observations)
infm.maxit <- as.integer(infm.maxit)
display.progress <- as.logical(display.progress)
method <- as.character(method)
model.size <- as.integer(model.size)
not_same <- as.logical(!same)
epsilon <- as.double(epsilon)
OTmaxit <- as.integer(OTmaxit)
if (length(scale.factor) > 0) {
if (length(scale.factor) != p)
stop("scale.factor must be same length as xty (nvars)")
scale.factor <- as.double(scale.factor)
}
if (maxit <= 0 | irls.maxit <= 0 | infm.maxit <=0) {
stop("maxit, irls.maxit, and infm.maxit should be greater than 0")
}
if (tol < 0 | irls.tol < 0) {
stop("tol and irls.tol should be nonnegative")
}
options <- list(maxit = maxit, tol = tol, irls_maxit = irls.maxit,
irls_tol = irls.tol, infm_maxit = infm.maxit,
display_progress = display.progress,
method = method,
transport_method = transport.method,
model_size = model.size,
not_same = not_same,
epsilon = epsilon,
OTmaxit = OTmaxit)
# if (method == "projection" & (penalty == "projection.lasso") | (penalty == "projection.lasso.net")) {
# lambdas <- lambda[[1]]
# xglmnet <- t(X_)
# yglmnet <- t(Y_)
# if(length(lambdas)==0) lambdas <- NULL
# dimnames(yglmnet)[[1]] <- dimnames(xglmnet)[[1]]
# dimnames(yglmnet)[[2]] <- paste0("sample_",1:ncol(yglmnet))
# output <- glmnet::glmnet(xglmnet, yglmnet, family="mgaussian",
# lambda.min.ratio = lambda.min.ratio,
# nlambda=nlambda, alpha = alpha, lambda=lambdas,
# penalty.factor = penalty.factor, intercept = FALSE,
# standardize.response = FALSE)
# beta_list <- lapply(output$beta, function(bb) data.frame(as.matrix(bb)))
# beta <- data.table::rbindlist(beta_list)
# output$beta <- beta
# output$niter <- matrix(0, nrow=1,ncol=length(output$lambda))
# output$innerIter <- rep(1, length(output$lambda))
# } else {
# cat("\n")
output <- W2penalized(X_,Y_, theta_, family,
penalty, groups, unique.groups, group.weights,
lambda, nlambda, lambda.min.ratio, alpha, gamma, tau,
scale.factor, penalty.factor, options)
# }
if (any(output$innerIter == infm.maxit) & infm.maxit>1) warning("Maximum iterations hit when optimizing 2-Wasserstein distance over possible infimums. Increase infimum.maxit to ensure have a local minimum.", call. = FALSE)
if (any(output$niter == maxit)) warning("Maximum iterations hit when optimizing parameters. Consider increasing maxit.", call. = FALSE)
if(penalty != "ols") {
if (not_same) {
options_ols <- options
options_ols$display_progress <- FALSE
if(method == "selection.variable"){
penalty_ols <- penalty
} else {
penalty_ols <- "ols"
}
options_ols$infm_maxit <- 1
options_ols$model_size <- p
ols.out <- W2penalized(X_,Y_, theta_, family,
penalty_ols, groups, unique.groups, group.weights,
list(as.double(0)), as.integer(1), lambda.min.ratio, alpha, gamma, tau,
scale.factor, penalty.factor, options_ols)[c("beta","niter")]
output$niter <- cbind(output$niter, 0)
output$niter[1,ncol(output$niter)] <- ols.out$niter[1]
output$innerIter <- c(output$innerIter,1)
output$lambda <- c(output$lambda, 0)
output$beta <- cbind(output$beta, ols.out$beta)
rm(ols.out)
} else if (same & method == "selection.variable") {
output$lambda <- c(output$lambda, 0)
output$beta <- cbind(output$beta, rep(1, nrow(output$beta)))
output$niter <- cbind(output$niter, 0)
output$innerIter <- c(output$innerIter,0)
} else if (same & method == "projection") {
output$lambda <- c(output$lambda, 0)
output$beta <- cbind(output$beta, c(theta))
output$niter <- cbind(output$niter, 0)
output$innerIter <- c(output$innerIter,0)
}
}
output$nvars <- p
output$maxit <- maxit
output$power <- 2.0
output$penalty <- penalty
# output$family <- family
output$varnames <- varnames
output$call <- formals(W2L1)
output$call[names(this.call)] <- this.call
output$method <- orig.method
output$remove.idx <- rmv.idx
output$nonzero_beta <- colSums(output$beta != 0)
# output$nzero <- nz
class(output) <- c("WpProj", "optimization")
extract <- extractTheta(output, theta_)
output$nzero <- extract$nzero
output$eta <- lapply(extract$theta, function(tt) crossprod(X_, tt))
output$theta <- extract$theta
if(!is.null(rmv.idx)) {
for(i in seq_along(output$theta)){
output$theta[[i]] <- theta_save
output$theta[[i]][-rmv.idx,] <- extract$theta[[i]]
}
}
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.