Nothing
log1exp <- function(xi, j, n_y_is_0) {
k <- length(xi)
si <- c(rep.int(1, k), 3, 4, 6)
sj <- c(seq_along(xi), k + j, k + n_y_is_0 + j, k + j)
sv <- c( -xi, -1, -1, 1)
simple_triplet_matrix(si, sj, sv, nrow = 6L, ncol = (k + 2 * n_y_is_0))
}
check_data <- function(x, y) {
assert(check_vector(y), check_true(length(y) == NROW(x)), combine = "and")
}
#
# Binomial
#
# TODO:
# The likelihood should store the variables names, so we
# later know on which part of the matrices we have to set the constriants.
# @title Binomial Log Log-Likelihood
# @param x design matrix of dimension \eqn{n \times p}.
# @param y a numeric vector giving the response variable.
# @param eps a numeric giving the epsilon for the linear constraint.
# @param weights an optional numeric vector (or \code{NULL}) of prior
# weights to be used in the fitting process. The length
# of numeric weights should be equal to \code{NROW(x)}.
# @param ... optional additional variables, currently not used.
# @return An object of class \code{"OP"}.
# @export
loglike_binomial_log <- function(x, y, weights = rep.int(1L, NROW(x)), eps = 1e-7, ...) {
wy <- y * weights
assert_integerish(wy, any.missing = FALSE)
if (is_zero_one(y)) {
yx <- y %*% x
y_is_0 <- y == 0L
weights <- rep.int(1, sum(y_is_0))
} else {
yx <- round(wy) %*% x
y0_count <- round((1 - y) * weights)
y_is_0 <- y0_count > 0L
weights <- y0_count[y_is_0]
}
n_y_is_0 <- sum(y_is_0)
op <- OP(-c(beta=yx, double(n_y_is_0), weights), maximum = FALSE)
L1 <- cbind(x, stzm(nrow(x), 2 * n_y_is_0))
L2 <- mapply(log1exp, split(x[y_is_0,], seq_len(n_y_is_0)),
seq_len(n_y_is_0), MoreArgs = list(n_y_is_0 = n_y_is_0),
SIMPLIFY = FALSE)
rhs <- c(c(0, 1, 0), c(0, 1, 1))
rhs <- c(rep(-eps, nrow(x)), rep(rhs, n_y_is_0))
cones <- c(K_lin(nrow(x)), K_expp(2 * n_y_is_0))
L <- do.call(rbind, c(list(L1), L2))
constraints(op) <- C_constraint(L, cones, rhs)
bounds(op) <- V_bound(ld = -Inf, nobj = length(objective(op)))
op
}
loglike_binomial_log_approx <- function(x, y, weights = rep.int(1L, NROW(x)), eps = 1e-7, ...) {
if ("tangent_points" %in% names(attributes(x))) {
Vx <- attributes(x)$tangent_points
} else {
Vx <- c(-16.53, -11.88, -8, -7.01, -4.68, -4.23, -3.12, -2.34, -1.92, -1.58, -1.28, -1.03, -0.82, -0.64, -0.49, -0.25, -0.1)
}
wy <- y * weights
assert_integerish(wy, any.missing = FALSE)
if (is_zero_one(y)) {
yx <- y %*% x
y_is_0 <- y == 0L
weights <- rep.int(1, sum(y_is_0))
} else {
yx <- round(wy) %*% x
y0_count <- round((1 - y) * weights)
y_is_0 <- y0_count > 0L
weights <- y0_count[y_is_0]
}
n_y_is_0 <- sum(y_is_0)
n <- nrow(x); m <- ncol(x)
f <- function(v) -log(1-exp(v))
fd <- function(v) 1/(exp(-v)-1)
tangent <- function(vk) L_constraint(cbind(-fd(vk)*drop(x)[y_is_0,], stdm(1, n_y_is_0)), geq(n_y_is_0), rep(f(vk)-fd(vk)*vk, n_y_is_0))
# - x * \beta + -log(1-exp(x * \beta))
op <- OP(c(-(yx), weights), maximum = FALSE)
C1 = L_constraint(cbind(drop(x), stzm(n, n_y_is_0)), leq(n), double(n))
C2 = do.call(rbind, lapply(Vx, tangent))
constraints(op) <- rbind(C1, C2)
bounds(op) <- V_bound(li=seq(m), lb=rep.int(-Inf, m), nobj = length(objective(op)))
op
}
# @title Binomial Logit Log-Likelihood
# @param x design matrix of dimension \eqn{n \times p}.
# @param y response variable
# @param weights an optional numeric vector (or \code{NULL}) of prior
# weights to be used in the fitting process. The length
# of numeric weights should be equal to \code{NROW(x)}.
# @param ... optional additional variables, currently not used.
# @return An object of class \code{"OP"}.
# @export
loglike_binomial_logit <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if (is.null(weights)) {
yx <- y %*% x
weights <- rep.int(1L, NROW(x))
} else {
yx <- ((weights * y) %*% x)
}
m <- nrow(x); n <- ncol(x)
i <- 3 * seq_len(m) - 2
# op <- OP(c(beta=-(y %*% x), rep.int(1, m), double(m)), maximum = FALSE)
op <- OP(c(beta=-yx, weights, double(m)), maximum = FALSE)
C11 <- stm(rep(i, n), rep(seq_len(n), each = m), -drop(x), 3 * m, n)
C12 <- stm(i, seq_len(m), rep.int(1, m), 3 * m, m)
C13 <- stm(i + 2, seq_len(m), rep.int(-1, m), 3 * m, m)
C1 <- cbind(C11, C12, C13)
C2 <- cbind(stzm(3 * m, n), C12, -C13)
C <- rbind(C1, C2)
cones <- K_expp(2 * m)
rhs <- c(rep(c(0, 1, 1), m), rep(c(0, 1, 0), m))
constraints(op) <- C_constraint(C, cones, rhs)
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(C))
op
}
loglike_binomial_logit_approx <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if ("tangent_points" %in% names(attributes(x))) {
Vx <- attributes(x)$tangent_points
} else {
Vx <- c(-8.02, -5.88, -4.74, -3.37, -1.75, -0.65, -0.33, -0.04, 0.37, 1.07, 1.69, 2.74, 4.01, 7.08, 8.82)
#Vx <- c(-6.37, -4.78, -3.79, -3.03, -2.14, -1.47, -0.87, -0.32, 0.2, 0.6, 1.03, 2.02, 3.26, 4.01, 5.4)
}
y[y == 0] <- -1
if (is.null(weights)) {
yx <- y * x
weights <- rep.int(1L, NROW(x))
} else {
yx <- (weights * y) * x
}
n <- nrow(x); m <- ncol(x)
f <- function(v) log(1+exp(v))
fd <- function(v) 1/(exp(-v)+1)
tangent <- function(vk) L_constraint(cbind(-fd(vk)*drop(-yx), stdm(1, n)), geq(n), rep(f(vk)-fd(vk)*vk, n))
# build_tangent <- function(Vx) {
# mat = as.simple_triplet_matrix(-yx)
# mat1 = do.call(rbind, lapply(Vx, function(vk) -fd(vk)*(-yx)))
# mat2 = stm(seq(n*length(Vx)), rep(seq(n), length(Vx)), rep(1L, length(Vx)*n))
# L_constraint(cbind(mat1, mat2), geq(n*length(Vx)), rep(sapply(Vx, function(vk) f(vk)-fd(vk)*vk), each=n))
# }
# tangent_cons <- build_tangent(Vx)
op <- OP(c(beta=double(m), weights), maximum = FALSE)
tangent_cons <- do.call(rbind, lapply(Vx, tangent))
# f'(v1)(v-v1)+f(v1)=-v
v1_cons <- L_constraint(cbind(drop(yx), stdm(1,n)), geq(n), numeric(n))
# f'(v4)(v-v4)+f(v4)=0
v4_cons <- L_constraint(cbind(stzm(n, m), stdm(1,n)), geq(n), numeric(n))
constraints(op) <- rbind(tangent_cons, v1_cons, v4_cons)
bounds(op) <- V_bound(li=seq(m), lb=rep.int(-Inf, m), nobj = length(objective(op)))
op
}
# @title Binomial Logit Log-Likelihood
# @param x design matrix of dimension \eqn{n \times p}.
# @param y response variable
# @param weights an optional numeric vector (or \code{NULL}) of prior
# weights to be used in the fitting process. The length
# of numeric weights should be equal to \code{NROW(x)}.
# @param ... optional additional variables, currently not used.
# @return An object of class \code{"OP"}.
# @references
# K.D. Tocher:The art of simulation. (1963) English Universities Press,London.
# @export
# NOTE: We use the Kocher approximation
loglike_binomial_probit <- loglike_binomial_logit
loglike_binomial_probit_approx <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if ("tangent_points" %in% names(attributes(x))) {
Vx <- attributes(x)$tangent_points
} else {
Vx <- c(-7.79, -6.94, -6.01, -5.04, -4.55, -4.04, -3.3, -2.55, -1.94, -1.32, -0.79, -0.24, 0.27, 0.84, 1.62, 7.11, 8.13)
}
y[y == 0] <- -1
if (is.null(weights)) {
yx <- y * x
weights <- rep.int(1L, NROW(x))
} else {
yx <- (weights * y) * x
}
n <- nrow(x); m <- ncol(x)
f <- function(v) -log(pnorm(v))
fd <- function(v) -dnorm(v)/pnorm(v)
tangent <- function(vk) L_constraint(cbind(-fd(vk)*drop(yx), stdm(1, n)), geq(n), rep(f(vk)-fd(vk)*vk, n))
op <- OP(c(double(m), rep(1, n)), maximum = FALSE)
constraints(op) = do.call(rbind, lapply(Vx, tangent))
bounds(op) <- V_bound(li=seq(m), lb=rep.int(-Inf, m), nobj = length(objective(op)))
op
}
#
# Poisson
#
# @title Poisson Log Log-Likelihood
# @param x design matrix of dimension \eqn{n \times p}.
# @param y response variable
# @param weights an optional numeric vector (or \code{NULL}) of prior
# weights to be used in the fitting process. The length
# of numeric weights should be equal to \code{NROW(x)}.
# @param ... optional additional variables, currently not used.
# @return An object of class \code{"OP"}.
# @export
loglike_poisson_log <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if (is.null(weights)) {
yx <- y %*% x
weights <- rep.int(1L, NROW(x))
} else {
yx <- (weights * y) %*% x
}
m <- nrow(x); n <- ncol(x)
i <- 3 * seq_len(m) - 2
# minimize -y(x*\beta) + exp(x*\beta)
op <- OP(c(-yx, weights))
A <- cbind(stm(rep(i, n), rep(seq_len(n), each = m), -drop(x), 3 * m, n),
stm(i + 2, seq_len(m), rep.int(-1, m), 3 * m, m))
rhs <- rep(c(0, 1, 0), m)
cones <- K_expp(m)
constraints(op) <- C_constraint(A, cones, rhs)
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(A))
op
}
loglike_poisson_log_approx <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if ("tangent_points" %in% names(attributes(x))) {
Vx <- attributes(x)$tangent_points
} else {
Vx <- c(1.04, 1.99, 3.34, 4.25, 5.26, 5.64, 5.98, 6.28, 7.24, 7.66, 8.03, 8.48, 8.91, 9.1, 9.29, 9.54, 9.78)
}
if (is.null(weights)) {
yx <- y %*% x
weights <- rep.int(1L, NROW(x))
} else {
yx <- (weights * y) %*% x
}
n <- nrow(x); m <- ncol(x)
op <- OP(c(-yx, weights), maximum=FALSE)
f <- fd <- exp
tangent <- function(vk) L_constraint(cbind(-fd(vk)*drop(x), stdm(1, n)), geq(n), rep(f(vk)-fd(vk)*vk, n))
constraints(op) <- do.call(rbind, lapply(Vx, tangent))
bounds(op) <- V_bound(li=seq(m), lb=rep.int(-Inf, m), nobj = length(objective(op)))
op
}
# loglike_poisson_log_approx <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
# if (is.null(weights)) {
# yx <- y %*% x
# weights <- rep.int(1L, NROW(x))
# } else {
# yx <- (weights * y) %*% x
# }
# n <- nrow(x); m <- ncol(x)
# op <- OP(c(-yx, weights), maximum=FALSE)
# f <- function(v) exp(v)
# fd <- function(v) exp(v)
# tangent <- function(vk) L_constraint(cbind(-fd(vk)*drop(x), stdm(1, n)), geq(n), rep(f(vk)-fd(vk)*vk, n))
# Vx = seq(0, 10, length.out=50)
# #Vx = c(0, 1, 2, 3, 4, 5)
# constraints(op) <- do.call(rbind, lapply(Vx, tangent))
# bounds(op) <- V_bound(li=seq(m), lb=rep.int(-Inf, m), nobj = length(objective(op)))
# op
# }
# @title Poisson Identity Log-Likelihood
# @param x design matrix of dimension \eqn{n \times p}.
# @param y response variable
# @param weights an optional numeric vector (or \code{NULL}) of prior
# weights to be used in the fitting process. The length
# of numeric weights should be equal to \code{NROW(x)}.
# @param ... optional additional variables, currently not used.
# @return An object of class \code{"OP"}.
# @export
loglike_poisson_identity <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if (is.null(weights)) {
wx <- x
wy <- y
} else {
wx <- diag(weights) %*% x
wy <- weights * y
}
m <- nrow(x); n <- ncol(x)
i <- 3 * seq_len(m) - 2
op <- OP(c(colSums(wx), -wy))
A <- cbind(stm(rep(i+2, n), rep(seq_len(n), each = m), -drop(x), 3 * m, n),
stm(i, seq_len(m), rep.int(-1, m), 3 * m, m))
rhs <- rep(c(0, 1, 0), m)
cones <- K_expp(m)
nonneg_const <- C_constraint(cbind(-wx, stzm(m)), K_lin(m), rep(0, m))
constraints(op) <- c(C_constraint(A, cones, rhs), nonneg_const)
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(A))
op
}
loglike_poisson_identity_approx <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if ("tangent_points" %in% names(attributes(x))) {
Vx <- attributes(x)$tangent_points
} else {
Vx <- c(1e3, 2.95, 3.7, 4.32, 5.17, 5.51, 5.82, 6.07, 6.3, 6.72, 7.07, 7.4, 7.65, 7.89, 8.31, 8.67, 9)
}
if (is.null(weights)) {
wx <- x
wy <- y
} else {
wx <- diag(weights) %*% x
wy <- weights * y
}
n <- nrow(x); m <- ncol(x)
# x * \beta + y * -\log(x * \beta)
op <- OP(c(colSums(wx), wy))
f <- function(x) -log(x)
fd <- function(x) -1/x
tangent <- function(vk) L_constraint(cbind(-fd(vk)*drop(x), stdm(1, n)), geq(n), rep(f(vk)-fd(vk)*vk, n))
constraints(op) <- do.call(rbind, lapply(Vx, tangent))
bounds(op) <- V_bound(li=seq(m), lb=rep.int(-Inf, m), nobj = length(objective(op)))
op
}
# @title Poisson Sqrt Log-Likelihood
# @param x design matrix of dimension \eqn{n \times p}.
# @param y response variable
# @param weights an optional numeric vector (or \code{NULL}) of prior
# weights to be used in the fitting process. The length
# of numeric weights should be equal to \code{NROW(x)}.
# @param ... optional additional variables, currently not used.
# @return An object of class \code{"OP"}.
# @export
loglike_poisson_sqrt <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if (is.null(weights)) {
wx <- x
wy <- y
} else {
wx <- diag(sqrt(weights)) %*% x
wy <- weights * y
}
m <- nrow(x); n <- ncol(x)
# X zeta delta
op <- OP(c(double(n), 1, -2 * wy))
i <- 3 * seq_len(m) - 2
A <- rbind(stm(c(1, 2), c(n + 1, n + 1), c(-1, 1), ncol = n + 1 + m),
stm(rep(seq_len(m), n), rep(seq_len(n), each = m), -2 * drop(wx), ncol = n + 1 + m),
cbind(stm(rep(i + 2, n), rep(seq_len(n), each = m), -drop(x), 3 * m, n),
stm(i, seq_len(m) + 1, rep.int(-1, m), 3 * m, m + 1))
)
rhs <- c(1, 1, integer(m), rep(c(0, 1, 0), m))
cones <- c(K_soc(m + 2), K_expp(m))
constraints(op) <- C_constraint(A, cones, rhs)
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(A))
op
}
loglike_poisson_sqrt_approx <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
# could rewrite this for non SOCP solver like gurobi
if ("tangent_points" %in% names(attributes(x))) {
Vx <- attributes(x)$tangent_points
} else {
Vx <- c(0.01, 2.95, 3.7, 4.32, 5.17, 5.51, 5.82, 6.07, 6.3, 6.72, 7.07, 7.4, 7.65, 7.89, 8.31, 8.67, 9)
# approx works but tangent points are garbage :/
}
if (is.null(weights)) {
wx <- x
wy <- y
} else {
wx <- diag(sqrt(weights)) %*% x
wy <- weights * y
}
n <- nrow(x); m <- ncol(x)
# X zeta delta
op <- OP(c(double(m), 1, 2 * wy))
A <- rbind(stm(c(1, 2), c(m + 1, m + 1), c(-1, 1), ncol = m + 1 + n),
stm(rep(seq_len(n), m), rep(seq_len(m), each = n), -2 * drop(wx), ncol = m + 1 + n))
rhs <- c(1, 1, integer(n))
cones <- K_soc(n + 2)
f <- function(x) -log(x)
fd <- function(x) -1/x
tangent <- function(vk) L_constraint(cbind(-fd(vk)*drop(x), stzm(n, 1), stdm(1, n)), geq(n), rep(f(vk)-fd(vk)*vk, n))
constraints(op) <- rbind(C_constraint(A, cones, rhs), do.call(rbind, lapply(Vx, tangent)))
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(A))
op
}
#
# Gaussian data
#
# @title Gaussian Identity Likelihood
# @param x design matrix of dimension \eqn{n \times p}.
# @param y response variable
# @param weights an optional numeric vector (or \code{NULL}) of prior
# weights to be used in the fitting process. The length
# of numeric weights should be equal to \code{NROW(x)}.
# @param solver a character string giving the solver name.
# @param ... optional additional variables, currently not used.
# @return An object of class \code{"OP"}.
# @export
loglike_gaussian_identity <- function(x, y, weights = rep.int(1L, NROW(x)), solver = "auto", ...) {
SOCP_solvers = c("ecos", "clarabel", "scs")
if (solver %in% SOCP_solvers || solver=="auto" && "ecos" %in% names(ROI::ROI_registered_solvers())) {
# FIXME: Use a better algorithm to select the solver.
return(loglike_gaussian_identity_SOCP(x, y, weights = weights, ...))
}
if (is.null(weights)) {
Q <- 2 * t(x) %*% x
L <- -2 * t(y) %*% x
} else {
Q <- 2 * t(x) %*% diag(weights) %*% x
L <- -2 * t(y) %*% diag(weights) %*% x
}
op <- OP(objective = Q_objective(Q = Q, L = L))
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(x))
op
}
# SOCP formulation
loglike_gaussian_identity_SOCP <- function(x, y, weights = rep.int(1L, NROW(x)), ...) {
if (!is.null(weights)) {
x <- diag(sqrt(weights)) %*% x
y <- diag(sqrt(weights)) %*% y
}
m <- NROW(x); n <- NCOL(x)
op <- OP(c(beta=double(n), SOC=1))
A <- rbind(
stm(i = c(1,2), j = c(n+1, n+1), v = c(-1, 1), ncol = n+1),
cbind(2*x, stzm(m, 1))
)
constraints(op) <- C_constraint(A, c(K_soc(m+2)), rhs = c(1, 1, 2*y))
bounds(op) <- V_bound(ld = -Inf, nobj = n+1)
op
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.