#' Robust Envelope Regression for Sufficient Dimension Reduction
#'
#' @description This is a modification of the \code{\link{ENV}} function that uses a robust
#' estimator of location and scatter to calculate weights, and then the envelope regression
#' is run on the re-weighted data.
#'
#' @param formula model formula
#' @param data data frame
#' @param rank number of dimensions
#' @param yrank number of dimensions for a multivariate response.
#' @param se should standard errors for the regression basis be calculated? defaults to TRUE.
#'
#'
#' @return an sdr object
#' @export
#'
RENV <- function(formula, data, rank = "all", yrank = "all", se = TRUE){
X = model.matrix(formula, data)[,-1]
Y = model.frame(formula, data)
Y = model.response(Y)
Y <- as.matrix(Y)
if (rank == "all" || is.null(rank)){
rank = ncol(X)
}
if (ncol(Y)==1){
ENV.runi(formula, data, rank, se, init= NULL)
}
else{
if (yrank == "all" || yrank == ncol(Y)){ENV.rmv(formula, data, rank, se, init= NULL)}
else{ENV.rmvxy(formula, data, rank, yrank, se, Pinit = NULL, Ginit = NULL)}
}
}
ENV.runi <- function(formula, data, rank, se = TRUE, init = NULL) {
X = model.matrix(formula, data)[,-1]
Y = model.frame(formula, data)
Y = model.response(Y)
Y <- as.matrix(Y)
X <- as.matrix(X)
a <- dim(Y)
n <- a[1]
r <- a[2]
p <- ncol(X)
if (a[1] != nrow(X)) stop("X and Y should have the same number of observations.")
if (rank > p || rank < 0) stop("rank must be an integer between 0 and p.")
if(sum(duplicated(cbind(X, Y), MARGIN = 2)) > 0) stop("Some responses also appear in the predictors, or there maybe duplicated columns in X or Y.")
ZY <- sweep(Y, 2, mean(Y), "-")/sd(Y); ZX <- sweep(sweep(X, 2, mean(X), "-"), 2, apply(X,2,sd), "/")
ZY <- sweep(sweep(ZY, 2, tauLocScale(Y[,1], mu=F), "*"), 2, tauLocScale(Y[,1], mu=T)[1], "+")
ZX <- sweep(sweep(ZX, 2, apply(X,2,function(i)tauLocScale(i,mu=F)), "*"), 2, apply(X,2,function(i)tauLocScale(i,mu=T)[1]), "+")
MCD <- cov.mrcd(cbind(ZY, ZX), kappa = 0.50, method = "tau")
MCD <- .ENVwtfun(cbind(Y,X),MCD$center, MCD$cov)
OY <- Y; OX <- X
X <- X[MCD$w,]; Y<-Y[MCD$w,]
SigmaY <- matrix(diag(MCD$cov)[1], 1, 1)
SigmaX <- MCD$cov[-1, -1]
SigmaYX <- matrix(MCD$cov[1, -1], nrow = 1)
TauY <- cvreg:::.ridgeinv(SigmaY)
TauX <- cvreg:::.ridgeinv(SigmaX)
eig.SigmaY <- eigen(SigmaY)
eig.SigmaX <- eigen(SigmaX)
U <- crossprod(SigmaYX, TauY) %*% SigmaYX
M <- SigmaX - U
ymean <- MCD$center[1]
xmeans <- MCD$center[-1]
a <- dim(Y);n <- nrow(X);r <- 1;p <- ncol(X)
if (is.null(init)){
SigmaOY <- matrix(var(ZY), 1, 1)
SigmaOX <- cov(ZX)
SigmaOYOX <- cov(ZY, ZX)
TauOY <- cvreg:::.ridgeinv(SigmaOY)
TauOX <- cvreg:::.ridgeinv(SigmaOX)
OU <- crossprod(SigmaOYOX, TauOY) %*% SigmaOYOX
OM <- SigmaOX - OU
tmp <- cvreg:::envlp_MU(OM, OU, rank)
rm(SigmaOY, SigmaOX, SigmaOYOX, TauOY, TauOX, OU, OM, ZX, ZY)
}
if (!is.null(init)) {
if (nrow(init) != p || ncol(init) != rank) stop("The initial value should have p rows and rank columns.")
tmp0 <- qr.Q(qr(init), complete = TRUE)
tmp$Gammahat <- as.matrix(tmp0[, 1:rank])
tmp$Gamma0hat <- as.matrix(tmp0[, (rank+1):p])
}
Gammahat <- tmp$Gammahat;Gamma0hat <- tmp$Gamma0hat;objfun <- tmp$objfun
covMatrix <- NULL;std.err <- NULL;ratio <- NULL
if (rank == p) {
olscoef <- tcrossprod(TauX, SigmaYX)
etahat <- olscoef
Omegahat <- SigmaX
Omega0hat <- NULL
muhat <- ymean - crossprod(olscoef, xmeans)
betahat <- olscoef
SigmaXhat <- M + U
SigmaYcXhat <- SigmaY - SigmaYX %*% olscoef
log_lik <- - nrow(OX) * (r + p) / 2 * (log(2 * pi) + 1) - nrow(OX) / 2 * (objfun + sum(log(eig.SigmaY$values)))
if (se == T) {
covMatrix <- kronecker(SigmaYcXhat, TauX)
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
ratio <- matrix(1, p, r)
}
} else {
etahat <- crossprod(Gammahat, t(SigmaYX))
Omegahat <- crossprod(Gammahat, SigmaX) %*% Gammahat
Omega0hat <- crossprod(Gamma0hat, SigmaX) %*% Gamma0hat
invOmegahat <- cvreg:::.ridgeinv(Omegahat)
betahat <- Gammahat %*% invOmegahat %*% etahat
muhat <- ymean - crossprod(betahat, xmeans)
SigmaXhat <- Gammahat %*% tcrossprod(Omegahat, Gammahat) + Gamma0hat %*% tcrossprod(Omega0hat, Gamma0hat)
olscoef <- tcrossprod(TauX, SigmaYX)
PGamma <- tcrossprod(Gammahat)
SigmaYcXhat <- SigmaY - SigmaYX %*% PGamma %*% cvreg:::.ridgeinv(SigmaXhat) %*% tcrossprod(PGamma , SigmaYX)
log_lik <- - nrow(OX) * (r + p) / 2 * (log(2 * pi) + 1) - nrow(OX) / 2 * (objfun + sum(log(eig.SigmaY$values)))
if (se == T) {
covMatrix <- kronecker(SigmaYcXhat, TauX)
seFm <- matrix(sqrt(diag(covMatrix)), nrow = p)
TaumaYcXhat <- cvreg:::.ridgeinv(SigmaYcXhat)
invOmega0hat <- cvreg:::.ridgeinv(Omega0hat)
temp <- kronecker(etahat %*% tcrossprod(TaumaYcXhat, etahat), Omega0hat) + kronecker(invOmegahat, Omega0hat) + kronecker(Omegahat, invOmega0hat) - 2 * kronecker(diag(rank), diag(p - rank))
temp2 <- kronecker(t(etahat), Gamma0hat)
covMatrix <- kronecker(SigmaYcXhat, Gammahat %*% tcrossprod(invOmegahat, Gammahat)) + temp2 %*% tcrossprod(cvreg:::.ridgeinv(temp), temp2)
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
ratio <- seFm / std.err
}
}
betahat <- as.vector(betahat)
names(betahat) <- colnames(X)
predictors <- OX %*% Gammahat
fitted <- as.vector(OX %*% betahat)
colnames(predictors) <- paste0("SP", 1:ncol(predictors))
colnames(Gammahat) <- paste0("SP", 1:ncol(predictors))
rownames(Gammahat) <- colnames(X)
if (se){
std.err <- std.err * (1/sqrt(n))
p.values <- 2*pnorm(-abs((betahat/std.err)))
} else {
std.err <- NULL
p.values <- NULL
}
return(structure(list(basis = Gammahat, coef = betahat, fitted = fitted, predictors = predictors, eta = etahat, muhat = muhat, Omega = Omegahat, SigmaYcX = SigmaYcXhat, SigmaX = SigmaXhat, log_lik = log_lik, n = nrow(OX), covMatrix = covMatrix, std.err = std.err, p.values = p.values, ratio = ratio, y.obs = OY, formula = formula, mf = model.frame(formula, data)), class = "sdr", model = "ENV"))
}
ENV.rmv <- function(formula, data, rank, se = TRUE, init = NULL) {
X = model.matrix(formula, data)[,-1];Y = model.frame(formula, data)
Y = model.response(Y);Y <- as.matrix(Y);X <- as.matrix(X)
a <- dim(Y);n <- a[1];r <- a[2];p <- ncol(X);R <- rep(1, p)
if (a[1] != nrow(X)) stop("X and Y should have the same number of observations.")
if (rank > p || rank < 0) stop("rank must be an integer between 0 and p.")
if(sum(duplicated(cbind(X, Y), MARGIN = 2)) > 0) stop("Some responses also appear in the predictors, or there maybe duplicated columns in X or Y.")
expan <- function(d) {
E <- matrix(0, d^2,d*(d+1)/2)
for (i in 1:d) {
for (j in 1:d) {
if (j >= i) {
E[d*(i-1)+j,(2*d-i)/2*(i-1)+j] <- 1
} else {
E[d*(i-1)+ j,(2*d-j)/2*(j-1)+i] <- 1
}
}
}
return(E)
}
contr <- function(d) {
C <- matrix(0, d * (d + 1) / 2, d ^ 2)
for (i in 1:d) {
for (j in 1:d) {
if (j == i) {
C[(2*d-i)/2*(i-1)+j,d*(i-1)+j] <- 1
} else if (j > i) {
C[(2*d-i)/2*(i-1)+j,d*(i-1)+j] <- 0.50
} else {
C[(2*d-j)/2*(j-1)+i,d*(i-1)+j] <- 0.50
}
}
}
return(C)
}
MCD <- cov.mrcd(cbind(Y, X), kappa = 0.50, method = "tau")
MCD <- .ENVwtfun(cbind(Y,X),MCD$center, MCD$cov)
OX <- X; OY <- Y
X <- X[MCD$w,]; Y<-Y[MCD$w,]
SigmaY <- MCD$cov[1:ncol(Y),1:ncol(Y)]
SigmaYX <- MCD$cov[1:ncol(Y) , seq(ncol(Y)+1,ncol(MCD$cov),by=1)]
SigmaX <- MCD$cov[seq(ncol(Y)+1,ncol(MCD$cov),by=1),seq(ncol(Y)+1,ncol(MCD$cov), by = 1)]
TauY <- cvreg:::.ridgeinv(SigmaY)
TauX <- cvreg:::.ridgeinv(SigmaX)
ymeans <- MCD$center[1:ncol(Y)]
xmeans <- MCD$center[-c(1:ncol(Y))]
eig.SigmaY <- eigen(SigmaY)
eig.SigmaX <- eigen(SigmaX)
ymeans <- MCD$center[1:ncol(Y)]
xmeans <- MCD$center[-c(1:ncol(Y))]
U <- crossprod(SigmaYX, TauY) %*% SigmaYX
M <- SigmaX - U
q <- length(R)
tmp <- cvreg:::envlp_MU(M, U, rank)
if (!is.null(init)) {
if (nrow(init) != p || ncol(init) != rank) stop("The initial value should have p rows and rank columns.")
tmp0 <- qr.Q(qr(init), complete = TRUE)
tmp$Gammahat <- as.matrix(tmp0[, 1:rank])
tmp$Gamma0hat <- as.matrix(tmp0[, (rank+1):p])
}
Gammahat <- tmp$Gammahat
Gamma0hat <- tmp$Gamma0hat
objfun <- tmp$objfun
covMatrix <- NULL
std.err <- NULL
ratio <- NULL
if (rank >= (p - (q - 1) / r)) {
Gammahat <- diag(p)
Gamma0hat <- NULL
olscoef <- tcrossprod(TauX, SigmaYX)
etahat <- olscoef
Omegahat <- SigmaX
Omega0hat <- NULL
muYhat <- ymeans
muXhat <- xmeans
betahat <- olscoef
SigmaXhat <- SigmaX
SigmaYcXhat <- SigmaY - SigmaYX %*% olscoef
eig.SigmaXhat <- eig.SigmaX
eig.SigmaYcXhat <- eigen(SigmaYcXhat)
objfun <- (sum(log(eig.SigmaXhat$values)) + sum(log(eig.SigmaYcXhat$values)) + p)
log_lik <- -nrow(OX) * (r + p)/2 * log(2 * pi) - nrow(OX) * r/2 - nrow(OX)/2 * objfun
if (se == T) {
covMatrix <- kronecker(SigmaYcXhat, TauX)
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
ratio <- matrix(1, p, r)
}
} else {
tmp1 <- cvreg:::sxenvlp_MU(X, Y, rank, R)
if (!is.null(init)) {
if (nrow(init) != p || ncol(init) != rank) stop("The initial value should have p rows and rank columns.")
tmp0 <- qr.Q(qr(init), complete = TRUE)
tmp1$Gammahat <- as.matrix(tmp0[, 1:rank])
tmp1$Gamma0hat <- as.matrix(tmp0[, (rank+1):p])
if (length(R) == p) {
make_objfun <- function(SigmaY, SigmaYX, SigmaX, TauX, TauY){
function(d, Gamma, X, Y){
X <- as.matrix(X);Y <- as.matrix(Y);a <- dim(Y);n <- a[1];r <- a[2];p <- ncol(X)
t1 <- crossprod(SigmaYX, TauY);U <- t1 %*% SigmaYX; M <- SigmaX - U
d1 <- c(1, d);Lambda <- diag(d1);invLambda <- diag(1 / d1)
m1 <- crossprod(Gamma, Lambda);eigtem1 <- eigen(m1 %*% tcrossprod(TauX, m1))
m2 <- crossprod(Gamma, invLambda);eigtem2 <- eigen(m2 %*% tcrossprod(M, m2))
temp1 <- sum(log(eigtem1$values));temp2 <- sum(log(eigtem2$values))
objfun <- temp1 + temp2
return(objfun)
}
}
objfun <- make_objfun(SigmaY, SigmaYX, SigmaX, TauX, TauY)
d.init <- rep(1, (p - 1))
k2 <- rep(0, (p - 1))
tmp.init <- Rsolnp::solnp(pars = d.init, fun = objfun, LB = k2,
control = list(delta = 1e-6, tol = 0.0005, trace = 0),
Gamma = tmp1$Gammahat, X = X, Y = Y)
d <- tmp.init$pars
d1 <- c(1, d)
tmp1$Lambdahat <- diag(d1)
} else {
make_objfun1 <- function(SigmaY, SigmaYX, SigmaX, TauX, TauY){
function(d, Gamma, X, Y, R){
X <- as.matrix(X);Y <- as.matrix(Y)
a <- dim(Y);n <- a[1];r <- a[2];p <- ncol(X)
t1 <- crossprod(SigmaYX, TauY)
U <- t1 %*% SigmaYX;M <- SigmaX - U;Lambda <- diag(d);invLambda <- diag(1 / d)
m1 <- crossprod(Gamma, Lambda);eigtem1 <- eigen(m1 %*% tcrossprod(TauX, m1))
m2 <- crossprod(Gamma, invLambda);eigtem2 <- eigen(m2 %*% tcrossprod(M, m2))
temp1 <- sum(log(eigtem1$values));temp2 <- sum(log(eigtem2$values));objfun <- temp1 + temp2
return(objfun)
}
}
objfun1 <- make_objfun1(SigmaY, SigmaYX, SigmaX, TauX, TauY)
heq <- function (d, Gamma, X, Y, R){
iter = sum(R)
i <- 1
cont <- NULL
while (i < iter) {
C <- matrix(0, (R[i] - 1), sum(R))
for (j in 2 : (sum(R) - 1)){
if (R[i] == j) {
for (k in 1 : (j - 1)){
s <- sum(R[0 : (i - 1)]) + 1
C[k , s] <- 1
C[k , (s + k)] <- -1
}
}
}
cont <- rbind(cont, C)
if (i == length(R)) {
c1 <- c(1, rep(0, (sum(R) - 1)))
cont <- rbind(c1, cont)
break
} else {
i <- i + 1
}
}
g <- rep(NA, nrow(cont))
g[1] <- d[1]
for (m in 2 : nrow(cont)){
g[m] <- d[which(cont[m, ] == 1)] - d[which(cont[m, ] == -1)]
}
g
}
d.init <- rep(1, p)
g1 <- heq(d.init, tmp1$Gammahat, X, Y, R)
k1 <- c(1, rep(0, length(g1) - 1))
k2 <- rep(0, p)
tmp.init <- Rsolnp::solnp(pars = d.init, fun = objfun1, eqfun = heq, eqB = k1, LB = k2,
control = list(delta = 1e-6, tol = 0.0005, trace = 0),
R = R, Gamma = tmp1$Gammahat, X = X, Y = Y)
d <- tmp.init$pars
tmp1$Lambdahat <- diag(d)
}
}
Gammahat <- tmp1$Gammahat
Gamma0hat <- tmp1$Gamma0hat
Lambdahat <- tmp1$Lambdahat
d1 <- diag(Lambdahat)
invLambdahat <- diag(1 / d1)
E1 <- crossprod(Gammahat, invLambdahat)
Omegahat <- E1 %*% tcrossprod(SigmaX, E1)
E2 <- crossprod(Gamma0hat, invLambdahat)
Omega0hat <- E2 %*% tcrossprod(SigmaX, E2)
invOmegahat <- cvreg:::.ridgeinv(Omegahat)
etahat <- invOmegahat %*% tcrossprod(E1, SigmaYX)
betahat <- invLambdahat %*% Gammahat %*% etahat
muYhat <- ymeans
muXhat <- xmeans
E3 <- Lambdahat %*% Gammahat
sig1 <- E3 %*% tcrossprod(Omegahat, E3)
E4 <- Lambdahat %*% Gamma0hat
sig2 <- E4 %*% tcrossprod(Omega0hat, E4)
SigmaXhat <- sig1 + sig2
Yc <- Y - tcrossprod(rep(1, nrow(Y)), ymeans)
Xc <- X - tcrossprod(rep(1, nrow(X)), xmeans)
E5 <- Yc - Xc %*% betahat
SigmaYcXhat <- crossprod(E5) / n
TauYcX <- cvreg:::.ridgeinv(mpd(SigmaYcXhat))
eig.SigmaXhat <- eigen(mpd(SigmaXhat))
eig.SigmaYcXhat <- eigen(mpd(SigmaYcXhat))
TauXhat <-cvreg:::.ridgeinv(mpd(SigmaXhat))
s1 <- TauXhat %*% SigmaX
eig.invX <- eigen(s1)
objfun <- (sum(log(eig.SigmaXhat$values)) + sum(log(eig.SigmaYcXhat$values)) + sum(eig.invX$values))
log_lik <- -nrow(OX) * (r + p)/2 * log(2 * pi) - nrow(OX) * r/2 - nrow(OX)/2 * objfun
if (se == T) {
U1 <- SigmaYX %*% TauX
U2 <- tcrossprod(U1, SigmaYX)
M2 <- SigmaY - U2
covMatrix <- kronecker(M2, TauX)
seFm <- matrix(sqrt(diag(covMatrix)), nrow = p)
Ep <- expan(p)
Eu <- expan(rank)
Epu <- expan((p - rank))
Cp <- contr(p)
j11 <- kronecker(TauYcX, SigmaXhat)
j222 <- kronecker(TauXhat, TauXhat)
j22 <- crossprod(Ep, j222) %*% Ep / 2
J <- cbind(rbind(j11, matrix(0, nrow = nrow(j22), ncol = ncol(j11))), rbind(matrix(0, nrow = nrow(j11), ncol = ncol(j22)), j22))
diagvec <- function(R){
E <- NULL
p <- sum(R)
for(i in 1 : (length(R) - 1)){
e <- rep(0, p)
s <- sum(R[1 : i])
e[s + 1] <- 1
t <- kronecker(e, e)
E <- cbind(E, t)
}
E <- matrix(E, nrow = p^2)
}
L <- diagvec(R)
SigmaL <- Gammahat %*% tcrossprod(Omegahat, Gammahat) + Gamma0hat %*% tcrossprod(Omega0hat, Gamma0hat)
h1 <- crossprod(Gammahat, invLambdahat)
h2 <- crossprod(etahat, h1)
h11 <- - kronecker(h2, invLambdahat) %*% L
h3 <- kronecker(Lambdahat %*% SigmaL, diag(p))
h21 <- 2 * Cp %*% h3 %*% L
h12 <- kronecker(diag(r), invLambdahat %*% Gammahat)
h13 <- kronecker(t(etahat), invLambdahat)
h4 <- Lambdahat %*% Gammahat %*% Omegahat
h5 <- kronecker(h4, Lambdahat)
h6 <- Lambdahat %*% Gammahat
h7 <- Lambdahat %*% Gamma0hat
h78 <- h7 %*% tcrossprod(Omega0hat, Gamma0hat)
h8 <- kronecker(h6, h78)
h23 <- 2 * Cp %*% (h5 - h8)
h9 <- Lambdahat %*% Gammahat
h24 <- Cp %*% kronecker(h9, h9) %*% Eu
h10 <- Lambdahat %*% Gamma0hat
h25 <- Cp %*% kronecker(h10, h10) %*% Epu
H <- rbind(cbind(h11, h12, h13, matrix(0, nrow = nrow(h11), ncol = ncol(h24)), matrix(0, nrow = nrow(h11), ncol = ncol(h25))), cbind(h21, matrix(0, nrow = nrow(h21), ncol = ncol(h12)), h23, h24, h25))
M1 <- crossprod(H, J) %*% H
invM1 <- cvreg:::.ridgeinv(M1)
V <- H %*% tcrossprod(invM1, H)
covMatrix <- V[1:(p * r), 1:(p * r)]
colpreds <- ncol(covMatrix)/ncol(Y)
colresps <- ncol(covMatrix)/ncol(X)
cvreg:::.robustse(X, )
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
ratio <- seFm/std.err
}
}
rownames(betahat) <- colnames(X)
predictors <- OX %*% Gammahat
fitted <- OX %*% betahat
colnames(predictors) <- paste0("SP", 1:ncol(predictors))
colnames(Gammahat) <- paste0("SP", 1:ncol(predictors))
rownames(Gammahat) <- colnames(X)
if (se){
std.err <- std.err * (1/sqrt(n))
p.values <- 2*pnorm(-abs((betahat / std.err)))
} else {
std.err <- NULL
p.values <- NULL
}
return(structure(list(basis = Gammahat, coef = betahat, fitted = fitted, predictors = predictors, eta = etahat, muhat = muYhat, Gamma0 = Gamma0hat, Omega = Omegahat, SigmaYcX = SigmaYcXhat, SigmaX = SigmaXhat, log_lik = log_lik, n = nrow(OX), covMatrix = covMatrix, std.err = std.err, p.values = p.values, ratio = ratio, formula = formula, mf = model.frame(formula, data), y.obs = OY), class = "sdr", model = "ENV"))
}
ENV.rmvxy <- function(formula, data, xrank, yrank, se = TRUE, Pinit = NULL, Ginit = NULL) {
X = model.matrix(formula, data)[,-1];Y = model.frame(formula, data)
Y = model.response(Y);Y <- as.matrix(Y);X <- as.matrix(X)
a <- dim(Y);n <- a[1];r <- a[2];p <- ncol(X)
if (a[1] != nrow(X))
stop("X and Y should have the same number of observations.")
if (yrank > r || yrank < 0)
stop("yrank must be an integer between 0 and r.")
if (xrank > p || xrank < 0)
stop("xrank must be an integer between 0 and p.")
if (sum(duplicated(cbind(X, Y), MARGIN = 2)) > 0)
stop("Some responses also appear in the predictors, or there maybe duplicated columns in X or Y.")
MCD <- cov.mrcd(cbind(Y, X), kappa = 0.60, method = "aad")
MCD <- .ENVwtfun(cbind(Y,X),MCD$center, MCD$cov)
OX <- X; OY <- Y
X <- X[MCD$w,]; Y<-Y[MCD$w,]
SigmaY <- MCD$cov[1:ncol(Y),1:ncol(Y)]
SigmaYX <- MCD$cov[1:ncol(Y) , seq(ncol(Y)+1,ncol(MCD$cov),by=1)]
SigmaX <- MCD$cov[seq(ncol(Y)+1,ncol(MCD$cov),by=1),seq(ncol(Y)+1,ncol(MCD$cov), by = 1)]
TauY <- cvreg:::.ridgeinv(SigmaY)
TauX <- cvreg:::.ridgeinv(SigmaX)
ymeans <- MCD$center[1:ncol(Y)]
xmeans <- MCD$center[-c(1:ncol(Y))]
eig.SigmaY <- eigen(SigmaY)
eig.SigmaX <- eigen(SigmaX)
a <- dim(Y);n <- a[1];r <- a[2];p <- ncol(X)
betaOLS <- SigmaYX %*% TauX
covMatrix <- NULL
std.err <- NULL
if (yrank == 0 || xrank ==0) {
Gammahat <- NULL
Gamma0hat <- diag(r)
Phihat <- NULL
Phi0hat <- diag(p)
etahat <- NULL
Omegahat <- NULL
Omega0hat <- SigmaY
Deltahat <- NULL
Delta0hat <- SigmaX
muhat <- ymeans
betahat <- matrix(0, p, r)
SigmaYcXhat <- SigmaY
SigmaXhat <- SigmaX
tmp1 <- eig.SigmaX
tmp2 <- eig.SigmaY
objfun <- sum(log(tmp1$values)) + sum(log(tmp2$values))
log_lik <- - n/2 * objfun
}
else if (yrank == r && xrank == p) {
M <- SigmaY - tcrossprod(betaOLS, SigmaYX)
Gammahat <- diag(r)
Gamma0hat <- NULL
Phihat <- diag(p)
Phi0hat <- NULL
etahat <- betaOLS
Omegahat <- M
Omega0hat <- NULL
Deltahat <- SigmaX
Delta0hat <- NULL
muhat <- ymeans - betaOLS %*% xmeans
betahat <- t(betaOLS)
SigmaYcXhat <- M
SigmaXhat <- SigmaX
tmp.e1 <- eigen(Omegahat)
tmp.e3 <- eigen(Deltahat)
objfun <- sum(log(tmp.e1$values)) + sum(log(tmp.e3$values))
log_lik <- -n * (p + r) / 2 - n/2 * objfun
if (se == T) {
covMatrix <- kronecker(M, TauX)
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
}
}
else if (xrank == p) {
tmp.envfun <- function(SigmaX, SigmaY, SigmaYX, TauX, yrank){
betaOLS <- SigmaYX %*% TauX
U <- tcrossprod(betaOLS, SigmaYX)
M <- SigmaY - U
tmp <- cvreg:::envlp_MU(M, U, yrank)
Gammahat <- tmp$Gammahat
Gamma0hat <- tmp$Gamma0hat
etahat <- crossprod(Gammahat, betaOLS)
betahat <- Gammahat %*% etahat
muhat <- ymeans - betahat %*% xmeans
Omegahat <- crossprod(Gammahat, M) %*% Gammahat
Omega0hat <- crossprod(Gamma0hat, sigY) %*% Gamma0hat
Sigma1 <- Gammahat %*% tcrossprod(Omegahat, Gammahat)
Sigmahat <- Sigma1 + Gamma0hat %*% tcrossprod(Omega0hat, Gamma0hat)
return(list(Gamma = Gammahat, Gamma0 = Gamma0hat, Omega = Omegahat, Omega0 = Omega0hat, beta = betahat, Sigma = Sigmahat))
}
tmp.env <- tmp.envfun(SigmaX, SigmaY, SigmaYX, TauX, yrank)
Gammahat <- tmp.env$Gamma
Gamma0hat <- tmp.env$Gamma0
Phihat <- diag(p)
Phi0hat <- NULL
etahat <- TauX %*% crossprod(SigmaYX, Gammahat)
Omegahat <- tmp.env$Omega
Omega0hat <- tmp.env$Omega0
Deltahat <- SigmaX
Delta0hat <- NULL
betahat <- t(tmp.env$beta)
muhat <- ymeans - crossprod(betahat, xmeans)
SigmaYcXhat <- tmp.env$Sigma
SigmaXhat <- SigmaX
tmp.e1 <- eigen(Omegahat)
tmp.e2 <- eigen(Omega0hat)
tmp.e3 <- eigen(Deltahat)
invome <- cvreg:::.ridgeinv(Omegahat)
invome0 <- cvreg:::.ridgeinv(Omega0hat)
invdel <- cvreg:::.ridgeinv(Deltahat)
t3 <- crossprod(Gammahat, SigmaY) %*% Gammahat %*% invome
t5 <- crossprod(etahat, SigmaX) %*% etahat %*% invome
t6 <- t(etahat) %*% crossprod(SigmaYX, Gammahat) %*% invome
obj1 <- sum(log(tmp.e1$values)) + sum(log(tmp.e2$values))
obj2 <- sum(log(tmp.e3$values)) + p
obj3 <- sum(diag(t3)) + (r - yrank)
obj4 <- sum(diag(t5)) - 2 * sum(diag(t6))
objfun <- obj1 + obj2 + obj3 + obj4
log_lik <- - (nrow(OX)/2) * objfun
if (se == T) {
M <- SigmaY - tcrossprod(betaOLS, SigmaYX)
covMatrix <- kronecker(M, TauX)
seFm <- matrix(sqrt(diag(covMatrix)), nrow = p)
m1 <- Gammahat %*% tcrossprod(Omegahat, Gammahat)
temp1 <- kronecker(m1, TauX)
m2 <- crossprod(etahat, SigmaX) %*% etahat
m3 <- kronecker(invome0, m2) + kronecker(invome0, Omegahat)
m4 <- kronecker(Omega0hat, invome) - 2 * kronecker(diag(r - yrank), diag(yrank))
m5 <- m3 + m4
invm5 <- cvreg:::.ridgeinv(m5)
m6 <- kronecker(Gamma0hat, etahat)
temp2 <- m6 %*% tcrossprod(invm5, m6)
covMatrix <- temp1 + temp2
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
}
}
else{
tmp.stenv <- cvreg:::xyenvlp_MU(X, Y, xrank, yrank)
if (!is.null(Ginit)) {
if (nrow(Ginit) != r || ncol(Ginit) != yrank) stop("The initial value should have r rows and yrank columns.")
tmp0 <- qr.Q(qr(Ginit), complete = TRUE)
tmp.stenv$Gammahat <- as.matrix(tmp0[, 1:yrank])
tmp.stenv$Gamma0hat <- as.matrix(tmp0[, (yrank+1):r])
}
if (!is.null(Pinit)) {
if (nrow(Pinit) != p || ncol(Pinit) != xrank) stop("The initial value should have p rows and xrank columns.")
tmp0 <- qr.Q(qr(Pinit), complete = TRUE)
tmp.stenv$Phihat <- as.matrix(tmp0[, 1:xrank])
tmp.stenv$Phi0hat <- as.matrix(tmp0[, (xrank+1):p])
}
Gammahat <- tmp.stenv$Gammahat
Gamma0hat <- tmp.stenv$Gamma0hat
Phihat <- tmp.stenv$Phihat
Phi0hat <- tmp.stenv$Phi0hat
Deltahat <- crossprod(Phihat, SigmaX) %*% Phihat
Delta0hat <- crossprod(Phi0hat, SigmaX) %*% Phi0hat
invdel <- cvreg:::.ridgeinv(Deltahat)
et1 <- t(Phihat) %*% crossprod(SigmaYX, Gammahat)
etahat <- invdel %*% et1
om1 <- SigmaYX %*% Phihat
om2 <- om1 %*% tcrossprod(invdel, om1)
om3 <- SigmaY - om2
Omegahat <- crossprod(Gammahat, om3) %*% Gammahat
Omega0hat <- crossprod(Gamma0hat, SigmaY) %*% Gamma0hat
betahat <- Phihat %*% tcrossprod(etahat, Gammahat)
muhat <- ymeans - crossprod(betahat, xmeans)
s1 <- Gammahat %*% tcrossprod(Omegahat, Gammahat)
SigmaYcXhat <- s1 + Gamma0hat %*% tcrossprod(Omega0hat, Gamma0hat)
s2 <- Phihat %*% tcrossprod(Deltahat, Phihat)
SigmaXhat <- s2 + Phi0hat %*% tcrossprod(Delta0hat, Phi0hat)
tmp.e1 <- eigen(Omegahat)
tmp.e2 <- eigen(Omega0hat)
tmp.e3 <- eigen(Deltahat)
tmp.e4 <- eigen(Delta0hat)
invome <- cvreg:::.ridgeinv(Omegahat)
invome0 <- cvreg:::.ridgeinv(Omega0hat)
invdel0 <- cvreg:::.ridgeinv(Delta0hat)
t3 <- crossprod(Gammahat, SigmaY) %*% Gammahat %*% invome
peta <- Phihat %*% etahat
t5 <- crossprod(peta, SigmaX) %*% peta %*% invome
t6 <- t(peta) %*% crossprod(SigmaYX, Gammahat) %*% invome
obj1 <- sum(log(tmp.e1$values)) + sum(log(tmp.e2$values))
obj2 <- sum(log(tmp.e3$values)) + sum(log(tmp.e4$values)) + p
obj3 <- sum(diag(t3)) + (r - yrank)
obj4 <- sum(diag(t5)) - 2 * sum(diag(t6))
objfun <- obj1 + obj2 + obj3 + obj4
log_lik <- - (nrow(OX)/2) * objfun
if (se == T) {
M <- SigmaY - tcrossprod(betaOLS, SigmaYX)
covMatrix <- kronecker(M, TauX)
seFm <- matrix(sqrt(diag(covMatrix)), nrow = p)
m1 <- Gammahat %*% tcrossprod(Omegahat, Gammahat)
m2 <- Phihat %*% tcrossprod(invdel, Phihat)
temp1 <- kronecker(m1, m2)
d1 <- etahat %*% tcrossprod(invome, etahat)
m3 <- kronecker(d1, Delta0hat)
m4 <- kronecker(Deltahat, invdel0) + kronecker(invdel,Delta0hat) - 2 * kronecker(diag(xrank), diag(p - xrank))
m5 <- m3 + m4
invm5 <- cvreg:::.ridgeinv(m5)
m6 <- kronecker(tcrossprod(Gammahat, etahat), Phi0hat)
temp2 <- m6 %*% tcrossprod(invm5, m6)
d2 <- crossprod(etahat, Deltahat) %*% etahat
m8 <- kronecker(invome0, d2) + kronecker(invome0, Omegahat)
m9 <- kronecker(Omega0hat, invome) - 2 * kronecker(diag(r - yrank), diag(yrank))
m10 <- m8 + m9
invm10 <- cvreg:::.ridgeinv(m10)
m11 <- kronecker(Gamma0hat, Phihat %*% etahat)
temp3 <- m11 %*% tcrossprod(invm10, m11)
covMatrix <- temp1 + temp2 + temp3
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
}
}
colnames(betahat) <- colnames(OY)
rownames(betahat) <- colnames(OX)
predictors <- OX %*% Phihat
y.reduced <- OY %*% Gammahat
fitted <- OX %*% betahat
colnames(predictors) <- paste0("SP", 1:ncol(predictors))
colnames(Phihat) <- paste0("SP", 1:ncol(predictors))
rownames(Phihat) <- colnames(X)
if (se){
std.err <- std.err * (1/sqrt(n))
p.values <- 2*pnorm(-abs((betahat / std.err)))
} else {
std.err <- NULL
p.values <- NULL
}
return(structure(list(coef = betahat, SigmaYcX = SigmaYcXhat, SigmaX = SigmaXhat, Gammahat = Gammahat, Gamma0 = Gamma0hat,
eta = etahat, Omega = Omegahat, Omega0 = Omega0hat, muhat = muhat, basis = Phihat, Phi0 = Phi0hat,
Delta = Deltahat, Delta0 = Delta0hat, log_lik = log_lik, covMatrix = covMatrix, std.err = std.err,
p.values = p.values, n = nrow(OX), y.obs = OY, x.obs = OX, fitted = fitted, y.reduced = y.reduced, predictors = predictors), class = "sdr", model = "ENV"))
}
.ENVwtfun <-function(x, center, cov){
n <- nrow(x); p <- ncol(x)
if (p<=10){pcrit <- (0.241-0.0029*p)/sqrt(n)}
if (p>=11){pcrit <- (0.252-0.0018*p)/sqrt(n)}
delta<-qchisq(0.975, p);d2<-mahalanobis_dist(x, center, cov);d2ord <- sort(d2)
dif <- pchisq(d2ord,p) - (0.5:n)/n;i <- (d2ord>=delta) & (dif>0)
if (sum(i)==0) alfan<-0 else alfan<-max(dif[i]) ; if (alfan<pcrit) alfan<-0
if (alfan>0) cn<-max(d2ord[n-ceiling(n*alfan)],delta) else cn<-Inf
w <- d2 < cn
if(sum(w)==0) {
m <- center; c <- cov
}
else {
newx <- x[w, ]
newcenter <- numeric(); newscale <- numeric()
for (j in 1:ncol(newx)){
LocScale <- cvreg:::tauLocScale(newx[,j])
newcenter[j] <- LocScale[1]; newscale[j] <- LocScale[2]
}
newx <- scale(x); newx <- sweep(sweep(newx, 2, newscale, "*"), 2, newcenter, "+")
c <- try(covShrink(newx, target = "adaptive"), silent = T)
if (class(c)=="try.error"){
c <-covShrink(newx,target="pooled")
}
center <- newcenter
}
list(center = center, cov = c, cn = cn, w = w)
}
#' Generalized Linear Envelopes for Robust Sufficient Dimension Reduction
#'
#' @description This is an adaptation with several improvements on functions in the Renvlp package. Incorporated
#' here is the robust maximum quasilikelihood estimation method also utilized in the \code{\link{robglm}} function
#' for fitting predictor envelopes for binomial and poisson regressions. The benefit of this is that outliers in
#' the design matrix are less likely to have a negative impact on the estimation process. A further benefit of this
#' separation of responses classes in binomial regression is less likely to result in degeneracy of the maximum
#' likelihood estimate. \cr
#' \cr
#' @param formula model formula
#' @param data data frame
#' @param rank number of dimensions
#' @param se should standard errors for the regression basis be calculated? defaults to TRUE.
#' @param init an optional initial covariate dimension reduction subspace. defaults to NULL.
#'
#' @return an sdr object
#' @export
#'
RGLENV <- function(formula, data, rank = "all", family = c("binomial", "poisson"), se = TRUE, init = NULL){
family <- match.arg(family)
X = model.matrix(formula, data)[,-1]
Y = model.frame(formula, data)
Y = model.response(Y)
Y <- as.matrix(Y)
if (rank == "all" || is.null(rank)){
rank = ncol(X)
}
if (family == "binomial"){
GLENV.rbinom(formula, data, rank, se, init)
}
else if (family == "poisson"){
GLENV.rpoisson(formula, data, rank, se, init)
}
}
GLENV.rbinom <- function(formula, data, rank,se = TRUE, init = NULL) {
X = model.matrix(formula, data)[,-1]
Y = model.frame(formula, data)
Y = as.matrix(as.numeric(as.factor(model.response(Y)))-1)
a <- dim(Y)
n <- a[1]
r <- a[2]
p <- ncol(X)
if (a[1] != nrow(X))
stop("X and Y should have the same number of observations.")
if (rank > p || rank < 0)
stop("rank must be an integer between 0 and p.")
if (sum(duplicated(cbind(X, Y), MARGIN = 2)) > 0)
stop("Some responses also appear in the predictors, or there maybe duplicated columns in X or Y.")
SigmaY <- var(Y)*(n-1)/n
SigmaYX <- cov(Y,X)*(n-1)/n
SigmaX <- solve(.ridgeinv(cov(X))*((n-1)/n))
TauY <- .ridgeinv(SigmaY)
tmp <- rbinom.envlp_MU(X, Y, rank)
if (!is.null(init)) {
if (nrow(init) != p || ncol(init) != rank) stop("The initial value should have p rows and rank columns.")
tmp0 <- qr.Q(qr(init), complete = TRUE)
tmp$Gammahat <- tmp0[, 1:rank]
tmp$Gamma0hat <- tmp0[, (rank+1):p]
GX <- X %*% tmp$Gammahat
fit <- cvreg:::.Mqle(cbind(1,GX), as.vector(Y), family = quasibinomial())
tmp$muhat <- as.vector(fit$coef[1])
tmp$etahat <- matrix(fit$coef[2 : (rank + 1)])
beta <- tmp$Gammahat %*% tmp$etahat
theta <- matrix(1, n, 1) %*% tmp$muhat + X %*% beta
c.theta <- - exp(theta) / (1 + exp(theta))^2
c.theta.mean <- mean(c.theta)
tmp$weighthat <- c.theta / c.theta.mean
mu1 <- exp(theta) / (1 + exp(theta))
tmp$Vhat <- theta + ((Y - mu1) / tmp$weighthat)
W <- diag(as.vector(tmp$weighthat))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% tmp$Vhat
mean.wv <- mean(wv)
wvv <- tmp$Vhat - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
tmp$avar <- Tauwx / (- c.theta.mean)
M <- SigmaX
MU <- SigmaX
tmp.MU <- eigen(MU)
invMU <- tcrossprod(sweep(tmp.MU$vectors, MARGIN = 2, 1/tmp.MU$values, "*") ,tmp.MU$vectors)
e1 <- eigen(crossprod(tmp$Gammahat, M) %*% tmp$Gammahat)
e2 <- eigen(crossprod(tmp$Gammahat, invMU) %*% tmp$Gammahat)
e3 <- eigen(mpd(M))
e4 <- matrix(1, n, 1) %*% tmp$muhat + X %*% tmp$Gammahat %*% tmp$etahat
e5 <- crossprod(Y, e4) - colSums(log(1 + exp(e4)))
tmp$objfun <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
}
Gammahat <- tmp$Gammahat
Gamma0hat <- tmp$Gamma0hat
objfun <- tmp$objfun
muhat <- tmp$muhat
etahat <- tmp$etahat
weighthat <- tmp$weighthat
Vhat <- tmp$Vhat
avarhat <- tmp$avar
covMatrix <- NULL
std.err <- NULL
ratio <- NULL
if (rank == 0) {
Omegahat <- NULL
Omega0hat <- SigmaX
betahat <- matrix(0, p, 1)
SigmaXhat <- SigmaX
log_lik <- objfun
if (se == T)
ratio <- matrix(1, p, r)
}
else if (rank == p) {
TauX <- .ridgeinv(SigmaX)
Omegahat <- SigmaX
Omega0hat <- NULL
betahat <- etahat
SigmaXhat <- SigmaX
log_lik <- objfun
if (se == T) {
covMatrix <- avarhat
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
ratio <- matrix(1, p, r)
}
}
else {
TauX <- .ridgeinv(SigmaX)
Omegahat <- crossprod(Gammahat, SigmaX) %*% Gammahat
Omega0hat <- crossprod(Gamma0hat, SigmaX) %*% Gamma0hat
betahat <- Gammahat %*% etahat
SigmaXhat <- Gammahat %*% tcrossprod(Omegahat, Gammahat) + Gamma0hat %*% tcrossprod(Omega0hat, Gamma0hat)
betaOLS <- tcrossprod(TauX, SigmaYX)
PGamma <- tcrossprod(Gammahat)
log_lik <- objfun
if (se == T) {
options(warn = -1)
fit <- cvreg:::.Mqle(cbind(1,X), as.vector(Y), family = quasibinomial())
mu <- as.vector(fit$coef[1])
beta <- as.vector(fit$coef[2 : (p + 1)])
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta) / (1 + exp(theta))^2
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
mu1 <- exp(theta) / (1 + exp(theta))
V <- theta + ((Y - mu1) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
avar <- Tauwx / (- c.theta.mean)
covMatrix <- avar
seFm <- matrix(sqrt(diag(covMatrix)), nrow = p)
PGamma <- tcrossprod(Gammahat)
t1 <- avarhat
T1 <- PGamma %*% t1 %*% PGamma
invt1 <- .ridgeinv(t1)
t2 <- kronecker(etahat, t(Gamma0hat))
invOme <- .ridgeinv(Omegahat)
invOme0 <- .ridgeinv(Omega0hat)
t3 <- kronecker(Omegahat, invOme0) + kronecker(invOme, Omega0hat) - 2 * kronecker(diag(rank), diag(p - rank))
M <- t2 %*% tcrossprod(invt1, t2) + t3
invM <- .ridgeinv(M)
T2 <- crossprod(t2, invM) %*% t2
covMatrix <- T1 + T2
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
ratio <- seFm/std.err
}
}
rownames(betahat) <- colnames(X)
predictors <- X %*% Gammahat
fitted <- X %*% betahat
colnames(predictors) <- paste0("SP", 1:ncol(predictors))
colnames(Gammahat) <- paste0("SP", 1:ncol(predictors))
rownames(Gammahat) <- colnames(X)
if (se){
std.err <- std.err * (1/sqrt(n))
p.values <- 2*pnorm(-abs((betahat / std.err)))
} else {
std.err <- NULL
p.values <- NULL
}
return(structure(list(basis = Gammahat, coef = betahat, fitted = fitted, predictors = predictors, eta = etahat, Omega = Omegahat, SigmaX = SigmaXhat, log_lik = log_lik, n = n, covMatrix = covMatrix, std.err = std.err, p.values = p.values, ratio = ratio, formula = formula, mf = model.frame(formula, data), y.obs = Y), class = "sdr", model = "ENV"))
}
GLENV.rpoisson <- function(X, Y, rank, se = TRUE, init = NULL){
X <- as.matrix(X)
Y <- as.matrix(Y)
a <- dim(Y)
n <- a[1]
r <- a[2]
p <- ncol(X)
if (a[1] != nrow(X))
stop("X and Y should have the same number of observations.")
if (rank > p || rank < 0)
stop("rank must be an integer between 0 and p.")
if (sum(duplicated(cbind(X, Y), MARGIN = 2)) > 0)
stop("Some responses also appear in the predictors, or there maybe duplicated columns in X or Y.")
SigmaY <- var(Y) * ((n - 1) / n)
SigmaYX <- cov(Y, X) * ((n - 1) / n)
SigmaX <- solve(.ridgeinv(cov(X)))*((n-1)/n)
TauY <- .ridgeinv(SigmaY)
tmp <- rpoisson.envlp_MU(X, Y, rank)
if (!is.null(init)) {
if (nrow(init) != p || ncol(init) != rank) stop("The initial value should have p rows and rank columns.")
tmp0 <- qr.Q(qr(init), complete = TRUE)
tmp$Gammahat <- tmp0[, 1:rank]
tmp$Gamma0hat <- tmp0[, (rank+1):p]
GX <- X %*% tmp$Gammahat
fit <- cvreg:::.Mqle(cbind(1,GX), as.vector(Y), family = poisson())
tmp$muhat <- as.vector(fit$coef[1])
tmp$etahat <- matrix(fit$coef[2 : (rank + 1)])
beta <- tmp$Gammahat %*% tmp$etahat
theta <- matrix(1, n, 1) %*% tmp$muhat + X %*% beta
c.theta <- - exp(theta)
c.theta.mean <- mean(c.theta)
tmp$weighthat <- c.theta / c.theta.mean
tmp$Vhat <- theta + ((Y - c.theta) / tmp$weighthat)
W <- diag(as.vector(tmp$weighthat))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% tmp$Vhat
mean.wv <- mean(wv)
wvv <- tmp$Vhat - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
tmp$avar <- Tauwx / (- c.theta.mean)
M <- SigmaX
MU <- SigmaX
tmp.MU <- eigen(MU)
invMU <- tcrossprod(sweep(tmp.MU$vectors, MARGIN = 2, 1/tmp.MU$values, "*") ,tmp.MU$vectors)
e1 <- eigen(crossprod(tmp$Gammahat, M) %*% tmp$Gammahat)
e2 <- eigen(crossprod(tmp$Gammahat, invMU) %*% tmp$Gammahat)
e3 <- eigen(mpd(M))
e4 <- matrix(1, n, 1) %*% tmp$muhat + X %*% tmp$Gammahat %*% tmp$etahat
e5 <- crossprod(Y, e4) - colSums(exp(e4))
tmp$objfun <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
}
Gammahat <- tmp$Gammahat
Gamma0hat <- tmp$Gamma0hat
objfun <- tmp$objfun
muhat <- tmp$muhat
etahat <- tmp$etahat
weighthat <- tmp$weighthat
Vhat <- tmp$Vhat
avarhat <- tmp$avar
covMatrix <- NULL
std.err <- NULL
ratio <- NULL
if (rank == 0) {
Omegahat <- NULL
Omega0hat <- SigmaX
betahat <- matrix(0, p, 1)
SigmaXhat <- SigmaX
log_lik <- objfun
if (se == T)
ratio <- matrix(1, p, r)
}
else if (rank == p) {
Omegahat <- SigmaX
Omega0hat <- NULL
betahat <- etahat
SigmaXhat <- SigmaX
log_lik <- objfun
if (se == T) {
covMatrix <- avarhat
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
ratio <- matrix(1, p, r)
}
}
else {
TauX<- .ridgeinv(SigmaX)
Omegahat <- crossprod(Gammahat, SigmaX) %*% Gammahat
Omega0hat <- crossprod(Gamma0hat, SigmaX) %*% Gamma0hat
betahat <- Gammahat %*% etahat
SigmaXhat <- Gammahat %*% tcrossprod(Omegahat, Gammahat) + Gamma0hat %*% tcrossprod(Omega0hat, Gamma0hat)
PGamma <- tcrossprod(Gammahat)
log_lik <- objfun
if (se == T) {
options(warn = -1)
fit <- cvreg:::.Mqle(cbind(1,X), as.vector(Y), family = poisson())
mu <- as.vector(fit$coef[1])
beta <- as.vector(fit$coef[2 : (p + 1)])
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta)
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
V <- theta + ((Y - c.theta) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
avar <- Tauwx / (- c.theta.mean)
covMatrix <- avar
asyFm <- matrix(sqrt(diag(covMatrix)), nrow = p)
PGamma <- tcrossprod(Gammahat)
t1 <- avarhat
T1 <- PGamma %*% t1 %*% PGamma
invt1 <- .ridgeinv(t1)
t2 <- kronecker(etahat, t(Gamma0hat))
invOme <- .ridgeinv(Omegahat)
invOme0 <- .ridgeinv(Omega0hat)
t3 <- kronecker(Omegahat, invOme0) + kronecker(invOme, Omega0hat) - 2 * kronecker(diag(rank), diag(p - rank))
M <- t2 %*% tcrossprod(invt1, t2) + t3
invM <- .ridgeinv(M)
T2 <- crossprod(t2, invM) %*% t2
covMatrix <- T1 + T2
std.err <- matrix(sqrt(diag(covMatrix)), nrow = p)
ratio <- asyFm/std.err
}
}
rownames(betahat) <- colnames(X)
predictors <- X %*% Gammahat
fitted <- X %*% betahat
colnames(predictors) <- paste0("SP", 1:ncol(predictors))
colnames(Gammahat) <- paste0("SP", 1:ncol(predictors))
rownames(Gammahat) <- colnames(X)
if (se){
std.err <- std.err * (1/sqrt(n))
p.values <- 2*pnorm(-abs((betahat / std.err)))
} else {
std.err <- NULL
p.values <- NULL
}
return(structure(list(basis = Gammahat, coef = betahat, fitted = fitted, predictors = predictors, eta = etahat, Omega = Omegahat, SigmaX = SigmaXhat, log_lik = log_lik, n = n, covMatrix = covMatrix, std.err = std.err, p.values = p.values, ratio = ratio, formula = formula, mf = model.frame(formula, data), y.obs = Y), class = "sdr", model = "ENV"))
}
rbinom.envlp_MU <- function(X, Y, rank){
X <- as.matrix(X);Y <- as.matrix(Y)
a <- dim(Y);n <- a[1];r <- a[2];p <- ncol(X)
if (a[1] != nrow(X))
stop("X and Y should have the same number of observations.")
if (rank > p || rank < 0)
stop("rank must be an integer between 0 and r.")
if (sum(duplicated(cbind(X, Y), MARGIN = 2)) > 0)
stop("Some responses also appear in the predictors, or there maybe duplicated columns in X or Y.")
options(warn = -1)
fit <- cvreg:::.Mqle(cbind(1,X), as.vector(Y), family = quasibinomial())
mu <- as.vector(fit$coef[1])
beta <- as.vector(fit$coef[2 : (p + 1)])
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta) / (1 + exp(theta))^2
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
mu1 <- exp(theta) / (1 + exp(theta))
V <- theta + ((Y - mu1) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
M.init <- Tauwx / (- c.theta.mean)
U.init <- tcrossprod(beta)
tmp1 <- envlp_MU(M.init, U.init, rank)
gamma.init <- tmp1$Gammahat
gamma0.init <- tmp1$Gamma0hat
SigmaX <- stats::cov(X) * ((n - 1) / n)
TauX <- .ridgeinv(SigmaX)
SigmaX <- solve(SigmaX)
if (rank == 0) {
Gammahat <- NULL
Gamma0hat <- diag(r)
tmp.M <- eigen(SigmaX)
mu <- log(mean(Y) / (1 - mean(Y)))
muh <- matrix(1, n, 1) %*% mu
c1 <- log(1 + exp(muh))
Cn1 <- crossprod(Y, muh) - colSums(c1)
eta <- NULL
var <- NULL
weight <- rep(1, n)
V <- Y
objfun <- Cn1 - n/2 * sum(log(tmp.M$values))
} else if (rank == p) {
Gammahat <- diag(r)
Gamma0hat <- NULL
tmp.M <- eigen(SigmaX)
eta <- as.matrix(beta)
var <- Tauwx / (- c.theta.mean)
c1 <- log(1 + exp(theta))
Cn1 <- crossprod(Y, theta) - colSums(c1)
objfun <- Cn1 - n/2 * sum(log(tmp.M$values))
} else if (rank == p - 1) {
maxiter = 250
ftol = 0.0005
M <- SigmaX
MU <- SigmaX
tmp.MU <- eigen(MU)
invMU <- tcrossprod(sweep(tmp.MU$vectors, MARGIN = 2, 1/tmp.MU$values, "*"), tmp.MU$vectors)
c1 <- log(1 + exp(theta))
temp1 <- crossprod(Y, theta) - colSums(c1)
e1 <- eigen(crossprod(gamma.init, M) %*% gamma.init)
e2 <- eigen(crossprod(gamma.init, invMU) %*% gamma.init)
e3 <- eigen(SigmaX)
temp2 <- sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))
obj1 <- temp1 - (n / 2) * temp2
GiX <- X %*% gamma.init
fit <- cvreg:::.Mqle(cbind(1,GiX), as.vector(Y), family = quasibinomial())
eta <- as.matrix(fit$coef[2 : (rank + 1)])
init <- gamma.init
GEidx <- .GaussElim(init)
Ginit <- init %*% pseudoinverse(init[GEidx[1:rank], ])
j <- GEidx[p]
g <- as.matrix(Ginit[j, ])
g1 <- Ginit[-j, ]
t2 <- crossprod(g1, as.matrix(M[-j, j]))/M[j, j]
t3 <- crossprod(g1, as.matrix(invMU[-j, j]))/invMU[j, j]
GUGt2 <- g + t2
GUG <- crossprod(Ginit, (M %*% Ginit)) - tcrossprod(GUGt2, GUGt2) * M[j, j]
GVGt2 <- g + t3
GVG <- crossprod(Ginit, (invMU %*% Ginit)) - tcrossprod(GVGt2, GVGt2) * invMU[j, j]
invC1 <- .ridgeinv(GUG)
invC2 <- .ridgeinv(GVG)
X1 <- as.matrix(X[ , j])
X2 <- as.matrix(X[ , -j])
t5 <- tcrossprod(X1,g) %*% eta
t6 <- matrix(1, n, 1) %*% mu + X2 %*% g1 %*% eta
t7 <- t6 + t5
t8 <- crossprod(Y, t7)
et6 <- log(1 + exp(t7))
t9 <- colSums(et6)
fobj <- function(x) {
tmp2 <- x + t2
tmp3 <- x + t3
tmp4 <- tcrossprod(X1,x) %*% eta + t6
T2 <- invC1 %*% tmp2
T3 <- invC2 %*% tmp3
T4 <- log(1 + exp(tmp4))
n /2 * (- 2 * log(1 + sum(x^2)) + log(1 + M[j, j] * crossprod(tmp2,T2)) + log(1 + invMU[j, j] * crossprod(tmp3,T3))) - t(Y) %*% tmp4 + colSums(T4)
}
gobj <- function(x) {
tmp2 <- x + t2
tmp3 <- x + t3
tmp4 <- tcrossprod(X1,x) %*% eta + t6
Cn <- Y - (exp(tmp4) / (1 + exp(tmp4)))
T2 <- invC1 %*% tmp2
T3 <- invC2 %*% tmp3
A1 <- crossprod(g1, Sigmawx[-j, -j]) %*% g1 + as.matrix(x) %*% Sigmawx[j, -j] %*% g1 + tcrossprod(crossprod(g1, Sigmawx[-j, j]), x) + tcrossprod(x) * Sigmawx[j , j]
invC3 <- .ridgeinv(A1)
T5 <- tcrossprod(crossprod(X, Cn),eta)
tmp5 <- tcrossprod(X1,x) + X2 %*% g1
T6 <- tcrossprod(Sigmawxv, Cn) %*% tmp5 %*% invC3
tmp6 <- as.matrix(Sigmawx[ , -j]) %*% g1 + tcrossprod(Sigmawx[ , j], x)
T7 <- tmp6 %*% invC3 %*% tcrossprod(crossprod(tmp5, Cn), eta)
T8 <- tmp6 %*% eta %*% crossprod(Cn, tmp5) %*% invC3
r1 <- rep(0, p)
r1[j] <- 1
T9 <- crossprod(r1, (T5 + T6 - T7 - T8))
- t(T9) + n * (- 4 * x %*% solve(1 + sum(x^2)) + 2 * T2/as.numeric(1/M[j, j] + crossprod(tmp2, T2)) + 2 * T3/as.numeric(1/invMU[j, j] + crossprod(tmp3, T3)))
}
i <- 1
while (i < maxiter) {
res <- nlminb(Ginit[j, ], fobj, gobj)
Ginit[j, ] <- res$par
GiX <- X %*% Ginit
fit <- cvreg:::.Mqle(cbind(1,GX), as.vector(Y), family = quasibinomial())
mu <- as.vector(fit$coef[1])
eta <- as.matrix(fit$coef[2 : (rank + 1)])
beta <- Ginit %*% eta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta) / (1 + exp(theta))^2
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
mu1 <- exp(theta) / (1 + exp(theta))
V <- theta + ((Y - mu1) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
e1 <- eigen(crossprod(Ginit, M) %*% Ginit)
e2 <- eigen(crossprod(Ginit, invMU) %*% Ginit)
e3 <- eigen(crossprod(Ginit))
e4 <- matrix(1, n, 1) %*% mu + X %*% beta
e5 <- crossprod(Y, e4) - colSums(log(1 + exp(e4)))
obj2 <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
if (abs(obj1 - obj2) < ftol) {
break
}
else {
obj1 <- obj2
i <- i + 1
}
}
a <- qr(Ginit)
Gammahat <- qr.Q(a)
GX <- X %*% Gammahat
fit <- cvreg:::.Mqle(cbind(1,GX), as.vector(Y), family = quasibinomial())
mu <- as.vector(fit$coef[1])
eta <- matrix(fit$coef[2 : (rank + 1)])
beta <- Gammahat %*% eta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta) / (1 + exp(theta))^2
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
mu1 <- exp(theta) / (1 + exp(theta))
V <- theta + ((Y - mu1) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
var <- Tauwx / (- c.theta.mean)
e1 <- eigen(crossprod(Gammahat, M) %*% Gammahat)
e2 <- eigen(crossprod(Gammahat, invMU) %*% Gammahat)
e3 <- eigen(mpd(M))
e4 <- matrix(1, n, 1) %*% mu + X %*% Gammahat %*% eta
e5 <- crossprod(Y, e4) - colSums(log(1 + exp(e4)))
Gamma0hat <- qr.Q(a, complete = TRUE)[, (rank + 1):p, drop = FALSE]
objfun <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
Gammahat <- as.matrix(Gammahat)
Gamma0hat <- as.matrix(Gamma0hat)
} else {
maxiter = 250
ftol = 0.0005
M <- SigmaX
MU <- SigmaX
tmp.MU <- eigen(MU)
invMU <- tcrossprod(sweep(tmp.MU$vectors, MARGIN = 2, 1/tmp.MU$values, "*"), tmp.MU$vectors)
c1 <- log(1 + exp(theta))
temp1 <- crossprod(Y, theta) - colSums(c1)
e1 <- eigen(crossprod(gamma.init, M) %*% gamma.init)
e2 <- eigen(crossprod(gamma.init, invMU) %*% gamma.init)
e3 <- eigen(SigmaX)
temp2 <- sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))
obj1 <- temp1 - (n / 2) * temp2
options(warn = -1)
GiX <- X %*% gamma.init
fit <- cvreg:::.Mqle(cbind(1,GiX), as.vector(Y), family = quasibinomial())
eta <- as.matrix(fit$coef[2 : (rank + 1)])
init <- gamma.init
GEidx <- .GaussElim(init)
Ginit <- init %*% pseudoinverse(init[GEidx[1:rank], ])
GUG <- crossprod(Ginit, (M %*% Ginit))
GVG <- crossprod(Ginit, (invMU %*% Ginit))
t4 <- crossprod(Ginit[GEidx[(rank + 1):p], ], Ginit[GEidx[(rank + 1):p], ]) + diag(rank)
i <- 1
while (i < maxiter) {
for (j in GEidx[(rank + 1):p]) {
g <- as.matrix(Ginit[j, ])
g1 <- as.matrix(Ginit[-j, ])
t2 <- crossprod(g1, as.matrix(M[-j, j]))/M[j, j]
t3 <- crossprod(g1, as.matrix(invMU[-j, j]))/invMU[j, j]
GUGt2 <- g + t2
GUG <- GUG - tcrossprod(GUGt2, GUGt2) * M[j, j]
GVGt2 <- g + t3
GVG <- GVG - tcrossprod(GVGt2, GVGt2) * invMU[j, j]
t4 <- t4 - tcrossprod(g, g)
invC1 <- .ridgeinv(GUG)
invC2 <- .ridgeinv(GVG)
invt4 <- .ridgeinv(t4)
X1 <- X[ , j]
X2 <- X[ , -j]
t5 <- tcrossprod(X1,g) %*% eta
t6 <- matrix(1, n, 1) %*% mu + X2 %*% g1 %*% eta
t7 <- t5 + t6
t8 <- crossprod(Y, t7)
et6 <- log(1 + exp(t7))
t9 <- colSums(et6)
fobj <- function(x) {
tmp2 <- x + t2
tmp3 <- x + t3
tmp4 <- tcrossprod(X1, x) %*% eta + t6
T1 <- invt4 %*% x
T2 <- invC1 %*% tmp2
T3 <- invC2 %*% tmp3
T4 <- log(1 + exp(tmp4))
n /2 * (-2 * log(1 + x %*% T1) + log(1 + M[j, j] * crossprod(tmp2, T2)) + log(1 + invMU[j, j] *crossprod(tmp3, T3))) - t(Y) %*% tmp4 + colSums(T4)
}
gobj <- function(x) {
tmp2 <- x + t2
tmp3 <- x + t3
tmp4 <- tcrossprod(X1, x) %*% eta + t6
Cn <- Y - (exp(tmp4) / (1 + exp(tmp4)))
T1 <- invt4 %*% x
T2 <- invC1 %*% tmp2
T3 <- invC2 %*% tmp3
A1 <- crossprod(g1, Sigmawx[-j, -j]) %*% g1 + as.matrix(x) %*% Sigmawx[j, -j] %*% g1 + tcrossprod(crossprod(g1, Sigmawx[-j, j]) , x) + tcrossprod(x) * Sigmawx[j , j]
invC3 <- .ridgeinv(A1)
T5 <- tcrossprod(crossprod(X, Cn), eta)
tmp5 <- tcrossprod(X1, x) + X2 %*% g1
T6 <- tcrossprod(Sigmawxv, Cn) %*% tmp5 %*% invC3
tmp6 <- Sigmawx[ , -j] %*% g1 + tcrossprod(Sigmawx[ , j], x)
T7 <- tmp6 %*% invC3 %*% tcrossprod(crossprod(tmp5, Cn), eta)
T8 <- tmp6 %*% eta %*% crossprod(Cn, tmp5) %*% invC3
r1 <- rep(0, p)
r1[j] <- 1
T9 <- crossprod(r1, (T5 + T6 - T7 - T8))
- t(T9) + n * (- 4 * T1/as.numeric(1 + x %*% T1) + 2 * T2/as.numeric(1/M[j, j] + crossprod(tmp2, T2)) + 2 * T3/as.numeric(1/invMU[j, j] + crossprod(tmp3, T3)))
}
res <- nlminb(Ginit[j, ], fobj, gobj)
Ginit[j, ] <- res$par
g <- as.matrix(Ginit[j, ])
t4 <- t4 + tcrossprod(g, g)
GUGt2 <- g + t2
GUG <- GUG + tcrossprod(GUGt2, GUGt2) * M[j, j]
GVGt2 <- g + t3
GVG <- GVG + tcrossprod(GVGt2, GVGt2) * invMU[j, j]
}
GiX <- X %*% Ginit
fit <- cvreg:::.Mqle(cbind(1,GiX), as.vector(Y), family = quasibinomial())
mu <- as.vector(fit$coef[1])
eta <- as.matrix(fit$coef[2 : (rank + 1)])
beta <- Ginit %*% eta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta) / (1 + exp(theta))^2
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
mu1 <- exp(theta) / (1 + exp(theta))
V <- theta + ((Y - mu1) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
e1 <- eigen(crossprod(Ginit, M) %*% Ginit)
e2 <- eigen(crossprod(Ginit, invMU) %*% Ginit)
e3 <- eigen(crossprod(Ginit))
e4 <- matrix(1, n, 1) %*% mu + X %*% beta
e5 <- crossprod(Y, e4) - colSums(log(1 + exp(e4)))
obj2 <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
if (abs(obj1 - obj2) < ftol) {
break
}
else {
obj1 <- obj2
i <- i + 1
}
}
a <- qr(Ginit)
Gammahat <- qr.Q(a)
GX <- X %*% Gammahat
fit <- cvreg:::.Mqle(cbind(1,GX), as.vector(Y), family = quasibinomial())
mu <- as.vector(fit$coef[1])
eta <- matrix(fit$coef[2 : (rank + 1)])
beta <- Gammahat %*% eta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta) / (1 + exp(theta))^2
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
mu1 <- exp(theta) / (1 + exp(theta))
V <- theta + ((Y - mu1) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
var <- Tauwx / (- c.theta.mean)
e1 <- eigen(crossprod(Gammahat, M) %*% Gammahat)
e2 <- eigen(crossprod(Gammahat, invMU) %*% Gammahat)
e3 <- eigen(mpd(M))
e4 <- matrix(1, n, 1) %*% mu + X %*% Gammahat %*% eta
e5 <- crossprod(Y, e4) - colSums(log(1 + exp(e4)))
Gamma0hat <- qr.Q(a, complete = TRUE)[, (rank + 1):p]
objfun <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
Gammahat <- as.matrix(Gammahat)
Gamma0hat <- as.matrix(Gamma0hat)
}
return(list(Gammahat = Gammahat, Gamma0hat = Gamma0hat, muhat = mu,
etahat = eta, weighthat = weight, Vhat = V,
avar = var, objfun = objfun))
}
rpoisson.envlp_MU <- function(X, Y, rank){
X <- as.matrix(X)
Y <- as.matrix(Y)
a <- dim(Y)
n <- a[1]
r <- a[2]
p <- ncol(X)
if (a[1] != nrow(X))
stop("X and Y should have the same number of observations.")
if (rank > p || rank < 0)
stop("rank must be an integer between 0 and r.")
if (sum(duplicated(cbind(X, Y), MARGIN = 2)) > 0)
stop("Some responses also appear in the predictors, or there maybe duplicated columns in X or Y.")
options(warn = -1)
fit <- cvreg:::.Mqle(cbind(1,X), as.vector(Y), family = poisson())
mu <- as.vector(fit$coef[1])
beta <- as.vector(fit$coef[2 : (p + 1)])
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- -exp(theta)
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
V <- theta + ((Y - c.theta) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
M.init <- Tauwx / (- c.theta.mean)
U.init <- tcrossprod(beta)
tmp1 <- envlp_MU(M.init, U.init, rank)
gamma.init <- tmp1$Gammahat
gamma0.init <- tmp1$Gamma0hat
SigmaX <- solve(.ridgeinv(stats::cov(X))) * ((n-1)/n)
TauX <- .ridgeinv(SigmaX)
if (rank == 0) {
Gammahat <- NULL
Gamma0hat <- diag(r)
tmp.M <- eigen(SigmaX)
mu <- log(mean(Y))
muh <- matrix(1, n, 1) %*% mu
Cn1 <- crossprod(Y, muh) - colSums(exp(muh))
eta <- NULL
var <- NULL
weight <- rep(1, n)
V <- Y
objfun <- Cn1 - n/2 * sum(log(tmp.M$values))
} else if (rank == p) {
Gammahat <- diag(r)
Gamma0hat <- NULL
tmp.M <- eigen(SigmaX)
eta <- beta
var <- Tauwx / (- c.theta.mean)
Cn1 <- crossprod(Y, theta) - colSums(exp(theta))
objfun <- Cn1 - n/2 * sum(log(tmp.M$values))
} else if (rank == p - 1) {
maxiter = 250
ftol = 0.0005
M <- SigmaX
MU <- SigmaX
tmp.MU <- eigen(MU)
invMU <- tcrossprod(sweep(tmp.MU$vectors, MARGIN = 2, 1/tmp.MU$values, "*") ,tmp.MU$vectors)
Cn1 <- crossprod(Y, theta) - colSums(exp(theta))
e1 <- eigen(crossprod(gamma.init, M) %*% gamma.init)
e2 <- eigen(crossprod(gamma.init, invMU) %*% gamma.init)
e3 <- eigen(SigmaX)
temp1 <- sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))
obj1 <- Cn1 - (n / 2) * temp1
GiX <- X %*% gamma.init
fit <- cvreg:::.Mqle(cbind(1,GiX), as.vector(Y), family = poisson())
eta <- as.matrix(fit$coef[2 : (rank + 1)])
init <- gamma.init
GEidx <- .GaussElim(init)
Ginit <- init %*% pseudoinverse(init[GEidx[1:rank], ])
j <- GEidx[p]
g <- as.matrix(Ginit[j, ])
g1 <- Ginit[-j, ]
t2 <- crossprod(g1, as.matrix(M[-j, j]))/M[j, j]
t3 <- crossprod(g1, as.matrix(invMU[-j, j]))/invMU[j, j]
GUGt2 <- g + t2
GUG <- crossprod(Ginit, (M %*% Ginit)) - tcrossprod(GUGt2, GUGt2) * M[j, j]
GVGt2 <- g + t3
GVG <- crossprod(Ginit, (invMU %*% Ginit)) - tcrossprod(GVGt2, GVGt2) * invMU[j, j]
invC1 <- .ridgeinv(GUG)
invC2 <- .ridgeinv(GVG)
X1 <- X[ , j]
X2 <- X[ , -j]
t5 <- tcrossprod(X1,g) %*% eta
t6 <- matrix(1, n, 1) %*% mu + X2 %*% g1 %*% eta
t7 <- t6 + t5
t8 <- crossprod(Y, t7)
et6 <- exp(t7)
t9 <- colSums(et6)
fobj <- function(x) {
tmp2 <- x + t2
tmp3 <- x + t3
tmp4 <- tcrossprod(X1, x) %*% eta + t6
T2 <- invC1 %*% tmp2
T3 <- invC2 %*% tmp3
T4 <- exp(tmp4)
n /2 * (- 2 * log(1 + sum(x^2)) + log(1 + M[j, j] * crossprod(tmp2,T2)) + log(1 + invMU[j, j] * crossprod(tmp3,T3))) - t(Y) %*% tmp4 + colSums(T4)
}
gobj <- function(x) {
tmp2 <- x + t2
tmp3 <- x + t3
tmp4 <- tcrossprod(X1, x) %*% eta + t6
Cn <- Y - exp(tmp4)
T2 <- invC1 %*% tmp2
T3 <- invC2 %*% tmp3
A1 <- crossprod(g1, Sigmawx[-j, -j]) %*% g1 + as.matrix(x) %*% Sigmawx[j, -j] %*% g1 + tcrossprod(crossprod(g1, Sigmawx[-j, j]),x) + tcrossprod(x) * Sigmawx[j , j]
invC3 <- .ridgeinv(A1)
T5 <- tcrossprod(crossprod(X, Cn), eta)
tmp5 <- tcrossprod(X1, x) + X2 %*% g1
T6 <- tcrossprod(Sigmawxv, Cn) %*% tmp5 %*% invC3
tmp6 <- as.matrix(Sigmawx[ , -j]) %*% g1 + tcrossprod(Sigmawx[ , j], x)
T7 <- tmp6 %*% invC3 %*% tcrossprod(crossprod(tmp5, Cn), eta)
T8 <- tmp6 %*% eta %*% crossprod(Cn, tmp5) %*% invC3
r1 <- rep(0, p)
r1[j] <- 1
T9 <- crossprod(r1, (T5 + T6 - T7 - T8))
- t(T9) + n * (- 4 * x %*% .ridgeinv(1 + sum(x^2)) + 2 * T2/as.numeric(1/M[j, j] + crossprod(tmp2, T2)) + 2 * T3/as.numeric(1/invMU[j, j] + crossprod(tmp3, T3)))
}
i <- 1
while (i < maxiter) {
res <- nlminb(Ginit[j, ], fobj, gobj)
Ginit[j, ] <- res$par
GX <- X %*% Ginit
fit <- cvreg:::.Mqle(cbind(1,GX), as.vector(Y), family = poisson())
mu <- as.vector(fit$coef[1])
eta <- as.matrix(fit$coef[2 : (rank + 1)])
beta <- Ginit %*% eta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta)
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
V <- theta + ((Y - c.theta) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
e1 <- eigen(crossprod(Ginit, M) %*% Ginit)
e2 <- eigen(crossprod(Ginit, invMU) %*% Ginit)
e3 <- eigen(crossprod(Ginit))
e4 <- matrix(1, n, 1) %*% mu + X %*% beta
e5 <- crossprod(Y, e4) - colSums(exp(e4))
obj2 <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
if (abs(obj1 - obj2) < ftol) {
break
}
else {
obj1 <- obj2
i <- i + 1
}
}
a <- qr(Ginit)
Gammahat <- qr.Q(a)
GX <- X %*% Gammahat
fit <- cvreg:::.Mqle(cbind(1,X), as.vector(Y), family = poisson())
mu <- as.vector(fit$coef[1])
eta <- matrix(fit$coef[2 : (rank + 1)])
beta <- Gammahat %*% eta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta)
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
V <- theta + ((Y - c.theta) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
var <- Tauwx / (- c.theta.mean)
e1 <- eigen(crossprod(Gammahat, M) %*% Gammahat)
e2 <- eigen(crossprod(Gammahat, invMU) %*% Gammahat)
e3 <- eigen(mpd(M))
e4 <- matrix(1, n, 1) %*% mu + X %*% Gammahat %*% eta
e5 <- crossprod(Y, e4) - colSums(exp(e4))
Gamma0hat <- qr.Q(a, complete = TRUE)[, (rank + 1):r, drop = FALSE]
objfun <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
Gammahat <- as.matrix(Gammahat)
Gamma0hat <- as.matrix(Gamma0hat)
} else {
maxiter = 250
ftol = 0.0005
M <- SigmaX
MU <- SigmaX
tmp.MU <- eigen(MU)
invMU <- tcrossprod(sweep(tmp.MU$vectors, MARGIN = 2, 1/tmp.MU$values, "*") ,tmp.MU$vectors)
Cn1 <- crossprod(Y, theta) - colSums(exp(theta))
e1 <- eigen(crossprod(gamma.init, M) %*% gamma.init)
e2 <- eigen(crossprod(gamma.init, invMU) %*% gamma.init)
e3 <- eigen(SigmaX)
temp1 <- sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))
obj1 <- Cn1 - (n / 2) * temp1
GiX <- X %*% gamma.init
fit <- cvreg:::.Mqle(cbind(1,GiX), as.vector(Y), family = poisson())
eta <- as.matrix(fit$coef[2 : (rank + 1)])
init <- gamma.init
GEidx <- .GaussElim(init)
Ginit <- init %*% pseudoinverse(init[GEidx[1:rank], ])
GUG <- crossprod(Ginit, (M %*% Ginit))
GVG <- crossprod(Ginit, (invMU %*% Ginit))
t4 <- crossprod(Ginit[GEidx[(rank + 1):p], ], Ginit[GEidx[(rank + 1):p], ]) + diag(rank)
i <- 1
while (i < maxiter) {
for (j in GEidx[(rank + 1):p]) {
g <- as.matrix(Ginit[j, ])
g1 <- as.matrix(Ginit[-j, ])
t2 <- crossprod(g1, as.matrix(M[-j, j]))/M[j, j]
t3 <- crossprod(g1, as.matrix(invMU[-j, j]))/invMU[j, j]
GUGt2 <- g + t2
GUG <- GUG - tcrossprod(GUGt2, GUGt2) * M[j, j]
GVGt2 <- g + t3
GVG <- GVG - tcrossprod(GVGt2, GVGt2) * invMU[j, j]
t4 <- t4 - tcrossprod(g)
invC1 <-.ridgeinv(GUG)
invC2 <-.ridgeinv(GVG)
invt4 <-.ridgeinv(t4)
X1 <- X[ , j]
X2 <- X[ , -j]
t5 <- tcrossprod(X1,g) %*% eta
t6 <- matrix(1, n, 1) %*% mu + X2 %*% g1 %*% eta
t7 <- t5 + t6
t8 <- crossprod(Y, t7)
et7 <- exp(t7)
t9 <- colSums(et7)
fobj <- function(x) {
tmp2 <- x + t2
tmp3 <- x + t3
tmp4 <- tcrossprod(X1, x) %*% eta + t6
T1 <- invt4 %*% x
T2 <- invC1 %*% tmp2
T3 <- invC2 %*% tmp3
T4 <- exp(tmp4)
n /2 * (-2 * log(1 + x %*% T1) + log(1 + M[j, j] * crossprod(tmp2, T2)) + log(1 + invMU[j, j] *crossprod(tmp3, T3))) - t(Y) %*% tmp4 + colSums(T4)
}
gobj <- function(x) {
tmp2 <- x + t2
tmp3 <- x + t3
tmp4 <- tcrossprod(X1, x) %*% eta + t6
Cn <- Y - exp(tmp4)
T1 <- invt4 %*% x
T2 <- invC1 %*% tmp2
T3 <- invC2 %*% tmp3
A1 <- crossprod(g1, Sigmawx[-j, -j]) %*% g1 + as.matrix(x) %*% Sigmawx[j, -j] %*% g1 + tcrossprod(crossprod(g1, Sigmawx[-j, j]), x) + tcrossprod(x) * Sigmawx[j , j]
invC3 <-.ridgeinv(A1)
T5 <- tcrossprod(crossprod(X, Cn), eta)
tmp5 <- tcrossprod(X1, x) + X2 %*% g1
T6 <- tcrossprod(Sigmawxv, Cn) %*% tmp5 %*% invC3
tmp6 <- as.matrix(Sigmawx[ , -j]) %*% g1 + tcrossprod(Sigmawx[ , j], x)
T7 <- tmp6 %*% invC3 %*% tcrossprod(crossprod(tmp5, Cn), eta)
T8 <- tmp6 %*% eta %*% crossprod(Cn, tmp5) %*% invC3
r1 <- rep(0, p)
r1[j] <- 1
T9 <- crossprod(r1, (T5 + T6 - T7 - T8))
- t(T9) + n * (- 4 * T1/as.numeric(1 + x %*% T1) + 2 * T2/as.numeric(1/M[j, j] + crossprod(tmp2, T2)) + 2 * T3/as.numeric(1/invMU[j, j] + crossprod(tmp3, T3)))
}
res <- nlminb(Ginit[j, ], fobj, gobj)
Ginit[j, ] <- res$par
g <- as.matrix(Ginit[j, ])
t4 <- t4 + tcrossprod(g, g)
GUGt2 <- g + t2
GUG <- GUG + tcrossprod(GUGt2, GUGt2) * M[j, j]
GVGt2 <- g + t3
GVG <- GVG + tcrossprod(GVGt2, GVGt2) * invMU[j, j]
}
GX <- X %*% Ginit
fit <- cvreg:::.Mqle(cbind(1,GX), as.vector(Y), family = poisson())
mu <- as.vector(fit$coef[1])
eta <- as.matrix(fit$coef[2 : (rank + 1)])
beta <- Ginit %*% eta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta)
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
V <- theta + ((Y - c.theta) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <-.ridgeinv(Sigmawx)
e1 <- eigen(crossprod(Ginit, M) %*% Ginit)
e2 <- eigen(crossprod(Ginit, invMU) %*% Ginit)
e3 <- eigen(crossprod(Ginit))
e4 <- matrix(1, n, 1) %*% mu + X %*% beta
e5 <- crossprod(Y, e4) - colSums(exp(e4))
obj2 <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
if (abs(obj1 - obj2) < ftol) {
break
}
else {
obj1 <- obj2
i <- i + 1
}
}
}
a <- qr(Ginit)
Gammahat <- qr.Q(a)
GX <- X %*% Gammahat
fit <- cvreg:::.Mqle(cbind(1,GX), as.vector(Y), family = poisson())
mu <- as.vector(fit$coef[1])
eta <- matrix(fit$coef[2 : (rank + 1)])
beta <- Gammahat %*% eta
theta <- matrix(1, n, 1) %*% mu + X %*% beta
c.theta <- - exp(theta)
c.theta.mean <- mean(c.theta)
weight <- c.theta / c.theta.mean
V <- theta + ((Y - c.theta) / weight)
W <- diag(as.vector(weight))
wx <- W %*% X
mean.wx <- apply(wx, 2, mean)
wxx <- X - matrix(1, nrow = n) %*% mean.wx
Sigmawx <- crossprod(wxx, W) %*% wxx / n
wv <- W %*% V
mean.wv <- mean(wv)
wvv <- V - matrix(1, nrow = n) %*% mean.wv
Sigmawxv <- crossprod(wxx, W) %*% wvv / n
Tauwx <- .ridgeinv(Sigmawx)
var <- Tauwx / (- c.theta.mean)
e1 <- eigen(crossprod(Gammahat, M) %*% Gammahat)
e2 <- eigen(crossprod(Gammahat, invMU) %*% Gammahat)
e3 <- eigen(mpd(M))
e4 <- matrix(1, n, 1) %*% mu + X %*% Gammahat %*% eta
e5 <- crossprod(Y, e4) - colSums(exp(e4))
Gamma0hat <- qr.Q(a, complete = TRUE)[, (rank + 1):p]
objfun <- - n/2 * (sum(log(e1$values)) + sum(log(e2$values)) + sum(log(e3$values))) + e5
Gammahat <- as.matrix(Gammahat)
Gamma0hat <- as.matrix(Gamma0hat)
return(list(Gammahat = Gammahat, Gamma0hat = Gamma0hat, muhat = mu,
etahat = eta, weighthat = weight, Vhat = V,
avar = var, objfun = objfun))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.