#' predict method for sdr objects
#'
#' @description This is a predict function for any of the sdr models in this package. It can
#' project new data onto the lower dimensional space to obtain a set of sufficient predictors
#' for testing data, or it can be used to obtain linear predictions of the sufficient predictors.
#' For nonlinear models note that you might be better off fitting a generalized additive model (GAM)
#' or generalized additive model for location, scale, and shape (GAMLSS) using the sufficient predictors
#' as covariates to the original response variable. After that, this function can be used to project
#' new testing data onto the lower dimensional space, and then using the predict function for the GAM/GAMLSS
#' to obtain the response variable predictions for the new data.
#'
#' @param fit model fit
#' @param newdata a data frame of new data. Note that the new data frame must contain observations of the response variable.
#' @param type one of "response" (the default, returns the predicted values) or "SP" (returns the new predictions )
#'
#' @return a plot
#' @method predict sdr
#' @export
#'
predict.sdr <- function(fit, newdata = NULL, type = c("response", "SP")){
type <- match.arg(type)
if (type == "response"){
if (is.null(newdata)){
fitted(fit)
}
else{
newpredictors = as.matrix(newdata) %*% as.matrix(fit$basis)
colnames(newpredictors) = paste0("SP",1:ncol(newpredictors))
X = as.matrix(newpredictors)
form = as.formula(paste0("y ~ 0 + ", paste0("SP", 1:ncol(newpredictors),collapse="+")))
betas = lmSolve(form, cbind.data.frame(y = fit$fitted, fit$predictors))
y.pred = as.vector(X %*% betas)
return(y.pred)
}
}
else if (type == "SP"){
if (is.null(newdata)){
fit$predictors
}
else{
as.matrix(newdata) %*% as.matrix(fit$basis)
}
}
}
#' plot method for sdr objects
#'
#' @param fit model fit
#' @param type one of "facet", "splom", or "fitted". facet plots each sufficient predictor's
#' relationship with the response. splom plots the same as facet but also the inter-relationships
#' of the sufficient predictors as a scatterplot matrix. fitted plots the relationship between
#' the fitted values of y and the observed values.
#' @param for "facet" and "splom", select which sufficient predictors to plot. If left as NULL, all are plotted.
#' @param smooth should a smooth line be fit to the plots? Defaults to FALSE.
#'
#' @return a plot
#' @method plot sdr
#' @export
#'
plot.sdr <- function(fit, type = c("facet", "splom", "fitted"), wch = NULL, smooth = F){
type <- match.arg(type)
if (type == "facet"){
if (is.null(wch)) wch = 1:ncol(fit$predictors)
dat <- cbind.data.frame(y = fit$y.obs, fit$predictors[,wch])
dat <- as.data.frame(tidyr:::pivot_longer(dat, -y))
dat$name <- factor(dat$name, levels = paste0("SP", 1:length(unique(dat$name))))
g = ggplot(dat, aes(x = value, y = y, color = name)) +
facet_wrap(~ name, ncol = ifelse(length(unique(dat$name)) < 4, length(unique(dat$name)), 2), scales="free") +
geom_point(shape = 1, alpha = 0.95, size = 2) +
theme(legend.position = "none")
if (smooth){
g + geom_line(stat = "smooth", method = RobStatTM::lmrobdetMM, formula = y ~ splines::ns(x, df = 3),
alpha = 0.60, size = 1, se = F)
}
else{
g
}
}
else if (type == "splom"){
if (is.null(wch)) wch = 1:ncol(fit$predictors)
dat <- cbind.data.frame(y = fit$y.obs, fit$predictors[,wch])
scatMat(dat, smooth = smooth, smoother = F, points.col = "#3293f3BF", pch = 1)
}
else if (type == "fitted"){
dat <- cbind.data.frame(yobs = fit$y.obs, yhat = fit$fitted)
g = ggplot(dat, aes(x = yhat, y = yobs)) +
geom_point(shape = 1, alpha = 0.95, color = "#e70e4e", size = 2)
if (smooth){
g + geom_line(stat = "smooth", method = RobStatTM::lmrobdetMM, formula = y ~ splines::ns(x, df = 3),
alpha = 0.60, size = 1, se = F)
}
else{
g
}
}
}
#' print method for sdr objects
#'
#' @param fit model fit
#'
#' @return text
#' @export
#' @method print sdr
#' @keywords internal
#'
print.sdr <- function(fit){
orangered <- crayon::make_style(rgb(1, 0.291, 0.018))
brightblue <- crayon:::make_style(rgb(0.12, 0.76, 0.712))
if (attr(fit, "model") == "grpOLS" || attr(fit, "model") == "grpGLM"){
cat(orangered("\nBasis Vectors:"), "\n\n")
Matrix::printSpMatrix(Matrix::Matrix(round(fit$basis, 4), sparse = TRUE), zero.print = " ", digits = 3, col.names=TRUE)
}
else{
cat(orangered("\nBasis Vectors:"), "\n\n")
print(round(fit$basis, 4), digits = 4)
}
if (attr(fit, "model") == "grpOLS" || attr(fit, "model") == "grpGLM"){
bink <- crayon::make_style(rgb(0.57,0.39,1))
cat(bink("\nEstimated Regression Coefficients\n\n"))
ols = round(signif(as.vector(t(rowSums(as.matrix(fit$coef)))), 4), 3)
olsnames = abbreviate(rownames(fit$coef),method = "both.sides", named = FALSE, minlength = 5)
names(ols) = olsnames
print(ols, digits = 2)
}
else if (attr(fit, "model") == "ENV"){
bink <- crayon::make_style(rgb(0.57,0.39,1))
cat(bink("\nEstimated Regression Coefficients\n\n"))
if (is.vector(fit$coef) || ncol(fit$coef) == 1){
coefs <- as.vector(round(fit$coef, 4))
names(coefs) <- names(fit$coef)
print(coefs, digits = 3)
}
else{
coefs <- round(fit$coef, 4)
print(round(coefs, 4), digits = 3)
}
}
else if (attr(fit, "model") == "HENV"){
bink <- crayon::make_style(rgb(0.57,0.39,1))
cat(bink("\nEstimated Regression Coefficients\n\n"))
print(fit$coef, digits = 3)
ogreen <- crayon::make_style(rgb(0.714, 0.712, 0.099))
cat(ogreen("\nEstimated Group Means\n\n"))
print(fit$means, digits = 3)
}
else{
cat("\n\n", brightblue("Eigenvalues of Estimated Dimension Reduction Subspace:", "\n\n"))
print(fit$evalues)
}
}
#' Estimate the true number of dimensions for sdr models
#'
#' @description This offers two different BIC-type statistics for the selection of
#' ranks. The ZMP and LAL variants are offered here (Zhu, Miao, & Peng,
#' 2006; Li, Artemiou, & Li, 2011). Note, however, that despite the
#' BIC-type penalty, these statistics do not include the -2 multiplier. Hence, the
#' largest value is optimal rather than the smallest as in the BIC.
#'
#' @param fit the model fit
#' @param criterion one of "zmp" (the default) or "lal"
#' @param max.rank the maximum rank to consider. defaults to all of them.
#' @param plot should the BIC-curve be plotted? defaults to TRUE.
#'
#' @return a list
#' @export
#'
#' @references
#' Zhu, L., Miao, B., & Peng, H. (2006). On Sliced Inverse Regression With High-Dimensional Covariates. Journal of the American Statistical Association, 101(474), 630–643. doi:10.1198/016214505000001285 \cr
#' \cr
#' Li, B., Artemiou, A., & Li, L. (2011). Principal support vector machines for linear and nonlinear sufficient dimension reduction. The Annals of Statistics, 39(6), 3182–3210. doi:10.1214/11-aos932
#'
rankest.sdr <- function(fit, criterion = c("zmp", "lal"), max.rank = ncol(fit$mf[,-1]), plot = T){
if (attr(fit, "model") == "ENV" || attr(fit, "model") == "HENV"){
return(cat(crayon::red("Please use the rankest.env or rankest.henv function.")))
}
if (attr(fit, "model") == "grpOLS"){
return(cat(crayon::red("Please use the rankest.grpols function.")))
}
criterion <- match.arg(criterion)
maximizer=function(x,y) return(x[order(y)[length(y)]])
n = length(fit$y.obs);
p = ncol(fit$mf[,-1])
evals=fit$evalues
bic=vector()
if(criterion=="lal"){
for(k in 0:max.rank){
if(k==0) {
bic=c(bic,0)
}
else if (k != 0){
bic=c(bic,sum(evals[1:k])-(2*evals[1])*n^(-1/2)*(log(n))^(1/2)*k)
names(bic) <- paste0("d=", 0:(length(bic)-1))
}
}
if (plot){
size = ifelse(length(bic) <= 20, 1, 0.75)
plot(bic, xlab = "rank", ylab = "lal", xaxt = "n", type = "b", cex = size, lwd = 1.125)
axis(1, labels = 0:(length(bic)-1), at = 1:length(bic))
points(x = which.max(bic), y = max(bic), col = "red", pch = 19, cex= size)
}
return(list(rank.est = maximizer(0:p,bic), bic.lal = bic))
}
if(criterion=="zmp"){
for(k in 0:(max.rank-1)){
c1=(evals[1]/3)*(0.5* log(n)+0.1*n^(1/3))/(2*n)
c2=k*(2*p-k+1)
ll=sum(log(evals[(k+1):p]+1)-evals[(k+1):p])
bic=c(bic,ll-c1*c2)
}
bic=c(bic,-c1*p*(2*p-p+1))
names(bic) <- paste0("d=", 0:(length(bic)-1))
if (plot){
size = ifelse(length(bic) <= 20, 1, 0.75)
plot(bic, xlab = "rank", ylab = "zmp", xaxt = "n", type = "b", cex = size, lwd = 1.125)
axis(1, labels = 0:(length(bic)-1), at = 1:length(bic))
points(x = which.max(bic), y = max(bic), col = "red", pch = 19, cex = size)
}
return(list(rank.est = maximizer(0:p, bic), bic.zmp = bic))
}
}
hyptest.sdr <- function(object, numdir=ncol(coefficients), ...) {
ev<-sort(object$evalues)
p<-length(ev)
n<-length(fit$y.obs)
L<-df<-p.value<-0
nt <- min(p,numdir)
for (i in 0:(nt-1)){
L[i+1]<-n*(p-i)*mean(ev[seq(1,p-i)])
df[i+1]<-(p-i)*sum(object$slices-i-1)
p.value[i+1]<-1-pchisq(L[i+1], df[i+1])
}
out<-data.frame(cbind(round(L,5),df,p.value))
rr<-paste(0:(nt-1),"R vs >= ",1:nt,"R",sep="")
dimnames(out)<-list(rr,c("L", "df", "p.value"))
return(out)
}
#' Estimate dimensions for an ENV model
#'
#' @param formula model formula
#' @param data data frame
#' @param plot whether to plot. defaults to TRUE.
#'
#' @return a list and plot
#' @export
#'
rankest.env <- function(formula, data, plot = T){
X = model.matrix(formula, data)[,-1]
Y = model.frame(formula, data)
Y = model.response(Y)
XX <- as.factor(X)
if (is.vector(Y)){r = 1;n=length(Y)} else{a <- dim(Y);n <- a[1];r <- a[2]}
p = ncol(X)
ll = sapply(1:p, function(i) ENV(formula, data, rank = i, se = F)$loglik)
npara.seq <- r * (1:p) + r + p - 1 + p * (p + 1)/2 + r * (r + 1)/2
bic <- -2 * ll + log(n) * npara.seq
if (plot){
size = ifelse(length(bic) <= 20, 1, 0.75)
plot(bic, xlab = "rank", ylab = "bic", xaxt = "n", type = "b", cex = size, lwd = 1.125)
axis(1, labels = 1:length(bic), at = 1:length(bic))
points(x = which.min(bic), y = min(bic), col = "red", pch = 19, cex = size)
}
return(list(rank.est = which.min(bic), bic = bic))
}
#' Estimate dimensions for a HENV model
#'
#' @param formula model formula
#' @param data data frame
#' @param plot whether to plot. defaults to TRUE.
#'
#' @return a list and plot
#' @export
#'
rankest.henv <- function(formula, data, plot = T){
X = model.matrix(formula, data)[,-1]
Y = model.frame(formula, data)
Y = model.response(Y)
XX <- as.factor(X)
a <- dim(Y)
n <- a[1]
r <- a[2]
p <- nlevels(XX)
paranum = loglik.seq <- c()
for (u in 1:r) {
loglik.seq[u] <- HENV(formula, data, rank = u, se = FALSE)$loglik
paranum[u] <- (r - u) + u * (r - u + p) + p * u *
(u + 1)/2 + (r - u) * (r - u + 1)/2
}
bic <- -2 * loglik.seq + log(n) * paranum
if (plot){
size = ifelse(length(bic) <= 20, 1, 0.75)
plot(bic, xlab = "rank", ylab = "bic", xaxt = "n", type = "b", cex = size, lwd = 1.125)
axis(1, labels = 1:(length(bic)), at = 1:length(bic))
points(x = which.min(bic), y = min(bic), col = "red", pch = 19, cex = size)
}
return(list(rank.est = which.min(bic), bic = bic))
}
#' Estimate dimensions for a group OLS, Qreg, or Theil-Sen model
#'
#' @param X the covariates
#' @param Y the response
#' @param idx the group assignment for each covariate
#' @param method one of "ols", "quantile", or "theil-sen".
#' @param q a quantile if method is "quantile". defaults to 0.50.
#' @return a list
#' @export
#'
rankest.grpols <-function(X, Y, idx, method = c("ols","quantile","theil-sen"), q = 0.5){
method <- match.arg(method)
method <- switch(method,
"ols" = grpOLS,
"quantile" = function(X,Y,idx,ranks){grpQreg(X,Y,idx,ranks,q)},
"theil-sen" = grpTheilSen
)
n<-nrow(X)
ngrp<-length(unique(idx))
dsw<-rep(1, ngrp)
d<-rep(0, ngrp)
mu = apply(X, 2, mean)
sig = var(X)
signrt = matpower(sig, -1/2)
Z <- t(t(X) - mu) %*% signrt
Y <- (Y-mean(Y))/sd(Y)
out.d <- method(Z,Y,idx,dsw)
M <- t(as.matrix(out.d$coef)) %*% (as.matrix(out.d$coef))
vecM <- diag(M)
orderM <- order(vecM, decreasing = TRUE)
crit <- -1/(n^(1/ngrp)*log(n))
for (i in 1:length(vecM)){
crit.d <- sum(vecM[orderM][1:i]) - (i+1)/(n^(1/ngrp)*log(n))
crit<-c(crit, crit.d)
}
if (which(crit == max(crit)) > 1){
d[orderM[1:(which(crit == max(crit))-1)]] <- 1
}
ans<-list(rankest=d, bic=crit)
return(ans)
}
#' Estimate dimensions for a (robust) group GLM model
#'
#' @param X the covariates
#' @param Y the response
#' @param idx the group assignment for each covariate
#' @param family a glm family compatible with the chosen method.
#' @param method one of "glm" or "rob".
#' @return a list
#' @export
#'
rankest.grpglm <-function(X, Y, idx, family = gaussian(), method = c("glm","rob")){
method <- match.arg(method)
method <- switch(method, "glm" = grpGLM, "rob" = grpRobGLM)
n<-nrow(X)
ngrp<-length(unique(idx))
dsw<-rep(1, ngrp)
d<-rep(0, ngrp)
mu = apply(X, 2, mean)
sig = var(X)
signrt = matpower(sig, -1/2)
Z <- t(t(X) - mu) %*% signrt
out.d <- method(Z,Y,idx,dsw,family=family)
M <- t(as.matrix(out.d$coef)) %*% (as.matrix(out.d$coef))
vecM <- diag(M)
orderM <- order(vecM, decreasing = TRUE)
crit <- -1/(n^(1/ngrp)*log(n))
for (i in 1:length(vecM)){
crit.d <- sum(vecM[orderM][1:i]) - (i+1)/(n^(1/ngrp)*log(n))
crit<-c(crit, crit.d)
}
if (which(crit == max(crit)) > 1){
d[orderM[1:(which(crit == max(crit))-1)]] <- 1
}
ans<-list(rankest=d, bic=crit)
return(ans)
}
#' Discretize a numeric variable for SIR and SAVE
#'
#'
#' @description returns integers indicating bin assignments for a continuous random variable "x"
#' based on the quantiles of "x".
#'
#' @param x variable
#' @param q number of cuts
#'
#' @return a numeric vector
#' @export
#'
discretize <- function(x, q=4){
na.rm=TRUE
q <- seq(0.025,0.975,length.out=q+1)
quant <- hdquantile(x, probs = q)
quant <- c(min(x), quant[-c(1, length(quant))], max(x))
dups <- duplicated(quant)
if(any(dups)){
flag <- x %in% unique(quant[dups])
retval <- ifelse(flag,paste("[", as.character(x),"]", sep=''), NA)
uniqs <- unique(quant)
reposition <- function(cut){
flag <- x>=cut
if(sum(flag, na.rm=na.rm)==0)
return(cut)
else
return(min(x[flag], na.rm=na.rm))
}
newquant <- sapply(uniqs, reposition)
retval[!flag] <- as.character(cut(x[!flag], breaks=newquant,include.lowest=TRUE))
levs <- unique(retval[order(x)])
retval <- factor(retval, levels=levs)
mkpairs <- function(x) sapply(x, function(y) if(length(y)==2) y[c(2,2)] else y[2:3])
pairs <- mkpairs(strsplit(levs, '[^0-9+\\.\\-]+'))
rownames(pairs) <- c("lower.bound","upper.bound")
colnames(pairs) <- levs
closed.lower <- rep(F,ncol(pairs))
closed.upper <- rep(T,ncol(pairs))
closed.lower[1] <- TRUE
for(i in 2:ncol(pairs)){if(pairs[1,i]==pairs[1,i-1] && pairs[1,i]==pairs[2,i-1]) closed.lower[i] <- FALSE}
for(i in 1:(ncol(pairs)-1)){if(pairs[2,i]==pairs[1,i+1] && pairs[2,i]==pairs[2,i+1]) closed.upper[i] <- FALSE}
levs <- ifelse(pairs[1,]==pairs[2,], pairs[1,],paste(ifelse(closed.lower,"[","("), pairs[1,],",",pairs[2,],ifelse(closed.upper,"]",")"),sep=''))
levels(retval) <- levs
}
else
retval <- cut(x, quant, include.lowest=TRUE)
retval <- as.numeric(retval)
return(retval)
}
#' Generate a Basis Matrix for a Gaussian Process
#'
#' @description The function has a usage similar to \code{\link{bs}} and \code{\link{ns}} in the \pkg{splines} package.
#' However, this function utilizes \code{\link[=smooth.construct.gp.smooth.spec]{smooth constructor}} in the package \pkg{mgcv}
#' to construct a gaussian process basis matrix.
#'
#' @param x the predictor variable. Missing values are allowed.
#' @param df degrees of freedom of the basis matrix. The default is 5. The minimum allowed is 3.
#' @param knots breakpoints that define the spline. by default these are automatically selected,
#' and not defined by the user.
#' @param intercept If TRUE, an intercept is included in the basis matrix.
#' @param fx If TRUE, it removes the penalization.
#' @param S penalty matrix, defined internally if NULL (the default).
#' @param m a numeric vector. selects the covariance function, sets the scale parameter
#' and, if applicable, shape parameter. m[1] between 1 and 5 selects the correlation
#' function from respectively, spherical, power exponential, and Matern with kappa = 1.5,
#' 2.5 or 3.5. m[2] if present specifies the scale parameter, with non-positive or absent
#' indicating that the Kammann and Wand estimate should be used. m[3] can be used to
#' specify the shape parameter for the power exponential function. See
#' \code{\link{smooth.construct.gp.smooth.spec}} for more details. the default option
#' here is c(3,-1,1.4) which indicates the power exponential covariance function with
#' the Kammann and Wand scale estimate, and a shape parameter of 1.4.
#'
#' @return A matrix with class \code{"gp"} and class \code{"basis"}.
#' @export
gpSpline <- function(x, df=5, knots=NULL, intercept=FALSE, fx= FALSE, S=NULL, m = c(3,-1,1.4)){
#
################################################################################
#
nx <- names(x)
x <- as.vector(x)
nax <- is.na(x)
if(nas <- any(nax)) x <- x[!nax]
#
# DEFINE KNOTS AND DF
if(is.null(knots)) {
if(df<3) stop("'df' must be >=3")
knots <- try(hdquantile(unique(x), seq(0,1,length=df+!intercept)))
if (class(knots)=="try.error"){knots <- quantile(unique(x), seq(0,1,length=df+!intercept))}
} else df <- length(knots) - !intercept
#
# CHECK NUMBER OF UNIQUE x VALUES
# (ADD SOME IF NEEDED TO PREVENT ERROR IN smooth.construct.cr.smooth.spec)
if(add <- length(unique(x)) < length(knots))
x <- c(seq(min(knots), max(knots), length=length(knots)), x)
#
# TRANSFORMATION: CALL FUNCTION FROM MGCV
oo <- mgcv:::smooth.construct.gp.smooth.spec(mgcv:::s(x, bs="gp", k=df+!intercept, m = m),
data=list(x=x), knots=list(x=knots))
basis <- oo$X
if(!intercept) basis <- basis[,-1L,drop=FALSE]
#
# REMOVE ADDED VALUES AND RE-INSERT MISSING
if(add) basis <- basis[-seq(length(knots)),,drop=FALSE]
if(nas) {
nmat <- matrix(NA, length(nax), ncol(basis))
nmat[!nax,] <- basis
basis <- nmat
}
#
# RELATED PENALTY MATRIX
if(fx) {
S <- NULL
} else if(is.null(S)) {
S <- oo$S[[1]]
S <- (S+t(S))/2
if(!intercept) S <- S[-1L,-1L,drop=FALSE]
} else if(any(dim(S)!=ncol(basis))) stop("dimensions of 'S' not compatible")
#
# NAMES AND ATTRIBUTES
dimnames(basis) <- list(nx, seq(ncol(basis)))
attributes(basis) <- c(attributes(basis), list(df=df,
knots=knots,
intercept=intercept,
fx=fx,
S=S))
class(basis) <- c("matrix", "gp","basis")
return(basis)
}
#' Generate a Basis Matrix for Penalized Cubic Regression Splines
#'
#' @description The function has a usage similar to \code{\link{bs}} and \code{\link{ns}} in the \pkg{splines} package.
#' However, this function utilizes \code{\link[=smooth.construct.cr.smooth.spec]{smooth constructor}} in the package \pkg{mgcv}
#' to construct cubic regression splines with an optional penalty.
#'
#' @param x the predictor variable. Missing values are allowed.
#' @param df degrees of freedom of the basis matrix. The default is 5. The minimum allowed is 3.
#' @param knots breakpoints that define the spline. by default these are automatically selected,
#' and not defined by the user.
#' @param intercept If TRUE, an intercept is included in the basis matrix.
#' @param fx If TRUE, it removes the penalization.
#' @param S penalty matrix, defined internally if NULL (the default).
#'
#' @return A matrix with class \code{"cr"} and class \code{"basis"}.
#' @export
crSpline <- function(x, df=5, knots=NULL, intercept=FALSE, fx= FALSE, S=NULL) {
#
################################################################################
#
nx <- names(x)
x <- as.vector(x)
nax <- is.na(x)
if(nas <- any(nax)) x <- x[!nax]
#
# DEFINE KNOTS AND DF
if(is.null(knots)) {
if(df<3) stop("'df' must be >=3")
knots <- try(hdquantile(unique(x), seq(0,1,length=df+!intercept)))
if (class(knots)=="try.error"){knots <- quantile(unique(x), seq(0,1,length=df+!intercept))}
} else df <- length(knots) - !intercept
#
# CHECK NUMBER OF UNIQUE x VALUES
# (ADD SOME IF NEEDED TO PREVENT ERROR IN smooth.construct.cr.smooth.spec)
if(add <- length(unique(x)) < length(knots))
x <- c(seq(min(knots), max(knots), length=length(knots)), x)
#
# TRANSFORMATION: CALL FUNCTION FROM MGCV
oo <- mgcv:::smooth.construct.cr.smooth.spec(mgcv:::s(x, bs="cr", k=df+!intercept),
data=list(x=x), knots=list(x=knots))
basis <- oo$X
if(!intercept) basis <- basis[,-1L,drop=FALSE]
#
# REMOVE ADDED VALUES AND RE-INSERT MISSING
if(add) basis <- basis[-seq(length(knots)),,drop=FALSE]
if(nas) {
nmat <- matrix(NA, length(nax), ncol(basis))
nmat[!nax,] <- basis
basis <- nmat
}
#
# RELATED PENALTY MATRIX
if(fx) {
S <- NULL
} else if(is.null(S)) {
S <- oo$S[[1]]
S <- (S+t(S))/2
if(!intercept) S <- S[-1L,-1L,drop=FALSE]
} else if(any(dim(S)!=ncol(basis))) stop("dimensions of 'S' not compatible")
#
# NAMES AND ATTRIBUTES
dimnames(basis) <- list(nx, seq(ncol(basis)))
attributes(basis) <- c(attributes(basis), list(df=df, knots=knots,
intercept=intercept, fx=fx, S=S))
#
class(basis) <- c("matrix", "cr", "basis")
#
return(basis)
}
#' Mean Orthogonal Projection (Combine Dimension Reducton Subspaces)
#'
#' @param p a list of symmetric projection matrices.
#' @param method whether constant weights or sqrt(1/rank) weights should be used.
#'
#' @return a list
#' @export
#' @examples
#' fit1 = SIR(bmi ~ ., diabetes[,-2], rank = 1)
#' fit2 = SIR(bmi ~ ., diabetes[,-2], rank = 3)
#' fit3 = SIR(bmi ~ ., diabetes[,-2], rank = 5)
#' fit4 = SIR(bmi ~ ., diabetes[,-2], rank = 9)
#' MOP(p = list(fit1$sirmat, fit2$sirmat, fit3$sirmat, fit4$sirmat), method = "sqrtinv")
#'
#' @references
#' Liski E., Nordhausen K., Oja H., and Ruiz-Gazen A. (2016), Combining Linear Dimension Reduction Subspaces. In: Recent Advances in
#' Robust Statistics: Theory and Applications. doi: 10.1007/9788132236436_7. \cr
#' \cr
#' Liski, E., Nordhausen, K., Oja, H., & Ruiz-Gazen, A. (2012). Averaging orthogonal projectors. https://arxiv.org/pdf/1210.2575.pdf
#'
MOP <-function(p, method = c("const", "sqrtinv")){
method <- match.arg(method)
if (method=="const") out = .mopwtconstant(p)
else out = .mopwtsqrtinv(p)
out
}
# subfunction MOP.constant
.mopwtconstant <- function(x){
n.mat <- length(x)
p <- dim(x[[1]])[1]
k <- sapply(x, function(i) sum(diag(i)))
if(any(k==0)) {
if (all(k==0)){
return(cat(crayon::red("All projections are degenerate. The problem is severely ill-conditioned.")))
}
else{
x <- x[-which(k==0)]
k <- k[-which(k==0)]
}
}
else{
k <- k
}
n.mat.new <- length(x)
P.matbar <- Reduce("+",x) / n.mat.new
E.P.matbar <- eigen(P.matbar)
# Estimate of k
k.estim <- sum(E.P.matbar$values >= 0.5)
O.mat <- E.P.matbar$vector[,1:k.estim]
# MOP
P.mathat <- tcrossprod(O.mat)
list(sdrmat = P.mathat, evectors = O.mat, evalues = E.P.matbar$values[1:k.estim], rank.est = k.estim)
}
# subfunction MOP.sq.inverse
.mopwtsqrtinv <- function(x){
n.mat <- length(x)
p <- dim(x[[1]])[1]
# Individual k-estimates
k <- sapply(x, function(i) sum(diag(i)))
# Check for projectors with rank 0.
if(any(k==0)) {
if (all(k==0)){
return(cat(crayon::red("All projections are degenerate. The problem is severely ill-conditioned.")))
}
else{
x <- x[-which(k==0)]
k <- k[-which(k==0)]
x <- x[k]
}
}
else{
k <- k
}
n.mat.new <- length(x)
x.weighted <- mapply("/", x, sqrt(k), SIMPLIFY=FALSE)
P.star <- Reduce("+", x.weighted) / n.mat.new
EVD.P.star <- eigen(P.star)
f.criterion <- cumsum(EVD.P.star$values) / sqrt(1:p)
# Estimate of k
k.estim <- which.max(f.criterion)
O.mat <- EVD.P.star$vector[,1:k.estim]
# MOP
P.mathat <- tcrossprod(O.mat)
list(sdrmat = P.mathat, evectors = O.mat, evalues = EVD.P.star$values[1:k.estim], k = k.estim)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.