#' @importFrom optimr optimr
#' @importFrom shapes procOPA
#' @importFrom stats runif
#' @importFrom stats aggregate
#' @importFrom stats quantile
#' @export
#'
#' @title
#' Fitting a Binary Logistic Biplot using bootstrap methodology
#' @description
#' This function estimates the vector \eqn{\mu}, matrix A and matrix B using the optimization algorithm chosen by the user and applies a bootstrap methodology to determine the confidence ellipses.
#' @return
#' Coordenates of the matrix A and B in resamples and Biplot
#' @details
#' Fitting when sup=TRUE ... whereas sup=FALSE ...
#' @author Giovany Babativa <gbabativam@@gmail.com>
#' @param x Binary matrix.
#' @param k Dimensions number. By default \code{k = 2}.
#' @param L Penalization parameter. By default \code{L = 0}.
#' @param method Method to be used to estimate the parameters. By default \code{ method="CG"}
#' @param type For the conjugate-gradients method. Takes value 1 for the Fletcher–Reeves update, 2 for Polak–Ribiere and 3 for Beale–Sorenson.
#' @param plot Plot the Bootstrap Logistic Biplot.
#' @param sup Boolean, if TRUE, rows that are not selected in each resample are treated as supplementary individuals. See details.
#' @param ellipses Draw confidence ellipses. By default is FALSE.
#' @param maxit The maximum number of iterations. Defaults to 100 for the gradient methods, and 500 without gradient.
#' @param resamples Number of iterations in the bootstrap process. By default \code{100}.
#' @param conf Level confidence in the ellipses. By default \code{conf=0.90}
#' @param col.ind Color for the rows.
#'
#' @references
#' John C. Nash (2011). Unifying Optimization Algorithms to Aid Software System Users:optimx for R. Journal of Statistical Software. 43(9). 1--14.
#'
#' John C. Nash (2014). On Best Practice Optimization Methods in R. Journal of Statistical Software. 60(2). 1--14.
#'
#' Milan, L., & Whittaker, J. (1995). Application of the parametric bootstrap to models that incorporate a singular value decomposition. Applied Statistics, 44, 31–49.
#'
#' Vicente-Villardon, J.L. and Galindo, M. Purificacion (2006), \emph{Multiple Correspondence Analysis and related Methods. Chapter: Logistic Biplots}. Chapman-Hall
#' @seealso \code{\link{plotBLB}, \link{performanceBLB}}
#' @examples
#' \donttest{
#' data("Methylation")
#' set.seed(02052020)
#' out.sup <- bootBLB(x = Methylation, ellipses = FALSE)
#' out <- bootBLB(x = Methylation, sup = FALSE, ellipses = TRUE)
#' }
bootBLB <- function(x, k=2, L=0, method="CG", type = 1, plot=TRUE, sup=TRUE,
ellipses=FALSE, maxit=NULL, resamples = 100, conf = 0.9, col.ind = NULL){
n=nrow(x); p=ncol(x); aik=n*k; bjk=p*(k+1)
dTheta = aik + bjk; s=k+1
if (!requireNamespace("dplyr", quietly = TRUE)) {
stop("Package \"dplyr\" needed for this function to work. Please install it.",
call. = FALSE)
}
if(method == "CG"){
res = optimr(par=runif(dTheta), fn = J.BipLog.BIN, gr = Grad.BipLog.BIN,
xt=x, k = k, lambda = L, method = method, control = list(type = type))
}else{
res = optimr(par=runif(dTheta), fn = J.BipLog.BIN, gr = Grad.BipLog.BIN,
xt=x, k = k, lambda = L, method = method)
}
par=res$par
A = data.frame(matrix(res$par[1:aik], n, k))
colnames(A) = c(paste0("Dim", seq(1,k,1)))
rownames(A) = rownames(x)
B = data.frame(matrix(par[(aik + 1):dTheta], p, k+1))
rownames(B) = colnames(x)
colnames(B) = c(paste0("b", seq(0,k,1)))
Ai = A |> dplyr::mutate(rowId = dplyr::row_number())
Bi = as.matrix(B[,1:s])
listA = list()
listB = list()
indica = rep(1:nrow(x))
xtemp <- x |> `rownames<-`(seq_len(nrow(x)))
for (i in 1:resamples) {
sample_ind = as.matrix(sample(indica,replace = T))
xb = xtemp[sample_ind,]
n=nrow(xb); p=ncol(xb);
aik=n*k; bjk=p*(k+1)
ran1 = as.matrix(runif(aik))
sample_ind2=rbind(sample_ind,sample_ind+n)
par1 = ran1[sample_ind2,]
par2 = runif(bjk)
param = c(par1, par2)
if(method == "CG"){
res.b = optimr(par=param, fn = J.BipLog.BIN, gr = Grad.BipLog.BIN,
xt=xb, k = k, lambda = L, method = method, control = list(type = type))
}else{
res.b = optimr(par=param, fn = J.BipLog.BIN, gr = Grad.BipLog.BIN,
xt=xb, k = k, lambda = L, method = method)
}
Bb = matrix(res.b$par[(aik + 1):dTheta], p, k+1)
Ab = matrix(res.b$par[1:aik], n, k)
Ab_j = data.frame(Ab, rowId=sample_ind)
Ab_j = dplyr::distinct(Ab_j, rowId, .keep_all=T)
if(sup){
Asup = dplyr::anti_join(Ai, Ab_j, by="rowId")
n_sup = nrow(Asup)
pars = runif(dim(Asup)[1] * (dim(Asup)[2]-1))
xsup = xtemp[Asup$rowId,]
res.sup = optimr(par=pars, fn = Indsup.BIN, gr = Indsup.GradBIN,
xs=xsup, B=Bb, k = k, lambda = L, method = method)
Asup = data.frame(matrix(res.sup$par, n_sup, k), rowId = Asup[,ncol(Asup)])
Abr = rbind(Ab_j, Asup) |>
dplyr::arrange(rowId)
Ab = Abr |> dplyr::select(-rowId)
outA = procOPA(as.matrix(A), as.matrix(Ab), scale = TRUE, reflect = TRUE)
Arot = outA$Bhat
R = outA$R
SCR_A = outA$OSS
}else{
Abr = Ab_j |> dplyr::arrange(rowId)
Ai_b = dplyr::inner_join(Ai, Abr, by="rowId") |>
dplyr::select(paste0("Dim", seq(1,k,1)))
Ab = Abr |> dplyr::select(-rowId)
outA = procOPA(as.matrix(Ai_b), as.matrix(Ab), scale = TRUE, reflect = TRUE)
Arot = outA$Bhat
R = outA$R
SCR_A = outA$OSS
}
Brot = Bb[,2:s] %*% R
Arot = data.frame(Arot, ind=Abr$rowId)
colnames(Arot)=c(paste0("Dim", seq(1,k,1)), "ind")
rownames(Arot)=rownames(x[Abr$rowId,])
Brot = data.frame(Bb[,1], Brot, param = seq(1,p,1))
colnames(Brot)=c(paste0("b", seq(0,k,1)), "param")
rownames(Brot)=rownames(B)
Arot$resample = i; Brot$resample = i
listA[[i]] <- Arot
listB[[i]] <- Brot
}
ResultA <- dplyr::bind_rows(listA)
ResultB <- dplyr::bind_rows(listB)
rows <- rownames(x)
cols <- colnames(x)
EspA <- aggregate(.~ind, ResultA, mean)
colnames(EspA) <- c("ind", paste0("Dimb", seq(1,k,1)), "resampleb")
EspB <- aggregate(.~param, ResultB, mean)
colnames(EspB) <- c("param", paste0("bb", seq(0,k,1)), "resampleb")
Ahat <- EspA |> dplyr::select(dplyr::starts_with("Dim"))
Bhat <- EspB |> dplyr::select(dplyr::starts_with("bb"))
if(ellipses){
CentBootA <- dplyr::left_join(ResultA, EspA, by="ind")|>
dplyr::mutate(Dim1c = Dim1-Dimb1, Dim2c=Dim2-Dimb2) |>
dplyr::select(ind, resample, Dim1c, Dim2c)
CentBootB <- dplyr::left_join(ResultB, EspB, by="param") |>
dplyr::mutate(bj0c = b0-bb0, bj1c = b1-bb1, bj2c=b2-bb2) |>
dplyr::select(param, resample, bj0c, bj1c, bj2c)
n = nrow(EspA); p = nrow(EspB)
datalist = list()
q <- conf
for (a in 1:n){
temp <- dplyr::filter(CentBootA, ind == a) |>
dplyr::select(Dim1c, Dim2c)
sol <- svd(temp)
k <- sqrt(rowSums(sol$u * sol$u))
r <- quantile(k, q)
theta <- seq(0, 2 * pi, length = 500)
z <- rbind(r * cos(theta), r * sin(theta))
esp <- dplyr::filter(EspA, ind == a) |>
dplyr::select(-ind, -resampleb)
v <- as.data.frame(as.matrix(rep(1, 500))%*% as.matrix(esp) + r*t(z)%*%diag(sol$d)%*%t(sol$v))
v$ind <- a
datalist[[a]] <- v
}
Ellip <- dplyr::bind_rows(datalist)
}
LogP = as.matrix(cbind(rep(1,nrow(x)),Ahat))%*%t(Bhat)
P = exp(LogP)/(1+exp(LogP))
Pr = ifelse(P>=0.5, 1, 0)
PCC = ifelse((x==1 & Pr==1) | (x==0 & Pr==0), 1, 0)
ones <- apply(x, 2, sum)
zeros <- n - ones
confusion <- data.frame( Sensitivy = round(100*apply((Pr == 1) & (x == 1), 2, sum)/ ones, 1),
Specificity = round(100*apply((Pr == 0) & (x == 0), 2, sum)/ zeros, 1),
Global = round(100*colSums(PCC)/nrow(PCC), 1))
rownames(Ahat) <- rownames(x)
rownames(Bhat) <- colnames(x)
if(method == "CG" & type == 1) method <- "CG: Fletcher--Reeves"
if(method == "CG" & type == 2) method <- "CG: Polak--Ribiere"
if(method == "CG" & type == 3) method <- "CG: Beale--Sorenson"
if(method == "Rcgmin") method <- "CG: Dai--Yuan"
if(ellipses){
out <- list(Ahat = Ahat, Bhat = Bhat, BootA=ResultA, BootB=ResultB, pred= Pr, fit = confusion, rows=rows, cols = cols, method=method, Ellip=Ellip)
}else{
out <- list(Ahat = Ahat, Bhat = Bhat, BootA=ResultA, BootB=ResultB, pred= Pr, fit = confusion, rows=rows, cols = cols, method=method)
}
if (plot & ncol(Ahat)>1) {
print(plotBLB(x=out, ellipses = ellipses, endsegm = 0.95, col.ind = col.ind))
}
class(out) <- c("BiplotML", "list")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.