Nothing
#' Run the whole archetypoid analysis with the functional multivariate robust Frobenius norm
#'
#' @aliases do_fada_multiv_robust
#'
#' @description
#' This function executes the entire procedure involved in the functional archetypoid
#' analysis. Firstly, the initial vector of archetypoids is obtained using the
#' functional archetypal algorithm and finally, the optimal vector of archetypoids is
#' returned.
#'
#' @usage
#' do_fada_multiv_robust(subset, numArchoid, numRep, huge, prob, compare = FALSE, PM,
#' method = "adjbox")
#'
#' @param subset Data to obtain archetypes. In fadalara this is a subset of the
#' entire data frame.
#' @param numArchoid Number of archetypes/archetypoids.
#' @param numRep For each \code{numArch}, run the archetype algorithm \code{numRep} times.
#' @param huge Penalization to solve the convex least squares problem,
#' see \code{\link[Anthropometry]{archetypoids}}.
#' @param prob Probability with values in [0,1].
#' @param compare Boolean argument to compute the non-robust residual sum of squares
#' to compare these results with the ones provided by \code{\link{do_fada}}.
#' @param PM Penalty matrix obtained with \code{\link[fda]{eval.penalty}}.
#' @param method Method to compute the outliers. So far the only option allowed is
#' 'adjbox' for using adjusted boxplots for skewed distributions. The use of
#' tolerance intervals might also be explored in the future for the multivariate case.
#'
#' @return
#' A list with the following elements:
#' \itemize{
#' \item cases: Final vector of archetypoids.
#' \item alphas: Alpha coefficients for the final vector of archetypoids.
#' \item rss: Residual sum of squares corresponding to the final vector of archetypoids.
#' \item rss_non_rob: If \code{compare=TRUE}, this is the residual sum of squares using
#' the non-robust Frobenius norm. Otherwise, NULL.
#' \item resid Vector of residuals.
#' \item outliers: Outliers.
#' \item local_rel_imp Matrix with the local (casewise) relative importance
#' (in percentage) of each variable for the outlier identification. Only for
#' the multivariate case. It is relative to the outlier observation itself.
#' The other observations are not considered for computing this importance.
#' This procedure works because the functional variables are in the same scale,
#' after standardizing. Otherwise, it couldn't be interpreted like that.
#' \item margi_rel_imp Matrix with the marginal relative importance of each variable
#' (in percentage) for the outlier identification. Only for the multivariate case.
#' In this case, the other points are considered, since the value of the outlier
#' observation is compared with the remaining points.
#' }
#'
#' @author
#' Guillermo Vinue, Irene Epifanio
#'
#' @seealso
#' \code{\link{stepArchetypesRawData_funct_multiv_robust}},
#' \code{\link{archetypoids_funct_multiv_robust}}
#'
#' @references
#' Moliner, J. and Epifanio, I., Robust multivariate and functional archetypal analysis
#' with application to financial time series analysis, 2019.
#' \emph{Physica A: Statistical Mechanics and its Applications} \bold{519}, 195-208.
#' \url{https://doi.org/10.1016/j.physa.2018.12.036}
#'
#' @examples
#' \dontrun{
#' library(fda)
#' ?growth
#' str(growth)
#' hgtm <- growth$hgtm
#' hgtf <- growth$hgtf[,1:39]
#'
#' # Create array:
#' nvars <- 2
#' data.array <- array(0, dim = c(dim(hgtm), nvars))
#' data.array[,,1] <- as.matrix(hgtm)
#' data.array[,,2] <- as.matrix(hgtf)
#' rownames(data.array) <- 1:nrow(hgtm)
#' colnames(data.array) <- colnames(hgtm)
#' str(data.array)
#'
#' # Create basis:
#' nbasis <- 10
#' basis_fd <- create.bspline.basis(c(1,nrow(hgtm)), nbasis)
#' PM <- eval.penalty(basis_fd)
#' # Make fd object:
#' temp_points <- 1:nrow(hgtm)
#' temp_fd <- Data2fd(argvals = temp_points, y = data.array, basisobj = basis_fd)
#'
#' X <- array(0, dim = c(dim(t(temp_fd$coefs[,,1])), nvars))
#' X[,,1] <- t(temp_fd$coef[,,1])
#' X[,,2] <- t(temp_fd$coef[,,2])
#'
#' # Standardize the variables:
#' Xs <- X
#' Xs[,,1] <- scale(X[,,1])
#' Xs[,,2] <- scale(X[,,2])
#'
#' suppressWarnings(RNGversion("3.5.0"))
#' set.seed(2018)
#' res_fada <- do_fada_multiv_robust(subset = Xs, numArchoid = 3, numRep = 5, huge = 200,
#' prob = 0.75, compare = FALSE, PM = PM, method = "adjbox")
#' str(res_fada)
#' res_fada$cases
#' #[1] 8 24 29
#' res_fada$rss
#' #[1] 2.301741
#' }
#'
#' @export
do_fada_multiv_robust <- function(subset, numArchoid, numRep, huge, prob, compare = FALSE, PM,
method = "adjbox") {
nbasis <- dim(subset)[2] # number of basis.
nvars <- dim(subset)[3] # number of variables.
lass <- stepArchetypesRawData_funct_multiv_robust(data = subset, numArch = numArchoid,
numRep = numRep, verbose = FALSE,
saveHistory = FALSE, PM, prob, nbasis, nvars)
fada_subset <- archetypoids_funct_multiv_robust(numArchoid, subset, huge = huge,
ArchObj = lass, PM, prob)
k_subset <- fada_subset$cases # The same with the S3 method anthrCases(fada_subset)
alphas_subset <- fada_subset$alphas
# Outliers:
# t(ada_subset$resid) is the residuals matrix with the same dimension
# as the original data matrix.
if (method == "adjbox") {
seq_pts <- sort(c(seq(1, nbasis * nvars, by = nbasis),
rev(nbasis * nvars - nbasis * (1:(nvars-1))),
nbasis * nvars))
odd_pos <- seq(1, length(seq_pts), 2)
r_list <- list()
for (i in odd_pos) {
r_list[[i]] <- apply(fada_subset$resid[seq_pts[i]:seq_pts[i+1],], 2, int_prod_mat_funct, PM = PM)
}
r_list1 <- r_list[odd_pos]
aux <- Reduce(`+`, r_list1)
resid_vect <- sqrt(aux)
outl_boxb <- boxB(x = resid_vect, k = 1.5, method = method)
outl <- which(resid_vect > outl_boxb$fences[2])
# Local relative importance of the variables in the outlier identification:
local_rel_imp <- sapply(1:nvars, function(i, j, x, y) round((x[[i]][j]/y[j]) * 100, 2), outl, r_list1, aux)
if (length(outl) > 0) { # For the case when there are outliers.
if (length(outl) == 1) { # For the case when there are a single outlier.
local_rel_imp <- t(local_rel_imp) # We have to create the matrix, otherwise local_rel_imp is a vector.
}
dimnames(local_rel_imp) <- list(as.character(outl), paste("V", 1:nvars, sep = ""))
}
# Marginal relative importance of the variables in the outlier identification:
margi_rel_imp <- sapply(1:nvars, function(i, j, x) round((x[[i]][j]/sum(x[[i]])) * 100, 2), outl, r_list1)
if (length(outl) > 0) { # For the case when there are outliers.
if (length(outl) == 1) { # For the case when there are a single outlier.
margi_rel_imp <- t(margi_rel_imp) # We have to create the matrix, otherwise margi_rel_imp is a vector.
}
dimnames(margi_rel_imp) <- list(as.character(outl), paste("V", 1:nvars, sep = ""))
}
#}else if (method == "toler") {
# resid_vect <- apply(fada_subset$resid, 2, int_prod_mat_funct, PM = PM)
# # Degree of outlierness:
# outl <- do_outl_degree(vect_tol, resid_vect, alpha,
# paste(outl_degree, "_non_rob", sep = ""))
}else{
stop("So far methods allowed are 'adjbox'.") # and 'toler'
}
# -----------------------------
rss_subset <- fada_subset$rss
rss_non_rob <- NULL
if (compare) {
n <- ncol(t(subset))
x_gvv <- rbind(t(subset), rep(huge, n))
zs <- x_gvv[,k_subset]
zs <- as.matrix(zs)
alphas <- matrix(0, nrow = numArchoid, ncol = n)
for (j in 1:n) {
alphas[, j] = coef(nnls(zs, x_gvv[,j]))
}
resid <- zs[1:(nrow(zs) - 1),] %*% alphas - x_gvv[1:(nrow(x_gvv) - 1),]
rss_non_rob <- frobenius_norm_funct(resid, PM) / n
}
#return(c(list(k_subset = k_subset, alphas_subset = alphas_subset,
# rss_subset = rss_subset, rss_non_rob = rss_non_rob), out_tol1))
return(list(cases = k_subset, alphas = alphas_subset,
rss = rss_subset, rss_non_rob = rss_non_rob,
resid = resid_vect, outliers = outl,
local_rel_imp = local_rel_imp, margi_rel_imp = margi_rel_imp))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.