Nothing
######
## VT::22.11.2019
##
##
## roxygen2::roxygenise("C:/users/valen/onedrive/myrepo/R/fsdaR", load_code=roxygen2:::load_installed)
##
#' Computes \code{tclustreg} for different number of groups \code{k}
#' and restriction factors \code{c}.
#'
#' @description (the last two letters stand for 'Information Criterion') computes
#' the values of BIC (MIXMIX), ICL (MIXCLA) or CLA (CLACLA), for different values
#' of \code{k} (number of groups) and different values of \code{c}
#' (restriction factor for the variances of the residuals), for
#' a prespecified level of trimming. In order to minimize randomness, given \code{k},
#' the same subsets are used for each value of \code{c}.
#'
#' @param y Response variable. A vector with \code{n} elements that
#' contains the response variable.
#'
#' @param x An n x p data matrix (n observations and p variables).
#' Rows of x represent observations, and columns represent variables.
#'
#' Missing values (NA's) and infinite values (Inf's) are allowed,
#' since observations (rows) with missing or infinite values will
#' automatically be excluded from the computations.
#'
#' @param alphaLik Trimming level, a scalar between 0 and 0.5 or an
#' integer specifying the number of observations which have to be trimmed.
#' If \code{alphaLik=0}, there is no trimming. More in detail, if \code{0 < alphaLik < 1}
#' clustering is based on \code{h = floor(n * (1 - alphaLik))} observations.
#' If \code{alphaLik} is an integer greater than 1 clustering is
#' based on \code{h = n - floor(alphaLik)}. More in detail, likelihood
#' contributions are sorted and the units associated with the smallest \code{n - h}
#' contributions are trimmed.
#'
#' @param alphaX Second-level trimming or constrained weighted model for \code{x}.
#
#' @param intercept wheather to use constant term (default is \code{intercept=TRUE}
#'
#' @param plot If \code{plot=FALSE} (default) or \code{plot=0} no plot is produced.
#' If \code{plot=TRUE} a plot with the final allocation is shown (using the spmplot function).
#' If \code{X} is 2-dimensional, the lines associated to the groups are shown too.
#'
#' @param nsamp If a scalar, it contains the number of subsamples which will be extracted.
#' If \code{nsamp = 0} all subsets will be extracted. Remark - if the number of all possible
#' subset is greater than 300 the default is to extract all subsets, otherwise just 300.
#' If \code{nsamp} is a matrix it contains in the rows the indexes of the subsets which
#' have to be extracted. \code{nsamp} in this case can be conveniently generated by
#' function \code{subsets()}. \code{nsamp} must have \code{k * p} columns. The first \code{p}
#' columns are used to estimate the regression coefficient of group 1, ..., the last \code{p}
#' columns are used to estimate the regression coefficient of group \code{k}.
#'
#' @param refsteps Number of refining iterations in each subsample. Default is \code{refsteps=10}.
#' \code{refsteps = 0} means "raw-subsampling" without iterations.
#'
#' @param reftol Tolerance of the refining steps. The default value is 1e-14
#'
#' @param equalweights A logical specifying wheather cluster weights in the concentration
#' and assignment steps shall be considered. If \code{equalweights=TRUE} we are (ideally)
#' assuming equally sized groups, else if \code{equalweights = false} (default) we allow for
#' different group weights. Please, check in the given references which functions
#' are maximized in both cases.
#'
#' @param wtrim How to apply the weights on the observations - a flag taking values in c(0, 1, 2, 3, 4).
#' \itemize{
#' \item If \code{wtrim==0} (no weights), the algorithm reduces to the standard \code{tclustreg} algorithm.
#' \item If \code{wtrim==1}, trimming is done by weighting the observations using values specified in vector
#' \code{we}. In this case, vector \code{we} must be supplied by the user.
#' \item If \code{wtrim==2}, trimming is again done by weighting the observations
#' using values specified in vector \code{we}. In this case, vector \code{we}
#' is computed from the data as a function of the density estimate pdfe.
#' Specifically, the weight of each observation is the probability of retaining
#' the observation, computed as
#' \deqn{pretain_{ig} = 1-pdfe_{ig}/max_{ig}(pdfe_{ig})}
#' \item If \code{wtrim==3}, trimming is again done by weighting the observations using
#' values specified in vector \code{we}. In this case, each element wei of vector
#' \code{we} is a Bernoulli random variable with probability of success
#' \eqn{pdfe_{ig}}.
#' In the clustering framework this is done under the constraint that no group is empty.
#' \item If \code{wtrim==4}, trimming is done with the tandem approach of Cerioli and Perrotta (2014).
#' }
#'
#'
#' @param we Weights. A vector of size n-by-1 containing application-specific weights
#' Default is a vector of ones.
#'
#' @param msg Controls whether to display or not messages on the screen If \code{msg==TRUE} (default)
#' messages are displayed on the screen. If \code{msg=2}, detailed messages are displayed,
#' for example the information at iteration level.
#'
#' @param RandNumbForNini pre-extracted random numbers to initialize proportions.
#' Matrix of size k-by-nrow(nsamp) containing the random numbers which
#' are used to initialize the proportions of the groups. This option is effective only if
#' \code{nsamp} is a matrix which contains pre-extracted subsamples. The purpose of this
#' option is to enable the user to replicate the results when the function \code{tclustreg()}
#' is called using a parfor instruction (as it happens for example in routine IC, where
#' \code{tclustreg()} is called through a parfor for different values of the restriction factor).
#' The default is that \code{RandNumbForNini} is empty - then uniform random numbers are used.
#'
#' @param trace Whether to print intermediate results. Default is \code{trace=FALSE}.
#'
#' @param ... potential further arguments passed to lower level functions.
#'
#' @return An S3 object of class \code{\link{tclustreg.object}}
#'
#' @references
#'
#' Torti F., Perrotta D., Riani, M. and Cerioli A. (2019). Assessing Robust Methodologies for Clustering Linear Regression Data,
#' Advances in Data Analysis and Classification, Vol. 13, pp 227-257.
#'
#' @export
#' @author FSDA team, \email{valentin.todorov@@chello.at}
tclustregIC <- function(y, x, alphaLik, alphaX, intercept=TRUE, plot=FALSE,
nsamp, refsteps=10, reftol=10e-14, equalweights=FALSE, wtrim=0, we, msg=TRUE, RandNumbForNini,
trace=FALSE, ...)
{
if(is.data.frame(x))
x <- data.matrix(x)
else if(!is.matrix(x))
x <- matrix(x, length(x), 1,
dimnames = list(names(x), deparse(substitute(x))))
if(!is.numeric(x)) stop("x is not a numeric")
if(is.data.frame(y))
y <- data.matrix(y)
else if(!is.matrix(y))
y <- matrix(y, length(y), 1,
dimnames = list(names(y), deparse(substitute(y))))
if(!is.numeric(y)) stop("y is not a numeric")
dx <- dim(x)
xn <- (dnx <- dimnames(x))[[2]]
xn <- if (!is.null(xn))
xn
else if (dx[2] > 1)
paste("X", 1:dx[2], sep = "")
else if(dx[2])
"X"
dimnames(x) <- list(dnx[[1]], xn)
n <- nrow(x)
p <- ncol(x)
control <- list(...)
control$intercept <- ifelse(intercept, 1, 0)
control$plots <- ifelse(plot, 1, 0)
if(!missing(nsamp)){
control$nsamp <- nsamp
if(is.matrix(nsamp) && !missing(RandNumbForNini))
control$RandNumbForNini <- RandNumbForNini
}
control$refsteps <- refsteps
control$reftol <- reftol
control$equalweights <- equalweights
if(is.null(wtrim) || is.na(wtrim) || !is.numeric(wtrim) || wtrim < 0 || wtrim > 4)
stop("'wtrim' must be numeric, one of 0, 1, 2, 3 or 4!")
control$wtrim <- wtrim
if(!missing(we))
{
if(!is.numeric(we) || length(we) != n)
stop("Parameter 'we' must be a numeric vector of length ", n)
control$we <- we
}
xmsg <- 0
if(is.logical(msg))
xmsg <- ifelse(msg, 1, 0)
else if(is.numeric(msg) && msg >= 0 && msg <= 2)
xmsg <- msg
else
stop("Invalid parameter 'msg'. Should be TRUE/FALSE or 0, 1, 2.")
control$msg <- xmsg
outclass <- "tclustregIC"
## ES 27.06.2018: parameters that are mandatory to the MATLAB function
## cannot be put into the MATLAB function because they have to be supplied
## to the function individually and not in (name, value) pairs
parlist = c(.jarray(y, dispatch=TRUE), .jarray(x, dispatch=TRUE))
if(length(alphaLik) == 1)
parlist = c(parlist, .jnew("java/lang/Double", as.double(alphaLik))) # alpha 1
else
parlist = c(parlist, .jarray(alphaLik, dispatch=TRUE))
if(length(alphaX) == 1)
parlist = c(parlist, .jnew("java/lang/Double", as.double(alphaX))) # alpha 2
else
parlist = c(parlist, .jarray(alphaX, dispatch=TRUE))
paramNames = names(control)
if(trace)
print(control)
if(length(paramNames) > 0)
{
for (i in 1:length(paramNames)) {
paramName = paramNames[i]
paramValue = control[[i]]
matlabValue = rType2MatlabType(paramName, paramValue)
parlist = c(parlist, .jnew("java/lang/String", paramName), matlabValue)
}
}
out <- callFsdaFunction("tclustregIC", "[Ljava/lang/Object;", 1, parlist)
if(is.null(out))
return(NULL)
arr1 = .jcast(out[[1]], "com/mathworks/toolbox/javabuilder/MWStructArray")
arr = .jnew("org/jrc/ipsc/globesec/sitaf/fsda/FsdaMWStructArray", arr1)
if(trace)
{
cat("\nReturning from MATLAB tclust(). Fields returned by MATLAB: \n")
print(arr$fieldNames())
}
bopt = as.matrix(.jevalArray(arr$get("bopt", as.integer(1)), "[[D", simplify = TRUE))
sigma2opt = as.matrix(.jevalArray(arr$get("sigma2opt", as.integer(1)), "[[D", simplify = TRUE))
sigma2opt_corr = as.matrix(.jevalArray(arr$get("sigma2opt_corr", as.integer(1)), "[[D", simplify = TRUE))
sigmaXopt = if(as.integer(arr$hasField("sigmaXopt", as.integer(1))) != 1) NULL
else as.matrix(.jevalArray(arr$get("sigmaXopt", as.integer(1)), "[[D", simplify = TRUE))
idx = as.vector(as.matrix(.jevalArray(arr$get("idx", as.integer(1)), "[[D", simplify = TRUE)))
size = as.matrix(.jevalArray(arr$get("siz", as.integer(1)), "[[D", simplify = TRUE))
rows <- 1:nrow(size)
cols <- c("Cluster", "Size", "Percent")
dimnames(size) <- list(rows, cols)
idx_before_tr = if(as.integer(arr$hasField("idx_before_tr", as.integer(1))) != 1) NULL
else as.vector(as.matrix(.jevalArray(arr$get("idx_before_tr", as.integer(1)), "[[D", simplify = TRUE)))
we <- if(as.integer(arr$hasField("we", as.integer(1))) != 1) NULL
else as.vector(as.matrix(.jevalArray(arr$get("we", as.integer(1)), "[[D", simplify = TRUE)))
postprobopt <- if(as.integer(arr$hasField("postprobopt", as.integer(1))) != 1) NULL
else as.matrix(.jevalArray(arr$get("postprobopt", as.integer(1)), "[[D", simplify = TRUE))
MIXMIX <- if(as.integer(arr$hasField("MIXMIX", as.integer(1))) != 1) NULL
else as.vector(as.matrix(.jevalArray(arr$get("MIXMIX", as.integer(1)), "[[D", simplify = TRUE)))
MIXCLA <- if(as.integer(arr$hasField("MIXCLA", as.integer(1))) != 1) NULL
else as.vector(as.matrix(.jevalArray(arr$get("MIXCLA", as.integer(1)), "[[D", simplify = TRUE)))
CLACLA <- if(as.integer(arr$hasField("CLACLA", as.integer(1))) != 1) NULL
else as.vector(as.matrix(.jevalArray(arr$get("CLACLA", as.integer(1)), "[[D", simplify = TRUE)))
ans <- list(call=match.call(), alphaLik=alphaLik, alphaX=alphaX,
MIXMIX=MIXMIX, MIXCLA=MIXCLA, CLACLA=CLACLA)
freeMatlabResources(out)
class(ans) <- outclass
return (ans)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.