#' Outliergram for univariate functional data sets
#'
#' This function performs the outliergram of a univariate functional data set,
#' possibly with an adjustment of the true positive rate of outliers discovered
#' under assumption of gaussianity.
#'
#' @section Adjustment:
#'
#' When the adjustment option is selected, the value of \eqn{F} is optimized for
#' the univariate functional dataset provided with \code{fData}. In practice,
#' a number \code{adjust$N_trials} of times a synthetic population
#' (of size \code{adjust$trial_size} with the same covariance (robustly
#' estimated from data) and centerline as \code{fData} is simulated without
#' outliers and each time an optimized value \eqn{F_i} is computed so that a
#' given proportion (\code{adjust$TPR}) of observations is flagged as outliers.
#' The final value of \code{F} for the outliergram is determined as an average
#' of \eqn{F_1, F_2, \ldots, F_{N_{trials}}}. At each time step the optimization
#' problem is solved using \code{stats::uniroot} (Brent's method).
#'
#' @param fData the univariate functional dataset whose outliergram has to be
#' determined.
#' @param MBD_data a vector containing the MBD for each element of the dataset.
#' If missing, MBDs are computed.
#' @param MEI_data a vector containing the MEI for each element of the dataset.
#' If not not provided, MEIs are computed.
#' @param p_check percentage of observations with either low or high MEI to be
#' checked for outliers in the secondary step (shift towards the center of the
#' dataset).
#' @param Fvalue the \eqn{F} value to be used in the procedure that finds the
#' shape outliers by looking at the lower parabolic limit in the outliergram.
#' Default is \code{1.5}. You can also leave the default value and, by providing
#' the parameter \code{adjust}, specify that you want \code{Fvalue} to be
#' adjusted for the dataset provided in \code{fData}.
#' @param adjust either \code{FALSE} if you would like the default value for the
#' inflation factor, \eqn{F = 1.5}, to be used, or a list specifying the
#' parameters required by the adjustment.
#' \itemize{
#' \item{"\code{N_trials}"}{: the number of repetitions of the adjustment
#' procedure based on the simulation of a gaussian population of functional
#' data, each one producing an adjusted value of \eqn{F}, which will lead
#' to the averaged adjusted value \eqn{\bar{F}}. Default is 20;}
#' \item{"\code{trial_size}"}{: the number of elements in the gaussian
#' population of functional data that will be simulated at each repetition of
#' the adjustment procedure. Default is \code{5 * fData$N};}
#' \item{"\code{TPR}"}{: the True Positive Rate of outliers, i.e. the proportion
#' of observations in a dataset without shape outliers that have to be considered
#' outliers. Default is \code{2 * pnorm( 4 * qnorm( 0.25 ) )};}
#' \item{"\code{F_min}"}{: the minimum value of \eqn{F}, defining the left
#' boundary for the optimization problem aimed at finding, for a given dataset
#' of simulated gaussian data associated to \code{fData}, the optimal value of
#' \eqn{F}. Default is 0.5;}
#' \item{"\code{F_max}"}{: the maximum value of \eqn{F}, defining the right
#' boundary for the optimization problem aimed at finding, for a given dataset
#' of simulated gaussian data associated to \code{fData}, the optimal value of
#' \eqn{F}. Default is 20;}
#' \item{"\code{tol}"}{: the tolerance to be used in the optimization problem
#' aimed at finding, for a given dataset of simulated gaussian data associated
#' to \code{fData}, the optimal value of \eqn{F}. Default is \code{1e-3};}
#' \item{"\code{maxiter}"}{: the maximum number of iterations to solve the
#' optimization problem aimed at finding, for a given dataset of simulated
#' gaussian data associated to \code{fData}, the optimal value of \eqn{F}.
#' Default is \code{100};}
#' \item{"\code{VERBOSE}"}{: a parameter controlling the verbosity of the
#' adjustment process;}
#' }
#' @param display either a logical value indicating whether you want the
#' outliergram to be displayed, or the number of the graphical device
#' where you want the outliergram to be displayed.
#' @param xlab a list of two labels to use on the x axis when displaying the
#' functional dataset and the outliergram
#' @param ylab a list of two labels to use on the y axis when displaying the
#' functional dataset and the outliergram;
#' @param main a list of two titles to be used on the plot of the functional
#' dataset and the outliergram;
#' @param ... additional graphical parameters to be used \emph{only} in the plot
#' of the functional dataset
#'
#' @return
#'
#' Even when used graphically to plot the outliergram, the function returns a
#' list containing:
#' \itemize{
#' \item{\code{Fvalue}}{: the value of the parameter F used;}
#' \item{\code{d}}{: the vector of values of the parameter \eqn{d} for each observation
#' (distance to the parabolic border of the outliergram);}
#' \item{\code{ID_outliers}}{: the vector of observations id corresponding to outliers.}}
#'
#' @references
#'
#' Arribas-Gil, A., and Romo, J. (2014). Shape outlier detection and visualization
#' for functional data: the outliergram, \emph{Biostatistics}, 15(4), 603-619.
#'
#' @seealso \code{\link{fData}}, \code{\link{MEI}}, \code{\link{MBD}},
#' \code{\link{fbplot}}
#'
#' @examples
#' set.seed(1618)
#'
#' N <- 200
#' P <- 200
#' N_extra <- 4
#'
#' grid <- seq(0, 1, length.out = P)
#'
#' Cov <- exp_cov_function(grid, alpha = 0.2, beta = 0.8)
#'
#' Data <- generate_gauss_fdata(
#' N = N,
#' centerline = sin(4 * pi * grid),
#' Cov = Cov
#' )
#'
#' Data_extra <- array(0, dim = c(N_extra, P))
#'
#' Data_extra[1, ] <- generate_gauss_fdata(
#' N = 1,
#' centerline = sin(4 * pi * grid + pi / 2),
#' Cov = Cov
#' )
#'
#' Data_extra[2, ] <- generate_gauss_fdata(
#' N = 1,
#' centerline = sin(4 * pi * grid - pi / 2),
#' Cov = Cov
#' )
#'
#' Data_extra[3, ] <- generate_gauss_fdata(
#' N = 1,
#' centerline = sin(4 * pi * grid + pi / 3),
#' Cov = Cov
#' )
#'
#' Data_extra[4, ] <- generate_gauss_fdata(
#' N = 1,
#' centerline = sin(4 * pi * grid - pi / 3),
#' Cov = Cov
#' )
#'
#' Data <- rbind(Data, Data_extra)
#'
#' fD <- fData(grid, Data)
#'
#' # Outliergram with default Fvalue = 1.5
#' outliergram(fD, display = TRUE)
#'
#' # Outliergram with Fvalue enforced to 2.5
#' outliergram(fD, Fvalue = 2.5, display = TRUE)
#'
#' \donttest{
#' # Outliergram with estimated Fvalue to ensure TPR of 1%
#' outliergram(
#' fData = fD,
#' adjust = list(
#' N_trials = 10,
#' trial_size = 5 * nrow(Data),
#' TPR = 0.01,
#' VERBOSE = FALSE
#' ),
#' display = TRUE
#' )
#' }
#'
#' @export
outliergram = function( fData, MBD_data = NULL, MEI_data = NULL, p_check = 0.05,
Fvalue = 1.5,
adjust = FALSE, display = TRUE,
xlab = NULL, ylab = NULL, main = NULL, ... )
{
N = fData$N
grid = seq( fData$t0,
fData$tP,
length.out = fData$P )
if( ! is.list( adjust ) )
{
# Plain outliergram with default F value: F = 1.5
stopifnot( is.numeric(Fvalue) )
out = .outliergram( fData,
MBD_data = MBD_data, MEI_data = MEI_data,
p_check = p_check,
Fvalue = Fvalue,
shift = TRUE )
} else {
nodenames = c( 'N_trials', 'trial_size', 'TPR', 'F_min', 'F_max',
'tol', 'maxiter', 'VERBOSE' )
unused = setdiff( names( adjust ), nodenames )
# Checking for unused parameters
if( length( unused ) > 0 )
{
for( i in unused )
warning( 'Warning: unused parameter ', i, ' in adjust argument of outliergram' )
}
N_trials = ifelse( is.null( adjust$N_trials ),
20,
adjust$N_trials )
trial_size = ifelse( is.null( adjust$trial_size ),
5 * fData$N,
adjust$trial_size )
TPR = ifelse( is.null( adjust$TPR ),
2 * stats::pnorm( 4 * stats::qnorm( 0.25 ) ),
adjust$TPR )
F_min = ifelse( is.null( adjust$F_min ),
0.5,
adjust$F_min )
F_max= ifelse( is.null( adjust$F_max ),
20,
adjust$F_max )
tol = ifelse( is.null( adjust$tol ),
1e-3,
adjust$tol )
maxiter = ifelse( is.null( adjust$maxiter ),
100,
adjust$maxiter )
VERBOSE = ifelse( is.null( adjust$VERBOSE ),
FALSE,
adjust$VERBOSE )
Cov = robustbase::covOGK( fData$values, sigmamu = robustbase::s_Qn )$cov
CholCov <- chol( Cov )
if( is.null ( MBD_data ) )
{
MBD_data = MBD( fData$values, manage_ties = TRUE )
}
centerline = fData$values[ which.max( MBD_data ), ]
Fvalues = rep( 0, N_trials )
obj_function = function( F_curr )( length(
.outliergram( fData_gauss,
MBD_data = NULL,
MEI_data = NULL,
Fvalue = F_curr,
shift = FALSE )$ID_SO ) / trial_size - TPR )
for( iTrial in 1 : N_trials )
{
if( VERBOSE > 0 )
{
cat( ' * * * Iteration ', iTrial, ' / ', N_trials, '\n' )
}
fData_gauss = fData( grid,
generate_gauss_fdata( N = trial_size,
centerline = centerline,
CholCov = CholCov ) )
if( VERBOSE > 0 )
{
cat( ' * * * * beginning optimization\n' )
}
opt = stats::uniroot(
obj_function,
interval = c( F_min, F_max ),
tol = tol,
maxiter = maxiter
)
Fvalues[ iTrial ] = opt$root
}
Fvalue = mean( Fvalues )
out = .outliergram( fData, MBD_data, MEI_data, p_check, Fvalue = Fvalue, shift = TRUE )
}
a_0_2 = -2 / ( N * ( N - 1 ) )
a_1 = 2 * ( N + 1 ) / ( N - 1 )
# Plot
if( display )
{
if( is.null( xlab ) )
{
xlab = list( '', 'MEI' )
}
if( is.null( ylab ) )
{
ylab = list( '', 'MBD' )
}
if( is.null( main ) )
{
main = list( '', 'Outliergram' )
}
# Setting up palettes
col_non_outlying = scales::hue_pal( h = c( 180, 270 ),
l = 60 )( length( out$ID_NO ) )
col_non_outlying = set_alpha( col_non_outlying, 0.5 )
if( length( out$ID_SO ) > 0 )
{
col_outlying = scales::hue_pal( h = c( - 90, 180 ),
c = 150 )( length( out$ID_SO ) )
} else
{
col_outlying = scales::hue_pal( h = c( - 90, 180 ),
c = 150 )( 1 )
}
grDevices::dev.cur()
oldpar <- graphics::par(mfrow = c(1, 1))
on.exit(graphics::par(oldpar))
graphics::par(mfrow = c(1, 2))
# Plotting functional data
if( length( out$ID_SO ) > 0 )
{
graphics::matplot( grid, t( fData$values[ - out$ID_SO, ] ), type = 'l', lty = 1,
ylim = range( fData$values ),
col = col_non_outlying,
xlab = xlab[[1]],
ylab = ylab[[1]],
main = main[[1]],
... )
graphics::matplot( grid, t( toRowMatrixForm( fData$values[ out$ID_SO, ] ) ),
type = 'l', lty = 1, lwd = 3, ylim = range( fData$values ),
col = col_outlying, add = TRUE )
} else {
graphics::matplot( grid, t( fData$values ), type = 'l', lty = 1,
ylim = range( fData$values ),
col = col_non_outlying,
xlab = xlab[[1]],
ylab = ylab[[1]],
main = main[[1]],
... )
}
# Adding text labels with curve ID
w_spacing = diff( range( grid ) ) / ( 2 * length( out$ID_SO ) )
for( iOut in seq_along( out$ID_SO ) )
{
graphics::text( grid[ 1 ] + ( 2 * iOut - 1 ) * w_spacing,
fData$values[ out$ID_SO[ iOut ],
which.min( abs( grid - grid[ 1 ] -
( 2 * iOut - 1 ) * w_spacing ) ) ] +
diff( range( fData$values[ out$ID_SO[ iOut ] ] ) ) / 30,
out$ID_SO[ iOut ],
col = col_outlying[ iOut ] )
}
# Plotting outliergram
# Upper parabolic limit
grid_1D = seq( 0, 1, length.out = 100 )
graphics::plot( grid_1D, a_0_2 + a_1 * grid_1D + a_0_2 * N^2 * grid_1D^2,
lty = 2, type = 'l', col = 'darkblue', lwd = 2,
ylim = c( 0, a_0_2 + a_1 / 2 + a_0_2 * N^2/4 ),
xlab = xlab[[2]],
ylab = ylab[[2]],
main = main[[2]] )
if( length( out$ID_SO ) > 0 )
{
graphics::points( out$MEI_data[ - out$ID_SO ], out$MBD_data[ - out$ID_SO ],
pch = 16, col = col_non_outlying )
graphics::points( out$MEI_data[ out$ID_SO ], out$MBD_data[ out$ID_SO ],
pch = 16, cex = 1.5, col = col_outlying )
for( idOut in out$ID_SO )
{
graphics::text( out$MEI_data[ idOut ],
out$MBD_data[ idOut ] + 0.5 / 30,
idOut,
col = col_outlying[ match( idOut, out$ID_SO ) ] )
}
} else {
graphics::points( out$MEI_data, out$MBD_data,
pch = 16, col = col_non_outlying )
}
# lower parabolic limit
graphics::lines( grid_1D, a_0_2 + a_1 * grid_1D + a_0_2 * N^2 * grid_1D^2 -
out$Q_d3 - Fvalue * out$IQR_d,
lty = 2, lwd = 2, col = 'lightblue' )
}
return( list( Fvalue = Fvalue,
d = out$d,
ID_outliers = out$ID_SO ) )
}
.outliergram = function( fData, MBD_data = NULL, MEI_data = NULL,
p_check = 0.05,
Fvalue = 1.5, shift = TRUE )
{
N = fData$N
a_0_2 = -2 / ( N * ( N - 1 ) )
a_1 = 2 * ( N + 1 ) / ( N - 1 )
# Computing MBD
if( is.null( MBD_data ) ){
MBD_data = MBD( fData$values )
}
# Computing MEI
if( is.null( MEI_data ) )
{
MEI_data = MEI( fData$values )
}
d = a_0_2 + a_1 * MEI_data + N^2 * a_0_2 * MEI_data^2 - MBD_data
Q = stats::quantile( d )
Q_d3 = Q[ 4 ]
Q_d1 = Q[ 2 ]
IQR_d = Q[ 4 ] - Q[ 2 ]
# Computing surely outlying curves
ID_shape_outlier = which( d >= Q_d3 + Fvalue * IQR_d )
# Computing non outlying curves ids
ID_non_outlying = setdiff( 1 : nrow( fData$values ), ID_shape_outlier )
if( shift )
{
# Low MEI curves will be checked for upward shift
ID_non_outlying_Low_MEI = ID_non_outlying[
which( MEI_data[ - ID_shape_outlier ] <=
stats::quantile( MEI_data,
probs = p_check ) ) ]
# High MEI curves will be checked for downward shift
ID_non_outlying_High_MEI = ID_non_outlying[
which( MEI_data[ - ID_shape_outlier ] >=
stats::quantile( MEI_data, probs = 1 - p_check ) ) ]
# Manage high MEI data
lst = manage_high_MEI_data(fData = fData,
ID_non_outlying_High_MEI = ID_non_outlying_High_MEI,
ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying,
Q_d1 = Q_d1,
Q_d3 = Q_d3,
IQR_d = IQR_d,
Fvalue = Fvalue)
ID_non_outlying = lst[['ID_non_outlying']]
ID_shape_outlier = lst[['ID_shape_outlier']]
# Manage low MEI data
lst = manage_low_MEI_data(fData = fData,
ID_non_outlying_Low_MEI = ID_non_outlying_Low_MEI,
ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying,
Q_d1 = Q_d1,
Q_d3 = Q_d3,
IQR_d = IQR_d,
Fvalue = Fvalue)
ID_non_outlying = lst[['ID_non_outlying']]
ID_shape_outlier = lst[['ID_shape_outlier']]
}
return( list( ID_SO = ID_shape_outlier,
ID_NO = ID_non_outlying,
MEI_data = MEI_data,
MBD_data = MBD_data,
Q_d3 = Q_d3,
Q_d1 = Q_d1,
IQR_d = IQR_d,
d = d) )
}
#' Outliergram for multivariate functional datasets
#'
#' This function performs the outliergram of a multivariate functional dataset.
#'
#' The method applies the extension of the univariate outliergram to the case of multivariate
#' functional datasets. Differently from the function for the univariate case, only the
#' outliergram plot is displayed.
#'
#' @section Adjustment:
#'
#' Differently from the case of univariate functional data, in this case the function does not apply
#' an automatic tuning of the F parameter, since the related procedure would become computationally
#' too heavy for general datasets. If a good value of F is sought, it is recommended to run several
#' trials of the outliergram and manually select the best value.
#'
#' @param mfData the multivariate functional dataset whose outliergram has to be
#' determined;
#' @param MBD_data a vector containing the MBD for each element of the dataset; If missing, MBDs
#' are computed with the specified choice of weights;
#' @param weights the weights choice to be used to compute multivariate MBDs and MEIs;
#' @param MEI_data a vector containing the MEI for each element of the dataset.
#' If not not provided, MEIs are computed;
#' @param p_check percentage of observations with either low or high MEI to be
#' checked for outliers in the secondary step (shift towards the center of the
#' dataset).
#' @param Fvalue the \eqn{F} value to be used in the procedure that finds the
#' shape outliers by looking at the lower parabolic limit in the outliergram.
#' Default is \code{1.5};
#' @param shift whether to apply the shifting algorithm to properly manage observations having low
#' or high MEI. Default is TRUE.
#' @param display either a logical value indicating whether you want the
#' outliergram to be displayed, or the number of the graphical device
#' where you want the outliergram to be displayed;
#' @param xlab the label to use on the x axis in the outliergram plot;
#' @param ylab the label to use on the x axis in the outliergram plot;
#' @param main the title to use in the outliergram;
#'
#' @references
#'
#' Ieva, F. & Paganoni, A.M. Stat Papers (2017). https://doi.org/10.1007/s00362-017-0953-1.
#'
#' @seealso \code{\link{outliergram}}, \code{\link{mfData}}, \code{\link{MBD}},
#' \code{\link{MEI}}
#'
#' @examples
#'
#' N = 2e2
#' P = 1e2
#'
#' t0 = 0
#' t1 = 1
#'
#' set.seed(1)
#'
#' # Defining the measurement grid
#' grid = seq( t0, t1, length.out = P )
#'
#' # Generating an exponential covariance matrix to be used in the simulation of
#' # the functional datasets (see the related help for details)
#' C = exp_cov_function( grid, alpha = 0.3, beta = 0.2)
#'
#' # Simulating the measurements of two univariate functional datasets with
#' # required center and covariance function
#' f1 = function(x) x * ( 1 - x )
#' f2 = function(x) x^3
#' Data = generate_gauss_mfdata( N, L = 2,
#' centerline = matrix(c(sin(2 * pi * grid),
#' cos(2 * pi * grid)), nrow=2, byrow=TRUE),
#' listCov = list(C, C), correlations = 0.1 )
#'
#' # Building the mfData object
#' mfD = mfData( grid, Data )
#'
#'
#' dev.new()
#' out = multivariate_outliergram(mfD, Fvalue = 2., shift=TRUE)
#' col_non_outlying = scales::hue_pal( h = c( 180, 270 ),
#' l = 60 )( N - length( out$ID_outliers ) )
#' col_non_outlying = set_alpha( col_non_outlying, 0.5 )
#' col_outlying = scales::hue_pal( h = c( - 90, 180 ),
#' c = 150 )( length( out$ID_outliers ) )
#' colors = rep('black', N)
#' colors[out$ID_outliers] = col_outlying
#' colors[colors == 'black'] = col_non_outlying
#'
#' lwd = rep(1, N)
#' lwd[out$ID_outliers] = 2
#'
#' dev.new()
#' plot(mfD, col=colors, lwd=lwd)
#'
#' @export
multivariate_outliergram = function( mfData,
MBD_data = NULL,
MEI_data = NULL,
weights='uniform',
p_check = 0.05,
Fvalue = 1.5, shift = TRUE,
display = TRUE, xlab = NULL, ylab = NULL, main = NULL )
{
N = mfData$N
out = .outliergram_mfData( mfData,
MBD_data = MBD_data, MEI_data = MEI_data,
p_check = p_check,
Fvalue = Fvalue,
shift = shift )
a_0_2 = -2 / ( N * ( N - 1 ) )
a_1 = 2 * ( N + 1 ) / ( N - 1 )
# Plot
if( display )
{
if( is.null( xlab ) )
{
xlab = 'MEI'
}
if( is.null( ylab ) )
{
ylab = 'MBD'
}
if( is.null( main ) )
{
main = 'Outliergram'
}
# Setting up palettes
col_non_outlying = scales::hue_pal( h = c( 180, 270 ),
l = 60 )( length( out$ID_NO ) )
col_non_outlying = set_alpha( col_non_outlying, 0.5 )
if( length( out$ID_SO ) > 0 )
{
col_outlying = scales::hue_pal( h = c( - 90, 180 ),
c = 150 )( length( out$ID_SO ) )
} else {
col_outlying = scales::hue_pal( h = c( - 90, 180 ),
c = 150 )( 1 )
}
grDevices::dev.cur()
# Plotting outliergram
## Upper parabolic limit
grid_1D = seq( 0, 1, length.out = 100 )
graphics::plot( grid_1D, a_0_2 + a_1 * grid_1D + a_0_2 * N^2 * grid_1D^2,
lty = 2, type = 'l', col = 'darkblue', lwd = 2,
ylim = c( 0, a_0_2 + a_1 / 2 + a_0_2 * N^2/4 ),
xlab = xlab,
ylab = ylab,
main = main )
if( length( out$ID_SO ) > 0 )
{
graphics::points( out$MEI_data[ - out$ID_SO ], out$MBD_data[ - out$ID_SO ],
pch = 16, col = col_non_outlying )
graphics::points( out$MEI_data[ out$ID_SO ], out$MBD_data[ out$ID_SO ],
pch = 16, cex = 1.5, col = col_outlying )
for( idOut in out$ID_SO )
{
graphics::text( out$MEI_data[ idOut ],
out$MBD_data[ idOut ] + 0.5 / 30,
idOut,
col = col_outlying[ match( idOut, out$ID_SO ) ] )
}
} else {
graphics::points( out$MEI_data, out$MBD_data,
pch = 16, col = col_non_outlying )
}
# lower parabolic limit
graphics::lines( grid_1D, a_0_2 + a_1 * grid_1D + a_0_2 * N^2 * grid_1D^2 -
out$Q_d3 - Fvalue * out$IQR_d,
lty = 2, lwd = 2, col = 'lightblue' )
}
return( list( Fvalue = Fvalue,
d = out$d,
ID_outliers = out$ID_SO ) )
}
.outliergram_mfData = function( mfData, MBD_data = NULL, MEI_data = NULL, weights='uniform',
p_check = 0.05,
Fvalue = 1.5, shift = TRUE )
{
N = mfData$N
a_0_2 = -2 / ( N * ( N - 1 ) )
a_1 = 2 * ( N + 1 ) / ( N - 1 )
# Computing MBD
if( is.null( MBD_data ) ){
MBD_data = multiMBD( toListOfValues(mfData), weights = weights )
}
# Computing MEI
if( is.null( MEI_data ) ){
MEI_data = multiMEI( toListOfValues(mfData), weights = weights )
}
d = a_0_2 + a_1 * MEI_data + N^2 * a_0_2 * MEI_data^2 - MBD_data
Q = stats::quantile( d )
Q_d3 = Q[ 4 ]
Q_d1 = Q[ 2 ]
IQR_d = Q[ 4 ] - Q[ 2 ]
# Computing surely outlying curves
ID_shape_outlier = which( d >= Q_d3 + Fvalue * IQR_d )
# Computing non outlying curves ids
ID_non_outlying = setdiff( 1 : N, ID_shape_outlier )
if( shift )
{
for( iDim in seq_along(mfData$fDList))
{
# Low MEI curves will be checked for upward shift
ID_non_outlying_Low_MEI = ID_non_outlying[
which( MEI_data[ - ID_shape_outlier ] <=
stats::quantile( MEI_data,
probs = p_check ) ) ]
# High MEI curves will be checked for downward shift
ID_non_outlying_High_MEI = ID_non_outlying[
which( MEI_data[ - ID_shape_outlier ] >=
stats::quantile( MEI_data, probs = 1 - p_check ) ) ]
# Manage high MEI data
lst = manage_high_MEI_data(fData = mfData$fDList[[iDim]],
ID_non_outlying_High_MEI = ID_non_outlying_High_MEI,
ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying,
Q_d1 = Q_d1,
Q_d3 = Q_d3,
IQR_d = IQR_d,
Fvalue = Fvalue)
ID_non_outlying = lst[['ID_non_outlying']]
ID_shape_outlier = lst[['ID_shape_outlier']]
# Manage low MEI data
lst = manage_low_MEI_data(fData = mfData$fDList[[iDim]],
ID_non_outlying_Low_MEI = ID_non_outlying_Low_MEI,
ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying,
Q_d1 = Q_d1,
Q_d3 = Q_d3,
IQR_d = IQR_d,
Fvalue = Fvalue)
ID_non_outlying = lst[['ID_non_outlying']]
ID_shape_outlier = lst[['ID_shape_outlier']]
}
}
return( list( ID_SO = ID_shape_outlier,
ID_NO = ID_non_outlying,
MEI_data = MEI_data,
MBD_data = MBD_data,
Q_d3 = Q_d3,
Q_d1 = Q_d1,
IQR_d = IQR_d,
d = d) )
}
manage_high_MEI_data = function(fData,
ID_non_outlying_High_MEI,
ID_shape_outlier,
ID_non_outlying,
Q_d1,
Q_d3,
IQR_d,
Fvalue=1.5)
{
if (length(ID_non_outlying_High_MEI) == 0) {
return(list(
ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying
))
}
N = fData$N
a_0_2 = -2 / ( N * ( N - 1 ) )
a_1 = 2 * ( N + 1 ) / ( N - 1 )
stopifnot( length(intersect(ID_shape_outlier, ID_non_outlying)) == 0 )
# Managing High MEI data
obs = min_diffs = NULL
mins = data.frame(min_diffs = apply(fData$values, 2, compute_min_diff_min),
obs = apply(fData$values, 2, which.min))
min_diff_min = mins %>%
dplyr::filter(obs %in% ID_non_outlying_High_MEI )
if (nrow(min_diff_min) == 0) {
return(list(
ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying
))
}
min_diff_min = min_diff_min %>%
dplyr::group_by(obs) %>%
dplyr::summarize(min_diff_min = min(min_diffs))
ID_to_check = as.list(min_diff_min)[['obs']]
aux_function_MBD = function( ID )(
MBD( rbind( fData$values[ - ID, ],
Data_tilde[ grep( ID, ID_to_check ), ] ) )[ N ] )
aux_function_MEI = function( ID )(
MEI( rbind( fData$values[ - ID, ],
Data_tilde[ grep( ID, ID_to_check ), ] ) )[ N ] )
if( nrow( min_diff_min ) > 0 )
{
Data_tilde = toRowMatrixForm( fData$values[ ID_to_check, ] +
as.list(min_diff_min)[['min_diff_min']])
MBD_curr = sapply( ID_to_check, aux_function_MBD )
MEI_curr = sapply( ID_to_check, aux_function_MEI )
d_curr = a_0_2 + a_1 * MEI_curr + N^2 * a_0_2 * MEI_curr^2 - MBD_curr
ID_out_extra = ID_to_check[ which( d_curr >= Q_d3 + Fvalue * IQR_d ) ]
ID_shape_outlier = c( ID_shape_outlier, ID_out_extra )
ID_non_outlying = setdiff( ID_non_outlying, ID_out_extra )
}
return( list(ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying) )
}
manage_low_MEI_data = function(fData,
ID_non_outlying_Low_MEI,
ID_shape_outlier,
ID_non_outlying,
Q_d1,
Q_d3,
IQR_d,
Fvalue = 1.5)
{
if (length(ID_non_outlying_Low_MEI) == 0) {
return(list(
ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying
))
}
N = fData$N
a_0_2 = -2 / ( N * ( N - 1 ) )
a_1 = 2 * ( N + 1 ) / ( N - 1 )
stopifnot( length(intersect(ID_shape_outlier, ID_non_outlying)) == 0 )
# Managing Low MEI data
obs = max_diffs = NULL
maxs = data.frame(max_diffs = apply(fData$values, 2, compute_max_diff_max),
obs = apply(fData$values, 2, which.max))
max_diff_max = maxs %>%
dplyr::filter(obs %in% ID_non_outlying_Low_MEI )
if (nrow(max_diff_max) == 0) {
return(list(
ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying
))
}
max_diff_max = max_diff_max %>%
dplyr::group_by(obs) %>%
dplyr::summarize(max_diff_max = max(max_diffs))
ID_to_check = as.list(max_diff_max)[['obs']]
aux_function_MBD = function( ID )(
MBD( rbind( fData$values[ - ID, ],
Data_tilde[ grep( ID, ID_to_check ), ] ) )[ N ] )
aux_function_MEI = function( ID )(
MEI( rbind( fData$values[ - ID, ],
Data_tilde[ grep( ID, ID_to_check ), ] ) )[ N ] )
if( nrow( max_diff_max ) > 0 )
{
Data_tilde = toRowMatrixForm( fData$values[ ID_to_check, ] +
as.list(max_diff_max)[['max_diff_max']])
MBD_curr = sapply( ID_to_check, aux_function_MBD )
MEI_curr = sapply( ID_to_check, aux_function_MEI )
d_curr = a_0_2 + a_1 * MEI_curr + N^2 * a_0_2 * MEI_curr^2 - MBD_curr
ID_out_extra = ID_to_check[ which( d_curr >= Q_d3 + Fvalue * IQR_d ) ]
ID_shape_outlier = c( ID_shape_outlier, ID_out_extra )
ID_non_outlying = setdiff( ID_non_outlying, ID_out_extra )
}
return( list(ID_shape_outlier = ID_shape_outlier,
ID_non_outlying = ID_non_outlying) )
}
compute_min_diff_min = function(x)
{
return(diff(sort(x, partial=c(1,2), decreasing=FALSE)[c(1,2)]))
}
compute_max_diff_max = function(x)
{
return(diff(-sort(-x, partial=c(1,2), decreasing=FALSE)[c(1,2)]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.