R/CreateOutliersPlot.R

Defines functions monotoniseMatrix monotonise quickNNeval makeSlicePlot CreateOutliersPlot

Documented in CreateOutliersPlot

#' Functional Principal Component or Functional Singular Value Decomposition Scores Plot using 'bagplot' or 'KDE' methodology
#'
#' This function will create, using the first components scores, a set of convex hulls of the scores based on 'bagplot' or 'KDE' methodology.
#'
#' @param fObj A class object returned by FPCA() or FSVD().   
#' @param optns A list of options control parameters specified by \code{list(name=value)}. See `Details'.
#' @param ... Additional arguments for the 'plot' function. 
#'
#' @details Available control options are 
#' \describe{
#' \item{ifactor}{inflation ifactor for the bag-plot defining the loop of bag-plot or multiplying ifactor the KDE pilot bandwidth matrix. (see ?aplpack::compute.bagplot; ?ks::Hpi respectively; default: 2.58; 2 respectively).}
#' \item{variant}{string defining the outlier method used ('KDE', 'NN' or 'bagplot') (default: 'KDE')}
#' \item{unimodal}{logical specifying if the KDE estimate should be unimodal (default: FALSE, relevant only for variant='KDE')}
#' \item{maxVar}{logical specifying if during slicing we should used the directions of maximum variance (default: FALSE for FPCA, TRUE for FSVD)}
#' \item{nSlices}{integer between 3 and 16, denoting the number of slices to be used (default: 4, relevant only for groupingType='slice') }
#' \item{showSlices}{logical specifying if during slicing we should show the outline of the slice (default: FALSE)}
#' \item{colSpectrum}{character vector to be use as input in the 'colorRampPalette' function defining the outliers colours (default: c("red",  "yellow", 'blue'), relevant only for groupingType='slice') }
#' \item{groupingType}{string specifying if a slice grouping ('slice') or a standard percentile/bagplot grouping ('standard') should be returned (default: 'standard')} 
#' \item{fIndices}{a two-component vector with the index of the mode of variation to consider (default: c(1,2) for FPCA and c(1,1) for FSVD)}
#' }
#'
#' @return An (temporarily) invisible copy of a list containing the labels associated with each of sample curves. 
#'
#' @examples
#' \donttest{
#' set.seed(1)
#' n <- 420
#' pts <- seq(0, 1, by=0.05)
#' sampWiener <- Wiener(n, pts)
#' sampWiener <- Sparsify(sampWiener, pts, 10)
#' res <- FPCA(sampWiener$Ly, sampWiener$Lt, 
#'             list(dataType='Sparse', error=FALSE, kernel='epan', verbose=TRUE))
#' CreateOutliersPlot(res)
#' }
#' @references
#' \cite{P. J. Rousseeuw, I. Ruts, J. W. Tukey (1999): The bagplot: a bivariate boxplot, The American Statistician, vol. 53, no. 4, 382-387}
#' \cite{R. J. Hyndman and H. L. Shang. (2010) Rainbow plots, bagplots, and boxplots for functional data, Journal of Computational and Graphical Statistics, 19(1), 29-45}
#' \cite{}
#' @export

CreateOutliersPlot <- function(fObj, optns = NULL, ...){
  
  fObjClass <- class(fObj)
  if( !(fObjClass %in% c('FSVD','FPCA')) ){
    stop("CreateOutliersPlot() expects an FPCA or an FSVD object as input.")
  } 
  
  newOptns <- CheckAndCreateCOPoptions(optns,fObjClass);
  
  nSlices = newOptns$nSlices;    ifactor = newOptns$ifactor; 
  colFunc = newOptns$colFunc;    fIndices = newOptns$fIndeces; #note the typo in newOptns "Indeces"
  variant = newOptns$variant;    groupingType = newOptns$groupingType; 
  unimodal = newOptns$unimodal;  outlierList = newOptns$outlierList;
  maxVar = newOptns$maxVar;      showSlices = newOptns$showSlices
  
  fVarAlls <- c();
  if(fObjClass == 'FPCA'){
    fVarAlls <-  fObj$lambda
  } else { 
    fVarAlls <-  (fObj$sValues)^2  
  }
  
  if(fIndices[2] > length(fVarAlls)){
    stop("You requested a mode of variation that is not available.")
  }
  
  fScores1 <- fScores2 <- c();
  if(fObjClass == 'FPCA'){
    fScores1 <- fObj$xiEst[,fIndices[1]]
    fScores2 <- fObj$xiEst[,fIndices[2]]
  } else { 
    fScores1 <- fObj$sScores1[,fIndices[1]]
    fScores2 <- fObj$sScores2[,fIndices[2]]
  }
  fScoresAll <- cbind(fScores1, fScores2) 
  
  xedge = 1.05 * max( abs(fScores1))
  yedge = 1.05 * max( abs(fScores2))  
  
  args1 <- list();
  if(fObjClass == 'FSVD'){
    args1 <- list(  pch=10,  xlab=paste('S1 FSC', fIndices[1] ,' scores ', sep=''   ),
                    ylab=paste('S2 FSC', fIndices[2] ,' scores ', sep='' ),  
                    xlim = c(-xedge, xedge), ylim =c(-yedge, yedge), lwd= 2)
  } else {
    args1 <- list(  pch=10,  xlab=paste('FPC', fIndices[1] ,' scores ', round(100* fObj$cumFVE[fIndices[1]]), '%', sep=''   ),
                    ylab=paste('FPC', fIndices[2] ,' scores ', round( diff( 100* fObj$cumFVE[c(fIndices[2]-1, fIndices[2])])), '%', sep='' ),  
                    xlim = c(-xedge, xedge), ylim =c(-yedge, yedge), lwd= 2)
  } 
  inargs <- list(...)
  args1[names(inargs)] <- inargs
  
  nComp <- length(fVarAlls) 
  
  if(nComp <2 ){
    stop("This plotting utility function needs at least two functional components.")
    return(NULL)
  } 
  
  if ( variant == 'bagplot' ){
    
    if ( is.null((ifactor))){ ifactor = 2.58 } 
    bgObj = aplpack::compute.bagplot(x= fScores1, y= fScores2, approx.limit=3333 , factor = ifactor)     
    # I do this because panel.first() does not work with the do.call()
    
    if(groupingType =='standard'){
      
      args2 = list(x= fScores1, y= fScores2,  cex= .33,  type='n' )
      do.call(plot, c(args2, args1))   
      points(x = fScores1, y = fScores2, cex= .33,  panel.first = grid(),  lwd= 2) 
      lines( bgObj$hull.bag[c(1:nrow(bgObj$hull.bag),1),], col=2, lwd=2)
      lines( bgObj$hull.loop[c(1:nrow(bgObj$hull.loop),1),], col=4, lwd=2) 
      legend(legend= c('0.500', 'The fence'), x='topright', col=c(2,4), lwd=2)
      
      return( invisible( list( 
        'bag' = match( apply(bgObj$pxy.bag,1, prod), apply( bgObj$xydata,1, prod)),
        'loop'= match( apply(bgObj$pxy.outer,1, prod), apply( bgObj$xydata,1, prod)), 
        'outlier' = ifelse( is.null(bgObj$pxy.outlier), NA, 
                            match( apply(bgObj$pxy.outlier,1, prod) ,apply( bgObj$xydata,1, prod)))
      ) ) ) 
    } else { # groupingType : slice
      
      N <- nrow(fScoresAll) 
      kNNIndices95plus <- (1:N %in% match( apply(bgObj$pxy.outlier,1, prod) ,apply( bgObj$xydata,1, prod)))
      return( makeSlicePlot(nSlices, colFunc, p95plusInd = kNNIndices95plus, N, args1, 
                            scoreEsts = fScoresAll , varEsts = fVarAlls[fIndices], 
                            useDirOfMaxVar = maxVar, showSlices = showSlices) )
      
    } 
  } else if (variant == 'KDE') {  # variant 'kde'
    
    if ( is.null((ifactor))){ ifactor = 2 } 
    fhat <- ks::kde(x=fScoresAll, gridsize = c(400,400), compute.cont = TRUE, 
                    H = ks::Hpi( x=fScoresAll, binned=TRUE,  pilot="dscalar"  ) *  ifactor) 
    zin = fhat$estimate
    if( !is.null(unimodal) && unimodal ){
      maxIndex = which( fhat$estimate == max(fhat$estimate), arr.ind = TRUE)
      zin = monotoniseMatrix( fhat$estimate, maxIndex[1], maxIndex[2])
    }
    qq = quickNNeval(xin = fhat$eval.points[[1]], yin = fhat$eval.points[[2]], zin =  zin, 
                     xout = fScores1, yout = fScores2 ) 
    
    if(groupingType =='standard'){ 
      args2 = list (x= fhat$eval.points[[1]], y=fhat$eval.points[[2]], z = zin, 
                    labcex=1.66, col= c('black','blue','red'), levels = fhat$cont[c(50, 95, 99)], labels = c('50%', '95%', '99%')) 
      do.call(graphics::contour, c(args2, args1)); 
      grid(col = "#e6e6e6")
      
      points(fScoresAll[qq <=  fhat$cont[99], ],cex=0.5, col='orange', pch=10 , lwd =2 ) 
      points(fScoresAll[qq >  fhat$cont[99] & qq <=  fhat$cont[95], ],cex=0.33, col='red', pch=10, lwd =2 ) 
      points(fScoresAll[qq >  fhat$cont[95] & qq <=  fhat$cont[50], ],cex=0.33, col='blue', pch=10 , lwd =2 ) 
      points(fScoresAll[qq >=  fhat$cont[50], ],cex=0.33, col='black' , pch=10, lwd =2 ) 
      legend('bottomleft', c('< 50%','50%-95%','95%-99%','> 99%'), pch = 19, 
             col= c('black','blue','red', 'orange'), pt.cex=1.5, bg='white' )
      
      return( invisible( list( 'p0to50'= which(qq >=  fhat$cont[50]),
                               'p50to95' = which(qq >  fhat$cont[95] & qq <=  fhat$cont[50]),
                               'p95to99' = which(qq >  fhat$cont[99] & qq <=  fhat$cont[95]),
                               'p99plus' = which(qq <=  fhat$cont[99]) )))
    } else { # groupingType : slice 
      
      kNNIndices95plus <- qq <=  fhat$cont[95]
      return( makeSlicePlot(nSlices, colFunc, p95plusInd = kNNIndices95plus, N, args1, 
                            scoreEsts = fScoresAll , varEsts = fVarAlls[fIndices], 
                            useDirOfMaxVar = maxVar, showSlices = showSlices) )
      
    }
  } else if (variant == 'NN') {
    
    centrePoint = c(0,0);
    distName = 'euclidean';
    N <- nrow(fScoresAll)
    k99 <- floor(0.99*N);
    k95 <- floor(0.95*N);
    k50 <- floor(0.50*N);
    scaledXi <- apply(fScoresAll, 2, scale)
    distances <- apply(scaledXi, 1, function(aRow) dist(x = rbind(aRow, centrePoint), method = distName) ) 
    kNNIndices0to99  <- sort(x = distances, index.return = TRUE)$ix[1:k99] # Partial sort should be better
    kNNIndices0to50  <- kNNIndices0to99[1:k50] 
    kNNIndices50to95 <- kNNIndices0to99[(1+k50):k95]  
    kNNIndices95to99 <- kNNIndices0to99[(1+k95):k99]  
    kNNIndices99plus <- setdiff(1:N, kNNIndices0to99)
    
    if(groupingType =='standard'){ 
      
      args2 = list (x = fScores1, y = fScores2, cex= .33,  type='n' )
      do.call(plot, c(args2, args1))   
      grid(col = "#e6e6e6")
      points(fScoresAll[kNNIndices99plus,],cex=0.5, col='orange', pch=10 , lwd =2 ) 
      points(fScoresAll[kNNIndices95to99,],cex=0.33, col='red', pch=10, lwd =2 ) 
      points(fScoresAll[kNNIndices50to95,],cex=0.33, col='blue', pch=10 , lwd =2 ) 
      points(fScoresAll[kNNIndices0to50, ],cex=0.33, col='black' , pch=10, lwd =2 ) 
      legend('bottomleft', c('< 50%','50%-95%','95%-99%','> 99%'), pch = 19, 
             col= c('black','blue','red', 'orange'), pt.cex=1.5, bg='white' )
      
      return( invisible( list( 'p0to50'= kNNIndices0to50,
                               'p50to95' = kNNIndices50to95,
                               'p95to99' = kNNIndices95to99,
                               'p99plus' = kNNIndices99plus)))
    } else { # groupingType : slice
      
      kNNIndices95plus <- (1:N %in% setdiff(1:N, kNNIndices0to99[1:k95]))
      return( makeSlicePlot(nSlices, colFunc, p95plusInd = kNNIndices95plus, N, args1, 
                            scoreEsts = fScoresAll, varEsts = fVarAlls[fIndices],
                            useDirOfMaxVar = maxVar, showSlices = showSlices) )
      
    }
  }
}

makeSlicePlot <- function( nSlices, colFunc, p95plusInd, N, args1, args2, scoreEsts, varEsts, useDirOfMaxVar, showSlices){
  
  
  kNNIndices95plus <- p95plusInd
  
  args2 = list (x = scoreEsts[,1], y = scoreEsts[,2], cex= .33,  type='n' )
  do.call(plot, c(args2, args1))   
  grid(col = "#e6e6e6")
  
  points(scoreEsts[!p95plusInd, ],cex=0.33, col='black' , pch=10, lwd =2 )
  Qstr = apply(scoreEsts, 2, scale, center = FALSE) #
  #scoreEsts /  matrix( c( rep( sqrt(varEsts ), each= length(scoreEsts[,1]))), ncol=2); 
  
  dirOfMaxVar <- c(1,0);
  if(useDirOfMaxVar){
    dirOfMaxVar <- svd(scoreEsts, nv = 1)$v;
    if(all(dirOfMaxVar <0) ){
      dirOfMaxVar = -dirOfMaxVar
    }
    abline(0,  dirOfMaxVar[2]/dirOfMaxVar[1], col='magenta', lty=2) # Uncomment if you want to see the direction of max variance
  }
  
  colPal = colFunc( nSlices )
  v = 1:nSlices;
  colPal = colPal[v] # this just gives a smooth change and maximized the diffference between oppositve slices
  outlierList <- list()
  # steps =  seq(-1, (nSlices-1) *2 -1 , by =2 ) 
  angles <- seq(0,2*pi, length.out = nSlices + 1) -  1*pi/nSlices + atan2(dirOfMaxVar[2],dirOfMaxVar[1])
  sd1 = sd(scoreEsts[,1]); 
  sd2 = sd(scoreEsts[,2]); 
  for( i in 1:nSlices){
    angle = angles[i] # atan2(dirOfMaxVar[2],dirOfMaxVar[1]) + steps[i] * pi/nSlices
    multiplier1 = sign( sin( angle + pi/2) )
    multiplier2 = sign( cos( angle + pi/ (nSlices/2)))
    qrtIndx =  multiplier1 * Qstr[,2] > multiplier1 * tan(angle) * Qstr[,1] & 
      multiplier2 * Qstr[,2] < multiplier2 * tan(angle + pi/ (nSlices/2) ) * Qstr[,1] 
    outlierList[[i]] = qrtIndx & kNNIndices95plus
    points(scoreEsts[ outlierList[[i]], c(1,2), drop=FALSE], cex=0.93, col= colPal[i], pch=3, lwd =2 )
    if(showSlices){
      bigNumber = 10 * max(abs(as.vector(scoreEsts)))
      lines(x = c(0, bigNumber * multiplier1), col=colPal[i],  
            y = c(0, bigNumber * multiplier1 * tan(angle) * sd2 / sd1))
      #lines(x = c(0, bigNumber * multiplier2), col=colPal[i],  
      #      y = c(0, bigNumber * multiplier2 * tan(angle + pi/ (nSlices/2) ) * sd2 / sd1))
    }
  } 
  return( invisible( list(  'p0to95'= which(!p95plusInd), 
                            'outlier' = sapply(outlierList, which),
                            'outlierColours' = colPal)) ) 
}

quickNNeval <- function(xin,yin, zin, xout, yout){
  xIndices = sapply( xout, function(myArg) which.min( abs( xin - myArg) ), simplify = TRUE)
  yIndices = sapply( yout, function(myArg) which.min( abs( yin - myArg) ), simplify = TRUE )
  return( zin[ cbind(xIndices,yIndices)] )
}

monotonise <- function(x, maxIndex = NULL){
  xq = x;
  if (is.null(maxIndex)){
    maxIndex = which.max(x);
  }
  
  if( maxIndex != length(x) ){
    for (i in 1:( length(x) - maxIndex)){
      if( xq[ i + maxIndex] > xq[maxIndex + i - 1] ){
        xq[ i + maxIndex] = xq[maxIndex + i - 1]
      }
    }
  }
  if (maxIndex >= 3){
    for (i in 1:(maxIndex - 2 )){
      if( xq[ - 1 - i + maxIndex] > xq[maxIndex - i] ){
        xq[ - 1- i + maxIndex] = xq[maxIndex - i]
      }
    }
  }
  return(xq)
} 

monotoniseMatrix = function(zin, xmaxind, ymaxind){
  if(is.null(xmaxind) && is.null(ymaxind)){
    maxIndx = which( max(zin) == zin, arr.ind = TRUE)
    xmaxind = maxIndx[1]
    ymaxind = maxIndx[2]
  }
  zq = zin;
  for (j in 1:dim(zin)[2]){
    for (i in 1:dim(zin)[1]){
      if (i == 1 || j == 1 || j == dim(zin)[1] || i == dim(zin)[2]){
        sizeOut = max( abs(xmaxind - i) +1, abs(ymaxind - j) +1 )
        xcoord = round(   ( seq(i, xmaxind , length.out = sizeOut) ) )
        ycoord = round(   ( seq(j, ymaxind , length.out = sizeOut) ) ) 
        zq[ cbind(xcoord,ycoord) ] = monotonise( zq[ cbind(xcoord,ycoord) ]) 
      }
    }
  }
  return(zq)
}

Try the fdapace package in your browser

Any scripts or data that you put into this service are public.

fdapace documentation built on Aug. 16, 2022, 5:10 p.m.