R/multivariate_tsout_ensemble.R
In composits: Compositional, Multivariate and Univariate Time Series Outlier Ensemble

Documented in mv_tsout_ens

#' Performs multivariate time series outlier ensembling.
#'
#' @param x A  data frame or a matrix object containing a multivariate time series
#' @param m1 Variable indicating dimension reduction methods. Default is set to using all 4 methods: PCA, DOBIN, ICS and ICA.
#' @param ncomp The number of components for each dimension reduction method. Default is set to 2.
#' @param sds The random seed for generating a no-outlier time series.
#' @param rept The number of repetitions for generating a no-outlier time series.
#' @param compr To adjust for multiple testing, the results of the ensemble are compared with the results of a time series without outliers. If \code{compr =1}, a time series is simulated as in \code{simulate_comp_ts} without outliers. If \code{compr = 2}, the top outliers are removed from the outlier series and interpolated values are used for those time points. If \code{compr = 3} both methods of simulation are used for comparison.
#' @param rat A comparison is done with the outliers removed time series. The variable \code{rat} denotes the ratio of outliers to be removed as a proportion of the whole dataset for this comparison.
#' @param fast For faster computation skip ICS decomposition method.
#'
#' @return A list with the following components:
#' \item{\code{outliers}}{The outliers detected from the multivariate ensemble after comparing with the comparison time series without outliers. }
#' \item{\code{all}}{All the outliers detected from the multivariate ensemble. }
#' \item{\code{outmat}}{A matrix with outlier scores organised by outlier method.}
#' \item{\code{wts}}{The weights of the outlier detection methods.}
#' \item{\code{pca_loadings}}{The basis vectors from PCA.}
#' \item{\code{dobin_loadings}}{The basis vectors from DOBIN. See R package \code{dobin} for more details.}
#' \item{\code{ics_loadings}}{The basis vectors from ICS. See R package \code{ICS} for more details.}
#' \item{\code{ica_loadings}}{The basis vectors from Independent Component Analysis.}
#' \item{\code{decomp_wts}}{Each decomposition method has several components. For example if \code{ncomp}=2, then there are 2 PC components, 2 DOBIn components, etc ... The weight of each component is given different and depends on the decomposition method. These weights are given in \code{decomp_wts}.}
#' \item{\code{outmat4D}}{A 4D array with outlier scores organised by outlier method, decomposition method, components for each decomposition method and time.}
#'
#' @examples
#' \dontrun{
#' set.seed(100)
#' n <- 600
#' x <- sample(1:100, n, replace=TRUE)
#' x[25] <- 200
#' x[320] <- 300
#' x2 <- sample(1:100, n, replace=TRUE)
#' x3 <- sample(1:100, n, replace=TRUE)
#' x4 <- sample(1:100, n, replace=TRUE)
#' X <- cbind.data.frame(x, x2, x3, x4)
#' out <- mv_tsout_ens(X, m1=c(1,2,4), compr=2)
#' }
#'
#' @importFrom stats prcomp rnorm runif
#' @export
mv_tsout_ens <- function(x, m1 =NULL, ncomp=2,  sds=1, rept=1, compr=2, rat=0.05, fast=TRUE){ # wts = NULL,
  d <- dim(x)[2]
  n <- dim(x)[1]
  if(is.null(m1)){
    m1 <- 1:4
  }
  if(fast){
    if(is.null(m1)){
      m1 <- c(1,2,4)
    }else{
      m1 <- intersect(m1, c(1,2,4))
    }
  }
  out1 <- tsout_ensemble(x, m1, ncomp,  sds) # wts,
  res <- out1$all

  if((compr==1)|(compr==3)){
    # thresholds for actual outliers - from no outliers
    noout <- no_outlier_scores_1(d, n, ncomp, sd=123, rept = rept, m1)
    thres1 <- noout$thres1
    thres2 <- noout$thres2
    gap <- thres2 - thres1

    indthres1 <- which(res[ ,2] > thres1)
    outthres1 <- res[indthres1, ]
    outthres1$gapscore1 <- rep(0, length(indthres1))


    stars <-  (outthres1$Total_Score - thres2)/gap
    stars <- ceiling(stars)
    stars[stars < 0] <- 0
    outthres1$gapscore1 <- stars
  }
  if((compr==2)|(compr==3)){
    # Remove outliers over mean + sd from the list and get the outlier scores
    noout2 <- no_outlier_scores_2(x, ncomp, out1, rat=rat, m1)
    thres1 <- noout2$thres1
    thres2 <- noout2$thres2
    if(thres1!=0){
      gap <- thres2 - thres1

      indthres1 <- which(res[ ,2] > thres1)
      outthres2 <- res[indthres1, ]
      outthres2$gapscore2 <- rep(0, length(indthres1))


      stars <-  (outthres2[ ,2] - thres2)/gap
      stars <- ceiling(stars)
      stars[stars < 0] <- 0
      outthres2$gapscore2 <- stars
    }else{
      # thresholds are equal to zero
      # The second comparison method of removing outliers did not produce any outliers to compare with - so return the original data frame
      outthres2 <- res
    }
  }

  if(compr==1){
    outthres <- outthres1
  }else if(compr==2){
    outthres <- outthres2
  }else if(compr==3){
    outthres <- merge(outthres1, outthres2, all = TRUE)
  }

  outthres <- as.data.frame(outthres)
  gp1 <- which(colnames(outthres) %in% c("gapscore1"))
  if(length(gp1)>0){
    colnames(outthres)[gp1] <- "Gap_Score_1"
  }
  gp2 <- which(colnames(outthres) %in% c("gapscore2"))
  if(length(gp2)>0){
    colnames(outthres)[gp2] <- "Gap_Score_2"
  }

  out <- list()
  out$outliers <- as.data.frame(outthres)
  out$all<- res
  out$outmat <- out1$outmat
  out$wts <- out1$wts
  out$pca_loadings <- out1$pca_loadings
  out$dobin_loadings <- out1$dobin_loadings
  out$ics_loadings <- out1$ics_loadings
  out$ica_loadings <- out1$ica_loadings
  out$decomp_wts <- out1$decomp_wts
  out$outmat4D <- out1$outmat4D

  return(out)

}



tsout_ensemble <- function(x, m1 =NULL, ncomp=NULL,  sds=1){  # wts = NULL,
  x <- as.data.frame(x)
  d <- dim(x)[2]
  n <- dim(x)[1]

  if(is.null(m1)){
    m1 <- 1:4
  }
  if(is.null(ncomp)){
    ncomp = 2
  }
  # if(is.null(wts)){
  #   wts <- 1/(1:ncomp)
  # }

  pca_loadings <- dobin_loadings <- ics_loadings <- ica_loadings <- NA
  result <- matrix(0, nrow=n, ncol=length(m1))
  outmat <- matrix(0, nrow=n, ncol=4)
  colnames(outmat) <- c("forecast", "tsoutliers", "otsad", "anomalize" )
  wts_mat <- matrix(0, nrow=ncomp, ncol=4)
  colnames(wts_mat) <- c("dobin", "pca", "ics", "ica")
  outmat4D <- array(0, dim=c(n, 4, ncomp, 4))
  # second index for 4 outlier methods
  # fourth index for 4 coord transformation methods
  for(i in 1:length(m1)){
    j=m1[i]
    obj <- tsout_ensemble_1M(x, pre=m1[i], ncomp )  # , wts
    result[ ,i] <- obj$score
    outmat <- outmat +  obj$outmat
    if(j==1){
      dobin_loadings <- obj$load_vectors
      wts_mat[ ,1] <- obj$wts
      outmat4D[ , , ,1] <- obj$outmat3D
    }else if(j==2){
      pca_loadings <- obj$load_vectors
      wts_mat[ ,2] <- obj$wts
      outmat4D[ , , ,2] <- obj$outmat3D
    }else if(j==3){
      ics_loadings <- obj$load_vectors
      wts_mat[ ,3] <- obj$wts
      outmat4D[ , , ,3] <- obj$outmat3D
    }else if(j==4){
      ica_loadings <- obj$load_vectors
      wts_mat[ ,4] <- obj$wts
      outmat4D[ , , ,4] <- obj$outmat3D
    }
  }
  # Get the weights for the different outlier methods
  wts <- compute_method_weights(outmat)
  # Scale the results by these weights
  # result <- result %*% diag(wts) # I think this needs to be outmat not result

  # Output scaled according to outlier method weights
  outmat2 <- outmat %*% diag(wts)

  result_rowsum <- apply(result, 1, sum)
  outmat_rowsum <- apply(outmat2, 1, sum)

  avg <- apply(outmat2, 1, sum)
  inds <- which(avg > 0)
  # Number of coordinates which found that point as an outlier
  num_comps_all <- apply(result, 1, function(x) sum(x>0))
  num_comps <- num_comps_all[inds]

  result[inds, ] <- result[inds, ]*(outmat_rowsum[inds]/result_rowsum[inds])

  # outmatsum_inds <- which(apply(outmat2, 1, sum) > 0 )
  outmat_positive <- outmat2 > 0
  num_methods_all <- apply( outmat_positive, 1, sum )
  num_methods <- num_methods_all[inds]

  colnames(result) <- c("DOBIN", "PCA", "ICS", "ICA")[m1]

  if(length(inds)==1){
    res <- c(inds, avg[inds], num_comps,  num_methods,  result[inds, ], outmat2[inds, ])
    res <- t(data.frame(res))
  }else{
    res <- cbind.data.frame(inds, avg[inds], num_comps,  num_methods,  result[inds, ], outmat2[inds, ])
  }

  colnames(res) <- c("Indices", "Total_Score", "Num_Coords", "Num_Methods", colnames(result), colnames(outmat) )

  tot_vals <- avg[inds]
  # thres <- mean(tot_vals) + sd(tot_vals)
  # indsgrtr <- which(res[ ,2] > thres)
  #
  # outl <- res[indsgrtr, ]

  out <- list()
  #out$outliers <- outl
  out$all<- res
  out$outmat <- outmat
  out$wts <- wts
  out$pca_loadings <- pca_loadings
  out$dobin_loadings <- dobin_loadings
  out$ics_loadings <- ics_loadings
  out$ica_loadings <- ica_loadings
  out$decomp_wts <- wts_mat
  out$outmat4D <- outmat4D
  return(out)
}


compute_method_weights <- function(outmat){
  wts <- rep(0, 4)
  for(i in 1:4){
    non_zero_inds_i <- which(outmat[ ,i] >0)
    if(length(non_zero_inds_i) >0 ){
      outmat_i <- outmat[ ,-i]
      non_zero_inds_others <- which( apply(outmat_i, 1, sum) > 0 )
      wts[i] <- length(intersect(non_zero_inds_others, non_zero_inds_i))/length(non_zero_inds_i)
    }else{
      wts[i] <- 1
    }
  }
  wts <- wts/sum(wts)
  return(wts)
}

Any scripts or data that you put into this service are public.

composits documentation built on May 25, 2022, 9:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

composits
Compositional, Multivariate and Univariate Time Series Outlier Ensemble

R/multivariate_tsout_ensemble.R
In composits: Compositional, Multivariate and Univariate Time Series Outlier Ensemble

Defines functions compute_method_weights tsout_ensemble mv_tsout_ens

Documented in mv_tsout_ens

Try the composits package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

composits Compositional, Multivariate and Univariate Time Series Outlier Ensemble

R/multivariate_tsout_ensemble.R In composits: Compositional, Multivariate and Univariate Time Series Outlier Ensemble

Defines functions compute_method_weights tsout_ensemble mv_tsout_ens

Documented in mv_tsout_ens

Try the composits package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

composits
Compositional, Multivariate and Univariate Time Series Outlier Ensemble

R/multivariate_tsout_ensemble.R
In composits: Compositional, Multivariate and Univariate Time Series Outlier Ensemble