R/outliers.ts.oga.R
In outliers.ts.oga: Efficient Outlier Detection for Large Time Series Databases

"single_oga" <- function(yt,s=NULL){
  
  ##########################################################################################################
  # Set up an execution indicator
  
  run <- "Yes"
  
  ##########################################################################################################
  # Validate frequency s
  
  if (is.ts(yt)==T){s <- frequency(yt)}
  if (is.null(s)==T){s <- 1}
  if (length(s)==1 && is.numeric(s)){
    if ((s %in% c(1,4,7,12,24,52,60))==F){message("s should be 1, 4, 7, 12, 24, 52 or 60");run <- "No"}
  } else {message("s should be 1, 4, 7, 12, 24, 52 or 60");run <- "No"}
  
  ##########################################################################################################
  # Verify yt is a numeric vector or a ts object. Attempt conversion otherwise
  
  if (is.ts(yt)==T){
    if (is.null(ncol(yt))==F){message("yt should be a single time series");run <- "No"}
  } else {
    if (is.vector(yt)==T){
      if (is.numeric(yt)==F){message("yt must be a numeric vector or a ts object");run <- "No"
      } else {yt <- ts(yt,start=1,frequency=s)}
    } else {
      if (is.matrix(yt) && ncol(yt)){yt <- ts(as.vector(yt),start=1,frequency=s)} else {
        message("yt must be of class vector or ts");run <- "No"}
    }
  }
  if (anyNA(yt)==T){message("Series with NAs are not allowed");run <- "No"}
  
  ##########################################################################################################
  # Check that sample size is large enough
  
  nT <- length(yt)
  if ((s==1)&&(nT<22)){message("Time series is too short");run <- "No"}
  if ((s==4)&&(nT<26)){message("Time series is too short");run <- "No"}
  if ((s==7)&&(nT<29)){message("Time series is too short");run <- "No"}
  if ((s==12)&&(nT<34)){message("Time series is too short");run <- "No"}
  if ((s==24)&&(nT<46)){message("Time series is too short");run <- "No"}
  if ((s==52)&&(nT<74)){message("Time series is too short");run <- "No"}
  if ((s==60)&&(nT<82)){message("Time series is too short");run <- "No"}
  
  ##########################################################################################################
  # Detect long runs of identical values
  
  n_rep <- sum(diff(yt)==0)
  if ((n_rep/nT)>0.5){message("Time series has many consecutive repeated values");run <- "No"}
  
  ##########################################################################################################
  # If all input conditions are met, proceed with the procedure. Otherwise, define outputs and conclude
  
  if (run == "Yes"){
    
    ########################################################################################################
    # First step: (i) Robust AR fitting
    
    rob_fit <- rob_ar(yt,s)
    rob_et <- rob_fit$rob_et
    rob_pis <- rob_fit$rob_pis
    max_lag <- rob_fit$max_lag
    
    ########################################################################################################
    # First step: (ii) Outlier detection with OGA with residuals after robust fitting and get cleaned series
    
    beg_eff <- max_lag
    end_eff <- 6
    clean <- T
    oga_rob <- det_oga(yt,rob_et,rob_pis,beg_eff,end_eff,clean)
    aos_rob <- oga_rob$aos
    lss_rob <- oga_rob$lss
    yt_clean_rob <- oga_rob$yt_clean
    
    ########################################################################################################
    # Second step: Repeat until no more outliers are found
    
    all_aos <- aos_rob
    all_lss <- lss_rob
    yt_clean <- yt_clean_rob
    out <- T
    while (out==T){
      
      # (i) Fit an ARIMA model to the cleaned series and obtain new AR representation and residuals
      
      clean_sarima <- sarima_fit(yt_clean)
      clean_et <- clean_sarima$con_et
      clean_pis <- clean_sarima$con_pis
      arima_fit <- clean_sarima$arima_fit
      
      # Second step: (ii) Detection of outliers with OGA with new residuals
      
      beg_eff <- max(c(arima_fit$arma[1]+arima_fit$arma[6]+s*(arima_fit$arma[3]+arima_fit$arma[7]),
                       arima_fit$arma[2]+arima_fit$arma[4]*s))
      oga_fit <- det_oga(yt_clean,clean_et,clean_pis,beg_eff,end_eff,clean=T)
      aos_fit <- oga_fit$aos
      lss_fit <- oga_fit$lss
      yt_clean_fit <- oga_fit$yt_clean
      
      # Checking

      n_aos_fit <- length(aos_fit)
      n_lss_fit <- length(lss_fit)
      n_out_fit <- n_aos_fit + n_lss_fit
      if (n_out_fit>0){
        yt_clean <- yt_clean_fit
        if (all(aos_fit %in% all_aos) && all(lss_fit %in% all_lss)){
          out <- F
        } else {
          all_aos <- sort(unique(c(all_aos,aos_fit)))
          all_lss <- sort(unique(c(all_lss,lss_fit)))
        }       
      } else {
        out <- F
      }
      
    }
    n_all_aos <- length(all_aos)
    n_all_lss <- length(all_lss)
    n_all_out <- n_all_aos + n_all_lss
    
    ##########################################################################################################
    # Third step: Joint estimation of ARIMA model and outlying effects, if any
    
    if (n_all_out>0){
      out_joint_fit <- joint_fit(yt,all_aos,all_lss,arima_fit)
      aos <- out_joint_fit$aos
      lss <- out_joint_fit$lss
      yt_clean <- out_joint_fit$yt_clean
    } else {
      yt_clean <- yt
      aos <- lss <- NULL
    }
    
  } else {
    yt_clean <- yt
    aos <- lss <- NULL
  }
  
  ##########################################################################################################
  # Return outputs
  
  return(list("yt_clean"=yt_clean,"aos"=aos,"lss"=lss))
  
}

"rob_ar" <- function(yt,s){
  
  ##########################################################################################################
  # Define the vector of lags
  
  nT <- length(yt)
  if (s==1){
    p_lags <- 1:6
  } else if ((s>1)&&((3*s+6)<nT)){
    p_lags <- unique(c(1:6,s:(s+6)))
  } else {
    p_lags <- unique(c(1:6,s:(s+6),2*s:(2*s+6),3*s:(3*s+6)))
  }
  max_lag <- max(p_lags)
  
  ##########################################################################################################
  # Run robust regression
  
  XX <- embed(yt,max_lag+1)
  XX <- data.frame(XX[,c(1,p_lags+1)])
  m1 <- suppressWarnings(robust::lmRob(XX$X1~.,data=XX,control=robust::lmRob.control(seed=1,trace=0)))
  #invisible(capture.output(m1 <- robust::lmRob(XX$X1~.,data=XX,control=robust::lmRob.control(seed=1))))
  rob_et <- c(rep(0,max_lag),as.vector(m1$residuals))
  rob_pis <- rep(0,max_lag)
  rob_pis[p_lags] <- as.vector(m1$coefficients[-1])
  
  ##########################################################################################################
  # Return results
  
  return(list("rob_et"=rob_et,"rob_pis"=rob_pis,"max_lag"=max_lag))
  
}

"det_oga" <- function(yt,et,pis,beg_eff,end_eff,clean){
  
  ##########################################################################################################
  # Compute time series length and number of fitted AR parameters
  
  nT <- length(yt)
  p <- length(pis)
  
  ##########################################################################################################
  # Build predictor matrix of size nT x 2nT (all times for aos and lss go into the matrix)
  
  pis_aos <- if (p<(nT-1)){c(1,-pis,rep(0,nT-1-p))}else{c(1,-pis[1:(nT-1)])}
  pis_lss <- cumsum(pis_aos)
  M <- matrix(1,nrow=nT,ncol=2*nT)
  M[,1:nT] <- toeplitz(pis_aos)
  M[,1:nT][upper.tri(M[,1:nT])] <- 0
  M[,(nT+1):(2*nT)] <- toeplitz(pis_lss)
  M[,(nT+1):(2*nT)][upper.tri(M[,(nT+1):(2*nT)])] <- 0
  
  ##########################################################################################################
  # Run OGA
  
  out_oga <- oga(M,et,beg_eff,end_eff)
  J_Trim <- out_oga$J_Trim
  
  ##########################################################################################################
  # Two scenarios: 
  # (i) If OGA finds no outliers, proceed to defining the outputs; 
  # (ii) If OGA identifies outliers, consider alternative configurations if necessary. Then, define AOs and LSs, 
  # and if required, compute the series without the outliers
  
  n_out <- length(J_Trim)
  if (n_out==0){
    aos <- lss <- NULL
    if (clean){yt_clean <- yt}
  } else {
    if (n_out>1){
      J_Trim <- config_oga(J_Trim,M,et)
      n_out <- length(J_Trim)
    }
    aos <- J_Trim[J_Trim<=nT]
    lss <- J_Trim[J_Trim>nT] - nT
    if (clean){
      M_Trim <- as.data.frame(as.matrix(M[,J_Trim]))
      colnames(M_Trim) <- names(data.frame(M))[J_Trim]
      fit_Trim <- lm(et~.-1,data=M_Trim)
      betahat_Trim <- summary(fit_Trim)
      yt_clean <- yt
      n_aos <- length(aos)
      n_lss <- length(lss)
      if (n_aos>0){
        w_aos <- as.numeric(coefficients(betahat_Trim)[1:n_aos,1])
        yt_clean[aos] <- yt_clean[aos] - w_aos
      } else {
        aos <- NULL
      }
      if (n_lss > 0){
        w_lss <- as.numeric(coefficients(betahat_Trim)[(n_aos+1):n_out,1])
        for (i in 1:n_lss){yt_clean[lss[i]:nT] <- yt_clean[lss[i]:nT] - w_lss[i]}
      } else {
        lss <- NULL
      }
    }
    
  }
  
  ##########################################################################################################
  # Return outputs
  
  if (clean){
    return(list("aos"=aos,"lss"=lss,"yt_clean"=yt_clean))
  } else {
    return(list("aos"=aos,"lss"=lss))
  }
  
}

"oga" <- function(M,et,beg_eff,end_eff){
  
  ##########################################################################################################
  # Preliminary steps
  
  nT <- nrow(M) # Number of rows
  nC <- ncol(M) # Number of columns
  K <- max(1,floor(5*sqrt(nT/log(nC)))) # Maximum number of predictors allowed
  
  ##########################################################################################################
  # Center responses and predictors
  
  d_et <- et - mean(et) # Centered residuals
  d_M <- M - rep(colMeans(M),rep.int(nT,nC)) # Centered predictors
  
  ##########################################################################################################
  # Set up objects and compute predictor norms
  
  J <- sig2 <- rep(0,K) # Initial vectors of selected predictors and residual variances
  MJ <- matrix(0,nT,K) # Initial matrix of centered predictors
  u <- as.matrix(d_et) # Initial vector of residuals
  M_norm <- sqrt(colSums(d_M^2)) # Norms of centered predictors
  d_M_st <- t(t(d_M)/M_norm) # Normalized centered predictors
  
  ##########################################################################################################
  # Select the first predictor by maximizing the squared root of absolute SSE
  
  aSSE <- abs(t(u)%*%d_M_st) # Vector with square roots of the absolute values of SSEs
  aSSE_0 <- c(1:beg_eff,(nT+1):(nT+beg_eff),(nC-end_eff+1):nC) # Beginning and end indices
  aSSE[aSSE_0] <- 0 # Remove beginning and end aSSEs
  J[1] <- which.max(as.vector(aSSE)) # First selected predictor
  
  ##########################################################################################################
  # Define first standardized predictor, residuals and residual variance
  
  MJ[,1] <- d_M_st[,J[1]] # First standardized centered predictor
  u <- u - MJ[,1] %*% t(MJ[,1]) %*% u # First residuals
  sig2[1] <- mean(u^2) # First residual variance
  
  ##########################################################################################################
  # Loop with orthogonal predictors
  
  if (K>1){
    for (k in 2:K){
      aSSE <- abs(t(u)%*%d_M_st) # Recompute aSSEs
      aSSE[aSSE_0] <- 0 # Remove beginning and end aSSEs
      aSSE[J[1:(k-1)]] <- 0 # Remove those that have appeared previously
      J[k] <- which.max(as.vector(aSSE)) # k-th selected predictor
      rq <- d_M[,J[k]]-MJ[,1:(k-1)]%*%t(MJ[,1:(k-1)])%*%d_M[,J[k]] # Orthogonalize new predictor
      MJ[,k] <- (rq/sqrt(sum(rq^2))) # k-th normalized orthogonal predictor
      u <- u - MJ[,k] %*% t(MJ[,k]) %*% u # k-th residuals
      sig2[k] <- mean(u^2) # k-th residual variance
    }
  }
  
  ##########################################################################################################
  # Compute HDAIC for 1:K and select the one that gives the minimum value
  
  hdic <- nT * log(sig2) + (1:K) * 2 * log(nC)
  kn <- which.min(hdic)
  
  ##########################################################################################################
  # Compare with model without predictors
  
  hdic_0 <- nT * log(mean(d_et^2))
  if (hdic_0 < hdic[kn]){
    J_Trim <- NULL # Define output
  } else {
    
    ########################################################################################################
    # Carry out trimming by skipping predictors that leads to large values of HDAIC
    
    benchmark <- hdic[kn] # HDAIC for the selected model
    J_Trim <- J[1:kn] # All indices
    trim_pos <- rep(0,kn) # Initially, all the indices are excluded (value 0)
    if (kn > 1) {
      for (l in 1:(kn-1)){
        JDrop1 <- J_Trim[-l] # Eliminate l-th predictor
        fit <- lm(d_et~.-1,data=data.frame(d_M[,JDrop1])) # Fit with OLS after skipping l-th predictor
        uDrop1 <- fit$residuals # Residuals of the fit
        HDICDrop1 <- nT * log(mean(uDrop1^2)) + (kn-1) * 2 * log(nC) # Compute HDAIC
        if (HDICDrop1>benchmark){trim_pos[l] <- 1} # Include predictor if HDAIC is greater (value 1)
      }
      trim_pos[kn] <- 1 # The last predictor is always included
      J_Trim <- J_Trim[which(trim_pos==1)] # Indices selected
    }
    J_Trim <- sort(J_Trim) # Sort indices selected
    
  }
  
  ##########################################################################################################
  # Return outputs
  
  return(list("J_Trim"=J_Trim))
  
}

"config_oga" <- function(J_Trim,M,et){
  
  ##########################################################################################################
  # Obtain dimensions of M
  
  nT <- nrow(M)
  nC <- ncol(M)
  
  ##########################################################################################################
  # Identify all runs in J_Trim
  
  runs_J_Trim <- runs_vec(J_Trim)
  n_runs <- runs_J_Trim$n_runs
  
  ##########################################################################################################
  # If there are runs, obtain all configurations
  
  if (n_runs>0){
    configs <- list()
    configs[[1]] <- J_Trim
    for (i in 1:n_runs){
      n_configs <- length(configs)
      n_run <- length(runs_J_Trim$runs[[i]])
      if (all(runs_J_Trim$runs[[i]]<=nT)){
        for (j in 1:n_configs){
          configs[[n_configs+j]] <- configs[[j]]
          ind_run <- which(configs[[n_configs+j]] %in% runs_J_Trim$runs[[i]])
          configs[[n_configs+j]] <- configs[[n_configs+j]][-ind_run]
          configs[[n_configs+j]] <- sort(unique(c(configs[[n_configs+j]],runs_J_Trim$runs[[i]][1]+nT,
                                                  runs_J_Trim$runs[[i]][n_run]+1+nT)))
        }
      } else {
        for (j in 1:n_configs){
          configs[[n_configs+2*j-1]] <- configs[[j]]
          ind_run <- which(configs[[n_configs+2*j-1]] %in% runs_J_Trim$runs[[i]])
          configs[[n_configs+2*j-1]] <- configs[[n_configs+2*j-1]][-ind_run]
          configs[[n_configs+2*j-1]] <- sort(unique(c(configs[[n_configs+2*j-1]],runs_J_Trim$runs[[i]][1:(n_run-1)]-nT)))
          configs[[n_configs+2*j]] <- sort(unique(c(configs[[n_configs+2*j-1]],runs_J_Trim$runs[[i]][n_run])))    
        }
      }
    }
    
    ##########################################################################################################
    # Compute HDAIC for all configurations
    
    n_configs <- length(configs)
    HDAIC <- vector(length=n_configs)
    for (i in 1:n_configs){
      M_config <- as.data.frame(M[,configs[[i]]])
      rank_M_config <- qr(M_config)$rank
      if (rank_M_config<ncol(M_config)){
        HDAIC[i] <- NA
      } else {
        fit_M_config <- lm(et~.-1,data=M_config)
        HDAIC[i] <- nT * log(mean(fit_M_config$residuals^2)) + length(configs[[i]]) * 2 * log(nC)
      }
    }
    if (all(is.na(HDAIC))==F){J_Trim <- configs[[which.min(HDAIC)]]}
    
  } 
  
  ##########################################################################################################
  # Return outputs
  
  return(J_Trim)
  
}

"runs_vec" <- function(vec){
  n_vec <- length(vec)
  if (n_vec<2) {
    runs <- NULL
    n_runs <- 0
  } else {
    all_runs <- split(vec,cumsum(c(0,diff(vec)>1)))
    which_runs <- as.numeric(which(lengths(all_runs)>1))
    n_runs <- length(which_runs)
    if (n_runs>0){
      runs <- vector("list",length=n_runs)
      runs[1:n_runs] <- all_runs[which_runs]
    } else {
      runs <- NULL
    }
  } 
  
  ##########################################################################################################
  # Return outputs
  
  return(list("runs"=runs,"n_runs"=n_runs))
  
}

"sarima_fit" <- function(yt_clean){
  
  ##########################################################################################################
  # Carry out identification depending on the frequency
  
  s <- frequency(yt_clean)
  if (s>1) {
    suppressMessages(suppressWarnings(spec_arima <- SLBDD::sarimaSpec(yt_clean,maxorder=c(3,1,0),period=s,method="CSS")))
    order <- spec_arima$order[1:3]
    sorder <- spec_arima$order[4:6]
    include.mean <- spec_arima$include.mean
    suppressMessages(suppressWarnings(arima_fit <- forecast::Arima(yt_clean,order=order,seasonal=list(order=sorder,period=s),
                                                  include.mean=include.mean,method="CSS")))
  } else {
    suppressMessages(suppressWarnings(spec_arima <- SLBDD::arimaSpec(yt_clean,maxorder=c(3,1,0),method="CSS")))
    order <- spec_arima$order
    sorder <- NULL
    include.mean <- spec_arima$include.mean
    suppressMessages(suppressWarnings(arima_fit <- forecast::Arima(yt_clean,order=order,include.mean=include.mean,method="CSS")))
  }
  
  ##########################################################################################################
  # Obtain residuals
  
  con_et <- suppressMessages(suppressWarnings(residuals(forecast::Arima(yt_clean,model=arima_fit))))
  if (anyNA(con_et)==T){con_et <- suppressMessages(suppressWarnings(forecast::na.interp(con_et)))}
  
  ##########################################################################################################
  # Obtain AR representation
  
  n_phi <- arima_fit$arma[1]
  n_theta <- arima_fit$arma[2]
  n_PHI <- arima_fit$arma[3]
  n_THETA <- arima_fit$arma[4]
  s <- arima_fit$arma[5]
  d <- arima_fit$arma[6]
  Ds <- arima_fit$arma[7]
  n_ALL <- 0
  if (n_phi>0){
    phi <- arima_fit$coef[(n_ALL+1):(n_ALL+n_phi)]
    n_ALL <- n_ALL + n_phi
  } else {
    phi <- 0
  }
  if (n_theta>0){
    theta <- arima_fit$coef[(n_ALL+1):(n_ALL+n_theta)]
    n_ALL <- n_ALL + n_theta
  } else {
    theta <- 0
  }
  if (n_PHI>0){
    PHI <- arima_fit$coef[(n_ALL+1):(n_ALL+n_PHI)]
    n_ALL <- n_ALL + n_PHI
  } else {
    PHI <- 0
  }
  if (n_THETA>0){
    THETA <- arima_fit$coef[(n_ALL+1):(n_ALL+n_THETA)]
    n_ALL <- n_ALL + n_THETA
  } else {
    THETA <- 0
  }
  con_pis <- gsarima::arrep(notation="arima",phi=phi,d=d,theta=theta,Phi=PHI,D=Ds,Theta=THETA,frequency=s)
  
  ##########################################################################################################
  # Return outputs
  
  return(list("con_et"=con_et,"con_pis"=con_pis,"arima_fit"=arima_fit))
  
}

"joint_fit" <- function(yt,aos,lss,arima_fit){
  
  ##########################################################################################################
  # Compute length of time series, AOs and LSs
  
  nT <- length(yt)
  n_aos <- length(aos)
  n_lss <- length(lss)
  n_out <- n_aos + n_lss
  
  ##########################################################################################################
  # Define ARIMA model to fit
  
  add_seas <- (arima_fit$arma[3]+arima_fit$arma[4]+arima_fit$arma[7]>0)
  if (add_seas==F){
    n_phi <- arima_fit$arma[1]
    n_theta <- arima_fit$arma[2]
    d <- arima_fit$arma[6]
    if ((n_phi+n_theta)<length(arima_fit$coef)){inc_mean <- T}else{inc_mean <- F}
  } else {
    n_phi <- arima_fit$arma[1]
    n_theta <- arima_fit$arma[2]
    n_PHI <- arima_fit$arma[3]
    n_THETA <- arima_fit$arma[4]
    s <- arima_fit$arma[5]
    d <- arima_fit$arma[6]
    Ds <- arima_fit$arma[7]
    if ((n_phi+n_theta+n_PHI+n_THETA)<length(arima_fit$coef)){inc_mean <- T}else{inc_mean <- F}
  }
  
  ##########################################################################################################
  # Joint estimation of ARIMA and outlier sizes
  
  X_reg <- matrix(0,nrow=nT,ncol=n_out)
  if (n_aos>0){for (i in 1:n_aos){X_reg[aos[i],i] <- 1}}
  if (n_lss>0){for (i in 1:n_lss){X_reg[lss[i]:nT,n_aos+i] <- 1}}
  
  if ((n_aos>0) && (n_lss>0)){
    are_lc <- caret::findLinearCombos(X_reg)
    n_are_lc <- length(are_lc$linearCombos)
    if (n_are_lc > 0){
      elim_col <- NULL
      for (i in 1:n_are_lc){
        are_lc_col <- are_lc$linearCombos[[i]]
        elim_col <- c(elim_col,are_lc_col[-which.max(colSums(X_reg[,are_lc_col]))])
      }
      elim_col <- sort(unique(elim_col))
      
      X_reg <- X_reg[,-elim_col]
      
      elim_col_aos <- elim_col[elim_col<=n_aos]
      elim_col_lss <- elim_col[elim_col>n_aos]-n_aos
      aos <- aos[-elim_col_aos]
      lss <- lss[-elim_col_lss]
      n_aos <- length(aos)
      n_lss <- length(lss)
      n_out <- n_aos + n_lss
    }
  }
  
  if (add_seas==F){
    suppressWarnings(con_sarima_end <- arima(yt,order=c(n_phi,d,n_theta),xreg=X_reg,
                                             include.mean=inc_mean,method="CSS"))
  } else {
    suppressWarnings(con_sarima_end <- arima(yt,order=c(n_phi,d,n_theta),seasonal=list(order=c(n_PHI,Ds,n_THETA),period=s),
                                             xreg=X_reg,include.mean=inc_mean,method="CSS"))
  }
  
  ##########################################################################################################
  # Define outliers and sizes
  
  n_par <- length(coef(con_sarima_end)) - n_out # Number of estimated ARIMA parameters
  w_est <- as.numeric(coef(con_sarima_end)[(n_par+1):(n_par+n_out)]) # Outlier size estimates
  if (n_aos>0){
    aos_end <- matrix(0,nrow=n_aos,ncol=2)
    aos_end[,1] <- aos
    aos_end[,2] <- w_est[1:n_aos]
  } else {
    aos_end <- NULL
  }
  if (n_lss>0){
    lss_end <- matrix(0,nrow=n_lss,ncol=2)
    lss_end[,1] <- lss
    lss_end[,2] <- w_est[(n_aos+1):n_out]
  } else {
    lss_end <- NULL
  }
  
  ##########################################################################################################
  # Obtain the clean time series
  
  if (n_out>0){
    yt_clean <- yt
    if (n_aos>0){for (i in 1:n_aos){yt_clean[aos_end[i]] <- yt_clean[aos_end[i]] - aos_end[i,2]}}
    if (n_lss>0){for (i in 1:n_lss){yt_clean[lss_end[i]:nT] <- yt_clean[lss_end[i]:nT] - lss_end[i,2]}}
  }
  
  #############################################################################################################
  # Return results
  
  return(list("yt_clean"=yt_clean,"aos"=aos_end,"lss"=lss_end))
  
}

"db_hom_oga" <- function(Y,s=NULL){
  
  run <- "Yes"
  
  ##########################################################################################################
  # Check validity of the frequency s
  
  if (is.null(s)==T){s <- 1}
  if (length(s)==1 && is.numeric(s)){
    if ((s %in% c(1,4,7,12,24,52,60))==F){result <- "s should be 1, 4, 7, 12, 24, 52 or 60";run <- "No"}
  } else {
    result <- "s should be 1, 4, 7, 12, 24, 52 or 60";run <- "No"
  }
  
  ##########################################################################################################
  # Check that Y is class matrix and transform if it is class data.frame or mts
  
  if (is.data.frame(Y)==T){Y <- as.matrix(Y)} # Check that Y is class data.frame
  if (is.mts(Y)==T){Y <- as.matrix(Y)} # Check that Y is class mts
  if (is.matrix(Y)==F){result <- "Y should be a matrix or a data.frame";run <- "no"} # Stop if Y is not class matrix
  if (is.numeric(Y)==F){result <- "Y must be numeric";run <- "No"}
  
  ##########################################################################################################
  # Check if there are NAs
  
  if (anyNA(Y)==T){result <- "Series with NAs are not allowed";run <- "No"}
  
  ##########################################################################################################
  # Check sufficiently large sample size
  
  nT <- nrow(Y)
  if ((s==1)&&(nT<22)){message("Time series is too short");run <- "No"}
  if ((s==4)&&(nT<26)){message("Time series is too short");run <- "No"}
  if ((s==7)&&(nT<29)){message("Time series is too short");run <- "No"}
  if ((s==12)&&(nT<34)){message("Time series is too short");run <- "No"}
  if ((s==24)&&(nT<46)){message("Time series is too short");run <- "No"}
  if ((s==52)&&(nT<74)){message("Time series is too short");run <- "No"}
  if ((s==60)&&(nT<82)){message("Time series is too short");run <- "No"}
  
  ##########################################################################################################
  # Run procedure if
  
  if (run == "Yes"){
    
    ##########################################################################################################
    # Set up parallel stuff and run procedure
    
    plan(multisession,workers=availableCores())
    out_bd_oga <- future_apply(Y,2,single_oga,s=s)
    plan(sequential)

    ##########################################################################################################
    # Save results
    
    p <- ncol(Y)
    AOs <- LSs <- vector(mode='list',length=p)
    n_AOs <- n_LSs <- vector(mode='numeric',length=p)
    Y_clean <- matrix(Y,nT,p)
    for (i in 1:p){
      if (is.matrix(out_bd_oga[[i]]$aos)){
        n_AOs[i] <- nrow(out_bd_oga[[i]]$aos)
        AOs[[i]] <- out_bd_oga[[i]]$aos[,1]
      }
      if (is.matrix(out_bd_oga[[i]]$lss)){
        n_LSs[i] <- nrow(out_bd_oga[[i]]$lss)
        LSs[[i]] <- out_bd_oga[[i]]$lss[,1]
      }
      Y_clean[,i] <- as.vector(out_bd_oga[[i]]$yt_clean)
    }
    result <- "The procedure has been successfully completed"
  } else {
    n_AOs <- n_LSs <- AOs <- LSs <- NULL
    Y_clean <- Y
  }
  
  ##########################################################################################################
  # Return results
  
  return(list("n_AOs"=n_AOs,"n_LSs"=n_LSs,"AOs"=AOs,"LSs"=LSs,"Y_clean"=Y_clean,"result"=result))
  
}

"db_het_oga" <- function(Y){
  
  run <- "Yes"
  
  ##########################################################################################################
  # Check that Y is a list of time series (ts objects)
  
  if (is.list(Y)==F){result <- "Y must be a list of time series";run <- "No"}
  if (all(sapply(Y,is.ts))==F){result <- "Y must be a list of time series";run <- "No"}
  
  ##########################################################################################################
  # Obtain number of series and frequencies and check validity of frequencies
  
  nY <- length(Y)
  nT <- lengths(Y)
  s <- unlist(lapply(Y,frequency))
  for (i in 1:nY){
    if ((s[i]==1)&&(nT[i]<22)){result <- "Time series is too short";run <- "No"}
    if ((s[i]==4)&&(nT[i]<26)){result <- "Time series is too short";run <- "No"}
    if ((s[i]==7)&&(nT[i]<29)){result <- "Time series is too short";run <- "No"}
    if ((s[i]==12)&&(nT[i]<34)){result <- "Time series is too short";run <- "No"}
    if ((s[i]==24)&&(nT[i]<46)){result <- "Time series is too short";run <- "No"}
    if ((s[i]==52)&&(nT[i]<74)){result <- "Time series is too short";run <- "No"}
    if ((s[i]==60)&&(nT[i]<82)){result <- "Time series is too short";run <- "No"}
    if ((s[i] %in% c(1,4,7,12,24,52,60))==F){result <- "s should be 1, 4, 7, 12, 24, 52 or 60";run <- "No"}
  }
  
  ##########################################################################################################
  # Check if there are NAs
  
  if (anyNA(Y)==T){result <- "Series with NAs are not allowed";run <- "No"}
  
  ##########################################################################################################
  # Run procedure if everything is fine
  
  if (run == "Yes"){
    
    ##########################################################################################################
    # Set up parallel stuff and run procedure
    
    plan(multisession,workers=availableCores())
    out_bd_oga <- future_lapply(Y,FUN=single_oga)
    plan(sequential)

    ##########################################################################################################
    # Save results
    
    AOs <- LSs <- Y_clean <- vector(mode='list',length=nY)
    n_AOs <- n_LSs <- vector(mode='numeric',length=nY)
    for (i in 1:nY){
      if (is.matrix(out_bd_oga[[i]]$aos)){
        n_AOs[i] <- nrow(out_bd_oga[[i]]$aos)
        AOs[[i]] <- out_bd_oga[[i]]$aos[,1]
      }
      if (is.matrix(out_bd_oga[[i]]$lss)){
        n_LSs[i] <- nrow(out_bd_oga[[i]]$lss)
        LSs[[i]] <- out_bd_oga[[i]]$lss[,1]
      }
      Y_clean[[i]] <- ts(out_bd_oga[[i]]$yt_clean,frequency=s[i])
    }
    result <- "The procedure has been successfully completed"
  } else {
    n_AOs <- n_LSs <- AOs <- LSs <- NULL
    Y_clean <- Y
  }
  
  ##########################################################################################################
  # Return results
  
  return(list("n_AOs"=n_AOs,"n_LSs"=n_LSs,"AOs"=AOs,"LSs"=LSs,"Y_clean"=Y_clean,"result"=result))
  
}
Any scripts or data that you put into this service are public.
outliers.ts.oga documentation built on Sept. 9, 2025, 5:37 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
outliers.ts.oga
Efficient Outlier Detection for Large Time Series Databases

R/outliers.ts.oga.R
In outliers.ts.oga: Efficient Outlier Detection for Large Time Series Databases

Try the outliers.ts.oga package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

outliers.ts.oga Efficient Outlier Detection for Large Time Series Databases

R/outliers.ts.oga.R In outliers.ts.oga: Efficient Outlier Detection for Large Time Series Databases

Try the outliers.ts.oga package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

outliers.ts.oga
Efficient Outlier Detection for Large Time Series Databases

R/outliers.ts.oga.R
In outliers.ts.oga: Efficient Outlier Detection for Large Time Series Databases