R/dimensionalityReduction.R

Defines functions decode vectorProject GramSchmidtOrthogonalization dimensionalityIncrease incrementalDimensionalityReductionUpdate incrementalDimensionalityReduction dimensionalityReduction

Documented in decode dimensionalityIncrease dimensionalityReduction GramSchmidtOrthogonalization incrementalDimensionalityReduction incrementalDimensionalityReductionUpdate vectorProject

#' dimensionalityReduction
#' 
#' Function implementing the dimensionality reduction module in the DFML architecture.
#' The function implements a batch dimensionality reduction.
#'
#' @param X - n x N matrix containing the N time series as columns, each one of length n time steps
#' @param k - Number of desired factors as output - Integer
#' @param family - Family of methods to use - String among those defined in DIMENSIONALITY_METHODS
#' @param params - Parameters to be passed to the forecaster function - List
#'                 
#'                 An (optional) pre-existing model is passed through the \code{parameters$model}.
#'                 If NULL it will be computed by the function. 
#'
#'
#' @return List containing:
#'         \itemize{
#'         \item{\code{Z}: }{n x k matrix contaning the k factor generated by the dimensionality reduction module}
#'         \item{\code{time_dim}: }{Computational time (in s) required to fit the dimensionality model - numeric scalar}
#'         \item{\code{model}: }{Fitted model constructed via the function}
#'         }
#'
#' @import autoencoder
#' @export
#'
#' @examples 
#' #See tests/testthat directory on https://github.com/jdestefani/ExtendedDFML
#' 
dimensionalityReduction <- function(X,k,family=DIMENSIONALITY_METHODS,params=list(model=NULL,embedding_params=NULL)){
    # Add dimensionality check/tests on data
    if(typeof(X) != "matrix"){
        X <- as.matrix(X)
    }
    
    family <- match.arg(family)
    switch(family,
           PCA={
               if(is.null(params[["model"]])){
                   ptm <- proc.time()
                   X_cov <- stats::cov(X)
                   X_eigen <- eigen(X_cov,TRUE)
                   time_fit <- (proc.time() - ptm)[3]
                   model <- list(eigenvalues=X_eigen$values[1:k],eigenvectors=t(X_eigen$vectors[,1:k]))
               }
               else{
                   model <- params$model
                   time_fit <- params$time_dim
               }
               ptm <- proc.time()
               Z <- X%*%t(model$eigenvectors)
               time_dim <- time_fit + (proc.time() - ptm)[3]
           },
           Autoencoder={
               if(is.null(params[["model"]])){
                   rho=0.1
                   lambda=0.001
                   beta=5
                   epsilon=0.01
                   
                   ptm <- proc.time()
                   encoder<-autoencoder::autoencode(X,nl=3,N.hidden=k,lambda=lambda,
                                       unit.type='tanh', beta=beta, rho=rho,epsilon=epsilon,
                                       max.iterations=50000,rescale.flag=TRUE)
                   time_fit <- (proc.time() - ptm)[3]
                   ptm <- proc.time()
                   Z <- predict(encoder,X,hidden.output=TRUE)$X.output
                   time_dim <- time_fit + (proc.time() - ptm)[3]
                   model <- encoder
               }
           },
           Autoencoder_Keras={
               if(is.null(params[["epochs"]])){
                   stop("[ERROR] - A number of epochs must be explicitly supplied for autoencoder training.")
               }
               
               if(is.null(params[["time_window"]])){
                   if(is.null(params[["model"]])){
                       ptm <- proc.time()
                       if(is.null(params[["deep_layers"]])){
                           model <- autoencoder_keras(X,method=params$method,latent_dim=k,epochs=params$epochs)
                       }
                       else{
                           model <- autoencoder_keras(X,method=params$method,latent_dim=params$deep_layers,epochs=params$epochs)
                       }
                       time_fit <- (proc.time() - ptm)[3]
                   }
                   else{
                       model <- params$model
                       time_fit <- params$time_dim
                   }
                   ptm <- proc.time()
                   Z <- model$encoder %>% predict(X)
                   time_dim <- time_fit + (proc.time() - ptm)[3]
               }
               else{
                   
                   if(is.null(params[["embedding_params"]])){
                       params$embedding_params <- list()
                       params$embedding_params$shift = params$time_window
                       params$embedding_params$padNA = FALSE
                   }
                   
                   if(is.null(params[["model"]])){
                       ptm <- proc.time()
                       if(is.null(params[["deep_layers"]])){
                           model <- autoencoder_keras(X,method=params$method,latent_dim=k,time_window=params$time_window,epochs=params$epochs,embedding_params=params$embedding_params)
                       }else{
                           model <- autoencoder_keras(X,method=params$method,latent_dim=params$deep_layers,time_window=params$time_window,epochs=params$epochs,embedding_params=params$embedding_params)
                       }
                       time_fit <- (proc.time() - ptm)[3]
                   }
                   else{
                       model <- params$model
                       time_fit <- params$time_dim
                   }
                   ptm <- proc.time()
                   Z_tensor <- model$encoder %>% predict(matrix2Tensor3D(X,
                                                                         time_window = params$time_window,
                                                                         shift = params$embedding_params$shift,
                                                                         padNA= params$embedding_params$padNA))
                   Z <- tensor3D2matrix(Z_tensor,shift = params$time_window)
                   time_dim <- time_fit + (proc.time() - ptm)[3]
               }
           },
           {
               stop('Wrong dimensionality reduction method used - Choose between "PCA" or "Autoencoder"')
           }
    )
    return(list(Z=stats::na.omit(Z),time_dim=time_dim,model=model))
}


#' incrementalDimensionalityReduction
#' 
#' Function implementing the dimensionality reduction module in the DFML architecture.
#' The function implements an incremental dimensionality reduction model with a rolling approach.
#' A percentage of \code{X}, defined by \code{init_sample_percentage} is used to initialize the dimensionality reduction technique.
#' The remaining part of \code{X} is used to incrementally fit the dimensionality reduction model 
#'
#' @param X - n x N matrix containing the N time series as columns, each one of length n time steps
#' @param init_sample_percentage - Numeric value indicating the percentage of \code{X} to be used for model fit - Numeric
#' @param k - Number of desired factors as output - Numeric
#' @param family - Family of methods to use - String among those defined in DIMENSIONALITY_METHODS
#' @param params - Parameters to be passed to the function - List
#'                 
#'                 An (optional) pre-existing model is passed through the \code{parameters$model}.
#'                 If NULL it will be computed by the function. 
#'                 
#' @import autoencoder
#'
#' @return List containing:
#'         \itemize{
#'         \item{\code{Z}: }{n x k matrix contaning the k factor generated by the dimensionality reduction module}
#'         \item{\code{time_dim}: }{Computational time (in s) required to fit the dimensionality model - numeric scalar}
#'         \item{\code{model}: }{Fitted model constructed via the function}
#'         }
#'         
#' @examples 
#' #See tests/testthat directory on https://github.com/jdestefani/ExtendedDFML
incrementalDimensionalityReduction <- function(X,init_sample_percentage,k,family=DIMENSIONALITY_METHODS,params=list(model=NULL)){
    # Add dimensionality check/tests on data
    if(typeof(X) != "matrix"){
        X <- as.matrix(X)
    }
    
    n <- nrow(X)
    init_sample_size <- round(init_sample_percentage*n)
    
    # 1. If there is no model for the initialization, fit a model using the standard dimensionality reduction function
    if(is.null(params[["model"]])){
        dim_red_results <- dimensionalityReduction(X[1:init_sample_size,],k,family,params)
        params$model <- dim_red_results$model
    }
    
    family <- match.arg(family)
    switch(family,
           PCA={
               if(is.null(params[["pca_type"]])){
                   stop("An incremental PCA type must be supplied in order to perform incremental PCA.")
               }
               ptm <- proc.time()
               xbar <- apply(X, 2, mean)
               incremental_pca_results <- incrementalPCA(X,
                                                         xbar = xbar,
                                                         base_pca_values = params$model$eigenvalues,
                                                         base_pca_vectors = t(params$model$eigenvectors),
                                                         pca_type = params$pca_type,
                                                         components = k,
                                                         start_value = (init_sample_size+1),
                                                         end_value = n)
               time_dim <- proc.time() - ptm
               model <- list(eigenvalues=incremental_pca_results$pca$values[1:k],
                             eigenvectors=t(incremental_pca_results$pca$vectors[,1:k]))
               Z <- X%*%t(model$eigenvectors)
           },
           Autoencoder_Keras={
               ptm <- proc.time()
               if(is.null(params[["time_window"]])){
                   if(is.null(params[["model"]])){
                       if(is.null(params[["deep_layers"]])){
                           model <- autoencoder_keras(X,method=params$method,latent_dim=k,epochs=params$epochs)
                       }
                       else{
                           model <- autoencoder_keras(X,method=params$method,latent_dim=params$deep_layers,epochs=params$epochs)
                       }
                   }
                   Z <- model$encoder %>% predict(X)
               }
               else{
                   if(is.null(params[["model"]])){
                       if(is.null(params[["deep_layers"]])){
                           model <- autoencoder_keras(X,method=params$method,latent_dim=k,time_window=params$time_window,epochs=params$epochs)
                       }else{
                           model <- autoencoder_keras(X,method=params$method,latent_dim=params$deep_layers,time_window=params$time_window,epochs=params$epochs)
                       }
                   }
                   Z_tensor <- model$encoder %>% predict(matrix2Tensor3D(X,time_window = params$time_window,shift=params$time_window))
                   Z <- tensor3D2matrix(Z_tensor,shift = params$time_window)
               }
               
               time_dim <- proc.time() - ptm
           },
           {
               stop('Wrong dimensionality reduction method used - Choose between "PCA" or "Autoencoder"')
           }
    )
    return(list(Z=stats::na.omit(Z),time_dim=time_dim[3],model=model))
}

 
#' incrementalDimensionalityReductionUpdate
#' 
#' Wrapper function for the incremental update of a dimensionality reduction model. 
#' \code{X_base} is used to initialize the dimensionality reduction model.
#' \code{X_update} is used to incrementally update the dimensionality reduction model fitted on \code{X_base}. 
#'
#' @param X_base - n x N matrix containing the N time series as columns, each one of length n time steps used for the initialization of the incremental dimensionality reduction.
#' @param X_update - n' x N matrix containing the N time series as columns, each one of length n' time steps used for the incremental update of the dimensionality reduction model.
#' @param k - Number of desired factors as output - Numeric
#' @param family - Family of methods to use - String among those defined in DIMENSIONALITY_METHODS
#' @param params - Parameters to be passed to the function - List
#'                 
#'                 An (optional) pre-existing model is passed through the \code{parameters$model}.
#'                 If NULL it will be computed by the function. 
#'
#' @return List containing:
#'         \itemize{
#'         \item{\code{Z}: }{n x k matrix contaning the k factor generated by the dimensionality reduction module}
#'         \item{\code{time_dim}: }{Computational time (in s) required to fit the dimensionality model - numeric scalar}
#'         \item{\code{model}: }{Fitted model constructed via the function}
#'         }
#'         
#' @examples 
#' #See tests/testthat directory on https://github.com/jdestefani/ExtendedDFML
incrementalDimensionalityReductionUpdate <- function(X_base,X_update,k,family=DIMENSIONALITY_METHODS,params=list(model=NULL)){
    # Add dimensionality check/tests on data
    if(typeof(X_base) != "matrix"){
        X_base <- as.matrix(X_base)
    }
    
    if(typeof(X_update) != "matrix"){
        X_update <- as.matrix(X_update)
    }
    
    # 1. If there is no model for the initialization, fit a model using the standard dimensionality reduction function
    if(is.null(params[["model"]])){
        dim_red_results <- dimensionalityReduction(X_base,k,family,params)
        params$model <- dim_red_results$model
    }
    
    family <- match.arg(family)
    switch(family,
           PCA={
               if(is.null(params[["pca_type"]])){
                   stop("An incremental PCA type must be supplied in order to perform incremental PCA.")
               }
               ## Incremental PCA (IPCA, centered)
               ptm <- proc.time()
               pca <- list(values=params$model$eigenvalues,vectors=t(params$model$eigenvectors))
               xbar <- apply(as.matrix(X_base), 2, mean)
               
               for(i in 1:dim(X_update)[1]){
                   incremental_pca_results <- iterativePCASingleStep(X_update[i,],dim(X_base)[1]+i,params$pca_type,xbar,pca,NA,k)
                   xbar <- incremental_pca_results$xbar
                   pca <- incremental_pca_results$pca
               }
               time_dim <- proc.time() - ptm
               model <- list(eigenvalues=incremental_pca_results$pca$values[1:k],
                             eigenvectors=t(incremental_pca_results$pca$vectors[,1:k]))
               Z_update <- X_update%*%t(model$eigenvectors)
           },
           Autoencoder_Keras={
               ptm <- proc.time()
               if(is.null(params[["time_window"]])){
                   model <- incremental_autoencoder_keras_update(X_update,
                                                                 method=params$method,
                                                                 model=params$model,
                                                                 epochs=params$epochs)
                   
                   Z_update <- model$encoder %>% predict(X_update)
               }
               else{
                   model <- incremental_autoencoder_keras_update(X_update,
                                                                 method=params$method,
                                                                 model=params$model,
                                                                 time_window=params$time_window,
                                                                 epochs=params$epochs)
                   
                   Z_update_tensor <- model$encoder %>% predict(matrix2Tensor3D(X_update,time_window = params$time_window,shift=params$time_window))
                   Z_update <- tensor3D2matrix(Z_update_tensor,shift = params$time_window)
               }
               
               time_dim <- proc.time() - ptm
           },
           {
               stop('Wrong dimensionality reduction method used - Choose between "PCA" or "Autoencoder"')
           }
    )
    return(list(Z_update=stats::na.omit(Z_update),time_dim=time_dim[3],model=model))
}

#' dimensionalityIncrease
#' 
#' 
#' Function implementing the dimensionality increase module in the DFML architecture.
#' The function implements a batch dimensionality increase.
#'
#' @param Z_hat - h x k matrix containing the h-step ahead forecast for the k input series - Numeric
#' @param family - Family of methods to use - String among those defined in DIMENSIONALITY_METHODS
#' @param model - An (optional) pre-existing model. If NULL it will be computed by the function. 
#' @param Z - n x k matrix contaning the k factor generated by the dimensionality reduction module - Numeric
#' @param params - Additional parameters to be passed to the forecaster function - List
#'
#' @return List containing:
#'         \itemize{
#'         \item{\code{X_hat}: }{h x N matrix containing the h-step ahead forecast for the N original series}
#'         \item{\code{time_dim}: }{Computational time (in s) required to run the dimensionality increase - numeric scalar}
#'         }
#'         
#' @export
#' @examples 
#' #See tests/testthat directory on https://github.com/jdestefani/ExtendedDFML
dimensionalityIncrease <- function(Z_hat,family=DIMENSIONALITY_METHODS,model=NULL,Z=NULL,params=NULL){
    # Convert vector to matrix if Z_hat has no dimensions
    if(is.null(dim(Z_hat))){
        Z_hat <- matrix(Z_hat,1,length(Z_hat))
    }
    
    # Convert vector to matrix if Z is present and has no dimensions
    if(!is.null(Z) && is.null(dim(Z))){
        Z <- matrix(Z,1,length(Z_hat))
    }
    
    if(is.null(model)){
        stop("[ERROR] - Dimensionality increase cannot be performed without existing model")
    }
    
    family <- match.arg(family)
    switch(family,
           PCA={
               ptm <- proc.time()
               V <- model$eigenvectors
               
               horizon <- dim(Z_hat)[1]
               factors <- dim(Z_hat)[2]
               
               U <- GramSchmidtOrthogonalization(Z,Z_hat,factors)
               
               if (factors>1){
                   X_hat <- U[,1:factors]%*%V[1:factors,]
               }
               else{
                   X_hat <- (U[,1]%*%array(V[1:factors,],c(1,factors)))
               }
               time_dim <- proc.time() - ptm
           },
           Autoencoder={
               ptm <- proc.time()
               X_hat <- decode(model,Z_hat)
               time_dim <- proc.time() - ptm
           },
           Autoencoder_Keras={
               ptm <- proc.time()
               if(!is.null(params[["time_window"]])){
                   Z_hat <- matrix2Tensor3D(Z_hat,params$time_window,params$time_window)
               }
               X_hat <- model$decoder %>% predict(Z_hat)
               time_dim <- proc.time() - ptm
               if(!is.null(params[["time_window"]])){
                   X_hat <- tensor3D2matrix(X_hat,params$time_window)
               }
           },
           {
               stop('Wrong dimensionality increase method used - Choose between "PCA" or "Autoencoder"')
           }
    )
    return(list(X_hat=X_hat,time_dim=time_dim))
}

#' GramSchmidtOrtogonalization
#' 
#' Helper function to implement Gram-Schmidt Ortogonalization (applied to the factor forecast)
#'
#' @param Z - n x k matrix contaning the k factor generated by the dimensionality reduction module - Numeric
#' @param Z_hat - h x k matrix containing the h-step ahead forecast for the k input series - Numeric
#' @param k - Number of factors for the orthogonalization - Numeric
#'
#' @return \code{U} - (n+h) x k matrix containing the orthogonalized factors
GramSchmidtOrthogonalization <- function(Z,Z_hat,k){
    # Reference implementation:
    # V <- model
    # U <- Z_hat
    # factors <- dim(Z_hat)[2]
    #
    # if (orth & factors>1){## Gram-Schmidt orthogonalization
    #   ZZ=rbind(Ztr[,1:factors],Z_hat[,1:factors])
    #   U=array(NA,c(NROW(ZZ),factors))
    #
    #   U[,1]=ZZ[,1]
    #
    #   for (i in 2:factors){
    #     p<-numeric(NROW(ZZ))
    #     for (j in 1:(i-1))
    #       p=p+proj(U[,j],ZZ[,i])
    #     U[,i]=ZZ[,i]-p
    #
    #   }
    #
    #   U=U[(NROW(Ztr)+1):NROW(U),]
    # }
    
    ## Gram-Schmidt orthogonalization
    if (k>1){
        
        # Merge data into Z to orthogonalized with respect to the full data
        ZZ=rbind(Z[,1:k],Z_hat[,1:k])
        U=array(NA,c(NROW(ZZ),k))
        
        # Initialize U
        U[,1]=ZZ[,1]
        
        # Iterative orthogonalization
        for (i in 2:k){
            p<-numeric(NROW(ZZ))
            
            for (j in 1:(i-1)){
                # Project ZZ onto U component by component
                p=p+vectorProject(U[,j],ZZ[,i])
            }
            # Remove the projected components from U
            U[,i]=ZZ[,i]-p
        }
        
        U <- U[(nrow(Z)+1):nrow(U),]
    }
    # If U is a 1-dimenstional vector, convert it to matrix
    if(is.null(dim(U))){
        U <- matrix(U,1,length(U))
    }
    
    return(U)
}

#' vectorProject
#' 
#' Auxiliary function to compute vector projection of vector u onto vector v.
#'
#' @param u - Numeric vector to project
#' @param v - Numeric vector on which u needs to be projected
#'
#' @return - Numeric vector representing the result of the projection 
vectorProject <- function(u,v){
    return(u*as.numeric(u%*%v)/as.numeric(u%*%u))
    
}


#' decode
#' 
#' Auxilary function to perform dimensionality increase with autoencoder techniques (only for those non based on Keras).
#'
#' @param model - Autoencoder model to employ to perfom the decoding
#' @param Z_hat - h x k matrix containing the h-step ahead forecast for the k input series - Numeric
#'
#' @return \code{X_hat} - h x N matrix containing the h-step ahead forecast for the N original series
decode <- function(model,Z_hat){
    # Decode
    X_hat <- model$W[[2]] %*% t(as.matrix(Z_hat))
    # Apply bias
    X_hat <- apply(X_hat, 2, function(x){x+model$b[[2]]})
    # Rescale
    X_hat <- X_hat * (model$rescaling$rescaling.max - model$rescaling$rescaling.min) + model$rescaling$rescaling.min
    return(t(X_hat))
}
jdestefani/ExtendedDFML documentation built on Dec. 20, 2021, 10:04 p.m.