R/sinkDirectory.R

Defines functions sinkDirectory

Documented in sinkDirectory

### sinkDirectory.R --- 
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: mar 22 2018 (09:24) 
## Version: 
## Last-Updated: nov 28 2018 (09:21) 
##           By: Brice Ozenne
##     Update #: 27
##----------------------------------------------------------------------
## 
### Commentary: 
## 
### Change Log:
##----------------------------------------------------------------------
## 
### Code:

## * sinkDirectory (documentation)
##' @title Import All .rds Files in a Directory.
##' @description Import all .rds files in a directory.
##' @name sinkDirectory
##' 
##' @param path [character] path to the directory.
##' @param string.keep [regexp] character string indicating files to import. 
##' @param string.exclude [regexp] character string indicating files to ignore.  
##' @param addMissingCol  [logical] if a dataset does not have the same columns as
##' the other, the necessary empty columns are added to it with \code{NA} as values.
##' @param fixed [logical] If \code{TRUE}, \code{pattern} is a string to be matched as is.
##' Argument passed to \code{\link{grep}}.
##' @param trace [logical] should a progress bar be displayed tracking how many files
##' have been imported.
##'
##' @details The function first read the name of all the files in the directory.
##' Then if the argument \code{string.keep} is not \code{NULL}, it only retains the
##' files whose name contain \code{string.keep}.
##' Then if the argument \code{string.exclude} is not \code{NULL}, it excludes the
##' files whose name contain \code{string.exclude}.
##'
##' Each file must contain a \code{data.table} object with the same columns,
##' so that they can be combined.
##' @return A \code{data.table} object.
##' @author Brice Ozenne
##'

## * sinkDirectoy (code)
##' @rdname sinkDirectory
##' @export
sinkDirectory <- function(path, string.keep = NULL, string.exclude = NULL,
                          addMissingCol = FALSE, fixed = FALSE,
                          trace = TRUE){

    ## ** import all files
    if(!dir.exists(path)){

        possible.dirs <- setdiff(list.dirs(file.path(path,".."), full.names = FALSE), "")
        
        stop("Directory ",path," does not exists \n",
             "Existing dir. in parent dir.: \"",paste0(possible.dirs,collapse = "\" \""),"\"\n")
    }
    
    allFiles <- list.files(path)

    if(length(allFiles)==0){
        warning("Empty directory \n")
        return(NULL)
    }
    index.file <- 1:length(allFiles)

    ## ** subset files
    if(!is.null(string.keep)){
        index.file <- intersect(index.file,grep(string.keep,allFiles,fixed = fixed))
    }
    if(!is.null(string.exclude)){
        index.file <- setdiff(index.file,grep(string.exclude,allFiles,fixed = fixed))
    }

    n.files <- length(index.file)
    if(n.files==0){
        warning("No file matching the query \n")
        return(NULL)
    }
    
    ## ** merge files
    dt.merge <- NULL
    if(trace){
        cat("read ",n.files," files \n", sep = "")
        pb <- txtProgressBar(max = n.files)
    }
    for(iFile in 1:n.files){ # iFile <- 1

        ## *** read file 
        iFilename <- allFiles[index.file[iFile]]
        iRes <- data.table::as.data.table(readRDS(file = file.path(path,iFilename)))
        if(NROW(iRes)==0){next}
        iRes[,iFile := iFilename]

        ## *** solve pb with missing columns        
        if(!is.null(dt.merge) && addMissingCol==TRUE && NCOL(dt.merge)!=NCOL(iRes)){

            missing <- setdiff(names(dt.merge),names(iRes))
            if(!is.null(missing)){
                for(iMiss in missing){ ## iMiss <- missing[1]
                    vec.tempo <- dt.merge[1,.SD,.SDcols = iMiss][[1]]
                    vec.tempo[1] <- NA                    
                    iRes[, c(iMiss) := vec.tempo[1]]                    
                }
            }
            missing <- setdiff(names(iRes),names(dt.merge))
            if(!is.null(missing)){                
                for(iMiss in missing){ ## iMiss <- missing[1]
                    vec.tempo <- iRes[1,.SD,.SDcols = iMiss][[1]]
                    vec.tempo[1] <- NA                    
                    dt.merge[, c(iMiss) := vec.tempo[1]]
                }
            }

        }

        ## *** merge
        dt.merge <- rbind(dt.merge,iRes)
        if(trace){setTxtProgressBar(pb,iFile)}
    }
    if(trace){close(pb)}

    ## ** export
    return(dt.merge)
}

##----------------------------------------------------------------------
### sinkDirectory.R ends here
bozenne/butils documentation built on Oct. 14, 2023, 6:19 a.m.