ff: memory-efficient storage of large data on disk and fast access functions

Documented in ccbind cfun clength cmean cmedian cquantile crbind csum csummary

# some collapsing functions for ff
# (c) 2007 Jens Oehlschägel
# Licence: GPL2
# Provided 'as is', use at your own risk
# Created: 2007-10-09
# Last changed: 2007-10-25

# source("d:/mwp/eanalysis/ff/R/CFUN.R")

# xx TODO: extend this for weighted means, weighted median etc., see http://tolstoy.newcastle.edu.au/R/help/02a/1073.html and http://tolstoy.newcastle.edu.au/R/help/02a/1060.html or google "Re: [R] Weighted median"

#! \name{CFUN}
#! \alias{CFUN}
#! \alias{ccbind}
#! \alias{crbind}
#! \alias{cfun}
#! \alias{cquantile}
#! \alias{csummary}
#! \alias{cmedian}
#! \alias{clength}
#! \alias{csum}
#! \alias{cmean}
#! \title{ Collapsing functions for batch processing }
#! \description{
#!   These are used in aggregating the chunks resulting from batch processing. They are usually called via \command{\link{do.call}}
#! }
#! \usage{
#! ccbind(\dots)
#! crbind(\dots)
#! cfun(\dots, FUN, FUNARGS = list())
#! cquantile(\dots, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE, type = 7)
#! csummary(\dots, na.rm = "ignored")
#! cmedian(\dots, na.rm = FALSE)
#! clength(\dots, na.rm = FALSE)
#! csum(\dots, na.rm = FALSE)
#! cmean(\dots, na.rm = FALSE)
#! }
#! \arguments{
#!   \item{\dots}{ \code{\dots} }
#!   \item{FUN}{ a aggregating function }
#!   \item{FUNARGS}{ further arguments to the aggregating function }
#!   \item{na.rm}{ TRUE to remove NAs }
#!   \item{probs}{ see \code{\link{quantile}} }
#!   \item{names}{ see \code{\link{quantile}} }
#!   \item{type}{ see \code{\link{quantile}} }
#! }
#! \details{
#!  \tabular{lll}{
#!   \strong{CFUN}        \tab \strong{FUN}              \tab \strong{comment} \cr
#!   \command{ccbind}     \tab \command{\link{cbind}}    \tab like \command{cbind} but respecting names \cr
#!   \command{crbind}     \tab \command{\link{rbind}}    \tab like \command{rbind} but respecting names \cr
#!   \command{cfun}       \tab                           \tab \command{crbind} the input chunks and then apply 'FUN' to each column \cr
#!   \command{cquantile}  \tab \command{\link{quantile}} \tab \command{crbind} the input chunks and then apply 'quantile' to each column \cr
#!   \command{csummary}   \tab \command{\link{summary}}  \tab \command{crbind} the input chunks and then apply 'summary' to each column \cr
#!   \command{cmedian}    \tab \command{\link{median}}   \tab \command{crbind} the input chunks and then apply 'median' to each column \cr
#!   \command{clength}    \tab \command{\link{length}}   \tab \command{crbind} the input chunks and then determine the number of values in each column\cr
#!   \command{csum}       \tab \command{\link{sum}}      \tab \command{crbind} the input chunks and then determine the sum values in each column\cr
#!   \command{cmean}      \tab \command{\link{mean}}     \tab \command{crbind} the input chunks and then determine the (unweighted) mean in each column\cr
#!   }
#!   In order to use CFUNs on the result of \code{\link{lapply}} or \code{\link{ffapply}} use \code{\link{do.call}}.
#! }
#! \note{
#!  Currently - for command line convenience - we map the elements of a single list argument to \dots, but this may change in the future.
#! }
#! \section{ff options}{
#!   xx TODO: extend this for weighted means, weighted median etc., \cr
#!   see http://tolstoy.newcastle.edu.au/R/help/02a/1073.html and http://tolstoy.newcastle.edu.au/R/help/02a/1060.html or google "Re: [R] Weighted median"
#! }
#! \value{
#!   depends on the CFUN used
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{ffapply}}, \code{\link{do.call}}, \code{\link{na.count}} }
#! \examples{
#!    X <- lapply(split(rnorm(1000), 1:10), summary)
#!    do.call("crbind", X)
#!    do.call("csummary", X)
#!    do.call("cmean", X)
#!    do.call("cfun", c(X, list(FUN=mean, FUNARGS=list(na.rm=TRUE))))
#!    rm(X)
#! }
#! \keyword{ manip }
#! \keyword{ list }

ccbind <- function(...){
  l <- list(...); if (length(l)==1 && is.list(l[[1]])) l <- l[[1]]
  nl <- length(l)
  if (nl){
    checkit <- sapply(l, function(x)if (is.matrix(x)) c(n=nrow(x), nonam=is.null(rownames(x))) else c(n=length(x), nonam=is.null(names(x))) )
    need.names <- length(unique(checkit["n",])) != 1
    if (need.names){
      if (any(checkit["nonam",]))
        stop("either need equal lengths or names")
      nam <- unique(do.call("c", lapply(l, function(x)if(is.matrix(x)) rownames(x) else names(x))))
      mat <- do.call("cbind", lapply(l, function(x)if (is.matrix(x)) {
        m <- matrix(NA, length(nam), ncol(x), dimnames=list(nam, colnames(x)))
        m[rownames(x),] <- x
        m
      } else {
        v <- rep(NA, length(nam))
        names(v) <- nam
        v[names(x)] <- x
        v
      }))
      mat
    }else{
      do.call("cbind", l)
    }
  }else{
    return(NULL)
  }
}

crbind <- function(...){
  l <- list(...); if (length(l)==1 && is.list(l[[1]])) l <- l[[1]]
  nl <- length(l)
  if (nl){
    checkit <- sapply(l, function(x)if (is.matrix(x)) c(n=ncol(x), nonam=is.null(colnames(x))) else c(n=length(x), nonam=is.null(names(x))) )
    need.names <- length(unique(checkit["n",])) != 1
    if (need.names){
      if (any(checkit["nonam",]))
        stop("either need equal lengths or names")
      nam <- unique(do.call("c", lapply(l, function(x)if(is.matrix(x)) colnames(x) else names(x))))
      mat <- do.call("rbind", lapply(l, function(x)if (is.matrix(x)) {
        m <- matrix(NA, nrow(x), length(nam), dimnames=list(rownames(x), nam))
        m[,colnames(x)] <- x
        m
      } else {
        v <- rep(NA, length(nam))
        names(v) <- nam
        v[names(x)] <- x
        v
      }))
      mat
    }else{
      do.call("rbind", l)
    }
  }else{
    return(NULL)
  }
}



cfun <- function(..., FUN, FUNARGS=list()){
  l <- list(...); if (length(l)==1 && is.list(l[[1]])) l <- l[[1]]
  nl <- length(l)
  if (nl){
    l <- do.call("crbind", l)
    ret <- do.call("apply", c(list(l, 2, FUN), FUNARGS))
    if (is.list(ret)){
      do.call("ccbind", ret)
    }else{
      ret
    }
  }else{
    return(NULL)
  }
}


cmedian <- function(..., na.rm=FALSE){
  cfun(..., FUN=median, FUNARGS=list(na.rm=na.rm))
}

csummary <- function(..., na.rm="ignored"){
  cfun(..., FUN=summary)
}

cquantile <- function(..., probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE, type = 7){
  cfun(..., FUN=quantile, FUNARGS=list(probs = probs, na.rm = na.rm, names = names, type = type))
}

crange <- function(..., na.rm="ignored"){
  cfun(..., FUN=range)
}



# faster implementations of clength, csum, cmean
clength <- function(..., na.rm=FALSE){
  l <- list(...); if (length(l)==1 && is.list(l[[1]])) l <- l[[1]]
  nl <- length(l)
  if (nl){
    checkit <- sapply(l, function(x)c(n=length(x), nonam=is.null(names(x))))
    need.names <- length(unique(checkit["n",])) != 1
    if (need.names){
      if (any(checkit["nonam",]))
        stop("either need equal lengths or names")
      nam <- unique(do.call("c", lapply(l, names)))
      if (na.rm){
        n <- as.integer(!is.na(l[[1]][nam]))
      }else{
        return(rep(nl, length.out=length(l[[1]][nam])))
      }
      if (nl>1){
        for (i in 2:nl){
            n <- n + !is.na(l[[i]][nam])
        }
      }
      return(n)
    }else{
      if (na.rm){
        n <- as.integer(!is.na(l[[1]]))
      }else{
        return(rep(nl, length.out=length(l[[1]])))
      }
      if (nl>1){
        for (i in 2:nl){
            n <- n + !is.na(l[[i]])
        }
      }
      return(n)
    }
  }else{
    return(NULL)
  }
}

csum <- function(..., na.rm=FALSE){
  l <- list(...); if (length(l)==1 && is.list(l[[1]])) l <- l[[1]]
  nl <- length(l)
  if (nl){
    checkit <- sapply(l, function(x)c(n=length(x), nonam=is.null(names(x))))
    need.names <- length(unique(checkit["n",])) != 1
    if (need.names){
      if (any(checkit["nonam",]))
        stop("either need equal lengths or names")
      nam <- unique(do.call("c", lapply(l, names)))
      if (na.rm)
        s <- ifelse(is.na(l[[1]][nam]), 0L, l[[1]][nam])
      else{
        s <- l[[1]][nam]
      }
      if (nl>1){
        for (i in 2:nl){
          if (na.rm)
            s <- ifelse(is.na(l[[i]][nam]), s, s + l[[1]][nam])
          else{
            s <- s + l[[i]][nam]
          }
        }
      }
      s
    }else{
      if (na.rm)
        s <- ifelse(is.na(l[[1]]), 0L, l[[1]])
      else{
        s <- l[[1]]
      }
      if (nl>1){
        for (i in 2:nl){
          if (na.rm)
            s <- ifelse(is.na(l[[i]]), s, s + l[[1]])
          else{
            s <- s + l[[i]]
          }
        }
      }
      s
    }
  }else{
    return(NULL)
  }
}

cmean <- function(..., na.rm=FALSE){
  l <- list(...); if (length(l)==1 && is.list(l[[1]])) l <- l[[1]]
  nl <- length(l)
  if (nl){
    checkit <- sapply(l, function(x)c(n=length(x), nonam=is.null(names(x))))
    need.names <- length(unique(checkit["n",])) != 1
    if (need.names){
      if (any(checkit["nonam",]))
        stop("either need equal lengths or names")
      nam <- unique(do.call("c", lapply(l, names)))
      if (na.rm){
        na <- is.na(l[[1]][nam])
        s <- ifelse(na, 0L, l[[1]][nam])
        n <- as.integer(!na)
      }else{
        s <- l[[1]][nam]
      }
      if (nl>1){
        for (i in 2:nl){
          if (na.rm){
            na <- is.na(l[[i]][nam])
            s <- ifelse(na, s, s + l[[1]][nam])
            n <- n + !na
          }else{
            s <- s + l[[i]][nam]
          }
        }
      }
      if (na.rm)
        s/n
      else
        s/nl
    }else{
      if (na.rm){
        na <- is.na(l[[1]])
        s <- ifelse(na, 0L, l[[1]])
        n <- as.integer(!na)
      }else{
        s <- l[[1]]
      }
      if (nl>1){
        for (i in 2:nl){
          if (na.rm){
            na <- is.na(l[[i]])
            s <- ifelse(na, s, s + l[[1]])
            n <- n + !na
          }else{
            s <- s + l[[i]]
          }
        }
      }
      if (na.rm)
        s/n
      else
        s/nl
    }
  }else{
    return(NULL)
  }
}

kindlychung/ff documentation built on May 20, 2019, 9:58 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

kindlychung/ff
memory-efficient storage of large data on disk and fast access functions

R/CFUN.R
In kindlychung/ff: memory-efficient storage of large data on disk and fast access functions

Defines functions ccbind crbind cfun cmedian csummary cquantile crange clength csum cmean

Documented in ccbind cfun clength cmean cmedian cquantile crbind csum csummary

R Package Documentation

Browse R Packages

We want your feedback!

kindlychung/ff memory-efficient storage of large data on disk and fast access functions

R/CFUN.R In kindlychung/ff: memory-efficient storage of large data on disk and fast access functions

Defines functions ccbind crbind cfun cmedian csummary cquantile crange clength csum cmean

Documented in ccbind cfun clength cmean cmedian cquantile crbind csum csummary

R Package Documentation

Browse R Packages

We want your feedback!

kindlychung/ff
memory-efficient storage of large data on disk and fast access functions

R/CFUN.R
In kindlychung/ff: memory-efficient storage of large data on disk and fast access functions