R/utils.R
In mipfp: Multidimensional Iterative Proportional Fitting and Alternative Models

Documented in Array2Vector CompareMaxDev ComputeA expand expand.default expand.table flat flat.array flat.default flat.matrix flat.table GetLinInd Vector2Array

# File mipfp/R/utils.R
# by Johan Barthelemy and Thomas Suesse
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 or 3 of the License
#  (at your option).
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  http://www.r-project.org/Licenses/

# ------------------------------------------------------------------------------
# This file provides:
# - functions to convert array to vectors and conversely;
# - a function to remove the linearly dependent columns from a given matrix;
# - a function to compute the Wald confidence interval for the estimates
#   generated by either Ipfp or ObtainModelEstimates;
# - a function to compare the deviations between given targets or a table of a
#   a list of mipfp objects;
# - a S3 method to flatten a multidimensional array.
# ------------------------------------------------------------------------------

Array2Vector <- function(arr) {
  # Transform a N-dimensional array a to vector, where last index moves fastest.
  #
  # Author: T. Suesse
  #
  # Args:
  #   arr: The array to be transformed into a vector.
  #
  # Returns: A vector containing the input array data.
  
  # checking if an input array is specified
  if (is.null(arr) == TRUE) {
    stop('Error: no array arr specified!')
  }
  
  dim.array <- dim(arr)
  if (is.null(dim.array) == FALSE ) {
    arr <- aperm(arr, seq(length(dim.array), 1, by = -1))    
  }
  return(c(arr))
  
}

Vector2Array <- function(vect, dim.out) {
  # Transform a vector to an array with given dimensions, where last index moves 
  # fastest.
  #
  # Author: T. Suesse
  #
  # Args:
  #   vector: The vector to be transformed into an array.
  #   dim.out: The dimension of the generated array.
  #
  # Returns: An array filled with the input vector data.
  
  # checking if an input array is specified
  if (is.null(vect) == TRUE) {
    stop('Error: no vector vect specified!')
  }
  
  # checking if an input array is specified
  if (is.null(dim.out) == TRUE) {
    stop('Error: no dimension dim.out specified for the output array!')
  }
  
  l.dim.out<-length(dim.out)
  arr <- array(vect, dim.out[seq(l.dim.out,1,by=-1)])
  arr <- aperm(arr, seq(l.dim.out, 1, by = -1)) 
  return(arr)
  
}

GetLinInd <- function(mat, tol = 1e-10) {
  # Removing the linearly dependant columns of matrix to obtain a matrix of full
  # rank using QR decomposition. See Matrix Computations from Golub and Van Loan
  # (2012) for a detailed description of the procedure.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   mat: The matrix possibly containing linearly dependant columns.
  #   tol: Rank estimation tolerance.
  #
  # Returns: The extracted columns of mat.
  
  # checking if an input matrix is specified
  if (is.null(mat) == TRUE) {
    stop('Error: mat is missing!')
  }
  
  # checking if tol is positive
  if (tol < 0.0) {
    stop('Error: tol must be positive!')
  }
  
  # QR decomposition
  mat.li <- as.matrix(mat)
  mat.qr = qr(mat.li)
  
  # if input matrix is of full rank, nothing to do
  if( mat.qr$rank == dim(mat.li)[2] ) {
    idx = seq(1,dim(mat.li)[2])
    result = list("mat.li" = mat.li, "idx" = idx)
    return(result) 
  }
  
  if( is.vector(qr.R(mat.qr)) == FALSE ) {
    diagr = abs(diag(qr.R(mat.qr)))
  } else {
    diagr = qr.R(mat.qr)[1]
  }
  
  # rank computation
  r = tail(which(diagr >= tol * diagr[1]), n = 1)
  
  # selection the r linearly independant columns
  idx <- sort(mat.qr$pivot[1:r])
  mat.li <- mat.li[,idx]  
  
  # returning the matrix and the selected row indices
  result = list("mat.li" = mat.li, "idx" = idx)
  return(result)
  
}

CompareMaxDev <- function(list.mipfp = list(), true.table = NULL, 
                          echo = FALSE) {
  # This function compares either the margins errors from different
  # mipfp objects or the absolute maximum deviation between a given table
  # and the estimates in the mipfp objects.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   list.mipfp: The list produced by Estimate().
  #   true.table: The table which is compared agains the estimates given by
  #               the mipfp objects in the list.
  #   echo: Verbose parameter. If TRUE print what is being compared.
  #
  # Returns: A table with as many rows as the number of mipfp objects in
  #          list.mipfp. Each row details the margins errors or the 
  #          maximum absolute deviation of one mipfp object.
  
  if (length(list.mipfp) < 1) {
    stop('Error: list.mipfp is empty!
         It must contain at least one mipfp object.')
  }   
  
  tab <- array(0, dim=c(0))
  method.list <- list()
    
  for (i in 1:length(list.mipfp)) {          
    # check class of the current object
    if (any(class(list.mipfp[[i]]) == "mipfp") == FALSE) {
      stop('Error: element ', i, ' of list.mipfp is not a mipfp object!')
    }
    # if no true table has been provided: comparison of the margins errors
    # else computation of the deviation between the estimates and the table
    if (is.null(true.table) == TRUE) {
      tab <- rbind(tab, summary(list.mipfp[[i]])$error.margins)    
    }     
    else {
      max.dev <- max(abs(true.table - list.mipfp[[i]]$x.hat))
      max.dev.p <- max(abs(true.table / sum(true.table) - 
                             list.mipfp[[i]]$p.hat))
      
      tab <- rbind(tab,cbind("Deviation"=max.dev, "Prop"=max.dev.p))
    }    
    method.list <- append(method.list, list.mipfp[[i]]$method)
  }          
  
  # verbosity
  if (echo == TRUE && is.null(true.table) == TRUE) {
    cat('Maximum absolute deviation between targets and generated margins:\n')    
  }
  if (echo == TRUE && is.null(true.table) == FALSE) {
    cat('Maximum absolute deviation:\n')    
  }
  
  rownames(tab) <- method.list
  return(tab)
  
}

flat <- function(x, ...) {
  # Generic method to flatten its argument.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   x: An object to be flattened.
  #   ...: Further arguments passed to or from other methods.
  
  UseMethod("flat", x)
  
}

flat.default <- function(x, ...) {
  # Default method of the genereric flat function. A specific method to
  # flatten the argument has not been implemented yet and the method returns
  # the original object.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   x: An object.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: The original, unmodified object.
  
  warning('Can not flatten this object!')
  return(x)
  
}

flat.array <- function(x, sep = ".", label = "value", l.names = 0, ...) {  
  # This function takes a multidimensional array and flattens it to a 
  # 2-dimension for a pretty printing. The row names are the concatenation
  # of the original dimension names while the only column stores the initial
  # data of the array.
  # Note: The function is inspired from the function wrap.array from the
  #       package R.utils written by Henrik Bengtsson.
  #
  # Author: J. Barthelemy (inspired by H. Bengtsson)
  #
  # Args:
  #   x: An array.
  #   sep: The separator used to concatenate the dimension names.
  #   label: The name of the column storing the data.
  #   lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
  #           the original dimnames.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: An array containing a flattened version of x
  
  # testing if input is not NULL
  if (is.null(x) == TRUE) {
    stop('Error: input x is NULL!')
  }
  
  # getting some useful information
  n.dim <- length(dim(x))  
  dimnames.temp <- dimnames(x)    
  
  # generating default names if dimnames(x) is NULL
  if (is.null(dimnames.temp) == TRUE) {
    for (d in seq(1,n.dim)) {
      dimnames.temp[[d]] <- seq(1,dim(x)[d])
      names(dimnames.temp)[d] <- paste0("V",d)
    }
  }
  
  # flattening the array to one dimension
  dim(x) <- c(prod(dim(x)),1)   
  
  # concatening the dimensions names    
  dims.rename <- function(dims) {    
    d.names <- NULL
    for (d in dims) {
      d.names.tmp <- dimnames.temp[[d]]
      if (l.names > 0) {
        d.names.tmp <- substr(d.names.tmp, 1, l.names)
      }
      if (is.null(d.names)) {
        d.names <- d.names.tmp
      }
      else {        
        d.names <- paste(d.names, 
                         rep(d.names.tmp, each = length(d.names)), 
                         sep = sep)
      }      
    }
    return(d.names)
  }
  
  # updating the dimnames
  dimnames(x) <- c(lapply(list(seq(1,n.dim)), dims.rename), label)
  concat.names <- paste(names(dimnames.temp),collapse = sep)
  names(dimnames(x)) <- c(concat.names,"")
  
  return(x)
  
}

flat.table <- function(x, sep = ".", label = "value", l.names = 0, ...) {
  # This function takes a multidimensional table and flattens it to a 
  # 2-dimension for a pretty printing. The row names are the concatenation
  # of the original dimension names while the only column stores the initial
  # data of the array.
  # Note: The function is inspired from the function wrap.array from the
  #       package R.utils written by Henrik Bengtsson.
  #
  # Author: J. Barthelemy (inspired by H. Bengtsson)
  #
  # Args:
  #   x: A table.
  #   sep: The separator used to concatenate the dimension names.
  #   label: The name of the column storing the data.
  #   lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
  #                 the original dimnames.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: An array containing a flattened version of x
  
  return(flat.array(x, sep = sep, label = label, l.names = l.names, ...))
  
}

flat.matrix  <- function(x, sep = ".", label = "value", l.names = 0, ...) {
  # This function takes a matrix and flattens it to a 
  # 2-dimension for a pretty printing. The row names are the concatenation
  # of the original dimension names while the only column stores the initial
  # data of the array.
  # Note: The function is inspired from the function wrap.array from the
  #       package R.utils written by Henrik Bengtsson.
  #
  # Author: J. Barthelemy (inspired by H. Bengtsson)
  #
  # Args:
  #   x: A matrix.
  #   sep: The separator used to concatenate the dimension names.
  #   label: The name of the column storing the data.
  #   lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
  #                 the original dimnames.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: An array containing a flattened version of x
  
  return(flat.array(x, sep = sep, label = label, l.names = l.names, ...))
  
}

expand <- function(x, ...) {
  # Generic method to expand its into a data frame.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   x: An object to be expanded.
  #   ...: Further arguments passed to or from other methods.
  
  UseMethod("expand", x)
  
}

expand.default <- function(x, ...) {
  # Default method of the genereric expand function. A specific method to
  # expand the argument into a data frame has not been implemented yet and the 
  # method returns the original object.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   x: An object.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: The original, unmodified object.
  
  warning('Can not expand this object into a data frame!')
  return(x)
  
}

expand.table <- function(x, ...) {
  # This function takes a multi-dimensionnal contingency table and expands it 
  # to a data frame containing individual records.
  # Note: The function is inspired from the "Cookbook for R" available at
  # http://www.cookbook-r.com/Manipulating_data/Converting_between_data_frames
  # _and_contingency_tables/
  #
  # Author: J. Barthelemy (inspired by Winston Chang)
  #
  # Args: 
  #   x: A table x.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: A data frame of the individual records derived from x.
  
  # check if the rounded table summed to at least 1, otherwise stops
  if (sum(x) <= 1) {
    stop("The sum of the element of x should be at least 1!")
  }
  
  # coercing x to a data frame
  x.df <- as.data.frame(round(x), ...)
  
  # getting the number of time each row must be replicated
  idx <- rep.int(seq_len(nrow(x.df)), x.df$Freq)
  
  # drop count column
  x.df$Freq <- NULL
  
  # get the rows from result using idx
  result <- x.df[idx, ]
  
  # renaming the row names
  row.names(result) <- 1:dim(result)[1]
  
  # returning the result
  return(result)
  
}

ComputeA <- function(dim.arr, target.list, target.data) {
  # Given two vector x and and a set of target constraints, returns the marginal
  # matrix A and vector t derived from the targets such that A' * x = t.
  #
  # Note that x and t are obtained by removing the first element of
  # each target in order for A to be full rank.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   dim.arr: dimension of the problem, i.e. of the array
  #   target.list: A list of the target margins provided in target.data. Each 
  #                component of the list is an array whose cells indicates which
  #                dimension the corresponding margin relates to.
  #   target.data: A list containing the data of the target margins. Each 
  #                component of the list is an array storing a margin. The list 
  #                order must follow the one defined in target.list. Note that 
  #                the cells of the arrays must be non-negative.
  #
  # Returns: A list whose elements are:
  #   marginal.matrix: The marginal matrix.
  #   margins: A vector containing the margins associated with A.
  #   df: The degree of freedom of the problem.
  
  # checking if a seed is provided
  if (is.null(dim.arr) == TRUE) {
    stop('Error: dimension of the seed not provided!')
  }
  
  # checking if target.list is provided 
  if (is.null(target.list) == TRUE) {
    stop('Error: target.list not provided!')
  }
  
  # checking if target.data is provided
  if (is.null(target.data) == TRUE) {
    stop('Error: target.data not provided!')
  }
  
  # checking if NA in target cells
  if (is.na(min(sapply(target.data, min))) == TRUE)  {
    stop('Error: NA values present for margins contained in target.data!')
  }
  
  # generating some useful variables
  n.cells <- prod(dim.arr)
  
  # generating A such that A' * pi = target.data
  margins.vector <- c(1)
  A.t <- matrix(1, nrow = 1, ncol = n.cells)
  # ... generating the marginal matrix row by row
  
  for (k in 1:length(target.list)) {
    
    # convert k-th margin to an array
    temp.margins <- Array2Vector(target.data[[k]])
    
    # remove first condition as it is redundant information
    temp.margins <- temp.margins[-1] / sum(temp.margins)
    margins.vector <- c(margins.vector, temp.margins)
    
    # construct the current row of the marginal matrix
    A.row <- cmm::MarginalMatrix(var = 1:length(dim.arr), 
                                 marg = target.list[[k]], 
                                 dim = dim.arr)[-1, ]
    
    # remove first entry as it is always redundant
    A.t <- rbind(A.t, A.row)      
    
  }
  A.li <- GetLinInd(t(A.t))
  A <- A.li$mat.li
  margins.vector <- margins.vector[A.li$idx]
  
  # degrees of freedom of the estimates
  deg.free <- dim(A)[1] - dim(A)[2] 
  
  # gathering the results and return
  results <- list("marginal.matrix" = A, "margins" = margins.vector,
                  "df" = deg.free)
  return(results)
  
}

Any scripts or data that you put into this service are public.

mipfp documentation built on May 2, 2019, 6:01 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

mipfp
Multidimensional Iterative Proportional Fitting and Alternative Models

R/utils.R
In mipfp: Multidimensional Iterative Proportional Fitting and Alternative Models

Defines functions Array2Vector Vector2Array GetLinInd CompareMaxDev flat flat.default flat.array flat.table flat.matrix expand expand.default expand.table ComputeA

Documented in Array2Vector CompareMaxDev ComputeA expand expand.default expand.table flat flat.array flat.default flat.matrix flat.table GetLinInd Vector2Array

Try the mipfp package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mipfp Multidimensional Iterative Proportional Fitting and Alternative Models

R/utils.R In mipfp: Multidimensional Iterative Proportional Fitting and Alternative Models

Defines functions Array2Vector Vector2Array GetLinInd CompareMaxDev flat flat.default flat.array flat.table flat.matrix expand expand.default expand.table ComputeA

Documented in Array2Vector CompareMaxDev ComputeA expand expand.default expand.table flat flat.array flat.default flat.matrix flat.table GetLinInd Vector2Array

Try the mipfp package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mipfp
Multidimensional Iterative Proportional Fitting and Alternative Models

R/utils.R
In mipfp: Multidimensional Iterative Proportional Fitting and Alternative Models