R/utils.R

Defines functions Array2Vector Vector2Array GetLinInd CompareMaxDev flat flat.default flat.array flat.table flat.matrix expand expand.default expand.table ComputeA

Documented in Array2Vector CompareMaxDev ComputeA expand expand.default expand.table flat flat.array flat.default flat.matrix flat.table GetLinInd Vector2Array

# File mipfp/R/utils.R
# by Johan Barthelemy and Thomas Suesse
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 or 3 of the License
#  (at your option).
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  http://www.r-project.org/Licenses/

# ------------------------------------------------------------------------------
# This file provides:
# - functions to convert array to vectors and conversely;
# - a function to remove the linearly dependent columns from a given matrix;
# - a function to compute the Wald confidence interval for the estimates
#   generated by either Ipfp or ObtainModelEstimates;
# - a function to compare the deviations between given targets or a table of a
#   a list of mipfp objects;
# - a S3 method to flatten a multidimensional array.
# ------------------------------------------------------------------------------

Array2Vector <- function(arr) {
  # Transform a N-dimensional array a to vector, where last index moves fastest.
  #
  # Author: T. Suesse
  #
  # Args:
  #   arr: The array to be transformed into a vector.
  #
  # Returns: A vector containing the input array data.
  
  # checking if an input array is specified
  if (is.null(arr) == TRUE) {
    stop('Error: no array arr specified!')
  }
  
  dim.array <- dim(arr)
  if (is.null(dim.array) == FALSE ) {
    arr <- aperm(arr, seq(length(dim.array), 1, by = -1))    
  }
  return(c(arr))
  
}

Vector2Array <- function(vect, dim.out) {
  # Transform a vector to an array with given dimensions, where last index moves 
  # fastest.
  #
  # Author: T. Suesse
  #
  # Args:
  #   vector: The vector to be transformed into an array.
  #   dim.out: The dimension of the generated array.
  #
  # Returns: An array filled with the input vector data.
  
  # checking if an input array is specified
  if (is.null(vect) == TRUE) {
    stop('Error: no vector vect specified!')
  }
  
  # checking if an input array is specified
  if (is.null(dim.out) == TRUE) {
    stop('Error: no dimension dim.out specified for the output array!')
  }
  
  l.dim.out<-length(dim.out)
  arr <- array(vect, dim.out[seq(l.dim.out,1,by=-1)])
  arr <- aperm(arr, seq(l.dim.out, 1, by = -1)) 
  return(arr)
  
}

GetLinInd <- function(mat, tol = 1e-10) {
  # Removing the linearly dependant columns of matrix to obtain a matrix of full
  # rank using QR decomposition. See Matrix Computations from Golub and Van Loan
  # (2012) for a detailed description of the procedure.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   mat: The matrix possibly containing linearly dependant columns.
  #   tol: Rank estimation tolerance.
  #
  # Returns: The extracted columns of mat.
  
  # checking if an input matrix is specified
  if (is.null(mat) == TRUE) {
    stop('Error: mat is missing!')
  }
  
  # checking if tol is positive
  if (tol < 0.0) {
    stop('Error: tol must be positive!')
  }
  
  # QR decomposition
  mat.li <- as.matrix(mat)
  mat.qr = qr(mat.li)
  
  # if input matrix is of full rank, nothing to do
  if( mat.qr$rank == dim(mat.li)[2] ) {
    idx = seq(1,dim(mat.li)[2])
    result = list("mat.li" = mat.li, "idx" = idx)
    return(result) 
  }
  
  if( is.vector(qr.R(mat.qr)) == FALSE ) {
    diagr = abs(diag(qr.R(mat.qr)))
  } else {
    diagr = qr.R(mat.qr)[1]
  }
  
  # rank computation
  r = tail(which(diagr >= tol * diagr[1]), n = 1)
  
  # selection the r linearly independant columns
  idx <- sort(mat.qr$pivot[1:r])
  mat.li <- mat.li[,idx]  
  
  # returning the matrix and the selected row indices
  result = list("mat.li" = mat.li, "idx" = idx)
  return(result)
  
}

CompareMaxDev <- function(list.mipfp = list(), true.table = NULL, 
                          echo = FALSE) {
  # This function compares either the margins errors from different
  # mipfp objects or the absolute maximum deviation between a given table
  # and the estimates in the mipfp objects.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   list.mipfp: The list produced by Estimate().
  #   true.table: The table which is compared agains the estimates given by
  #               the mipfp objects in the list.
  #   echo: Verbose parameter. If TRUE print what is being compared.
  #
  # Returns: A table with as many rows as the number of mipfp objects in
  #          list.mipfp. Each row details the margins errors or the 
  #          maximum absolute deviation of one mipfp object.
  
  if (length(list.mipfp) < 1) {
    stop('Error: list.mipfp is empty!
         It must contain at least one mipfp object.')
  }   
  
  tab <- array(0, dim=c(0))
  method.list <- list()
    
  for (i in 1:length(list.mipfp)) {          
    # check class of the current object
    if (any(class(list.mipfp[[i]]) == "mipfp") == FALSE) {
      stop('Error: element ', i, ' of list.mipfp is not a mipfp object!')
    }
    # if no true table has been provided: comparison of the margins errors
    # else computation of the deviation between the estimates and the table
    if (is.null(true.table) == TRUE) {
      tab <- rbind(tab, summary(list.mipfp[[i]])$error.margins)    
    }     
    else {
      max.dev <- max(abs(true.table - list.mipfp[[i]]$x.hat))
      max.dev.p <- max(abs(true.table / sum(true.table) - 
                             list.mipfp[[i]]$p.hat))
      
      tab <- rbind(tab,cbind("Deviation"=max.dev, "Prop"=max.dev.p))
    }    
    method.list <- append(method.list, list.mipfp[[i]]$method)
  }          
  
  # verbosity
  if (echo == TRUE && is.null(true.table) == TRUE) {
    cat('Maximum absolute deviation between targets and generated margins:\n')    
  }
  if (echo == TRUE && is.null(true.table) == FALSE) {
    cat('Maximum absolute deviation:\n')    
  }
  
  rownames(tab) <- method.list
  return(tab)
  
}

flat <- function(x, ...) {
  # Generic method to flatten its argument.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   x: An object to be flattened.
  #   ...: Further arguments passed to or from other methods.
  
  UseMethod("flat", x)
  
}

flat.default <- function(x, ...) {
  # Default method of the genereric flat function. A specific method to
  # flatten the argument has not been implemented yet and the method returns
  # the original object.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   x: An object.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: The original, unmodified object.
  
  warning('Can not flatten this object!')
  return(x)
  
}

flat.array <- function(x, sep = ".", label = "value", l.names = 0, ...) {  
  # This function takes a multidimensional array and flattens it to a 
  # 2-dimension for a pretty printing. The row names are the concatenation
  # of the original dimension names while the only column stores the initial
  # data of the array.
  # Note: The function is inspired from the function wrap.array from the
  #       package R.utils written by Henrik Bengtsson.
  #
  # Author: J. Barthelemy (inspired by H. Bengtsson)
  #
  # Args:
  #   x: An array.
  #   sep: The separator used to concatenate the dimension names.
  #   label: The name of the column storing the data.
  #   lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
  #           the original dimnames.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: An array containing a flattened version of x
  
  # testing if input is not NULL
  if (is.null(x) == TRUE) {
    stop('Error: input x is NULL!')
  }
  
  # getting some useful information
  n.dim <- length(dim(x))  
  dimnames.temp <- dimnames(x)    
  
  # generating default names if dimnames(x) is NULL
  if (is.null(dimnames.temp) == TRUE) {
    for (d in seq(1,n.dim)) {
      dimnames.temp[[d]] <- seq(1,dim(x)[d])
      names(dimnames.temp)[d] <- paste0("V",d)
    }
  }
  
  # flattening the array to one dimension
  dim(x) <- c(prod(dim(x)),1)   
  
  # concatening the dimensions names    
  dims.rename <- function(dims) {    
    d.names <- NULL
    for (d in dims) {
      d.names.tmp <- dimnames.temp[[d]]
      if (l.names > 0) {
        d.names.tmp <- substr(d.names.tmp, 1, l.names)
      }
      if (is.null(d.names)) {
        d.names <- d.names.tmp
      }
      else {        
        d.names <- paste(d.names, 
                         rep(d.names.tmp, each = length(d.names)), 
                         sep = sep)
      }      
    }
    return(d.names)
  }
  
  # updating the dimnames
  dimnames(x) <- c(lapply(list(seq(1,n.dim)), dims.rename), label)
  concat.names <- paste(names(dimnames.temp),collapse = sep)
  names(dimnames(x)) <- c(concat.names,"")
  
  return(x)
  
}

flat.table <- function(x, sep = ".", label = "value", l.names = 0, ...) {
  # This function takes a multidimensional table and flattens it to a 
  # 2-dimension for a pretty printing. The row names are the concatenation
  # of the original dimension names while the only column stores the initial
  # data of the array.
  # Note: The function is inspired from the function wrap.array from the
  #       package R.utils written by Henrik Bengtsson.
  #
  # Author: J. Barthelemy (inspired by H. Bengtsson)
  #
  # Args:
  #   x: A table.
  #   sep: The separator used to concatenate the dimension names.
  #   label: The name of the column storing the data.
  #   lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
  #                 the original dimnames.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: An array containing a flattened version of x
  
  return(flat.array(x, sep = sep, label = label, l.names = l.names, ...))
  
}

flat.matrix  <- function(x, sep = ".", label = "value", l.names = 0, ...) {
  # This function takes a matrix and flattens it to a 
  # 2-dimension for a pretty printing. The row names are the concatenation
  # of the original dimension names while the only column stores the initial
  # data of the array.
  # Note: The function is inspired from the function wrap.array from the
  #       package R.utils written by Henrik Bengtsson.
  #
  # Author: J. Barthelemy (inspired by H. Bengtsson)
  #
  # Args:
  #   x: A matrix.
  #   sep: The separator used to concatenate the dimension names.
  #   label: The name of the column storing the data.
  #   lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
  #                 the original dimnames.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: An array containing a flattened version of x
  
  return(flat.array(x, sep = sep, label = label, l.names = l.names, ...))
  
}

expand <- function(x, ...) {
  # Generic method to expand its into a data frame.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   x: An object to be expanded.
  #   ...: Further arguments passed to or from other methods.
  
  UseMethod("expand", x)
  
}

expand.default <- function(x, ...) {
  # Default method of the genereric expand function. A specific method to
  # expand the argument into a data frame has not been implemented yet and the 
  # method returns the original object.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   x: An object.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: The original, unmodified object.
  
  warning('Can not expand this object into a data frame!')
  return(x)
  
}

expand.table <- function(x, ...) {
  # This function takes a multi-dimensionnal contingency table and expands it 
  # to a data frame containing individual records.
  # Note: The function is inspired from the "Cookbook for R" available at
  # http://www.cookbook-r.com/Manipulating_data/Converting_between_data_frames
  # _and_contingency_tables/
  #
  # Author: J. Barthelemy (inspired by Winston Chang)
  #
  # Args: 
  #   x: A table x.
  #   ...: Further arguments passed to or from other methods.
  #
  # Returns: A data frame of the individual records derived from x.
  
  # check if the rounded table summed to at least 1, otherwise stops
  if (sum(x) <= 1) {
    stop("The sum of the element of x should be at least 1!")
  }
  
  # coercing x to a data frame
  x.df <- as.data.frame(round(x), ...)
  
  # getting the number of time each row must be replicated
  idx <- rep.int(seq_len(nrow(x.df)), x.df$Freq)
  
  # drop count column
  x.df$Freq <- NULL
  
  # get the rows from result using idx
  result <- x.df[idx, ]
  
  # renaming the row names
  row.names(result) <- 1:dim(result)[1]
  
  # returning the result
  return(result)
  
}

ComputeA <- function(dim.arr, target.list, target.data) {
  # Given two vector x and and a set of target constraints, returns the marginal
  # matrix A and vector t derived from the targets such that A' * x = t.
  #
  # Note that x and t are obtained by removing the first element of
  # each target in order for A to be full rank.
  #
  # Author: J. Barthelemy
  #
  # Args:
  #   dim.arr: dimension of the problem, i.e. of the array
  #   target.list: A list of the target margins provided in target.data. Each 
  #                component of the list is an array whose cells indicates which
  #                dimension the corresponding margin relates to.
  #   target.data: A list containing the data of the target margins. Each 
  #                component of the list is an array storing a margin. The list 
  #                order must follow the one defined in target.list. Note that 
  #                the cells of the arrays must be non-negative.
  #
  # Returns: A list whose elements are:
  #   marginal.matrix: The marginal matrix.
  #   margins: A vector containing the margins associated with A.
  #   df: The degree of freedom of the problem.
  
  # checking if a seed is provided
  if (is.null(dim.arr) == TRUE) {
    stop('Error: dimension of the seed not provided!')
  }
  
  # checking if target.list is provided 
  if (is.null(target.list) == TRUE) {
    stop('Error: target.list not provided!')
  }
  
  # checking if target.data is provided
  if (is.null(target.data) == TRUE) {
    stop('Error: target.data not provided!')
  }
  
  # checking if NA in target cells
  if (is.na(min(sapply(target.data, min))) == TRUE)  {
    stop('Error: NA values present for margins contained in target.data!')
  }
  
  # generating some useful variables
  n.cells <- prod(dim.arr)
  
  # generating A such that A' * pi = target.data
  margins.vector <- c(1)
  A.t <- matrix(1, nrow = 1, ncol = n.cells)
  # ... generating the marginal matrix row by row
  
  for (k in 1:length(target.list)) {
    
    # convert k-th margin to an array
    temp.margins <- Array2Vector(target.data[[k]])
    
    # remove first condition as it is redundant information
    temp.margins <- temp.margins[-1] / sum(temp.margins)
    margins.vector <- c(margins.vector, temp.margins)
    
    # construct the current row of the marginal matrix
    A.row <- cmm::MarginalMatrix(var = 1:length(dim.arr), 
                                 marg = target.list[[k]], 
                                 dim = dim.arr)[-1, ]
    
    # remove first entry as it is always redundant
    A.t <- rbind(A.t, A.row)      
    
  }
  A.li <- GetLinInd(t(A.t))
  A <- A.li$mat.li
  margins.vector <- margins.vector[A.li$idx]
  
  # degrees of freedom of the estimates
  deg.free <- dim(A)[1] - dim(A)[2] 
  
  # gathering the results and return
  results <- list("marginal.matrix" = A, "margins" = margins.vector,
                  "df" = deg.free)
  return(results)
  
}

Try the mipfp package in your browser

Any scripts or data that you put into this service are public.

mipfp documentation built on May 2, 2019, 6:01 a.m.