Nothing
# File mipfp/R/utils.R
# by Johan Barthelemy and Thomas Suesse
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 or 3 of the License
# (at your option).
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
# ------------------------------------------------------------------------------
# This file provides:
# - functions to convert array to vectors and conversely;
# - a function to remove the linearly dependent columns from a given matrix;
# - a function to compute the Wald confidence interval for the estimates
# generated by either Ipfp or ObtainModelEstimates;
# - a function to compare the deviations between given targets or a table of a
# a list of mipfp objects;
# - a S3 method to flatten a multidimensional array.
# ------------------------------------------------------------------------------
Array2Vector <- function(arr) {
# Transform a N-dimensional array a to vector, where last index moves fastest.
#
# Author: T. Suesse
#
# Args:
# arr: The array to be transformed into a vector.
#
# Returns: A vector containing the input array data.
# checking if an input array is specified
if (is.null(arr) == TRUE) {
stop('Error: no array arr specified!')
}
dim.array <- dim(arr)
if (is.null(dim.array) == FALSE ) {
arr <- aperm(arr, seq(length(dim.array), 1, by = -1))
}
return(c(arr))
}
Vector2Array <- function(vect, dim.out) {
# Transform a vector to an array with given dimensions, where last index moves
# fastest.
#
# Author: T. Suesse
#
# Args:
# vector: The vector to be transformed into an array.
# dim.out: The dimension of the generated array.
#
# Returns: An array filled with the input vector data.
# checking if an input array is specified
if (is.null(vect) == TRUE) {
stop('Error: no vector vect specified!')
}
# checking if an input array is specified
if (is.null(dim.out) == TRUE) {
stop('Error: no dimension dim.out specified for the output array!')
}
l.dim.out<-length(dim.out)
arr <- array(vect, dim.out[seq(l.dim.out,1,by=-1)])
arr <- aperm(arr, seq(l.dim.out, 1, by = -1))
return(arr)
}
GetLinInd <- function(mat, tol = 1e-10) {
# Removing the linearly dependant columns of matrix to obtain a matrix of full
# rank using QR decomposition. See Matrix Computations from Golub and Van Loan
# (2012) for a detailed description of the procedure.
#
# Author: J. Barthelemy
#
# Args:
# mat: The matrix possibly containing linearly dependant columns.
# tol: Rank estimation tolerance.
#
# Returns: The extracted columns of mat.
# checking if an input matrix is specified
if (is.null(mat) == TRUE) {
stop('Error: mat is missing!')
}
# checking if tol is positive
if (tol < 0.0) {
stop('Error: tol must be positive!')
}
# QR decomposition
mat.li <- as.matrix(mat)
mat.qr = qr(mat.li)
# if input matrix is of full rank, nothing to do
if( mat.qr$rank == dim(mat.li)[2] ) {
idx = seq(1,dim(mat.li)[2])
result = list("mat.li" = mat.li, "idx" = idx)
return(result)
}
if( is.vector(qr.R(mat.qr)) == FALSE ) {
diagr = abs(diag(qr.R(mat.qr)))
} else {
diagr = qr.R(mat.qr)[1]
}
# rank computation
r = tail(which(diagr >= tol * diagr[1]), n = 1)
# selection the r linearly independant columns
idx <- sort(mat.qr$pivot[1:r])
mat.li <- mat.li[,idx]
# returning the matrix and the selected row indices
result = list("mat.li" = mat.li, "idx" = idx)
return(result)
}
CompareMaxDev <- function(list.mipfp = list(), true.table = NULL,
echo = FALSE) {
# This function compares either the margins errors from different
# mipfp objects or the absolute maximum deviation between a given table
# and the estimates in the mipfp objects.
#
# Author: J. Barthelemy
#
# Args:
# list.mipfp: The list produced by Estimate().
# true.table: The table which is compared agains the estimates given by
# the mipfp objects in the list.
# echo: Verbose parameter. If TRUE print what is being compared.
#
# Returns: A table with as many rows as the number of mipfp objects in
# list.mipfp. Each row details the margins errors or the
# maximum absolute deviation of one mipfp object.
if (length(list.mipfp) < 1) {
stop('Error: list.mipfp is empty!
It must contain at least one mipfp object.')
}
tab <- array(0, dim=c(0))
method.list <- list()
for (i in 1:length(list.mipfp)) {
# check class of the current object
if (any(class(list.mipfp[[i]]) == "mipfp") == FALSE) {
stop('Error: element ', i, ' of list.mipfp is not a mipfp object!')
}
# if no true table has been provided: comparison of the margins errors
# else computation of the deviation between the estimates and the table
if (is.null(true.table) == TRUE) {
tab <- rbind(tab, summary(list.mipfp[[i]])$error.margins)
}
else {
max.dev <- max(abs(true.table - list.mipfp[[i]]$x.hat))
max.dev.p <- max(abs(true.table / sum(true.table) -
list.mipfp[[i]]$p.hat))
tab <- rbind(tab,cbind("Deviation"=max.dev, "Prop"=max.dev.p))
}
method.list <- append(method.list, list.mipfp[[i]]$method)
}
# verbosity
if (echo == TRUE && is.null(true.table) == TRUE) {
cat('Maximum absolute deviation between targets and generated margins:\n')
}
if (echo == TRUE && is.null(true.table) == FALSE) {
cat('Maximum absolute deviation:\n')
}
rownames(tab) <- method.list
return(tab)
}
flat <- function(x, ...) {
# Generic method to flatten its argument.
#
# Author: J. Barthelemy
#
# Args:
# x: An object to be flattened.
# ...: Further arguments passed to or from other methods.
UseMethod("flat", x)
}
flat.default <- function(x, ...) {
# Default method of the genereric flat function. A specific method to
# flatten the argument has not been implemented yet and the method returns
# the original object.
#
# Author: J. Barthelemy
#
# Args:
# x: An object.
# ...: Further arguments passed to or from other methods.
#
# Returns: The original, unmodified object.
warning('Can not flatten this object!')
return(x)
}
flat.array <- function(x, sep = ".", label = "value", l.names = 0, ...) {
# This function takes a multidimensional array and flattens it to a
# 2-dimension for a pretty printing. The row names are the concatenation
# of the original dimension names while the only column stores the initial
# data of the array.
# Note: The function is inspired from the function wrap.array from the
# package R.utils written by Henrik Bengtsson.
#
# Author: J. Barthelemy (inspired by H. Bengtsson)
#
# Args:
# x: An array.
# sep: The separator used to concatenate the dimension names.
# label: The name of the column storing the data.
# lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
# the original dimnames.
# ...: Further arguments passed to or from other methods.
#
# Returns: An array containing a flattened version of x
# testing if input is not NULL
if (is.null(x) == TRUE) {
stop('Error: input x is NULL!')
}
# getting some useful information
n.dim <- length(dim(x))
dimnames.temp <- dimnames(x)
# generating default names if dimnames(x) is NULL
if (is.null(dimnames.temp) == TRUE) {
for (d in seq(1,n.dim)) {
dimnames.temp[[d]] <- seq(1,dim(x)[d])
names(dimnames.temp)[d] <- paste0("V",d)
}
}
# flattening the array to one dimension
dim(x) <- c(prod(dim(x)),1)
# concatening the dimensions names
dims.rename <- function(dims) {
d.names <- NULL
for (d in dims) {
d.names.tmp <- dimnames.temp[[d]]
if (l.names > 0) {
d.names.tmp <- substr(d.names.tmp, 1, l.names)
}
if (is.null(d.names)) {
d.names <- d.names.tmp
}
else {
d.names <- paste(d.names,
rep(d.names.tmp, each = length(d.names)),
sep = sep)
}
}
return(d.names)
}
# updating the dimnames
dimnames(x) <- c(lapply(list(seq(1,n.dim)), dims.rename), label)
concat.names <- paste(names(dimnames.temp),collapse = sep)
names(dimnames(x)) <- c(concat.names,"")
return(x)
}
flat.table <- function(x, sep = ".", label = "value", l.names = 0, ...) {
# This function takes a multidimensional table and flattens it to a
# 2-dimension for a pretty printing. The row names are the concatenation
# of the original dimension names while the only column stores the initial
# data of the array.
# Note: The function is inspired from the function wrap.array from the
# package R.utils written by Henrik Bengtsson.
#
# Author: J. Barthelemy (inspired by H. Bengtsson)
#
# Args:
# x: A table.
# sep: The separator used to concatenate the dimension names.
# label: The name of the column storing the data.
# lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
# the original dimnames.
# ...: Further arguments passed to or from other methods.
#
# Returns: An array containing a flattened version of x
return(flat.array(x, sep = sep, label = label, l.names = l.names, ...))
}
flat.matrix <- function(x, sep = ".", label = "value", l.names = 0, ...) {
# This function takes a matrix and flattens it to a
# 2-dimension for a pretty printing. The row names are the concatenation
# of the original dimension names while the only column stores the initial
# data of the array.
# Note: The function is inspired from the function wrap.array from the
# package R.utils written by Henrik Bengtsson.
#
# Author: J. Barthelemy (inspired by H. Bengtsson)
#
# Args:
# x: A matrix.
# sep: The separator used to concatenate the dimension names.
# label: The name of the column storing the data.
# lnames: If > 0, set the max lenght of the dimnames, otherwise keeps
# the original dimnames.
# ...: Further arguments passed to or from other methods.
#
# Returns: An array containing a flattened version of x
return(flat.array(x, sep = sep, label = label, l.names = l.names, ...))
}
expand <- function(x, ...) {
# Generic method to expand its into a data frame.
#
# Author: J. Barthelemy
#
# Args:
# x: An object to be expanded.
# ...: Further arguments passed to or from other methods.
UseMethod("expand", x)
}
expand.default <- function(x, ...) {
# Default method of the genereric expand function. A specific method to
# expand the argument into a data frame has not been implemented yet and the
# method returns the original object.
#
# Author: J. Barthelemy
#
# Args:
# x: An object.
# ...: Further arguments passed to or from other methods.
#
# Returns: The original, unmodified object.
warning('Can not expand this object into a data frame!')
return(x)
}
expand.table <- function(x, ...) {
# This function takes a multi-dimensionnal contingency table and expands it
# to a data frame containing individual records.
# Note: The function is inspired from the "Cookbook for R" available at
# http://www.cookbook-r.com/Manipulating_data/Converting_between_data_frames
# _and_contingency_tables/
#
# Author: J. Barthelemy (inspired by Winston Chang)
#
# Args:
# x: A table x.
# ...: Further arguments passed to or from other methods.
#
# Returns: A data frame of the individual records derived from x.
# check if the rounded table summed to at least 1, otherwise stops
if (sum(x) <= 1) {
stop("The sum of the element of x should be at least 1!")
}
# coercing x to a data frame
x.df <- as.data.frame(round(x), ...)
# getting the number of time each row must be replicated
idx <- rep.int(seq_len(nrow(x.df)), x.df$Freq)
# drop count column
x.df$Freq <- NULL
# get the rows from result using idx
result <- x.df[idx, ]
# renaming the row names
row.names(result) <- 1:dim(result)[1]
# returning the result
return(result)
}
ComputeA <- function(dim.arr, target.list, target.data) {
# Given two vector x and and a set of target constraints, returns the marginal
# matrix A and vector t derived from the targets such that A' * x = t.
#
# Note that x and t are obtained by removing the first element of
# each target in order for A to be full rank.
#
# Author: J. Barthelemy
#
# Args:
# dim.arr: dimension of the problem, i.e. of the array
# target.list: A list of the target margins provided in target.data. Each
# component of the list is an array whose cells indicates which
# dimension the corresponding margin relates to.
# target.data: A list containing the data of the target margins. Each
# component of the list is an array storing a margin. The list
# order must follow the one defined in target.list. Note that
# the cells of the arrays must be non-negative.
#
# Returns: A list whose elements are:
# marginal.matrix: The marginal matrix.
# margins: A vector containing the margins associated with A.
# df: The degree of freedom of the problem.
# checking if a seed is provided
if (is.null(dim.arr) == TRUE) {
stop('Error: dimension of the seed not provided!')
}
# checking if target.list is provided
if (is.null(target.list) == TRUE) {
stop('Error: target.list not provided!')
}
# checking if target.data is provided
if (is.null(target.data) == TRUE) {
stop('Error: target.data not provided!')
}
# checking if NA in target cells
if (is.na(min(sapply(target.data, min))) == TRUE) {
stop('Error: NA values present for margins contained in target.data!')
}
# generating some useful variables
n.cells <- prod(dim.arr)
# generating A such that A' * pi = target.data
margins.vector <- c(1)
A.t <- matrix(1, nrow = 1, ncol = n.cells)
# ... generating the marginal matrix row by row
for (k in 1:length(target.list)) {
# convert k-th margin to an array
temp.margins <- Array2Vector(target.data[[k]])
# remove first condition as it is redundant information
temp.margins <- temp.margins[-1] / sum(temp.margins)
margins.vector <- c(margins.vector, temp.margins)
# construct the current row of the marginal matrix
A.row <- cmm::MarginalMatrix(var = 1:length(dim.arr),
marg = target.list[[k]],
dim = dim.arr)[-1, ]
# remove first entry as it is always redundant
A.t <- rbind(A.t, A.row)
}
A.li <- GetLinInd(t(A.t))
A <- A.li$mat.li
margins.vector <- margins.vector[A.li$idx]
# degrees of freedom of the estimates
deg.free <- dim(A)[1] - dim(A)[2]
# gathering the results and return
results <- list("marginal.matrix" = A, "margins" = margins.vector,
"df" = deg.free)
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.