Nothing
# File mipfp/R/s3_mipfp.R
# by Johan Barthelemy and Thomas Suesse
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 or 3 of the License
# (at your option).
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
# ------------------------------------------------------------------------------
# This file provides S3 methods for mipfp objects, including a constructor,
# print, summary, error.margins and confint.
# ------------------------------------------------------------------------------
coef.mipfp <- function(object, prop = FALSE, ...) {
# S3 function which extracts estimates from mipfp objects.
#
# Author: J. Barthelemy
#
# Args:
# object: The mipfp object whose estimates should be retrieved.
# prop: If set to TRUE the probabilities are retrieved, otherwise the counts
# will be considered (default: FALSE).
# ...: Not used.
#
# Returns: Estimates extracted from the mipfp object object.
# checking whether the user wants probabilities or counts
if (prop == TRUE) {
tab <- flat(object$p.hat, label = "Probability")
} else {
tab <- flat(object$x.hat, label = "Estimate")
}
# creating and returning the resulting vector
result <- as.vector(tab)
names(result) <- rownames(tab)
return(result)
}
print.mipfp <- function(x, prop = FALSE, ...) {
# S3 print method for class mipfp. This method prints a brief description
# of the input argument.
#
# Author: J. Barthelemy
#
# Args:
# x: The mipfp object to be printed.
# prop: Indicates whether the print shows counts (FALSE) or probabilities
# (TRUE).
# ...: Further arguments passed to or from other methods.
cat("\nCall:\n")
print(x$call)
cat("\nMethod: ", x$method,"- convergence: ", x$conv, "\n")
cat("\nEstimates:\n")
if (prop == TRUE) {
x <- x$p.hat
lab <- "Probability"
} else {
x <- x$x.hat
lab <- "Estimate"
}
if (length(dim(x)) < 3 ) {
print(x, ...)
} else {
print(flat(x, label = lab, ...), ...)
}
invisible(x)
}
summary.mipfp <- function(object, cov.method = "delta", prop = FALSE,
target.list = NULL, l.names = 0, ...) {
# S3 summary method for class mipfp. This method returns a summary object
# containing various information about the estimates contained in the
# firts argument.
#
# Author: J. Barthelemy
#
# Args:
# object: The mipfp object to be summarized.
# cov.method: Indicates which method to use to compute the covariance.
# Possible values are Delta ('delta', default) or Lang ('lang').
# prop: Indicate whether the results should return counts or probabilities.
# target.list: The list of the dimensions of the targets used by the
# estimation process
# ...: Further arguments passed to or from other methods
#
# Returns: An object of class summary.mipfp whose elements are
# call: A call object in which all the specified arguments are given by
# their full names.
# conv: A boolean indicating if the specified method converged to a
# solution (TRUE) or not (FALSE)
# method: The method used to generate estimates.
# df: Degrees of freedom of the estimates.
# estimates: Estimates generated by the selected method with
# standard deviations and associated t- and p-values.
# error.margins: A list returning, for each margin, the absolute maximum
# deviation between the desired and generated margin.
# vcov: A covariance matrix of the estimates (last index move fastest).
# stats.df: Degrees of freedom for the G2, W2 and X2 statistics
# tab.gof: A table containing the G2, W2 and X2 statistics with their
# associated p-values.
# stats.gof: G2, W2 and X2 statistics.
# dim.names: original dimension names of the estimated table.
# gathering some initial data
results <- list("call" = object$call,
"conv" = object$conv,
"method" = object$method,
"dim.names" = dimnames(object$x.hat),
"l.names" = l.names)
# checking whether the user wants probabilities or counts
if (prop == TRUE) {
tab <- flat(object$p.hat, label = "Probability", l.names = l.names, ...)
} else {
tab <- flat(object$x.hat, label = "Estimate", l.names = l.names, ...)
}
# computing covariance matrix
cov.results <- vcov(object, method.cov = cov.method, prop = prop, ...)
if (prop == FALSE){
results$vcov <- cov.results$x.hat.cov
} else {
results$vcov <- cov.results$p.hat.cov
}
# degrees of freedom
results$df <- cov.results$df
# getting the estimates, their standard deviation and p-values
if (prop == TRUE) {
std.dev.arr <- Vector2Array(cov.results$p.hat.se, dim(object$p.hat))
} else {
std.dev.arr <- Vector2Array(cov.results$x.hat.se, dim(object$p.hat))
}
std.dev <- flat(std.dev.arr, label = "StdDev")
tval <- tab / std.dev
dimnames(tval)[2] <- "t.value"
pval <- 2 * pt(-abs(tval), df = results$df)
dimnames(pval)[2] <- "p.value"
tab <- cbind(tab, StdDev = std.dev, t.value = tval, p.value = pval)
results$estimates <- tab
# margins errors
# ... checking if target.list is provided
if (is.null(target.list) == TRUE) {
if (is.null(object$target.list) == TRUE) {
target.list <- eval(object$call$target.list, parent.frame(2))
} else {
target.list <- object$target.list
}
}
# ... retrieving / generatings names
if (is.null(names(object$error.margins)) == TRUE) {
for (d in 1:length(target.list)) {
if (is.null(names(target.list[[d]])) == TRUE) {
names(object$error.margins)[d] <- paste0("V",target.list[[d]],
collapse = ".")
} else {
names(object$error.margins)[d] <- paste0(names(target.list[[d]]),
collapse = ".")
}
}
}
# ... retrieving the errors
results$error.margins <- object$error.margins
# gathering the goodness of fit statistics
gof.results <- gof.estimates(object, ...)
df.gof <- gof.results$stats.df
tab.gof <- rbind(G2 = c(gof.results$G2, 1 - pchisq(gof.results$G2,
df = df.gof)),
W2 = c(gof.results$W2, 1 - pchisq(gof.results$W2,
df = df.gof)),
X2 = c(gof.results$X2, 1 - pchisq(gof.results$X2,
df = df.gof)))
dimnames(tab.gof)[[2]] <- c("Stat","p.value")
results$stats.df <- df.gof
results$stats.gof <- tab.gof
# returning the results
class(results) <- "summary.mipfp"
return(results)
}
print.summary.mipfp <- function(x, ...) {
# S3 print method for class mipfp. This method prints a brief description
# of the input argument.
#
# Author: J. Barthelemy
#
# Args:
# x: The summary.mipfp object to be printed.
# ...: Further arguments passed to or from other methods.
cat("\nCall:\n")
print(x$call)
cat("\nMethod:", x$method," - convergence:", x$conv, "\n\n")
printCoefmat(x$estimates, P.values = TRUE, has.Pvalue = TRUE)
cat("Degrees of freedom:", x$df,"\n")
# printing the levels of the variables
cat("\nVariables:\n")
dn <- x$dim.names
if (is.null(dn) == FALSE) {
for (i in 1:length(x$error.margins)) {
#cat("*", names(dn[i]),":", dn[[i]], "\n")
cat("*", names(dn[i]),":")
for (j in 1:length(dn[[i]])) {
cat("",dn[[i]][j])
if (x$l.names > 0) {
cat(" (",substr(dn[[i]][j],1, x$l.names),")", sep = "")
}
}
cat("\n")
}
}
# margins errors
cat("\nMargins errors:\n")
print(x$error.margins, ...)
# goodness of fit statistics
cat("\nGoodness of fit statistics:\n")
printCoefmat(x$stats.gof, P.values = TRUE, has.Pvalue = TRUE)
cat("Degrees of freedom:", x$stats.df,"\n\n")
invisible(x)
}
error.margins <- function(object, ...) {
# Generic method to compute the margins errors its argument. The error
# is defined as the deviation between the expected (true) and resulting
# margins.
#
# Author: J. Barthelemy
#
# Args:
# x: An object to be flattened.
# ...: Further arguments passed to or from other methods.
UseMethod("error.margins", object)
}
error.margins.default <- function(object, ...) {
# Default method to compute the margins errors its argument. The error
# is defined as the deviation between the expected (true) and resulting
# margins. A specific method has not been implemented yet and the method
# returns the original object.
#
# Author: J. Barthelemy
#
# Args:
# x: An object.
# ...: Further arguments passed to or from other methods.
#
# Returns: The original, unmodified object.
warning('The input argument is not an object of class mipfp!')
invisible(object)
}
error.margins.mipfp <- function(object, ...) {
# S3 error.margins method for class mipfp.
#
# This method returns the maximum deviation between each generated and
# desired margins of the input argument. It corresponds to the absolute
# maximum deviation between each target margin used to generate the estimates
# in the mipfp object and the generated one.
#
# Author: J. Barthelemy
#
# Args:
# object: The summary.mipfp object to be printed.
# ...: Further arguments passed to or from other methods.
#
# Returns: An array containing the absolute maximum deviations for each
# margin.
res <- CompareMaxDev(list(object), echo = FALSE, ...)
return(res)
}
confint.mipfp <- function(object, parm, level = 0.95, prop = FALSE, ...) {
# S3 confint method for class mipfp.
#
# This methods returns the confidence intervals for the estimates in the
# mipfp object provided that the covariance of the estimates have also been
# estimated.
#
# Author: J. Barthelemy
#
# Args:
# object: The mipfp object containing the estimates
# level: The confidence level
# prop: A boolean indicating if the results should be using counts (FALSE)
# or proportion (TRUE). Default is FALSE.
# ...: Further arguments passed to or from other methods (for instance
# vcov.mipf).
#
# Returns: The confidence intervals of the estimates in object in an array.
# checking significance level alpha
alpha <- 1 - level
if (alpha < 0 || alpha > 1) {
warning('Warning: alpha not in the range [0,1], setting to 0.05!')
alpha <- 0.05
}
# getting the estimates and their standart deviation
x <- coef(object, prop = prop, ...)
if (prop == FALSE) {
x.se <- vcov(object, prop = prop, ...)$x.hat.se
} else {
x.se <- vcov(object, prop = prop, ...)$p.hat.se
}
# selection of the estimates
pnames <- names(x)
if (missing(parm)) {
parm <- pnames
}
else if (is.numeric(parm)) {
parm <- pnames[parm]
}
# computing the confidence interval for the selected estimates
l <- qnorm(1 - alpha * 0.5)
label.up <- paste0((1 - alpha * 0.5) * 100, "%")
label.lo <- paste0((alpha * 0.5) * 100, "%")
ci.lo <- x[parm] - l * x.se[parm]
ci.up <- x[parm] + l * x.se[parm]
# returning the result
ci <- cbind(ci.lo, ci.up)
dimnames(ci)[[2]] <- c(label.lo, label.up)
return(ci)
}
vcov.mipfp <- function(object, method.cov = "delta", seed = NULL,
target.data = NULL, target.list = NULL,
replace.zeros = 1e-10, ...) {
# Compute the covariance matrix of the estimators produced by Estimate.
#
# This function determines the covariance matrix of the estimated proportions
# using either
# - the Delta given in the paper "Models for Contingency Tables With Known
# Margins When Target and Sampled Populations Differ" written by Little and
# Wu (1991). This is the default.
# - the Lang method in "Multinomial-Poisson homogeneous models for contingency
# tables".
#
# Author: J. Barthelemy
#
# Args:
# object: an object of class mipfp.
# seed: The initial multi-dimensional array updated by Estimate (optional).
# target.list: A list of the target margins used by the Ipfp function. Each
# component of the list is an array whose cells indicates
# which dimension the corresponding margin relates to
# (optional).
# target.data: A list containing the data of the target margins. Each
# component of the list is an array storing a margin.
# The list order must follow the one defined in target.list.
# Note that the cells of the arrays must be non-negative
# (optional).
# replace.zeros: If 0-cells are to be found in either the seed or the
# estimate arrays, then their values are replaced with this
# value.
# method.cov: Select the method to use for the computation of the covariance.
# The available methods are Delta and Lang.
# ...: Not used.
#
# Returns: A list whose elements are
# x.hat.cov: A covariance matrix of the estimated probabilities (last index
# move fastest).
# p.hat.cov: A covariance matrix of the estimated probabilities (last index
# move fastest).
# x.hat.se: A covariance matrix of the estimated probabilities (last index
# move fastest).
# p.hat.se: A covariance matrix of the estimated probabilities (last index
# move fastest).
# df: The degrees of freedom of the estimates.
# method.cov: The method used to compute the covariance matrix.
# checking if a seed is provided
if (is.null(seed) == TRUE) {
if (is.null(object$seed)) {
seed <- eval(object$call$seed, parent.frame(2), parent.frame(2))
} else {
seed <- object$seed
}
}
# checking if target.list is provided
if (is.null(target.list) == TRUE) {
if (is.null(object$target.list) == TRUE) {
target.list <- eval(object$call$target.list, parent.frame(2))
} else {
target.list <- object$target.list
}
}
# checking if target.data is provided
if (is.null(target.data) == TRUE) {
if (is.null(object$target.data) == TRUE) {
target.data <- eval(object$call$target.data, parent.frame(2))
} else {
target.data <- object$target.data
}
}
# checking if NA in target cells
if (is.na(min(sapply(target.data, min))) == TRUE) {
stop('Error: NA values present for margins contained in target.data!')
}
# selection the method for the covariance computation
method.num <- switch(method.cov, delta = 1, Lang = 2, 3)
if (method.num == 3) {
warning("'method must be 'delta' or 'Lang'! Switching to 'delta'")
method.cov <- "delta"
}
# determining the estimation method used
method.mipfp.num <- switch(object$method, ipfp = 1, ml = 2, chi2 = 3, lsq = 4)
# generating some useful variables
n <- sum(seed)
seed.prob <- Array2Vector(seed / sum(seed))
estimate.prob <- Array2Vector(object$p.hat)
# checking for 0-cells values and replace them with a small value
seed.prob <- ifelse(seed.prob == 0, replace.zeros, seed.prob)
estimate.prob <- ifelse(estimate.prob == 0, replace.zeros, estimate.prob)
# compute marginal matrix A such that A' * x = margins if not given
marg.list <- ComputeA(dim.arr = dim(seed), target.data = target.data,
target.list = target.list)
A <- marg.list$marginal.matrix
margins.vector <- marg.list$margins
# degrees of freedom of the estimates
deg.free <- dim(A)[1] - dim(A)[2]
# Delta method
if (method.num == 1) {
# ... generating inv(D1) and inv(D2)
switch(method.mipfp.num, {
# ipfp
D1.inv <- diag(1 / estimate.prob)
D2.inv <- diag(1 / seed.prob)
}, {
# ml
D1.inv <- diag(1 / (estimate.prob^2 / seed.prob))
D2.inv <- D1.inv
}, {
# chi2
D1.inv <- diag(1 / (estimate.prob^4 / seed.prob^3))
D2.inv <- D1.inv
}, {
# lsq
D1.inv <- diag(1 / seed.prob)
D2.inv <- diag(1 / (seed.prob^3 / estimate.prob^2))
})
# ... obtain orthogonal complement of A using QR decomposition
K <- qr.Q(qr(A), complete = TRUE)[, (dim(A)[2] + 1):dim(A)[1]]
if (is.null(dim(K)) == TRUE) {
K <- t(K)
}
if (dim(K)[1] == 1) {
K <- t(K)
}
# ... computing the covariance of p.hat
p.hat.cov <- (1 / n) * K %*% solve(t(K) %*% D1.inv %*% K) %*%
t(K) %*% D2.inv %*% K %*% solve(t(K) %*% D1.inv %*% K) %*% t(K)
}
# Lang's method
else {
# ... constraint function h(pi) = A'[-1] * pi - margins[-1]
h.fct <- function(m) {
return(t(A)[-1, ] %*% (m / sum(m)) - margins.vector[-1])
}
# ... generating H and D
H.pi <- t(numDeriv::jacobian(h.fct, estimate.prob))
D.pi <- diag(estimate.prob)
# ... computing the covariance
p.hat.cov <- 1 / n * (D.pi - estimate.prob %*% t(estimate.prob) -
D.pi %*% H.pi %*% solve(t(H.pi) %*% D.pi %*% H.pi)
%*% t(H.pi) %*% D.pi)
}
# adding names
rownames(p.hat.cov) <- names(coef(object))
colnames(p.hat.cov) <- names(coef(object))
# counts' covariance
x.hat.cov <- p.hat.cov * sum(object$x.hat)^2
# estimates' standart deviation
p.hat.se <- sqrt(diag(p.hat.cov))
x.hat.se <- sqrt(diag(x.hat.cov))
# gathering and returning the results
results <- list("p.hat.se" = p.hat.se, "p.hat.cov" = p.hat.cov,
"x.hat.se" = x.hat.se, "x.hat.cov" = x.hat.cov,
"df" = deg.free, "method.cov" = method.cov)
return(results)
}
gof.estimates <- function(object, ...) {
# Generic method to compute the goodness of fit of the estimates.
#
# Author: J. Barthelemy
#
# Args:
# object: An object containing estimates.
# ...: Further arguments passed to or from other methods.
UseMethod("gof.estimates", object)
}
gof.estimates.default <- function(object, ...) {
# Default method of the genereric gof.estimate function. A specific method to
# compute the goodness of fit statistics has not been implemented yet and the
# method returns the original object.
#
# Author: J. Barthelemy
#
# Args:
# object: An object.
# ...: Further arguments passed to or from other methods.
#
# Returns: The original object.
warning('Can not conmpute the covariance of the estimates for this object!')
invisible(object)
}
gof.estimates.mipfp <- function(object, seed = NULL,
target.data = NULL, target.list = NULL,
replace.zeros = 1e-10, ...) {
# This function computes statistics to test wheter the target constraints
# are met for the estimates contains in an object of class mipfp.
#
# Note that if the seed, target.data and target.list are not provided
# then the function test whether this information is present in the object
# or can be retrieved from the Estimate call used generate the object.
#
# Author: J. Barthelemy
#
# Args:
# object: The object of class mipfp containing the estimates.
# seed: The seed used to compute the estimates (optional).
# target.data: A list containing the data of the target margins. Each
# component of the list is an array storing a margin.
# The list order must follow the one defined in target.list.
# Note that the cells of the arrays must be non-negative (and
# can even be NA if method = ipfp) (optional).
# target.list A list of the target margins provided in target.data. Each
# component of the list is an array whose cells indicates
# which dimension the corresponding margin relates to
# (optional).
#
# Returns: A list whose elements are:
# G2: Log-likelihood statistic.
# W2: Wald statistic.
# X2: Person chi-squared statistic.
# stats.df: Degrees of freedom for the G2, W2 and X2 statistics.
# checking if a seed is provided
if (is.null(seed) == TRUE) {
if (is.null(object$seed)) {
seed <- eval(object$call$seed, parent.frame(2))
} else {
seed <- object$seed
}
}
# checking if target.list is provided
if (is.null(target.list) == TRUE) {
if (is.null(object$target.list) == TRUE) {
target.list <- eval(object$call$target.list, parent.frame(2))
} else {
target.list <- object$target.list
}
}
# checking if target.data is provided
if (is.null(target.data) == TRUE) {
if (is.null(object$target.data) == TRUE) {
target.data <- eval(object$call$target.data, parent.frame(2))
} else {
target.data <- object$target.data
}
}
# checking if NA in target cells
if (is.na(min(sapply(target.data, min))) == TRUE) {
stop('Error: NA values present for margins contained in target.data!')
}
# compute marginal matrix A such that A' * x = margins
marg.list <- ComputeA(dim.arr = dim(seed), target.data = target.data,
target.list = target.list)
A <- marg.list$marginal.matrix
margins.vector <- marg.list$margins
# gather some necessary variables
n <- sum(seed)
seed.vector <- Array2Vector(seed)
seed.vector[seed.vector == 0] <- replace.zeros * n
seed.prop.vector <- seed.vector / n
# constraint function h(pi) = A'[-1, ] * pi - margins[-1]
h.fct <- function(m) {
return(t(A)[-1, ] %*% (m / sum(m)) - margins.vector[-1])
}
H.seed <- t(numDeriv::jacobian(h.fct, seed.vector))
h.Y <- h.fct(seed.vector)
# compute statistics for testing if constraints are met and appending to
# the results
# ... log-likelihood ratio
G2 <- 2 * sum(seed.vector * log(seed.prop.vector /
Array2Vector(object$p.hat)))
# ... Wald test statistic
W2 <- t(h.Y) %*% solve(t(H.seed) %*% diag(seed.vector) %*% H.seed) %*% h.Y
# ... Pearson Chi-square
X2 <- t(seed.vector - n * Array2Vector(object$p.hat)) %*%
diag(1 / (n * Array2Vector(object$p.hat))) %*%
(seed.vector - n * Array2Vector(object$p.hat))
# ... associated degree of freedoms (dim of constraint function h)
stats.df <- dim(t(A))[1] - 1
# gathering the results and return
results <- list("G2" = G2, "W2" = W2, "X2" = X2, "stats.df" = stats.df)
return(results)
}
Estimate <- function(seed, target.list, target.data, method = "ipfp",
keep.input = FALSE, ...) {
# Update an N-way table given target margins.
#
# This function provides several alternative estimating methods to
# estimate multiway table subject to known constrains/totals: Iterative
# Proportionnal Fitting Procedure (IPFP), Maximum Likelihood method (ML),
# Minimum Chi-squared (CHI2) and Weighted Least squares (LSQ).
#
# The covariance matrix of the estimate as well as several other statistics
# are also computed.
#
# As this function is an interface to the functions Ipfp and
# ObtainModelEstimates, please see their respective documentation for more
# details.
#
# The outcome of the function is an object of class mipfp.
#
# Author: J. Barthelemy
#
# Args:
# seed: The initial multi-dimensional array to be updated. Each cell must
# be non-negative.
# target.list: A list of the target margins provided in target.data. Each
# component of the list is an array whose cells indicates
# which dimension the corresponding margin relates to.
# target.data: A list containing the data of the target margins. Each
# component of the list is an array storing a margin.
# The list order must follow the one defined in target.list.
# Note that the cells of the arrays must be non-negative (and
# can even be NA if method = ipfp).
# method: The method used for estimation. Possible value are
# - "ipfp" (default): iterative proportional fitting procedure
# - "ml": maximum likelihood method
# - "chi2": minimum chi-squared method
# - "lsq": least squares method.
# keep.input: If set to TRUE, then the function will add seed, target.data
# and target.list to the returned object.
# ...: Further arguments passed to or from other methods. See documentation
# of Ipfp and ObtainModelEstimate for a list of parameters to control
# the estimation process.
# Returns: A mipfp object consisting of a list whose elements are
# call: A call object in which all the specified arguments are given by
# their full names.
# method: The selected method for estimation.
# pi.hat: Array of the estimated table probabilities.
# xi.hat: Array of the estimated table frequencies.
# stp.crit: The final value of the stopping criterion (if method = ipfp).
# evol.stp.crit: Evolution of the stopping criterion over the iterations
# (if method = ipfp).
# solnp.res: For optimisation it uses the R package Rsolnp and solnp is the
# corresponding object returned by Rsolnp (if method is NOT
# ipfp).
# conv: A boolean indicating whether the algorithm converged to a solution.
# error.margins: A list returning, for each margin, the absolute maximum
# deviation between the target and generated margin.
# checking if a seed is provided
if (is.null(seed) == TRUE) {
stop('Error: no seed specified!')
}
# checking if targets are provided
if (is.null(target.data) == TRUE | is.null(target.list) == TRUE) {
stop('Error: target.data and/or target.list not specified!')
}
# switching to the desired user method
method.num <- switch(method, ipfp = 1, ml = 2, chi2 = 3, lsq = 4, 5)
if (method.num == 5) {
warning("'method' must be 'ipfp', 'ml', 'chi2' or 'lsq', switching to
'ipfp'!")
method <- "ipfp"
}
# calling the corresponding function to update the seed
if (method == "ipfp") {
result <- Ipfp(seed, target.list, target.data, ...)
} else {
result <- ObtainModelEstimates(seed, target.list, target.data,
method = method, ...)
}
result$call <- match.call()
# saving the input if required
if (keep.input == TRUE) {
result$seed <- seed
result$target.list <- target.list
result$target.data <- target.data
}
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.