#' Rounds an object by ensuring its totals remains equal before and after rounding
#'
#' Freely inspired by \url{https://stackoverflow.com/questions/32544646/round-vector-of-numerics-to-integer-while-preserving-their-sum}
#'
#' @param x the object to round
#' @param ... ignored
#' @return the rounded version of the object
#'
#' @export
#'
round_sum <- function (x, ...) {
UseMethod("round_sum", x)
}
round_sum.numeric <- function(x, ...) {
y <- floor(x)
indices <- tail(order(x-y), round(sum(x)) - sum(y))
y[indices] <- y[indices] + 1
y
}
round_sum.matrix <- function(x, ...) {
# convert the matrix to a vector (by columns)
y <- unlist(x)
# round it
r <- round_sum.numeric(y)
# convert it to a matrix
mat <- matrix(r, ncol=ncol(x), nrow=nrow(x))
# convert it back to a matrix
res <- as.data.frame(mat, row.names=row.names(x), col.names=colnames(x))
colnames(res) <- colnames(x)
res
}
round_sum.data.frame <- round_sum.matrix
# for a distribution of degree,
# sums the total degree, on the basis of
# 0
sum_degrees <- function(pdx) {
# ... sum the weight we are playing with
power <- rep(0, ncol(pdx))
for (i in 1:nrow(pdx)) {
power <- power + (i-1)*pdx[i,]
}
power
}
#' Update a given distribution column of degrees probabilities
#' so its matching a target average degree for this column.
#'
#' It works by finding a pivot and then increasing values on one
#' side and removing probabilities on the other side.
#'
#' @param pdx the distribution of degrees
#' @param t.target the vector of the expected average degrees
#' @param verbose more text if TRUE (FALSE by default)
#' @param precision for comparisons
#'
#' @author samuel.thiriot@res-ear.ch
#'
#' @keywords internal
#'
update_degree_distribution.col <- function(pdx, t.target, verbose=FALSE, precision=1e-10) {
np.orig <- pdx * 0:(length(pdx)-1)
t.orig <- sum(np.orig)
# cat("t.orig", t.orig, "\n")
# quick exit if there is nothing to do
if (abs(t.target - t.orig) <= precision) {
return(pdx)
}
# quick case: if the target is 0, then the solution is (1,0,.....,0)
if (t.target <= precision) {
pdx[0] <- 1
for (i in 2:length(pdx)) {
pdx[i] <- 0
}
return(pdx)
}
if (verbose) {
cat("should update a column of degree distribution pdx=",
paste(pdx, collapse=","), "such as it sums up to ", t.target,
"instead of the current sum ", t.orig, "\n")
}
p.potential <- rep(0,times=length(pdx))
p.potential.sum.neg <- 0
# what are the free margins?
for (i in 1:length(pdx)) {
if (pdx[i] == 0) {
# nothing
} else if (
( (t.orig < t.target ) & (i-1 < t.target) )
|
( (t.orig > t.target ) & (i-1 > t.target) )
) {
p.potential[i] <- -pdx[i]
p.potential.sum.neg <- p.potential.sum.neg - pdx[i]
} else {
p.potential[i] <- 1 - pdx[i]
}
}
#
# print("potential")
# print(p.potential)
# what can we gain ?
np.potential <- pmin(p.potential,-p.potential.sum.neg) * 0:(length(pdx)-1)
#print(np.potential)
np.min <- min(np.potential)
np.max <- max(np.potential)
np.potential.positive.max <- max(np.potential) # max(p.potential[which(np.potential == np.max)])
# cat("np.potential.positive.max",np.potential.positive.max,"\n")
np.potential.negative.sum <- sum(np.potential[which(np.potential < 0)])
# cat("np.potential.negative.sum",np.potential.negative.sum,"\n")
np.potential.cumulated <- np.potential.negative.sum + np.potential.positive.max
# print("np.potential.cumulated")
# print(np.potential.cumulated)
# create factors, 0 or 1
factors <- pmin( (np.potential == np.max) + (p.potential < 0), 1)
# print("factors")
# print(factors)
rat <- (t.target - t.orig) / np.potential.cumulated
# cat("rat",rat,"\n")
p.add <- factors * rat * pmin(p.potential, -p.potential.sum.neg)
# cat("p.add", p.add,"summing up to ", abs(sum(p.add)),"\n")
# cat("p.add",p.add,"=",sum(p.add),"\n")
if (abs(sum(p.add)) >= precision) {
stop("The case is too constrained to be solved (cannot adapt the degree probabilities that far)")
}
# print("p.modified ------------------------------")
p.modified <- p.add + pdx
# cat("p.modified",p.modified,"=",sum(p.modified),"\n")
if (length(which(p.modified < 0)) > 0) {
stop("The case is too constrained to be solved (cannot adapt the degree probabilities that far - would have probabilities below 0)")
}
if (length(which(p.modified > 1)) > 0) {
stop("The case is too constrained to be solved (cannot adapt the degree probabilities that far - would have probabilities greater than 1)")
}
# cat("p.modified", p.modified, abs(sum(p.modified)-1), "\n")
if (abs(sum(p.modified)-1) > precision) {
stop("The case is too constrained to be solved (cannot adapt the degree probabilities that far)")
}
# now ensure this leads to the expected result
np.res.cumulated <- sum(p.modified * 0:(length(pdx)-1))
# cat("np.res.cumulated",np.res.cumulated,"for target", t.target,"\n")
p.modified
}
#' Update a given distribution of degrees probabilities so its matching target degrees
#'
#' This process is done column by column by calling \link{update_degree_distribution.col}.
#' '
#' @param pdx the distribution of degrees
#' @param dx the vector of the expected average degrees
#' @param precision the precision for comparison with 0; change only if suggested and relevant
#' @param verbose more messages if TRUE
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
update_degree_distribution <- function(pdx, dx, precision=.Machine$double.eps, verbose=F) {
if (class(pdx) != "data.frame") {
stop("pdx should be a data.frame")
}
if (class(dx) != "numeric") {
stop("dx should be numeric")
}
if (ncol(pdx) != length(dx)) {
stop("length of dx (",length(dx),") should match the columns count of pdx (",ncol(pdx),")")
}
# ... sum the weight we are playing with
power <- sum_degrees(pdx)
# print("working on:")
# print(pdx)
# print("original average degree: ")
# print(power)
# print("yet we are supposed to reach")
# print(dx)
factord <- dx/power
pdx.reweighted <- pdx
for (c in 1:ncol(pdx)) {
pdx.reweighted[,c] <- update_degree_distribution.col(pdx[,c],dx[c], verbose=verbose, precision=precision)
}
colnames(pdx.reweighted) <- colnames(pdx)
pdx.reweighted
}
#' Patches a degree distribution contingencies so its totals fit expectations
#'
#' Adapts a distribution of degrees so that each column times n sums to expected nn
#' (basically solves rounding issues for this specific case)
#'
#' @param pdn the contigencies of degrees
#' @param nn the expected total degree (ni) that should be reached
#' @param cn the expected total count (ci) that should be reached
#' @param verbose print detailed information during the process
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
rectify.degree.counts <- function(pdn, nn, cn, verbose=FALSE) {
if (verbose) {
cat("should rectify\n")
print(pdn)
cat("to reach\n")
print(nn)
}
for (col in 1:ncol(pdn)) {
# if (verbose) cat("col ", col, "\n")
total.current <- sum(pdn[,col]*0:(nrow(pdn)-1))
total.expected <- nn[col]
if (total.current > total.expected) {
to.remove <- total.current - total.expected
if (verbose) cat("col ", col, ": should remove ", to.remove, "\n")
# no more warning: we anyway check at the end how well we fixed it
# if (to.remove > 1) {
# warning("/!\\ the solution to fix rounding errors below 1 slot (here:",to.remove,") is not managed well\n.",sep="")
# }
# we are creating more slots than expected
# ... we have to remove it somewhere
if ( (pdn[2,col] >= to.remove) ) { # && (pdn[1,col] > 0)
pdn[2,col] <- pdn[2,col] - to.remove
pdn[1,col] <- pdn[1,col] + to.remove
} else if ( ( nrow(pdn) >= 3) && (pdn[3,col] >= to.remove/2) ) { # && (pdn[1,col] > 0)
# cat("removing 2 and adding 1\n")
# to remove -1, we also might remove 2 and add 1 !
# print(pdn[,col])
pdn[3,col] <- pdn[3,col] - to.remove
pdn[2,col] <- pdn[2,col] + to.remove
# print(pdn[,col])
} else {
if (verbose)
warning("/!\\ found no good solution to fix this rounding error by removing ",to.remove,"\n")
}
} else if (total.current < total.expected) {
to.add <- total.expected - total.current
if (verbose) cat("col ", col, ": should add ", to.add, "\n")
# no more warning: we anyway check at the end how well we fixed it
# if (to.add > 1) {
# warning("/!\\ the solution to fix rounding errors above 1 slot (here:",to.add,") is not managed well\n", sep="")
# }
# we are creating more slots than expected
# ... we have to remove it somewhere
if (pdn[1,col] >= to.add) { # (pdn[2,col] > 0
pdn[1,col] <- pdn[1,col] - to.add
pdn[2,col] <- pdn[2,col] + to.add
} else if ( (nrow(pdn) >= 3) && (pdn[3,col] >= to.add) ) { # (pdn[2,col] > 0
pdn[2,col] <- pdn[2,col] - to.add
pdn[3,col] <- pdn[3,col] + to.add
} else {
if (verbose) {
warning("/!\\ found no good solution to fix this rounding error of an additional ",to.add,"\n")
print(pdn[,col])
}
}
}
}
# adapt the cn s we did not checked until now
to.add <- unname(cn) - unname(colSums(pdn))
if (!all(to.add == 0) && (sum(to.add) == 0)) {
# we can solve the problem by playing on the first raw (the one which induces no degree)
pdn[1,] <- pdn[1,] + to.add
# nota: this solution might lead to inconsistent result;
# typically if degree 0 is impossible, we are adding negative values here !
# it will be checked later.
}
# ensure we did it well, that is we achieve to reach the expected result
if (all.equal(
unname(colSums(pdn*(0:(nrow(pdn)-1)))),
unname(nn)
) != TRUE) {
# print(pdn)
# print(nn)
# print(cn)
stop("was unable to fix the rounding of degree distributions ",
"so the total slots ",
paste(unname(colSums(pdn*(0:(nrow(pdn)-1)))), collapse=","),
" sums up to ",
paste(nn, collapse=",")
)
}
if ((all.equal(unname(cn), unname(colSums(pdn))) != TRUE) || any(pdn[1,] < 0)) {
stop("was unable to fix the rounding of degree distributions ",
"so the total count ", paste(unname(colSums(pdn)), collapse=","),
" matches ",
paste(unname(cn), collapse=",")
)
}
pdn
}
#' Normalise an object
#'
#' takes a vector, matrix or list and updates its content
#' by dividing it by its sum.
#'
#' @param d any data to normalise
#' @param ... additional parameters are likely to be ignored
#' @return the same object normalised
#'
#' @export
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
normalise <- function(d, ...) {
UseMethod("normalise", d)
}
normalise.data.frame <- function(d, ...) {
d / sum(d)
}
normalise.numeric <- function(d, ...) {
d / sum(d)
}
normalise.list <- function(d, ...) {
total <- sum.list(d)
lapply(d, "/", total)
}
normalise.dpp_degree_cpt <- function(d, ...) {
for (i in 1:ncol(d$data)) {
d$data[i] <- normalise(d$data[i])
}
d
}
#' Sums applied on a list
#'
#' sums the content of the list (required for recent R versions)
#'
#' @param l the list to sum
#' @return the same object summed with operator "+"
#'
#' @keywords internal
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
sum.list <- function(l) {
Reduce("+", l)
}
#' Replaces NaN by zeros
#'
#' Replaces NaN by zeros. Used when we might divide 0
#' by 0 and know the result should be 0.
#'
#' @param x a vector, list or matrix
#' @param ... ignored
#' @return the same type without NaNs
#'
#' @keywords internal
#'
nan_to_zeros <- function (x, ...) {
UseMethod("nan_to_zeros", x)
}
nan_to_zeros.list <- function (x, ...) {
x[which(is.nan(x))] <- 0
x
}
nan_to_zeros.numeric <- nan_to_zeros.list
nan_to_zeros.matrix <- function (x, ...) {
for (i in 1:ncol(x)) {
x[,i] <- nan_to_zeros(x[,i])
}
x
}
nan_to_zeros.data.frame <- nan_to_zeros.matrix
#' Replaces Infinite by zeros
#'
#' Replaces Infinite by zeros. Used when we divide
#' by a value and know the result should be 0
#' when the diviser is 0.
#'
#' @param vv a vector or list
#' @return the same vector without Infinite
#'
#' @keywords internal
#'
inf_to_zeros <- function(vv) {
vv[which(is.infinite(vv))] <- 0
vv
}
#' A simple Iterated Proportional Fitting implementation
#'
#' taken from https://stats.stackexchange.com/questions/59115/iterative-proportional-fitting-in-r
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @seealso The entry point for IPF 2D: \code{\link{ipf.2d}}
#'
#' @keywords internal
#'
ipf.2d.stackoverflow <- function(Margins_, seedAry, maxiter=100, closure=0.001, verbose=FALSE) {
#Check to see if the sum of each margin is equal
MarginSums. <- unlist(lapply(Margins_, sum))
if(any(MarginSums. != MarginSums.[1])) warning("IPF: sum of each margin not equal")
#Replace margin values of zero with 0.001
Margins_ <- lapply(Margins_, function(x) {
# TODO ???
if(any(x == 0)) warning("IPF: zeros in marginsMtx replaced with 0.001")
x[x == 0] <- 0.001
x
})
#Check to see if number of dimensions in seed array equals the number of
#margins specified in the marginsMtx
numMargins <- length(dim(seedAry))
if(length(Margins_) != numMargins) {
stop("IPF: number of margins in marginsMtx not equal to number of margins in seedAry")
}
#Set initial values
resultAry <- seedAry
iter <- 0
marginChecks <- rep(1, numMargins)
margins <- seq(1, numMargins)
#Iteratively proportion margins until closure or iteration criteria are met
while((any(marginChecks > closure)) & (iter < maxiter)) {
for(margin in margins) {
marginTotal <- apply(resultAry, margin, sum)
marginCoeff <- Margins_[[margin]]/marginTotal
marginCoeff[is.infinite(marginCoeff)] <- 0
resultAry <- sweep(resultAry, margin, marginCoeff, "*")
marginChecks[margin] <- sum(abs(1 - marginCoeff))
}
iter <- iter + 1
}
#If IPF stopped due to number of iterations then output info
if(verbose && (iter == maxiter)) cat("IPF stopped due to number of iterations\n")
#Return balanced array
resultAry
}
#' Applies Iterative Proportional Fitting on a 2d dataframe
#'
#' Takes as inputs a dataframe representing a 2d matrix,
#' and target marings for cols and rows.
#' Returns a table reweighted so its sums fit margins.
#'
#' Either falls back to a local implementation \code{\link{ipf.2d.stackoverflow}}, or to an
#' higher quality one from the mipfp package if it is installed.
#'
#' @param df the dataframe to reweight
#' @param margins_cols a vector containing the target marginals for cols
#' @param margins_rows a vector containing the target marginals for rows
#' @param verbose displays more messages if TRUE (FALSE by default)
#' @param max.iterations the maximum iterations before stopping (1000 by default)
#' @param precision the target precision, will stop once reached (defaults to 1e-6)
#' @return the dataframe reweighted
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @export
#'
ipf.2d <- function(df, margins_cols, margins_rows, max.iterations=1000, precision=1e-10, verbose=FALSE) {
if (class(df) != "data.frame")
stop("df should be a data frame")
if (class(margins_cols) != "numeric")
stop("margins_cols should be a numeric vector")
if (class(margins_rows) != "numeric")
stop("margins_rows should be a numeric vector")
if (length(margins_rows) != nrow(df))
stop(paste("the length of margins_rows (", length(margins_rows) , ") should match the count of rows (", nrow(df), ") from df", sep=""))
if (length(margins_cols) != ncol(df))
stop(paste("the length of margins_cols (", length(margins_cols) , ") should match the count of columns (", ncol(df), ") from df", sep=""))
if (FALSE && verbose) {
cat("should reweight the matrix\n")
print(df)
cat("in order to reach column marginals: ", paste(margins_cols, collapse=","), "\n")
cat("in order to reach row marginals: ", paste(margins_rows, collapse=","), "\n")
}
if (requireNamespace("mipfp", quietly = TRUE)) {
# the mipfp package is available, let's use it, its a reference implementation
if (verbose) cat("using for IPF the implementation from package mipfp\n")
res <- mipfp::Ipfp(
seed=as.matrix(df),
target.list=list(2,1),
target.data=list(margins_cols, margins_rows),
print = verbose,
iter = max.iterations,
tol = 1e-10, tol.margins = precision , na.target = FALSE
)
if (!res$conv) {
# detect failure
print(res)
stop("IPF did not converged properly. That's a bit unexpected. Stopping.")
}
# extract the result of interest
as.data.frame(res$p.hat)
} else {
# fallback to a local, less powerful implementation
res <- ipf.2d.stackoverflow(
Margins_=list(margins_rows, margins_cols),
seedAry=as.matrix(df),
maxiter=max.iterations,
closure=precision,
verbose=verbose
)
normalise(as.data.frame(res))
}
}
#' Completes a partial solution by inference
#'
#' Takes an incomplete solution and enriches by applying the equations.
#' For instance if an equation links \code{hat.di * hat.ci = hat.ni},
#' then this equation will be applied if two of these variables are available.
#'
#'
#' @param sol the current solution (a named list)
#' @param case the case to solve
#' @param precision.pd the precision to be used for probabilistic distributions of degrees comparisons
#' @param verbose if TRUE, will display detailed information on the console
#' @param indent the level of indentation for verbose messages
#' @return a solution with possibly more variables known
#'
#' @seealso this function relies on \code{\link{round_sum}} to round values
#' so their sums is preserved;
#' \code{\link{normalise}} to normalise variables;
#' \code{\link{update_degree_distribution}} to compute the detail
#' of degree distributions from average degrees.
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
propagate.direct <- function(sol, case, precision.pd=.Machine$double.eps, verbose=FALSE, indent=1) {
info.rule <- function(name, sol, verbose=FALSE, indent=3) {
if (verbose) {
cat(paste(rep("\t",times=indent),collapse=""),name,"\n",sep="")
}
sol$inference <- list(sol$inference, list(name))
sol
}
if (verbose) {
cat(paste(rep("\t",times=indent),collapse=""),"solving equations based on known variables: ",paste.known(sol),"\n",sep="")
}
if (is.null(sol$inference)) {
sol$inference <- list()
}
while (TRUE) {
changed <- FALSE
# pdi -> di
if (
("hat.pdi" %in% names(sol)) && (!"hat.di" %in% names(sol))
) {
sol <- info.rule("hat.pdi -> hat.di", sol, verbose=verbose, indent=indent+1)
sol$hat.di <- colSums(sol$hat.pdi*(0:(nrow(sol$hat.pdi)-1)))
changed <- TRUE
}
if (
("hat.pdj" %in% names(sol)) && (!"hat.dj" %in% names(sol))
) {
sol <- info.rule("hat.pdj -> hat.dj", sol, verbose=verbose, indent=indent+1)
sol$hat.dj <- colSums(sol$hat.pdj*(0:(nrow(sol$hat.pdj)-1)))
changed <- TRUE
}
# forward nA + fi -> ci
if (
("hat.nA" %in% names(sol)) && ("hat.fi" %in% names(sol)) && (!"hat.ci" %in% names(sol))
) {
sol$hat.ci <- round_sum(sol$hat.nA * sol$hat.fi)
sol$hat.fi <- sol$hat.ci / sol$hat.nA
sol <- info.rule("hat.nA, hat.fi -> hat.ci", sol, verbose=verbose, indent=indent+1)
#print("sol$hat.ci <- round(sol$hat.nA * sol$hat.fi)")
changed <- TRUE
}
# forward nB + fj -> cj
if (
("hat.nB" %in% names(sol)) && ("hat.fj" %in% names(sol)) && (!"hat.cj" %in% names(sol))
) {
sol$hat.cj <- round_sum(sol$hat.nB * sol$hat.fj)
# rounding might introduce a bias; update this proba
sol$hat.fj <- sol$hat.cj / sol$hat.nB
sol <- info.rule("hat.nB, hat.fj -> hat.cj", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# forward ci + di -> ni
if (
("hat.ci" %in% names(sol)) && ("hat.di" %in% names(sol)) && (!"hat.ni" %in% names(sol))
) {
# print(sol$hat.ci)
# print(sol$hat.di)
# print(sol$hat.ci * sol$hat.di)
sol$hat.ni <- round_sum(sol$hat.ci * sol$hat.di)
# adapt rounding
sol$hat.di <- nan_to_zeros(sol$hat.ni / sol$hat.ci)
# adapt because of rounding
sol$hat.pi <- normalise(sol$hat.ni)
sol <- info.rule("hat.ci, hat.di -> hat.ni [hat.pi]", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# forward cj + dj -> nj
if (
("hat.cj" %in% names(sol)) && ("hat.dj" %in% names(sol)) && (!"hat.nj" %in% names(sol))
) {
sol$hat.nj <- round_sum(sol$hat.cj * sol$hat.dj)
sol$hat.dj <- nan_to_zeros(sol$hat.nj / sol$hat.cj)
# adapt because of rounding
sol$hat.pj <- normalise(sol$hat.nj)
sol <- info.rule("hat.cj, hat.dj -> hat.nj [hat.pj]", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.ni" %in% names(sol)) && (!"hat.nL" %in% names(sol))
) {
sol$hat.nL <- sum(sol$hat.ni)
sol <- info.rule("hat.ni -> hat.nL", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.nj" %in% names(sol)) && (!"hat.nL" %in% names(sol))
) {
sol$hat.nL <- sum(sol$hat.nj)
sol <- info.rule("hat.nj -> hat.nL", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# forward sum(ci) -> nA
if (
("hat.ci" %in% names(sol)) && (!"hat.nA" %in% names(sol))
) {
sol$hat.nA <- sum(sol$hat.ci)
sol <- info.rule("hat.ci -> hat.nA", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.cj" %in% names(sol)) && (!"hat.nB" %in% names(sol))
) {
sol$hat.nB <- sum(sol$hat.cj)
sol <- info.rule("hat.cj -> hat.nB", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# based on ni and pij (either defined, or from inputs)
if (
("hat.ni" %in% names(sol))
&& ("hat.pij" %in% names(sol))
&& (!"hat.nij" %in% names(sol))
) {
pij <- if ("hat.pij" %in% names(sol)) sol$hat.pij else case$inputs$pij$data
sol$hat.nij <- nan_to_zeros( t(sol$hat.ni * t(pij) / colSums(pij)) )
# round per column (to keep the sums consistent)
for (i in 1:ncol(sol$hat.nij)) {
sol$hat.nij[,i] <- round_sum(sol$hat.nij[,i])
}
# adapt th based on rounded values
sol$hat.pij <- sol$hat.nij / sum(sol$hat.nij)
sol <- info.rule("hat.ni, hat.pij -> hat.nij [hat.pij]", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.nj" %in% names(sol))
&& ("hat.pij" %in% names(sol))
&& (!"hat.nij" %in% names(sol))
) {
pij <- if ("hat.pij" %in% names(sol)) sol$hat.pij else case$inputs$pij$data
sol$hat.nij <- nan_to_zeros( sol$hat.nj * pij / rowSums(pij) )
# round per line (to keep the sums consistent)
for (i in 1:nrow(sol$hat.nij)) {
sol$hat.nij[i,] <- round_sum(sol$hat.nij[i,])
}
sol$hat.pij <- sol$hat.nij / sum(sol$hat.nij)
sol <- info.rule("hat.nj, hat.pij -> hat.nij [hat.pij]", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.nL" %in% names(sol))
&& ("hat.pij" %in% names(sol))
&& (!"hat.nij" %in% names(sol))
) {
sol$hat.nij <- round_sum(sol$hat.pij * sol$hat.nL)
# round.
# if the totals of ci are known, then round per column
# (same for cj and rows)
# if ("hat.ci" %in% names(sol)) {
# for (i in 1:ncol(sol$hat.nij)) {
# sol$hat.nij[,i] <- round_sum(sol$hat.nij[,i])
# }
# } else if ("hat.cj" %in% names(sol)) {
# for (i in 1:nrow(sol$hat.nij)) {
# sol$hat.nij[i,] <- round_sum(sol$hat.nij[i,])
# }
# }
# TODO what if both ci and cj ?
# recompute pij, in case rounding would make things inconsistent
sol$hat.pij <- normalise(sol$hat.nij)
sol <- info.rule("hat.nL, hat.pij -> hat.nij [hat.pij]", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.nij" %in% names(sol)) && (!"hat.ni" %in% names(sol))
) {
sol$hat.ni <- colSums(sol$hat.nij)
if ("hat.pi" %in% names(sol)) {
sol <- info.rule("hat.nij -> hat.ni [hat.pi]", sol, verbose=verbose, indent=indent+1)
sol$hat.pi <- normalise(sol$hat.ni)
} else {
sol <- info.rule("hat.nij -> hat.ni", sol, verbose=verbose, indent=indent+1)
}
changed <- TRUE
}
if (
("hat.nij" %in% names(sol)) && (!"hat.nj" %in% names(sol))
) {
sol$hat.nj <- rowSums(sol$hat.nij)
if ("hat.pj" %in% names(sol)) {
sol <- info.rule("hat.nij -> hat.nj [hat.pj]", sol, verbose=verbose, indent=indent+1)
sol$hat.pj <- normalise(sol$hat.nj)
} else {
sol <- info.rule("hat.nij -> hat.nj", sol, verbose=verbose, indent=indent+1)
}
changed <- TRUE
}
# forward ni + di -> ci
if (
("hat.ni" %in% names(sol)) && ("hat.di" %in% names(sol)) && (!"hat.ci" %in% names(sol))
) {
candidate_ci <- sol$hat.ni / sol$hat.di
# if di = 0, then we cannot extrapolate ci based on it...
# ... yet we might just assume it might be any other value
# let's say we try to keep its relative frequency.
fi_for_missing <- if ("hat.fi" %in% names(sol)) sol$hat.fi else case$stats$fi
indices_not_inf <- which((!is.infinite(candidate_ci)) && (!is.nan(candidate_ci)))
average_ratio <- mean(candidate_ci[indices_not_inf] / fi_for_missing[indices_not_inf])
indices_wrong <- which(is.infinite(candidate_ci) | is.nan(candidate_ci))
candidate_ci[indices_wrong] <- fi_for_missing[indices_wrong] * average_ratio
sol$hat.ci <- round_sum(candidate_ci)
sol$hat.di[indices_not_inf] <- sol$hat.ni[indices_not_inf] / sol$hat.ci[indices_not_inf]
sol <- info.rule("hat.ni, hat.di -> hat.ci", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.nj" %in% names(sol)) && ("hat.dj" %in% names(sol)) && (!"hat.cj" %in% names(sol))
) {
candidate_cj <- sol$hat.nj / sol$hat.dj
# if di = 0, then we cannot extrapolate ci based on it...
# ... yet we might just assume it might be any other value
# let's say we try to keep its relative frequency.
fj_for_missing <- if ("hat.fj" %in% names(sol)) sol$hat.fj else case$stats$fj
indices_not_inf <- which((!is.infinite(candidate_cj)) && (!is.nan(candidate_cj)))
average_ratio <- mean(candidate_cj[indices_not_inf] / fj_for_missing[indices_not_inf])
indices_wrong <- which(is.infinite(candidate_cj) | is.nan(candidate_cj))
candidate_cj[indices_wrong] <- fj_for_missing[indices_wrong] * average_ratio
sol$hat.cj <- round_sum(candidate_cj)
sol$hat.dj[indices_not_inf] <- sol$hat.nj[indices_not_inf] / sol$hat.cj[indices_not_inf]
sol <- info.rule("hat.nj, hat.dj -> hat.cj", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# if we have card and nx, then we can assess degree
if (
("hat.ni" %in% names(sol)) && ("hat.ci" %in% names(sol)) && (!"hat.di" %in% names(sol))
) {
sol$hat.di <- pmax(
case$inputs$min.di,
pmin(
case$inputs$max.di,
nan_to_zeros(sol$hat.ni / sol$hat.ci)
)
)
sol <- info.rule("hat.ni, hat.ci -> hat.di", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.nj" %in% names(sol)) && ("hat.cj" %in% names(sol)) && (!"hat.dj" %in% names(sol))
) {
sol$hat.dj <- pmax(
case$inputs$min.dj,
pmin(
case$inputs$max.dj,
nan_to_zeros(sol$hat.nj / sol$hat.cj)
)
)
sol <- info.rule("hat.nj, hat.cj -> hat.dj", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# pij -> pi
if (
("hat.pij" %in% names(sol)) && (!"hat.pi" %in% names(sol))
) {
sol$hat.pi <- colSums(sol$hat.pij)
sol <- info.rule("hat.pij -> hat.pi", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.pij" %in% names(sol)) && (!"hat.pj" %in% names(sol))
) {
sol$hat.pj <- rowSums(sol$hat.pij)
sol <- info.rule("hat.pij -> hat.pj", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# TODO IPF
# pi, pj -> pij
# (IPF)
if (
("hat.pi" %in% names(sol)) && ("hat.pj" %in% names(sol)) && (!"hat.pij" %in% names(sol))
) {
sol <- info.rule("hat.pi, hat.pj -> hat.pij (IPF)", sol, verbose=verbose, indent=indent+1)
reweighted <- ipf.2d(
df=case$inputs$pij$data,
margins_cols=sol$hat.pi, margins_rows=sol$hat.pj,
max.iterations=1000, precision=1e-10, verbose=F) # verbose
sol$hat.pij <- reweighted
changed <- TRUE
}
# fi * pi -> di
if (
("hat.pi" %in% names(sol)) && ("hat.fi" %in% names(sol)) && (!"hat.di" %in% names(sol))
) {
# print("current hat.pi")
# print(sol$hat.pi)
# print("current hat.fi")
# print(sol$hat.fi)
sol$hat.di <- nan_to_zeros(sol$hat.pi / sol$hat.fi)
# fix potential NaNs due to di=0 by their native di counterparts (they do not matter anyway)
# print("current hat.di")
# print(sol$hat.di)
# the degrees higher than the possible one are set to the maximum
ids_too_high <- which(sol$hat.di > case$inputs$max.di)
sol$hat.di[ids_too_high] <- case$inputs$max.di[ids_too_high]
ids_too_low <- which(sol$hat.di < case$inputs$min.di)
sol$hat.di[ids_too_low] <- case$inputs$min.di[ids_too_low]
# now, maybe we cannot reach the expected values...
if (length(ids_too_high) + length(ids_too_low) > 0) {
if (case$inputs$phi.A > 0) { # case$inputs$gamma
# the user prefers to report the errors on fi !
sol <- info.rule("hat.pi, hat.fi -> hat.di [hat.fi]", sol, verbose=verbose, indent=indent+1)
sol$hat.fi <- normalise(nan_to_zeros(sol$hat.pi / sol$hat.di))
#print("hat.fi is now")
#print(sol$hat.fi)
}
# else if (case$inputs$phi.A < case$inputs$gamma) {
# # the user prefers to report the errors on pi
# sol <- info.rule("hat.pi, hat.fi -> hat.di [+ hat.pi + !hat.pij]", sol, verbose=verbose, indent=indent+1)
# sol$hat.pi <- normalise(nan_to_zeros(sol$hat.pi * sol$hat.di))
# sol$hat.pij <- NULL
# print("hat.pi")
# print(sol$hat.pi)
# }
else {
stop("not enough freedom on pdi to respect both fi and pi. Try relaxing phi.A, or add more potential degrees to pdi")
}
} else {
sol <- info.rule("hat.pi, hat.fi -> hat.di", sol, verbose=verbose, indent=indent+1)
}
# print("updated hat.di")
# print(sol$hat.di)
changed <- TRUE
}
if (
("hat.pj" %in% names(sol)) && ("hat.fj" %in% names(sol)) && (!"hat.dj" %in% names(sol))
) {
sol$hat.dj <- nan_to_zeros(sol$hat.pj / sol$hat.fj)
# the degrees higher than the possible one are set to the maximum
ids_too_high <- which(sol$hat.dj > case$inputs$max.dj)
sol$hat.dj[ids_too_high] <- case$inputs$max.dj[ids_too_high]
ids_too_low <- which(sol$hat.dj < case$inputs$min.dj)
sol$hat.dj[ids_too_low] <- case$inputs$min.dj[ids_too_low]
# now, maybe we cannot reach the expected values...
if (length(ids_too_high) + length(ids_too_low) > 0) {
if (case$inputs$phi.B > 0) { # case$inputs$gamma
# the user prefers to report the errors on fi !
sol <- info.rule("hat.pj, hat.fj -> hat.dj [hat.fj]", sol, verbose=verbose, indent=indent+1)
sol$hat.fj <- normalise(nan_to_zeros(sol$hat.pj / sol$hat.dj))
} else {
stop("not enough freedom on pdj to respect both fj and pj. Try relaxing phi.B, or add more potential degrees to pdj")
}
} else {
sol <- info.rule("hat.pj, hat.fj -> hat.dj", sol, verbose=verbose, indent=indent+1)
}
changed <- TRUE
}
# fj + dj -> pj
if (
("hat.fi" %in% names(sol)) && ("hat.di" %in% names(sol)) && (!"hat.pi" %in% names(sol))
) {
sol$hat.pi <- normalise(sol$hat.fi * sol$hat.di)
sol <- info.rule("hat.fi, hat.di -> hat.pi", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.fj" %in% names(sol)) && ("hat.dj" %in% names(sol)) && (!"hat.pj" %in% names(sol))
) {
sol$hat.pj <- normalise(sol$hat.fj * sol$hat.dj)
sol <- info.rule("hat.fj, hat.dj -> hat.pj", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# pj + dj -> fj
# TODO this should exist, but it leads to diffult problems. If a degree is close to 0, then dividing by it leads to weird frequencies.
# if (
# ("hat.pi" %in% names(sol)) && ("hat.di" %in% names(sol)) && (!"hat.fi" %in% names(sol))
# ) {
# print(sol$hat.pi)
# print(sol$hat.di)
# print(sol$hat.pi / sol$hat.di)
# sol$hat.fi <- normalise(sol$hat.pi / sol$hat.di)
# sol <- info.rule("hat.pi, hat.di -> hat.fi", sol, verbose=verbose, indent=indent+1)
# print(sol$hat.fi)
# print(sum(sol$hat.fi))
# changed <- TRUE
# }
# if (
# ("hat.pj" %in% names(sol)) && ("hat.dj" %in% names(sol)) && (!"hat.fj" %in% names(sol))
# ) {
# sol$hat.fj <- normalise(sol$hat.pj / sol$hat.dj)
# sol <- info.rule("hat.pj, hat.dj -> hat.fj", sol, verbose=verbose, indent=indent+1)
# print(sol$hat.fj)
# print(sum(sol$hat.fj))
# changed <- TRUE
# }
# pj + pij -> pij
# if (
# ("hat.pi" %in% names(sol)) && (!"hat.pij" %in% names(sol))
# ) {
# sol$hat.pij <- t(t(case$inputs$pij$data) / colSums(case$inputs$pij$data)) * sol$hat.pi
# sol <- info.rule("hat.pi -> hat.pij", sol, verbose=verbose, indent=indent+1)
# changed <- TRUE
# }
# if (
# ("hat.pj" %in% names(sol)) && (!"hat.pij" %in% names(sol))
# ) {
# sol$hat.pij <- case$inputs$pij$data / rowSums(case$inputs$pij$data) * sol$hat.pj
# sol <- info.rule("hat.pj -> hat.pij", sol, verbose=verbose, indent=indent+1)
# changed <- TRUE
# }
# ci -> fi
if (
("hat.ci" %in% names(sol)) && (!"hat.fi" %in% names(sol))
) {
sol$hat.fi <- sol$hat.ci / sum(sol$hat.ci)
sol <- info.rule("hat.ci -> hat.fi", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
if (
("hat.cj" %in% names(sol)) && (!"hat.fj" %in% names(sol))
) {
sol$hat.fj <- sol$hat.cj / sum(sol$hat.cj)
sol <- info.rule("hat.cj -> hat.fj", sol, verbose=verbose, indent=indent+1)
changed <- TRUE
}
# ndi -> ci
if (
("hat.ndi" %in% names(sol)) && (!"hat.ci" %in% names(sol))
) {
sol <- info.rule("hat.ndi -> hat.ci", sol, verbose=verbose, indent=indent+1)
sol$hat.ci <- colSums(sol$hat.ndi)
changed <- TRUE
}
if (
("hat.ndj" %in% names(sol)) && (!"hat.cj" %in% names(sol))
) {
sol <- info.rule("hat.ndj -> hat.cj", sol, verbose=verbose, indent=indent+1)
sol$hat.cj <- colSums(sol$hat.ndj)
changed <- TRUE
}
# pdi, ci -> ndi
if (
all(c("hat.pdi", "hat.ci", "hat.ni") %in% names(sol)) && (!"hat.ndi" %in% names(sol))
) {
sol <- info.rule("hat.pdi, hat.ci, hat.ni -> hat.ndi", sol, verbose=verbose, indent=indent+1)
sol$hat.ndi <- round_sum(t(t(sol$hat.pdi) * sol$hat.ci))
sol$hat.ndi <- rectify.degree.counts(sol$hat.ndi, sol$hat.ni, sol$hat.ci, verbose=F)
# update pdi after rounding (when divisible, else we keep the theorical version)
indices <- which(sol$hat.ci!=0)
sol$hat.pdi[indices] <- t(t(sol$hat.ndi[indices]) / sol$hat.ci[indices])
# update our past knowledge according to rounding
if ("hat.di" %in% names(sol)) {
sol$hat.di <- colSums(sol$hat.pdi * (0:(nrow(sol$hat.pdi)-1)))
#print("now hat.di")
#print(sol$hat.di)
}
changed <- TRUE
}
if (
all(c("hat.pdj", "hat.cj", "hat.nj") %in% names(sol)) && (!"hat.ndj" %in% names(sol))
) {
sol <- info.rule("hat.pdj, hat.cj, hat.nj -> hat.ndj", sol, verbose=verbose, indent=indent+1)
sol$hat.ndj <- round_sum(t(t(sol$hat.pdj) * sol$hat.cj))
sol$hat.ndj <- rectify.degree.counts(sol$hat.ndj, sol$hat.nj, sol$hat.cj, verbose=F)
# update pdi after rounding (when divisible, else we keep the theorical version)
indices <- which(sol$hat.cj!=0)
sol$hat.pdj[indices] <- as.data.frame(
t(t(sol$hat.ndj[indices]) / sol$hat.cj[indices]),
optional=FALSE)
# update our past knowledge according to rounding
if ("hat.dj" %in% names(sol)) {
sol$hat.dj <- colSums(sol$hat.pdj * (0:(nrow(sol$hat.pdj)-1)))
}
changed <- TRUE
}
# di -> pdi
if (
("hat.di" %in% names(sol)) && (!"hat.pdi" %in% names(sol))
) {
sol <- info.rule("hat.di -> hat.pdi", sol, verbose=verbose, indent=indent+1)
sol$hat.pdi <- tryCatch({
update_degree_distribution(case$inputs$pdi$data, sol$hat.di, precision=precision.pd, verbose=F)
}, error = function(e) {
if (verbose)
cat(rep("\t",times=indent+2),
"unable to update hat.pdi to fit hat.di=",paste(sol$hat.di,collapse=","),": ",e$message,"\n",sep="")
stop("The case is too constrained to be solved (hat.di are not compliant with the original pdi)")
})
changed <- TRUE
}
if (
("hat.dj" %in% names(sol)) && (!"hat.pdj" %in% names(sol))
) {
sol <- info.rule("hat.dj -> hat.pdj", sol, verbose=verbose, indent=indent+1)
sol$hat.pdj <- tryCatch({
update_degree_distribution(case$inputs$pdj$data, sol$hat.dj)
}, error = function(e) {
if (verbose)
cat(rep("\t",times=indent+2),"unable to update hat.pdj to fit hat.dj=",paste(sol$hat.dj,collapse=","),": ",e$message,,"\n",sep="")
stop("The case is too constrained to be solved (probabilities hat.dj are not compliant with the original pdj)")
})
changed <- TRUE
}
# stop looping when we changed nothing
# (so we are sure we cannot apply any novel equation)
if (!changed) {
break
}
}
sol
}
#' Asserts two values are equal.
#'
#' In the context of checking the consistency of a solution,
#' ensures the content are similar.
#'
#' @param v1 a scalar, vector, matrix or data frame
#' @param v2 a scalar, vector, matrix or data frame
#' @param msg the message to display in case of failure
#' @param indent the indentation (count of tabs) for verbose display
#' @param verbose if TRUE, assertion errors will be displayed in the console
#' @param tolerance the numeric tolerance to define two numeric are equal
#' @return 1 in case of error else 0
#'
#' @keywords internal
#'
assert.equal <- function(v1,v2,msg, verbose=FALSE, indent=3, tolerance=1.5e-8) {
if (!identical(v1,v2) && !(all.equal(v1,v2,tolerance=tolerance)==TRUE)) { # && ((length(v1) > 1) & sum((v1-v2)^2) > 0)
if (verbose) {
if (is.numeric(v1) && is.numeric(v2)) {
cat(rep("\t",times=indent),
"assert error on '",msg,"': ",
paste(v1,collapse=",")," != ",paste(v2,collapse=","),
"\n",sep="")
} else {
cat(rep("\t",times=indent),
"assert error on '",msg,"' ",
"\n",sep="")
cat(rep("\t",times=indent),all.equal(v1,v2),"\n",sep="")
}
# print(v1)
# print(v2)
#print( all.equal(v1,v2) )
}
return(1)
}
0
}
#' Checks the consistency of a solution
#'
#' Takes a temptative solution and the initial case to resolve.
#' Tests wether the equations all lead to consistent results of
#' do contradict each other. Returns an error with \code{stop()}
#' in case of error if \code{fail=TRUE}; else just returns the
#' count of problems.
#'
#' @param sol the current solution (a named list)
#' @param case the case to be solved
#' @param fail if TRUE, an error will call stop()
#' @param verbose if TRUE, will ddetect.problemsisplay detailed information on the console
#' @param indent the indentation (count of tabs) for verbose display
#' @param tolerance.pd the tolerance for probability distributions
#' @param tolerance.degree.max the tolerance for min/max average degrees
#' @param tolerance.pij the tolerance for pij probabilities
#' @return the count of problems
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
detect.problems <- function(sol, case, fail=TRUE, verbose=FALSE, indent=1, tolerance.pd=1.5e-8, tolerance.degree.max=1.5e-8, tolerance.pij=1e-4) {
if (verbose) {
cat(rep("\t",times=indent),"checking the consistency of the current solution with ",paste.known(sol),"\n",sep="")
}
problems <- 0
# nA = sum(ni)
if (!is.null(sol$hat.nA) && !is.null(sol$hat.ci)) {
problems <- problems + assert.equal(
sol$hat.nA, sum(sol$hat.ci),
"hat.nA = sum(hat.ci)",
verbose=verbose,
indent=indent+1)
}
if (!is.null(sol$hat.nB) && !is.null(sol$hat.cj)) {
problems <- problems + assert.equal(
sol$hat.nB, sum(sol$hat.cj),
"hat.nB = sum(hat.cj)",
verbose=verbose,
indent=indent+1)
}
# ni / nA = fi
if (!is.null(sol$hat.nA) && !is.null(sol$hat.ci) && !is.null(sol$hat.fi)) {
problems <- problems + assert.equal(
as.vector(sol$hat.ci/sol$hat.nA),
as.vector(sol$hat.fi),
"hat.ci/hat.nA = hat.fi",
verbose=verbose, indent=indent+1, tolerance=1)
}
if (!is.null(sol$hat.nB) && !is.null(sol$hat.cj) && !is.null(sol$hat.fj)) {
problems <- problems + assert.equal(
as.vector(sol$hat.cj/sol$hat.nB),
as.vector(sol$hat.fj),
"hat.cj/hat.nB = hat.fj",
verbose=verbose, indent=indent+1, tolerance=1)
}
# sum(fi) = 1
if (!is.null(sol$hat.fi)) {
problems <- problems + assert.equal(
1, sum(sol$hat.fi),
"sum(hat.fi)=1",
verbose=verbose, indent=indent+1)
}
if (!is.null(sol$hat.fj)) {
problems <- problems + assert.equal(
1, sum(sol$hat.fj),
"sum(hat.fj)=1",
verbose=verbose, indent=indent+1)
}
# ni = ci*di
if (!is.null(sol$hat.ci) && !is.null(sol$hat.di) && !is.null(sol$hat.ni)) {
problems <- problems + assert.equal(
as.vector(sol$hat.ni),
as.vector(sol$hat.ci*sol$hat.di),
"hat.ni = hat.ci*hat.di",
verbose=verbose, indent=indent+1,
tolerance=1)
}
if (!is.null(sol$hat.cj) && !is.null(sol$hat.dj) && !is.null(sol$hat.nj)) {
problems <- problems + assert.equal(
as.vector(sol$hat.nj),
as.vector(sol$hat.cj*sol$hat.dj),
"hat.nj = hat.cj*hat.dj",
verbose=verbose, indent=indent+1,
tolerance=1)
}
# nL = sum(ni)
if (!is.null(sol$hat.nL) && !is.null(sol[["hat.ni", exact=TRUE]])) {
problems <- problems + assert.equal(
sol$hat.nL, sum(sol$hat.ni),
"hat.nL = sum(hat.ni)",
verbose=verbose, indent=indent+1)
}
if (!is.null(sol$hat.nL) && !is.null(sol$hat.nj)) {
problems <- problems + assert.equal(
sol$hat.nL, sum(sol$hat.nj),
"hat.nL = sum(hat.nj)",
verbose=verbose, indent=indent+1)
}
# nL = sum(nij)
if (!is.null(sol$hat.nL) && !is.null(sol$hat.nij)) {
problems <- problems + assert.equal(
sol$hat.nL, sum(sol$hat.nij),
"hat.nL = sum(hat.nij)",
verbose=verbose, indent=indent+1)
}
# sum(hat.pij) = 1
if (!is.null(sol$hat.pij)) {
problems <- problems + assert.equal(
1, sum(sol$hat.pij),
"sum(hat.pij) = 1",
verbose=verbose, indent=indent+1)
}
# hat.pij = hat.nij/sum(hat.nij)
if (!is.null(sol$hat.nij) && !is.null(sol$hat.pij)) {
problems <- problems + assert.equal(
sol$hat.nij/sum(sol$hat.nij), sol$hat.pij,
"hat.pij = hat.nij/sum(hat.nij)",
verbose=verbose, indent=indent+1)
}
# colSum(hat.nij) = hat.ni
if (!is.null(sol$hat.nij) && !is.null(sol[["hat.ni", exact=TRUE]])) {
problems <- problems + assert.equal(
as.vector(colSums(sol$hat.nij)), as.vector(sol$hat.ni),
"hat.ni = colSums(hat.nij)",
verbose=verbose, indent=indent+1)
}
if (!is.null(sol$hat.nij) && !is.null(sol$hat.nj)) {
problems <- problems + assert.equal(
as.vector(rowSums(sol$hat.nij)), as.vector(sol$hat.nj),
"hat.nj = rowSums(hat.nij)",
verbose=verbose, indent=indent+1)
}
# colSum(hat.pij) = hat.pi
if (!is.null(sol$hat.pij) && !is.null(sol[["hat.pi", exact=TRUE]])) {
problems <- problems + assert.equal(
as.vector(colSums(sol$hat.pij)), as.vector(sol$hat.pi),
"hat.pi = colSums(hat.pij)",
verbose=verbose, indent=indent+1,
tolerance=tolerance.pij)
}
if (!is.null(sol$hat.pij) && !is.null(sol$hat.pj)) {
problems <- problems + assert.equal(
as.vector(rowSums(sol$hat.pij)), as.vector(sol$hat.pj),
"hat.pj = rowSums(hat.pij)",
verbose=verbose, indent=indent+1,
tolerance=tolerance.pij)
}
# vsum(pdi) = 1
if (!is.null(sol$hat.pdi)) {
for (i in 1:ncol(sol$hat.pdi)) {
problems <- problems + assert.equal(
1,
sum(sol$hat.pdi[,i]),
paste("sum(hat.pdi[",i,"])=1 (you can tune tolerance.pd)",sep=""),
verbose=verbose, indent=indent+1,
tolerance=tolerance.pd)
}
}
if (!is.null(sol$hat.pdj)) {
for (i in 1:ncol(sol$hat.pdj)) {
problems <- problems + assert.equal(
1,
sum(sol$hat.pdj[,i]),
paste("sum(hat.pdj[",i,"])=1 (you can tune tolerance.pd)",sep=""),
verbose=verbose, indent=indent+1,
tolerance=tolerance.pd)
}
}
# sum(hat.ndi) = hat.ci
if (!is.null(sol$hat.ndi) && !is.null(sol$hat.ci)) {
problems <- problems + assert.equal(
as.vector(sol$hat.ci),
as.vector(colSums(sol$hat.ndi)),
"hat.ci = sum(hat.ndi)",
verbose=verbose, indent=indent+1)
}
if (!is.null(sol$hat.ndj) && !is.null(sol$hat.cj)) {
problems <- problems + assert.equal(
as.vector(sol$hat.cj),
as.vector(colSums(sol$hat.ndj)),
"hat.cj = sum(hat.ndj)",
verbose=verbose, indent=indent+1)
}
# sum(hat.ndi * n) = hat.ni
if (!is.null(sol$hat.ndi) && !is.null(sol[["hat.ni", exact=TRUE]])) {
problems <- problems + assert.equal(
as.vector(colSums(sol$hat.ndi * 0:(nrow(sol$hat.ndi)-1))),
as.vector(sol$hat.ni),
"hat.ni = sum( n * ndi[n])",
verbose=verbose, indent=indent+1)
}
if (!is.null(sol$hat.ndj) && !is.null(sol$hat.nj)) {
problems <- problems + assert.equal(
as.vector(colSums(sol$hat.ndj * 0:(nrow(sol$hat.ndj)-1))),
as.vector(sol$hat.nj),
"hat.nj = sum( n * ndj[n])",
verbose=verbose, indent=indent+1)
}
if (!is.null(sol$hat.di)) {
problems <- problems + assert.equal(
FALSE,
any(case$inputs$min.di - sol$hat.di > tolerance.degree.max),
"hat.di >= min.di (try increasing tolerance.degree.max)",
verbose=verbose, indent=indent+1)
problems <- problems + assert.equal(
FALSE,
any(sol$hat.di - case$inputs$max.di > tolerance.degree.max),
"hat.di <= max.di (try increasing tolerance.degree.max)",
verbose=verbose, indent=indent+1)
}
if (!is.null(sol$hat.dj)) {
problems <- problems + assert.equal(
FALSE,
any(case$inputs$min.dj - sol$hat.dj > tolerance.degree.max),
"hat.dj >= min.dj (try increasing tolerance.degree.max)",
verbose=verbose, indent=indent+1)
problems <- problems + assert.equal(
FALSE,
any(sol$hat.dj - case$inputs$max.dj > tolerance.degree.max),
"hat.dj <= max.dj (try increasing tolerance.degree.max)",
verbose=verbose, indent=indent+1)
}
if (verbose) {
if (problems > 0) {
#print(sol)
cat(rep("\t",times=indent),"=> the solution is NOT consistent: ",problems," problems detected\n", sep="")
} else {
cat(rep("\t",times=indent),"=> the solution is consistent\n", sep="")
}
}
if (fail && (problems > 0)) {
stop("The case is too constrained to be solved (led to incoherent state)")
}
problems
}
#' Detects the missing chains of variables
#'
#' For a given temptative solution supposed to be consistent,
#' checks the successive variables which depend on each other and
#' are not yet solved. These chains of variables probably require
#' using hypothesis for actual resolution.
#'
#' @param sol the current solution (a named list)
#' @param verbose if TRUE, will display detailed information on the console
#' @return a list of vectors (the chains) of strings
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
detect.missing.chains <- function(sol, verbose=FALSE) {
if (verbose) {
cat("we know: ",paste.known(sol),"\n",sep="")
}
missing.chains <- list()
current.chain <- NULL
# "hat.ni", "hat.nj",
chain.n <- c("hat.nA", "hat.ci", "hat.fi", "hat.di", "hat.pij", "hat.dj", "hat.fj", "hat.cj", "hat.nB")
for (i in 1:length(chain.n)) {
var.name <- chain.n[[i]]
#print(var.name)
if (!(var.name %in% names(sol))) {
#cat("missing",var.name,"\n")
if (is.null(current.chain)) {
# new chain
current.chain <- c(var.name)
} else {
# continue with the same chain
current.chain <- c(current.chain, var.name)
}
} else {
# not missing
if (!is.null(current.chain)) {
missing.chains[[length(missing.chains)+1]] <- current.chain
current.chain <- NULL
}
}
}
if (!is.null(current.chain)) {
missing.chains[[length(missing.chains)+1]] <- current.chain
current.chain <- NULL
}
if (verbose) {
if (length(missing.chains) > 0) {
cat("\tthere are ",length(missing.chains)," missing chains:\n")
for (c in missing.chains) {
cat("\t\t", paste(c,collapse=","))
}
} else {
cat("\tthere is no missing chain in this solution.")
}
}
missing.chains
}
#' Concatenates the known values
#'
#' Only keeps in a solution (a list) the variables names which correspondond to
#' the chain of variables we need to solve
#'
#' @param sol the current solution (a list)
#' @return a string
#'
#' @keywords internal
#'
paste.known <- function(sol) {
paste(
intersect(
names(sol),
c("hat.nA", "hat.ci", "hat.fi", "hat.di", "hat.pij", "hat.dj", "hat.fj", "hat.cj", "hat.nB")
),
collapse=", "
)
}
#' generates the hypothesis to explore a given solution
#'
#' Explores all the combinations of variables which are missing
#' and might be set to user inputs.
#'
#' @param sol a current solution
#'Â @return a list of lists of strings
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @importFrom utils combn
#'
#' @keywords internal
#'
generate.hypothesis <- function(sol) {
required_variables <- c("hat.nA","hat.fi",
"hat.di", # "hat.pdi",
"hat.pij",
"hat.dj", #"hat.pdj",
"hat.fj","hat.nB")
missing_variables <- base::setdiff(required_variables, names(sol))
#Â generate the combinations
hypothesis <- unlist(lapply(1:length(required_variables), function(x) combn(required_variables,x,simplify=FALSE)), recursive=FALSE)
hypothesis
}
#' A string representing an hypothesis
#'
#' For a given hypothesis l("hat.di","hat.fi"),
#' returns a string like "hat.di=di, hat.fi=fi"
#'
#' @param hypothesis a list of strings representing variables, as generated by \code{\link{generate.hypothesis}}
#'Â @return a string
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
name.hypothesis <- function(hypothesis) {
var_names <- lapply(hypothesis, function (x) substr(x, 5, nchar(x)))
paste(
paste(
hypothesis,
var_names,
sep="="
),
collapse=","
)
}
#' Asserts a given hypothesis on a given solution for a given case
#'
#' For an hypothesis such as l("hat.fi","hat.pdi"), sets the corresponding
#' variables in the solution such as sol$hat.fi=case$fi and sol$hat.pdi=case$pdi.
#'
#' @param hypothesis a list of strings representing variables, as generated by \code{\link{generate.hypothesis}}
#' @param sol a current solution
#' @param case the case containing the initial values
#' @param verbose FALSE by default
#'Â @return the solution with more variables
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @importFrom utils combn
#'
#' @keywords internal
#'
assert.hypothesis <- function(hypothesis, sol, case, nA, nB, verbose=FALSE) {
# find the initial variable names (without the prefix "hat.")
sol.hyp <- sol
for (var.hat.name in hypothesis) {
if (var.hat.name == "hat.di") { sol.hyp[[var.hat.name]] <- case$inputs$di * case$masks$mask.di.all }
else if (var.hat.name == "hat.dj") { sol.hyp[[var.hat.name]] <- case$inputs$dj * case$masks$mask.dj.all }
else if (var.hat.name == "hat.pdi") { sol.hyp[[var.hat.name]] <- case$inputs$pdi$data * case$masks$mask.di.all }
else if (var.hat.name == "hat.pdj") { sol.hyp[[var.hat.name]] <- case$inputs$pdj$data * case$masks$mask.dj.all }
else if (var.hat.name == "hat.fi") { sol.hyp[[var.hat.name]] <- normalise(case$stats$fi * case$masks$mask.fi.all) }
else if (var.hat.name == "hat.fj") { sol.hyp[[var.hat.name]] <- normalise(case$stats$fj * case$masks$mask.fj.all)}
else if (var.hat.name == "hat.pij") { sol.hyp[[var.hat.name]] <- normalise(case$inputs$pij$data * case$masks$mask.pij.all) }
else if (var.hat.name == "hat.nA") { sol.hyp[[var.hat.name]] <- nA }
else if (var.hat.name == "hat.nB") { sol.hyp[[var.hat.name]] <- nB }
else {
# we should never reach this step.
# we screwed up on the list of variables at the head of the for loop
stop("/!\\ cannot create such an hypothesis\n")
}
}
sol.hyp
}
#' Solves a chain of missing variables
#'
#' For a given incomplete solution, a set of dependant variables to solve,
#' a generation case, and targets defined by the user, tries to identify
#' the best solution (if any).
#'
#' It works by (i) trying to define hypothesis such as "let's try to respect
#' the ideal value for this variable". It then (ii) relies on inference to
#' determine the consequences of this hypothesis, and checks (iii) whether
#' this solution is consistent.
#'
#' At the end of this process, we might have no solution, one unique solution or
#' several ones. If several solutions are available, the best one is taken, with best
#' being defined as the solution which minimizes the cumulated errors weighted by the
#' user weights.
#'
#' @param sol the current solution (a named list)
#' @param chain the chain to be solved (a list of strings containing variable names)
#' @param case the case to solve
#' @param nA the target population size for A
#' @param nB the target population size for B
#' @param nu.A control for nA: 0 means "respect nA", non-null "adapt it to solve the case"
#' @param phi.A control for frequencies: 0 means "respect the original frequencies as detected in the sample", non-null "adapt it to solve the case"
#' @param delta.A control for degree A: 0 means "respect the input parameters pdi", non-null "adapt them to solve the case"
#' @param gamma control for pij: 0 means "respect the matching probabilities pij", non-null "adapt them to solve the case"
#' @param delta.B control for degree B: 0 means "respect the input parameters pdj", non-null "adapt them to solve the case"
#' @param phi.B control for frequencies: 0 means "respect the original frequencies as detected in the sample", non-null "adapt it to solve the case"
#' @param nu.B control for nB: 0 means "respect nB", non-null "adapt it to solve the case"
#' @param verbose if TRUE, will display detailed information on the console
#' @param tolerance.pd the tolerance for probability distributions
#' @param tolerance.degree.max the tolerance for min/max average degrees
#' @param tolerance.pij the tolerance for pij probabilities
#' @return a list of vectors (the chains) of strings
#'
#' @seealso \code{\link{propagate.direct}} for the inference of the consequences of the hypothesis,
#' \code{\link{detect.problems}} to ensure potential solutions are consistent
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @importFrom utils combn
#'
#' @keywords internal
#'
resolve.missing.chain <- function(sol, chain, case,
nA, nB,
nu.A, phi.A, delta.A, nu.B, phi.B, delta.B, gamma,
verbose=FALSE,
tolerance.pd=1.5e-8, tolerance.degree.max=1.5e-8, tolerance.pij=1e-6) {
if (verbose)
cat("\tstarting the investigation of the missing chain: ",paste(chain,collapse=","),"\n",sep="")
solutions <- list()
testable_variables <- intersect(
chain,
c("hat.di","hat.dj","hat.fi","hat.fj","hat.pij","hat.nA","hat.nB")
)
hypothesis <- unlist(lapply(1:length(testable_variables), function(x) combn(testable_variables,x,simplify=FALSE)), recursive=FALSE)
if (verbose)
cat("\t\twill test ",length(hypothesis)," hypothesis\n",sep="")
# iterate all the missing elements of the chain that we might put hypothesis on
for (tested_variables in hypothesis) {
var_names <- lapply(tested_variables, function (x) substr(x, 5, nchar(x)))
hypothesis_name <- paste(
paste(
tested_variables,
var_names,
sep="="
),
collapse=","
)
if (verbose) {
cat("\t\ttrying with hypothesis: ", hypothesis_name, "\n", sep="")
}
sol.hyp <- sol
explored <- list()
explored$investigated.var.name <- hypothesis_name
explored$hypothesis <- setdiff(chain,tested_variables)
for (var.hat.name in tested_variables) {
if (var.hat.name == "hat.di") { sol.hyp[[var.hat.name]] <- case$inputs$di * case$masks$mask.di.all }
else if (var.hat.name == "hat.dj") { sol.hyp[[var.hat.name]] <- case$inputs$dj * case$masks$mask.dj.all }
else if (var.hat.name == "hat.fi") { sol.hyp[[var.hat.name]] <- normalise(case$stats$fi * case$masks$mask.fi.all) }
else if (var.hat.name == "hat.fj") { sol.hyp[[var.hat.name]] <- normalise(case$stats$fj * case$masks$mask.fj.all)}
else if (var.hat.name == "hat.pij") { sol.hyp[[var.hat.name]] <- normalise(case$inputs$pij$data * case$masks$mask.pij.all) }
else if (var.hat.name == "hat.nA") { sol.hyp[[var.hat.name]] <- nA }
else if (var.hat.name == "hat.nB") { sol.hyp[[var.hat.name]] <- nB }
else {
# we should never reach this step.
# we screwed up on the list of variables at the head of the for loop
stop("/!\\ cannot create such an hypothesis\n")
}
}
if (is.null(sol.hyp$inference)) {
sol.hyp$inference <- list()
}
sol.hyp$inference <- list(sol.hyp$inference, list(paste("hypothesis on ",hypothesis_name,sep="")))
# propagate on this basis
sol.hyp <- tryCatch({
propagate.direct(sol.hyp, case, verbose=verbose, indent=3)
}, error = function(e) {
if (verbose) {
cat(paste("\t\t\tfound an invalid solution (inference failed: ", e, ")\n", sep=""))
}
NULL
})
# so know we have an estimation
if (!is.null(sol.hyp)) {
if (!all(chain %in% names(sol.hyp))) {
# we cannot infer anything from this hypothesis; let's ignore it
if (verbose) {
cat("\t\t\tthese hypothesis do not enable the resolution of all the variables.\n")
}
} else {
# do we achieve to solve the problem on this basis ?
nb.problems <- detect.problems(
sol.hyp, case,
fail=FALSE, verbose=verbose, indent=3,
tolerance.pd=tolerance.pd, tolerance.degree.max=tolerance.degree.max, tolerance.pij=tolerance.pij)
if (nb.problems > 0) {
#cat("this variable does not provides a valid solution to our problem","\n")
if (verbose) {
cat(paste("\t\t\twe found an invalid solution (",nb.problems," inconsistencies found)\n", sep=""))
}
} else {
if (verbose) {
cat("\t\t\twe found a valid solution which provides: ", paste.known(sol.hyp),"\n")
}
sol.hyp <- quantify.errors(sol.hyp, case, nA, nB)
if (verbose) {
cat(paste("\t\t\there are the NRMSE errors of this solution: ",
"nA=",formatC(sol.hyp$nrmse.nA, format="G"), ", fi=",formatC(sol.hyp$nrmse.fi, format="G"),
", di=",formatC(sol.hyp$nrmse.di, format="G"), ", pij=",formatC(sol.hyp$nrmse.pij, format="G"),
", dj=",formatC(sol.hyp$nrmse.dj, format="G"), ", fj=",formatC(sol.hyp$nrmse.fj, format="G"),
", nB=",formatC(sol.hyp$nrmse.nB, format="G"), "\n",
sep=""
))
}
explored$sol <- sol.hyp
solutions[[length(solutions)+1]] <- explored
}
}
}
}
# we did everything we could.
# now "solutions" contains the valid solutions which were found
res <- NULL
if (length(solutions) == 0) {
if (verbose)
cat("\t\t\tfound no solution for this chain :-(\n")
} else if (length(solutions) == 1) {
# return the unique solution
res <- solutions[[1]]$sol
if (verbose)
cat("\t\t\tfound one valid solution\n")
} else {
# create a vector with errors
val.or.0 <- function(x) {
if (is.null(x) || is.nan(x)) {
return(0)
} else {
return(x)
}
}
cumulated.errors <- rep(0, times=length(solutions))
cumulated.errors.constrainsts <- rep(0, times=length(solutions))
weights <- c(nu.A, phi.A, delta.A, gamma, delta.B, phi.B, nu.B)
weights.names <- c("nu.A", "phi.A", "delta.A", "gamma", "delta.B", "phi.B", "nu.B")
indices_weights_not_null <- which(weights > 0)
indices_weights_null <- which(weights == 0)
rnmse.names <- c("RNMSE.nA", "RNMSE.fi", "RNMSE.pdi", "RNMSE.pij", "RNMSE.pdj", "RNMSE.fj", "RNMSE.nB")
if (verbose)
cat("\t\tfound ",length(solutions)," solutions, we have to select the best according to weights",
paste(weights.names, weights, collapse=", ", sep="="),"\n")
for (i in 1:length(solutions)) {
s <- solutions[[i]]
errors <- c(
val.or.0(s$sol$nrmse.nA),
val.or.0(s$sol$nrmse.fi),
val.or.0(s$sol$nrmse.pdi),
val.or.0(s$sol$nrmse.pij),
val.or.0(s$sol$nrmse.pdj),
val.or.0(s$sol$nrmse.fj),
val.or.0(s$sol$nrmse.nB)
)
weighted <- errors[indices_weights_not_null] / weights[indices_weights_not_null]
cumulated.errors[i] <- sum(weighted)
cumulated.errors.constrainsts[i] <- sum(errors[indices_weights_null])
if (verbose)
cat(paste(
"\t\t\tsolution (",i,") (", formatC(cumulated.errors[i],format="G"), ") => ", s$investigated.var.name, "\n",
"\t\t\t\t\t\t", paste(rnmse.names[indices_weights_not_null], formatC(errors[indices_weights_not_null],format="G"),collapse=", ",sep="="), "\n",
"\t\t\t\t weighted: \t",paste(rnmse.names[indices_weights_not_null], formatC(weighted,format="G"), collapse=", ",sep="="),"\n",
"\t\t\t\t constrainsts: \t",paste(rnmse.names[indices_weights_null], formatC(errors[indices_weights_null],format="G"), collapse=", ",sep="="),"\n",
sep=""))
}
# the best solution is either the solution which minimizes the errors on constrainsts (if there is any difference),
# or the solution minimizing cumulated errors on relaxed parameters.
weight.by.constrainsts <- length(unique(cumulated.errors.constrainsts))>1
best.solutions <- if (weight.by.constrainsts)
which(cumulated.errors.constrainsts == min(cumulated.errors.constrainsts))
else
which(cumulated.errors == min(cumulated.errors))
best.solution <- NULL
if (weight.by.constrainsts && (length(best.solutions) > 1)) {
# the solutions are equivalent by constraints; why one is the best according to weights ?
best.solutions <- which(
(cumulated.errors.constrainsts == min(cumulated.errors.constrainsts)) &
(cumulated.errors == min(cumulated.errors)))
}
if (length(best.solutions) > 1) {
best.solution <- sample(best.solutions, 1)
# TODO warning ? systematic message ?
if (verbose) {
cat("\t\t\tthe best solutions are solutions ",paste(best.solutions,collapse=",")," based on hypothesis:\n",sep="")
for (idx in best.solutions)
cat("\t\t\t\t* solution ",idx," solved with:\t",solutions[[idx]]$investigated.var.name,"\n",sep="")
#cat("\t\t\tthere are multiple best solutions (that is: ",length(best.solutions),") ; just selecting one randomly\n",sep="")
cat("\t\t=> selected ",best.solution, " which is one of the ",length(best.solutions)," solutions having ", sep="")
if (weight.by.constrainsts)
cat("lowest error for constrainsts\n",sep="")
else
cat("lowest weighted error\n",sep="")
}
} else {
best.solution <- best.solutions[[1]]
if (verbose) {
cat("\t\t=> will keep solution ",best.solution," which is the one with the ",sep="")
if (weight.by.constrainsts)
cat("lowest error for constrainsts\n",sep="")
else
cat("lowest weighted error\n",sep="")
}
}
if (verbose) {
cat("\t\t\tthis solution is based on hypothesis: ",solutions[[best.solution]]$investigated.var.name,"\n",sep="")
}
res <- solutions[[best.solution]]$sol
#print(names(solutions[[best.solution]]$sol))
}
res
}
#' Selects one best solution among a list of possible solutions
#'
#' Relying on the weigthed error of each solution, selects one of the best ones.
#'
#' @param solutions a list of solutions such as generated inside \code{\link{resolve}}
#' @inheritParams resolve
#' @return a solution solved
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
best.solution <- function(solutions,
nu.A, phi.A, delta.A,
gamma,
nu.B, phi.B, delta.B,
verbose=FALSE) {
# no solution ? => no solution !
if (length(solutions) == 0) {
if (verbose)
cat("\t\t\tfound no solution for this chain :-(\n")
stop("no solution found; the case seems overconstrained. You might try to relax relaxation parameters, or relax.zeros your input distributions, or relax the tolerance margins. Keep trying!")
}
# one solution
if (length(solutions) == 1) {
# return the unique solution
if (verbose)
cat("\t\t\tfound one valid solution\n")
return(solutions[[1]]$sol)
}
# several solutions
weights <- c(nu.A, phi.A, delta.A, gamma, delta.B, phi.B, nu.B)
weights.names <- c("nu.A", "phi.A", "delta.A", "gamma", "delta.B", "phi.B", "nu.B")
indices_weights_not_null <- which(weights > 0)
indices_weights_null <- which(weights == 0)
rnmse.names <- c("RNMSE.nA", "RNMSE.fi", "RNMSE.pdi", "RNMSE.pij", "RNMSE.pdj", "RNMSE.fj", "RNMSE.nB")
if (verbose)
cat("\t\tfound ",length(solutions)," solutions, we have to select the best according to weights",
paste(weights.names, weights, collapse=", ", sep="="),"\n")
# mesure the weighted error for each solution
cumulated.errors <- rep(0, times=length(solutions))
for (i in 1:length(solutions)) {
s <- solutions[[i]]
rnmse <- c(s$sol$nrmse.nA, s$sol$nrmse.fi, s$sol$nrmse.pdi, s$sol$nrmse.pij, s$sol$nrmse.pdj, s$sol$nrmse.fj, s$sol$nrmse.nB)
#Â The weighted sum is made of the errors of the relaxed parameters divided by relaxation (the more relaxed, the less important the error is).
# We add the errors on parameters supposed to be relaxed times 10000, that is they count a lot !
cumulated.errors[i] <- sum(rnmse[indices_weights_not_null] / weights[indices_weights_not_null]) +
sum(rnmse[indices_weights_null] * 10000)
if (verbose)
cat(paste(
"\t\t\tsolution (",i,") (", formatC(cumulated.errors[i],format="G"), ") => ", s$hypothesis, "\n",
"\t\t\t\t errors: \t ", paste(rnmse.names, formatC(rnmse,format="G"),collapse=", ",sep="="), "\n",
#"\t\t\t\t weighted: \t",paste(rnmse.names[indices_weights_not_null], formatC(weighted,format="G"), collapse=", ",sep="="),"\n",
#rnmse[indices_weights_null]
sep=""))
}
# select the solution(s) having the lowest error
best.solutions <- which(cumulated.errors == min(cumulated.errors))
best <- NULL
if (length(best.solutions) == 1) {
if (verbose)
cat("\t\t=> will keep solution ",best," which is the one with the lowest weighted error\n",sep="")
best <- best.solutions[[1]]
} else {
best <- sample(best.solutions, 1)
# TODO warning ? systematic message ?
if (verbose) {
cat("\t\t\tthe best solutions are solutions ",paste(best.solutions,collapse=",")," based on hypothesis:\n",sep="")
for (idx in best.solutions)
cat("\t\t\t\t* solution ",idx," solved with:\t",solutions[[idx]]$hypothesis,"\n",sep="")
#cat("\t\t\tthere are multiple best solutions (that is: ",length(best.solutions),") ; just selecting one randomly\n",sep="")
cat("\t\t=> randomly selected ",best, " which is one of the ",length(best.solutions)," solutions having lowest weighted error\n", sep="")
}
}
if (verbose) {
cat("\t\t\tthis solution is based on hypothesis: ",solutions[[best]]$hypothesis,"\n",sep="")
}
solutions[[best]]$sol
}
#' Resolves a partial solution by inference and hypothesis
#'
#' Resolves a given case starting with the base solution
#' according to user parameters.
#' It first uses \code{\link{propagate.direct}} to infer
#' results from available elements; then it detects the
#' chains of variable which are not constrained enough to be solved this way
#' using \code{\link{detect.missing.chains}}. Then it solves each chain
#' iteratively until the entire problem is solved.
#'
#' @param sol the current solution (a named list)
#' @param case the case to solve
#' @param nA the target population size for A
#' @param nB the target population size for B
#' @param nu.A control for nA: 0 means "respect nA", non-null "adapt it to solve the case"
#' @param phi.A control for frequencies: 0 means "respect the original frequencies as detected in the sample", non-null "adapt it to solve the case"
#' @param delta.A control for degree A: 0 means "respect the input parameters pdi", non-null "adapt them to solve the case"
#' @param gamma control for pij: 0 means "respect the matching probabilities pij", non-null "adapt them to solve the case"
#' @param delta.B control for degree B: 0 means "respect the input parameters pdj", non-null "adapt them to solve the case"
#' @param phi.B control for frequencies: 0 means "respect the original frequencies as detected in the sample", non-null "adapt it to solve the case"
#' @param nu.B control for nB: 0 means "respect nB", non-null "adapt it to solve the case"
#' @param verbose if TRUE, will display detailed information on the console
#' @param tolerance.pd the tolerance for probability distributions
#' @param tolerance.degree.max the tolerance for min/max average degrees
#' @param tolerance.pij the tolerance for pij probabilities
#'
#' @return a list of vectors (the chains) of strings
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
resolve <- function(sol, case,
nA, nB,
nu.A, phi.A, delta.A,
gamma,
nu.B, phi.B, delta.B,
verbose=FALSE,
tolerance.pd=1.5e-8, tolerance.degree.max=1.5e-8, tolerance.pij=1e-6) {
# direct propagation of what we know
sol.infered <- propagate.direct(sol, case, verbose=verbose)
detect.problems(sol.infered, case,
verbose=verbose,
tolerance.pd=tolerance.pd, tolerance.degree.max=tolerance.degree.max, tolerance.pij=tolerance.pij)
solutions <- list()
hypothesis <- generate.hypothesis(sol.infered)
blacklist <- list() # nothing blacklisted so far
for (h in hypothesis) {
# skip blacklisted hypothesis which are known not to work
skipped <- F
for (b in blacklist) {
if (all(b %in% h)) {
skipped <- T
if (verbose) {
cat("\t\tskipping ", name.hypothesis(h), " because ", name.hypothesis(b), "failed in the past\n", sep="")
}
break
}
}
if (skipped & verbose) {
#cat("\t\tskipping ", name.hypothesis(h), "\n", sep="")
next
}
# the current explored solution, which will host the error rates as well
explored <- list()
explored$hypothesis <- name.hypothesis(h)
# add the hypothesis to the solution
if (verbose) {
cat("\t\ttrying with hypothesis: ", name.hypothesis(h), "\n", sep="")
}
sol.hyp <- assert.hypothesis(h, sol.infered, case, nA, nB, verbose=verbose)
# inference on this basis
sol.hyp <- tryCatch({
propagate.direct(sol.hyp, case, verbose=verbose, indent=3)
}, error = function(e) {
if (verbose) {
cat(paste("\t\t\tfound an invalid solution (inference failed: ", e, ")\n", sep=""))
}
NULL
})
if (is.null(sol.hyp)) {
# we have no solution
# do not try more complex combinations
blacklist[[length(blacklist)+1]] <- h
# continue
next
}
if (!all(c("hat.nA","hat.ci","hat.ndi","hat.ni","hat.nij","hat.ni","hat.ndj","hat.cj","hat.nB") %in% names(sol.hyp))) {
# we cannot infer everything from this hypothesis; let's ignore it
if (verbose)
cat("\t\t\tthese hypothesis do not enable the resolution of all the required variables.\n")
# do not explore this solution more
next
}
# do we achieve to solve the problem on this basis ?
nb.problems <- detect.problems(
sol.hyp, case,
fail=FALSE, verbose=verbose, indent=3,
tolerance.pd=tolerance.pd, tolerance.degree.max=tolerance.degree.max, tolerance.pij=tolerance.pij)
if (nb.problems > 0) {
if (verbose)
cat(paste("\t\t\twe found an invalid solution (",nb.problems," inconsistencies found)\n", sep=""))
# do not try more complex combinations
blacklist[[length(blacklist)+1]] <- h
# stop there
next
}
if (verbose)
cat("\t\t\twe found a valid solution which provides: ", paste.known(sol.hyp),"\n")
sol.hyp <- quantify.errors(sol.hyp, case, nA, nB)
if (verbose) {
cat(paste("\t\t\there are the NRMSE errors of this solution: ",
"nA=",formatC(sol.hyp$nrmse.nA, format="G"), ", fi=",formatC(sol.hyp$nrmse.fi, format="G"),
", di=",formatC(sol.hyp$nrmse.di, format="G"), ", pij=",formatC(sol.hyp$nrmse.pij, format="G"),
", dj=",formatC(sol.hyp$nrmse.dj, format="G"), ", fj=",formatC(sol.hyp$nrmse.fj, format="G"),
", nB=",formatC(sol.hyp$nrmse.nB, format="G"), "\n",
sep=""
))
}
explored$sol <- sol.hyp
solutions[[length(solutions)+1]] <- explored
}
best.solution(solutions, nu.A, phi.A, delta.A,
gamma,
nu.B, phi.B, delta.B,
verbose=verbose)
# missing.chains.orig <- detect.missing.chains(sol.tmp)
# permutations <- function(l)Â {
# if (length(l) < 2) return (list(l))
# else if (length(l) == 2) return (list(list(l[0],l[1]), list(l[1],l[0]) )
# }
# for (missing.chains in length(missing.chains.orig)) {
# #Â will try by starting with the ith element
# # TODO TODO TODO
# missing.chains <- missing.chains.orig
# while (length(missing.chains) > 0) {
# chain <- missing.chains[[1]]
# missing.chains[[1]] <- NULL
# found <- resolve.missing.chain(sol.tmp, chain, case,
# nA=nA, nB=nB, nu.A=nu.A, phi.A=phi.A, delta.A=delta.A, nu.B=nu.B, phi.B=phi.B, delta.B=delta.B, gamma=gamma,
# verbose=verbose,
# tolerance.pd=tolerance.pd, tolerance.degree.max=tolerance.degree.max, tolerance.pij=tolerance.pij)
# if (!is.null(found)) {
# if (verbose) {
# cat("\tthis missing chain was solved\n", sep="")
# }
# sol.tmp <- found
# missing.chains <- detect.missing.chains(sol.tmp)
# }
# }
# if (!is.null(sol.tmp)) return(sol.tmp)
# }
# # print("propagated")
# # print(sol)
# return(sol.tmp)
}
#' Quantifies the goodness of fit between two dataframes
#'
#' Computes the squared of each, then the difference between them,
#' then elevated to power 2, then summed and mult 4.
#'
#' @param df1 data ref
#' @param df2 data gen
#' @return a scalar
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @export
#'
gof.freeman_tukey <- function(df1, df2) {
4*sum((sqrt(df1) - sqrt(df2))^2)
}
#' Quantifies the Fisher goodness of fit between two dataframes
#'
#' Computes fisher test using \code{\link{fisher.test}};
#' in case of error, returns NA
#'
#' @param x data ref
#' @param y data gen
#' @return a scalar or NA if not possible
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @importFrom stats fisher.test
#'
#' @export
#'
fisher.pvalue.or.NA <- function(x, y) {
tryCatch({
fisher.test(x, y, simulate.p.value=T)$p.value
}, error = function(e) {
NA
})
}
#' Quantifies the chi2 goodness of fit between two dataframes
#'
#' Computes fisher test using \code{\link{chisq.test}};
#' in case of error, returns NA
#'
#' @param x data ref
#' @param y data gen
#' @return a scalar or NA if not possible
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @importFrom stats chisq.test
#'
#' @export
#'
chisq.test.or.NA <- function(x, y) {
tryCatch({
chisq.test(x, B=y)$p.value
}, error = function(e) {
NA
})
}
#' Quantifies the errors of a solution
#'
#' Computes the errors in a (possibly partial) solution
#' for the variables which are defined and adds them
#' to these solutions. For instance if \code{sol$hat.fi}
#' is available, \code{sol$mse.fi} will be computed.
#'
#' @param sol the solution to evaluate
#' @param case the reference case
#' @param nA the expected size of population A
#' @param nB the expected size of population B
#' @param compute.fisher if TRUE, the compute the Fisher p value (slow)
#' @param compute.chi2 if TRUE, the compute the Chi2
#' @return the solution with additional variables for errors
#'
#' @keywords internal
#'
quantify.errors <- function(sol, case, nA, nB, compute.fisher=F, compute.chi2=F) {
# measure errors
# MSE fi
if (!is.null(sol$hat.fi)) {
sol$mse.fi <- mean( ( sol$hat.fi - case$stats$fi)^2 )
sol$rmse.fi <- sqrt(sol$mse.fi)
sol$nrmse.fi <- sol$rmse.fi
sol$ft.fi <- gof.freeman_tukey(sol$hat.fi, case$stats$fi)
}
if (!is.null(sol$hat.fj)) {
sol$mse.fj <- mean( ( sol$hat.fj - case$stats$fj)^2 )
sol$rmse.fj <- sqrt(sol$mse.fj)
sol$nrmse.fj <- sol$rmse.fj
sol$ft.fj <- gof.freeman_tukey(sol$hat.fj, case$stats$fj)
}
# Pearson ci, cj
if (!is.null(sol$hat.ci)) {
sol$xi2.p.fi <- chisq.test.or.NA(sol$hat.ci, case$stats$fi)
if (compute.fisher) sol$fisher.p.fi <- fisher.pvalue.or.NA(sol$hat.ci, case$stats$fi)
}
if (!is.null(sol$hat.cj)) {
sol$xi2.p.fj <- chisq.test.or.NA(sol$hat.cj, case$stats$fj)
if (compute.fisher) sol$fisher.p.fj <- fisher.pvalue.or.NA(sol$hat.cj, case$stats$fj)
}
# MSE di
if (!is.null(sol$hat.di)) {
sol$mse.di <- mean( ( sol$hat.di - case$inputs$di)^2 )
sol$rmse.di <- sqrt(sol$mse.di)
sol$nrmse.di <- sol$rmse.di / max(case$inputs$di)
# non sense? sol$ft.di <- gof.freeman_tukey(sol$hat.di, case$inputs$di)
}
if (!is.null(sol$hat.dj)) {
sol$mse.dj <- mean( ( sol$hat.dj - case$inputs$dj)^2 )
sol$rmse.dj <- sqrt(sol$mse.dj) / max(case$inputs$dj)
sol$nrmse.dj <- sol$rmse.dj
# non sense? sol$ft.dj <- gof.freeman_tukey(sol$hat.dj, case$inputs$dj)
}
# MSE pdi
if (!is.null(sol$hat.pdi)) {
sol$mse.pdi <- mean( ( sol$hat.pdi - case$inputs$pdi$data)^2 )
sol$rmse.pdi <- sqrt(sol$mse.pdi)
sol$nrmse.pdi <- sol$rmse.pdi #Â / ncol(case$inputs$pdi$data)
sol$ft.pdi <- gof.freeman_tukey(sol$hat.pdi, case$inputs$pdi$data)
}
if (!is.null(sol$hat.pdj)) {
sol$mse.pdj <- mean( ( sol$hat.pdj - case$inputs$pdj$data)^2 )
sol$rmse.pdj <- sqrt(sol$mse.pdj)
sol$nrmse.pdj <- sol$rmse.pdj #Â / ncol(case$inputs$pdj$data)
sol$ft.pdj <- gof.freeman_tukey(sol$hat.pdj, case$inputs$pdj$data)
}
# Pearson ndi, ndj
if (!is.null(sol$hat.ndi)) {
sol$xi2.p.pdi <- chisq.test.or.NA(sol$hat.ndi, case$inputs$pdi_fixed$data)
if (compute.fisher) sol$fisher.p.pdi <- fisher.pvalue.or.NA(sol$hat.ndi, case$inputs$pdi_fixed$data)
}
if (!is.null(sol$hat.ndj)) {
sol$xi2.p.pdj <- chisq.test.or.NA(sol$hat.ndj, case$inputs$pdi_fixed$data)
if (compute.fisher) sol$fisher.p.pdj <- fisher.pvalue.or.NA(sol$hat.ndj, case$inputs$pdj_fixed$data)
}
# MSE pij
if (!is.null(sol$hat.pij)) {
# if (verbose) {
# print("computing the NRMSE of pij, for hat.pij=")
# print(sol$hat.pij)
# print("original being")
# print(case$inputs$pij$data)
# print("difference is ")
# print(sol$hat.pij - case$inputs$pij$data)
# print("square is")
# print(( sol$hat.pij - case$inputs$pij$data )^2)
# print("so mean is ")
# print(mean.data.frame( ( sol$hat.pij - case$inputs$pij$data )^2 ))
# print("and thus")
# print(sqrt(mean.data.frame( ( sol$hat.pij - case$inputs$pij$data )^2 )))
# }
sol$mse.pij <- mean.data.frame( ( sol$hat.pij - case$inputs$pij$data )^2 )
sol$rmse.pij <- sqrt(sol$mse.pij)
sol$nrmse.pij <- sol$rmse.pij
sol$ft.pij <- gof.freeman_tukey(sol$hat.pij, case$inputs$pij$data)
}
# Pearson ndi, ndj
if (!is.null(sol$hat.nij)) {
sol$xi2.p.pij <- chisq.test.or.NA(sol$hat.nij, case$inputs$pij$data)
if (compute.fisher) sol$fisher.p.pij <- fisher.pvalue.or.NA(sol$hat.nij, case$inputs$pij_fixed$data)
}
# error n
if (!is.null(sol$hat.nA)) {
sol$error.nA <- abs( sol$hat.nA - nA )
sol$rmse.nA <- sol$error.nA
sol$nrmse.nA <- abs(sol$hat.nA - nA)/nA
}
if (!is.null(sol$hat.nB)) {
sol$error.nB <- abs( sol$hat.nB - nB )
sol$rmse.nB <- sol$error.nB
sol$nrmse.nB <- abs(sol$hat.nB - nB)/nB
}
sol
}
#' Ensures the content of a solution is in the expect format
#'
#' Ensures a solution is formatted well: notably guarantees
#' the lists or data frames which have to be names are named
#' as expected.
#'
#' @param sol the solution (might be partial)
#' @param case the case
#' @return the same solution fixed formats
#'
#' @keywords internal
#'
ensure.form <- function(sol, case) {
if (!is.null(sol$hat.ni)) {
names(sol$hat.ni) <- colnames(case$inputs$pij$data)
}
if (!is.null(sol$hat.ci)) {
names(sol$hat.ci) <- colnames(case$inputs$pij$data)
}
if (!is.null(sol$hat.fi)) {
names(sol$hat.fi) <- colnames(case$inputs$pij$data)
}
if (!is.null(sol$hat.di)) {
names(sol$hat.di) <- colnames(case$inputs$pij$data)
}
if (!is.null(sol$hat.nj)) {
names(sol$hat.nj) <- rownames(case$inputs$pij$data)
}
if (!is.null(sol$hat.cj)) {
names(sol$hat.cj) <- rownames(case$inputs$pij$data)
}
if (!is.null(sol$hat.fj)) {
names(sol$hat.fj) <- rownames(case$inputs$pij$data)
}
if (!is.null(sol$hat.dj)) {
names(sol$hat.dj) <- rownames(case$inputs$pij$data)
}
sol
}
#' Computes the mean of all the values inside a dataframe
#'
#' Sometimes the standard mean() function works on matrices,
#' but it does not on dataframes. In case the parameter is a dataframe,
#' the mean will be computed as the sum of all the values divided by the
#' count of elements. Else another standard mean will be called.
#'
#' @param x the dataframe
#' @param ... additional parameters are quietly ignored
#' @return a double value
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
#' @keywords internal
#'
mean.data.frame <- function(x, ...) {
if (class(x) != "data.frame")
mean(x)
else
sum(x) / (nrow(x) * ncol(x))
}
#' Solves the equations by arbitrating.
#'
#' Solves the underlying equations based on data, the inputs parameters,
#' and the control parameters. It tries to distribute the errors in the places
#' accepted by the user.
#'
#' This function is deterministic as long as only one solution is found.
#' In case several solutions with equivalent quality are found, one of them
#' is randomly chosen.
#'
#' @param case a case prepared with \code{\link{matching.prepare}}
#' @param nA the count of entities expected for population A
#' @param nB the count of entities expected for population B
#' @param nu.A control for nA: 0 means "respect nA", non-null "adapt it to solve the case"
#' @param phi.A control for frequencies: 0 means "respect the original frequencies as detected in the sample", non-null "adapt it to solve the case"
#' @param delta.A control for degree A: 0 means "respect the input parameters pdi", non-null "adapt them to solve the case"
#' @param gamma control for pij: 0 means "respect the matching probabilities pij", non-null "adapt them to solve the case"
#' @param delta.B control for degree B: 0 means "respect the input parameters pdj", non-null "adapt them to solve the case"
#' @param phi.B control for frequencies: 0 means "respect the original frequencies as detected in the sample", non-null "adapt it to solve the case"
#' @param nu.B control for nB: 0 means "respect nB", non-null "adapt it to solve the case"
#' @param verbose if TRUE, the resolution will emit messages for debug
#' @param tolerance.pd the tolerance for probability distributions
#' @param tolerance.degree.max the tolerance for min/max average degrees
#' @param tolerance.pij the tolerance for pij probabilities
#'
#' @return a case ready for generation
#'
#' @export
#'
#' @seealso \code{\link{matching.prepare}} to prepare a case for this function,
#' \code{\link{matching.generate}} to use the result for actual generation,
#' \code{\link{plot.dpp_resolved}} to plot the quality of the solution
#'
#' @examples
#'
#' # load sample data
#' data(dwellings_households)
#' # prepare the case
#' case.prepared <- matching.prepare(
#' dwellings_households$sample.A, dwellings_households$sample.B,
#' dwellings_households$pdi, dwellings_households$pdj,
#' dwellings_households$pij)
#' # resolve tbe case
#' solved <- matching.solve(case.prepared,
#' nA=50000,nB=40000,
#' nu.A=0, phi.A=0, delta.A=1,
#' gamma=1,
#' delta.B=0, phi.B=0, nu.B=0)
#' # print the resolution information
#' print(solved)
#' # access the solved frequencies, distribution of degrees and matching probabilities
#' print(solved$gen$hat.fi)
#' print(solved$gen$hat.pdi)
#' print(solved$gen$hat.pij)
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
matching.solve <- function(case,
nA, nB,
nu.A, phi.A, delta.A,
gamma,
delta.B, phi.B, nu.B,
verbose = FALSE,
tolerance.pd = 1.5e-6, tolerance.degree.max=1.5e-8, tolerance.pij=1e-6) {
if (class(case) != "dpp_prepared")
stop("case should be the result of a preparation of data by matching.prepare")
mixx <- list()
# start with masks
# TODO reuse
case$masks <- list()
case$masks$mask.fi <- as.integer(case$stats$fi > 0)
case$masks$mask.fj <- as.integer(case$stats$fj > 0)
case$masks$mask.di <- as.integer(case$inputs$di > 0)
case$masks$mask.dj <- as.integer(case$inputs$dj > 0)
case$masks$mask.pij <- (case$inputs$pij$data > 0)*1
case$masks$mask.pij.all <- t(t(case$masks$mask.pij) * case$masks$mask.fi * case$masks$mask.di) * case$masks$mask.fj * case$masks$mask.dj
case$masks$mask.fi.all <- as.integer(colSums(case$masks$mask.pij.all) > 0)
case$masks$mask.fj.all <- as.integer(rowSums(case$masks$mask.pij.all) > 0)
case$masks$mask.di.all <- case$masks$mask.di * case$masks$mask.fi.all
case$masks$mask.dj.all <- case$masks$mask.dj * case$masks$mask.fj.all
case$inputs$nA <- nA
case$inputs$nB <- nB
case$inputs$nu.A <- nu.A
case$inputs$nu.B <- nu.B
case$inputs$phi.A <- phi.A
case$inputs$phi.B <- phi.B
case$inputs$delta.A <- delta.A
case$inputs$delta.B <- delta.B
case$inputs$gamma <- gamma
case$inputs$tolerance.pd <- tolerance.pd
case$inputs$tolerance.degree.max <- tolerance.degree.max
#print(case$masks)
if (verbose)
cat("\nstarting the resolution of the case\n")
sol <- list()
if (nu.A == 0) { sol$hat.nA <- nA }
if (phi.A == 0) { sol$hat.fi <- normalise(case$stats$fi * case$masks$mask.fi.all) }
if (delta.A == 0) {
sol$hat.di <- case$inputs$di * case$masks$mask.di.all
sol$hat.pdi <- case$inputs$pdi$data #* case$masks$mask.di.all
}
if (nu.B == 0) { sol$hat.nB <- nB }
if (phi.B == 0) { sol$hat.fj <- normalise(case$stats$fj * case$masks$mask.fj.all) }
if (delta.B == 0) {
sol$hat.dj <- case$inputs$dj * case$masks$mask.dj.all
sol$hat.pdj <- case$inputs$pdj$data
}
if (gamma == 0) { sol$hat.pij <- normalise(case$inputs$pij$data * case$masks$mask.pij.all) }
if (verbose) {
cat("\taccording to user weights, we already know: ",paste(names(sol),collapse=","),"\n",sep="")
}
# detect issues right now; maybe the problem is overconstrained or badly constrainted
detect.problems(sol, case,
verbose=verbose,
tolerance.pd=tolerance.pd, tolerance.degree.max=tolerance.degree.max, tolerance.pij=tolerance.pij)
sol <- resolve(sol, case,
nA=nA, nB=nB,
nu.A=nu.A, phi.A=phi.A, delta.A=delta.A, nu.B=nu.B, phi.B=phi.B, delta.B=delta.B, gamma=gamma,
verbose=verbose,
tolerance.pd=tolerance.pd, tolerance.degree.max=tolerance.degree.max, tolerance.pij=tolerance.pij)
# ensure all the variables found a solution during the process
if (!all(c(
"hat.pdi","hat.pdj",
"hat.ci","hat.cj",
"hat.nij",
"hat.nA","hat.nB") %in% names(sol))) {
stop("The case is too constrained to be solved: some of the variables were not solved (",
paste(setdiff(c(
"hat.pdi","hat.pdj",
"hat.ci","hat.cj",
"hat.nij",
"hat.nA","hat.nB"),
names(sol)),
collapse=","),
")")
}
detect.problems(sol, case,
verbose=verbose,
tolerance.pd=tolerance.pd, tolerance.degree.max=tolerance.degree.max, tolerance.pij=tolerance.pij)
if (verbose) {
cat("\ncase solved.\n")
}
# measure errors
sol <- quantify.errors(sol, case, nA, nB)
# ensure the form is ok
sol <- ensure.form(sol, case)
# prepare the result
res <- case
res$gen <- sol
class(res) <- "dpp_resolved"
res
}
#' Display the result of a resolution
#'
#' @param x the case to print
#' @param ... ignored
#'
#' @export
#'
#' @author Samuel Thiriot <samuel.thiriot@res-ear.ch>
#'
print.dpp_resolved <- function(x,...) {
cat("case prepared and ready for generation\n")
cat("control parameters: nu.A=",x$inputs$nu.A,
", phi.A=",x$inputs$phi.A,", delta.A=",x$inputs$delta.A,
", gamma=",x$inputs$gamma,
", delta.B=",x$inputs$delta.B,", phi.B=",x$inputs$phi.B,", nu.B=",x$inputs$nu.B,
"\n\n",
sep="")
disp <- function(name,x) {
cat("$",name,":\n",sep="")
print(x$gen[[name]])
cat("\n")
}
disperr <- function(name,x) {
subname <- substr(name,5,nchar(name))
msename <- paste("rmse.",subname,sep="")
cat("$",name," [RMSE ",x$gen[[msename]],"]:\n",sep="")
print(x$gen[[name]])
cat("\n")
}
cat("$hat.nA: ",x$gen$hat.nA," [err ",x$gen$error.nA,"]\n\n",sep="")
disperr("hat.fi",x)
disp("hat.ci",x)
disperr("hat.di",x)
disperr("hat.pdi",x)
disp("hat.ni",x)
cat("$hat.nL: ",x$gen$hat.nL,"\n",sep="")
disp("hat.nij",x)
disperr("hat.pij",x)
disp("hat.nj",x)
disperr("hat.dj",x)
disperr("hat.pdj",x)
disp("hat.cj",x)
disperr("hat.fj",x)
cat("$hat.nB: ",x$gen$hat.nB," [err ",x$gen$error.nB,"]\n",sep="")
cat("The problem was resolved with the following process:\n")
cat("\t",paste(x$gen$inference,collapse="\n\t"),sep="")
cat("\n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.