Nothing
resample <- function(x, ...) {
if (length(x) <= 1) x
else sample(x)
}
#' Scapegoat algorithm
#'
#' @keywords internal
scapegoat <- function(R0, a0, x,krit=NULL) {
r0 <- nrow(R0)
v <- ncol(R0)
if (v < r0){
return(x)
}
#corner case when there is one variable and one restriction
if (v == 1 && r0 == 1){
return(a0/R0)
}
if (!is.null(krit)){
#krit <- colnames(R0) %in% krit
krit <- logical(ncol(R0))
}
else krit <- logical(ncol(R0))
p <- 1:v
perm <- c(resample(p[!krit]),resample(p[krit]))
R0t <- R0[, perm, drop=FALSE]
xt <- x[perm]
ks <- qr(R0t)$pivot;
p1 <- ks[1:r0]
p2 <- ks[(r0+1):v]
R1 <- R0t[,p1, drop=FALSE]
#x1 <- x[p1]
R2 <- R0t[,p2, drop=FALSE]
x2 <- xt[p2]
c <- a0 - (R2 %*% x2)
x1 <- solve(R1, c)[,1]
sol <- c(x1, x2)
#restore original order
m <- match(names(x), names(sol))
sol[m]
}
#' Correct records under linear restrictions for rounding errors
#'
#' This algorithm tries to detect and repair records that violate linear (in)equality constraints by correcting possible rounding errors as described by Scholtus(2008).
#' Typically data is constrainted by \eqn{Rx=a} and \eqn{Qx \ge b}.
#'
#' The algorithm first finds violated constraints
#' \eqn{|r'_{i}x-a_i| > 0} , and selects edits that may be due to a rounding error \eqn{0 < |r'_{i}x-a_i| \leq \delta}.
#' The algorithm then makes a correction suggestion where the errors are attributed to randomly selected variables under the lineair equality constraints.
#' It checks if the suggested correction
#' does not violate the inequality matrix \eqn{Q}. If it does, it will try to generate a different solution up till \code{K} times.
#'
#' @export
#' @example ../examples/correctRounding.R
#' @references
#' Scholtus S (2008). Algorithms for correcting some obvious
#' inconsistencies and rounding errors in business survey data. Technical
#' Report 08015, Statistics Netherlands.
#'
#' @param E \code{editmatrix} or \code{editset} as generated by the \code{editrules} package.
#' @param dat \code{data.frame} with the data to be corrected
#' @param ... arguments to be passed to other methods.
#' @param fixate \code{character} with variable names that should not be changed.
#' @param delta tolerance on checking for rounding error
#' @param K number of trials per record. See details
#' @param round should the solution be rounded, default TRUE
#' @param assumeUnimodularity If \code{FALSE}, a test is performed before corrections are computed (expensive).
#' @return A \code{\link[=deducorrect-object]{deducorrrect}} object.
#' @seealso \code{\link{deducorrect-object}} \code{\link{status}}
#'
#'
correctRounding <- function(E, dat, ...){
UseMethod('correctRounding')
}
#' @method correctRounding editset
#' @rdname correctRounding
#' @export
correctRounding.editset <- function(E, dat, ...){
correctAndRevert(correctRounding.editmatrix,E,dat,...)
}
#' @method correctRounding editmatrix
#' @rdname correctRounding
#' @export
correctRounding.editmatrix <- function(E, dat, fixate=NULL, delta=2, K=10, round=TRUE, assumeUnimodularity=FALSE,...){
stopifnot(is.data.frame(dat))
R <- E
krit <- character(0)
vars <- getVars(R)
fixate <- if (is.null(fixate)){FALSE}
else vars %in% fixate
# if (!missing(Q)) stop("Q parameter is deprecated, please add inequalities to R")
Q <- NULL
eq <- getOps(R) == "=="
if (!all(eq)){
Q <- R[!eq,]
R <- R[eq,]
#use reduced Q
krit <- getVars(Q)
b <- getb(Q)
}
a <- getb(R)
R <- getA(R)
if (!assumeUnimodularity){
if (!isTotallyUnimodular(R)) stop("The system of equalities must be totally unimodular. ", R)
}
m <- as.matrix(dat[vars])
n <- nrow(m)
status <- status(n)
attempts <- integer(n)
corrections <- NULL
cc <- which(complete.cases(m))
for (i in cc){
x <- m[i,]
E0 <- abs(a - (R %*% x)) <= delta
R0 <- R[E0,,drop=FALSE]
a0 <- a[E0]
if (all((R0 %*% x) == a0)){
status[i] <- if (all(E0)) "valid"
else "invalid"
next
}
k <- 0
if (!is.null(Q)){
violatedIneq <- which(violatedEdits(Q, data.frame(t(x))))
}
while (k < K){
k <- k + 1
#TODO check if R0 has one variable...
sol <- scapegoat(R0, a0, x, krit=krit)
if (round)
sol <- round(sol,0)
#TODO make this step more generic (so Q can be any inequality matrix)
changed <- (sol-x) != 0
if ( all(R0 %*% sol == a0)
&& !any(fixate & changed)
&& ( is.null(Q)
|| all(which(violatedEdits(Q, data.frame(t(sol)))) %in% violatedIneq)
)
){
break
}
}
if (k < K){
# detect if vars are changed
vars <- which(changed)
m[i,] <- sol
cor <- data.frame( row=i
, variable=colnames(R)[vars]
, old=x[vars]
, new=sol[vars]
)
corrections <- rbind(corrections, cor)
status[i] <- if (all(E0)) "corrected"
else "partial"
}
else {
status[i] <- "invalid"
}
attempts[i] <- k
}
corrected <- dat
corrected[colnames(m)] <- as.data.frame(m)[]
if ( is.null(corrections) ){
corrections <- data.frame(
row=integer(0),
variable=factor(levels=colnames(R)),
old=numeric(0), new=numeric(0)
)
}
return(
newdeducorrect(
corrected = corrected,
corrections = as.data.frame(corrections),
status = data.frame(status=status, attempts=attempts),
...
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.