R/correctRounding.R

resample <- function(x, ...) {
   if (length(x) <= 1) x
   else sample(x)
}

#' Scapegoat algorithm
#'
#' @keywords internal
scapegoat <- function(R0, a0, x,krit=NULL) {
	 r0 <- nrow(R0)
    v <- ncol(R0)
    
    if (v < r0){
       return(x)
    }
    
    #corner case when there is one variable and one restriction 
    if (v == 1 && r0 == 1){
       return(a0/R0)
    }
    if (!is.null(krit)){
        #krit <- colnames(R0) %in% krit
        krit <- logical(ncol(R0))
    }
    else krit <- logical(ncol(R0))
    p <- 1:v
    perm <- c(resample(p[!krit]),resample(p[krit]))

    
    R0t <- R0[, perm, drop=FALSE]
    xt <- x[perm]
    
    ks <- qr(R0t)$pivot;
    p1 <- ks[1:r0]
    p2 <- ks[(r0+1):v]
    
    R1 <- R0t[,p1, drop=FALSE]
    #x1 <- x[p1]
    R2 <- R0t[,p2, drop=FALSE]
    x2 <- xt[p2]
    
	 c <- a0 - (R2 %*% x2)
    x1 <- solve(R1, c)[,1]
	 sol <- c(x1, x2)
    #restore original order
    m <- match(names(x), names(sol))
    sol[m]
}

#' Correct records under linear restrictions for rounding errors
#'
#' This algorithm tries to detect and repair records that violate linear (in)equality constraints by correcting possible rounding errors as described by Scholtus(2008).
#' Typically data is constrainted by \eqn{Rx=a} and \eqn{Qx \ge b}.
#' 
#' The algorithm first finds violated constraints
#' \eqn{|r'_{i}x-a_i| > 0} , and selects edits that may be due to a rounding error \eqn{0 < |r'_{i}x-a_i| \leq \delta}. 
#' The algorithm then makes a correction suggestion where the errors are attributed to randomly selected variables under the lineair equality constraints. 
#' It checks if the suggested correction 
#' does not violate the inequality matrix \eqn{Q}. If it does, it will try to generate a different solution up till \code{K} times.
#'
#' @export
#' @example ../examples/correctRounding.R
#' @references 
#' Scholtus S (2008). Algorithms for correcting some obvious
#' inconsistencies and rounding errors in business survey data. Technical
#' Report 08015, Statistics Netherlands.
#'
#' @param E \code{editmatrix} or \code{editset} as generated by the \code{editrules} package.
#' @param dat \code{data.frame} with the data to be corrected
#' @param ... arguments to be passed to other methods.
#' @param fixate \code{character} with variable names that should not be changed.
#' @param delta tolerance on checking for rounding error
#' @param K number of trials per record. See details
#' @param round should the solution be rounded, default TRUE
#' @param assumeUnimodularity If \code{FALSE}, a test is performed before corrections are computed (expensive).
#' @return A \code{\link[=deducorrect-object]{deducorrrect}} object.
#' @seealso \code{\link{deducorrect-object}} \code{\link{status}}
#'
#'
correctRounding <- function(E, dat, ...){
    UseMethod('correctRounding')
}



#' @method correctRounding editset
#' @rdname correctRounding
#' @export
correctRounding.editset <- function(E, dat, ...){
    correctAndRevert(correctRounding.editmatrix,E,dat,...)
}   

#' @method correctRounding editmatrix
#' @rdname correctRounding
#' @export
correctRounding.editmatrix <- function(E, dat, fixate=NULL, delta=2, K=10, round=TRUE, assumeUnimodularity=FALSE,...){
   stopifnot(is.data.frame(dat))
   R <- E
   krit <- character(0)
   vars <- getVars(R)
   fixate <- if (is.null(fixate)){FALSE}
             else vars %in% fixate
             
#   if (!missing(Q)) stop("Q parameter is deprecated, please add inequalities to R")
   Q <- NULL
   eq <- getOps(R) == "=="
   if (!all(eq)){
       
       Q <- R[!eq,]
       R <- R[eq,]
       
       #use reduced Q
       krit <- getVars(Q)
       b <- getb(Q)
   }
   a <- getb(R)
   R <- getA(R)
  
   if (!assumeUnimodularity){
      if (!isTotallyUnimodular(R)) stop("The system of equalities must be totally unimodular. ", R)
   }
   
   m <- as.matrix(dat[vars])
   n <- nrow(m)
   status <- status(n)
   attempts <- integer(n)
   
   corrections <- NULL
   cc <- which(complete.cases(m))
   for (i in cc){
      x <- m[i,]
      E0 <- abs(a - (R %*% x)) <= delta
      R0 <- R[E0,,drop=FALSE]
      a0 <- a[E0]
      
      if (all((R0 %*% x) == a0)){
         status[i] <- if (all(E0)) "valid"
                      else "invalid"
         next
      }
      k <- 0
      
      if (!is.null(Q)){
         violatedIneq <- which(violatedEdits(Q, data.frame(t(x))))
      }
      while (k < K){
        k <- k + 1
        #TODO check if R0 has one variable...
        sol <- scapegoat(R0, a0, x, krit=krit)
        if (round) 
            sol <- round(sol,0)
        #TODO make this step more generic (so Q can be any inequality matrix)
        changed <- (sol-x) != 0
        if ( all(R0 %*% sol == a0)
          && !any(fixate & changed)
          && ( is.null(Q)
             || all(which(violatedEdits(Q, data.frame(t(sol)))) %in%  violatedIneq)
             )
           ){
           break
        }
      }
      
      if (k < K){
        # detect if vars are changed
        vars <- which(changed)
        m[i,] <- sol
        cor <- data.frame( row=i
                    , variable=colnames(R)[vars]
                    , old=x[vars]
                    , new=sol[vars]
                    )
        corrections <- rbind(corrections, cor)
        status[i] <- if (all(E0)) "corrected"
                     else "partial"
      }
      else {
        status[i] <- "invalid"
      }
      attempts[i] <- k
   }
   
   corrected <- dat
   corrected[colnames(m)] <- as.data.frame(m)[]
    
   if ( is.null(corrections) ){
      corrections <- data.frame(
            row=integer(0), 
            variable=factor(levels=colnames(R)),
            old=numeric(0), new=numeric(0)
        )   
   }
   return(
      newdeducorrect(
         corrected   = corrected, 
         corrections = as.data.frame(corrections), 
         status      = data.frame(status=status, attempts=attempts),
         ...
      )
   )
}

Try the deducorrect package in your browser

Any scripts or data that you put into this service are public.

deducorrect documentation built on May 2, 2019, 3:47 p.m.