R/impreciseEstimation.R

# Copyright (C) 2018  Paul Fink, Eva Endres
#
# This file is part of impimp.
#
# imptree is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# imptree is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with imptree.  If not, see <https://www.gnu.org/licenses/>.

#' @title Imprecise Estimation
#'
#' @description Estimate the probability of some events
#' based on data obtained by imprecise imputation
#'
#' @param data a data.frame obtained as result from an
#' imprecise imputation e.g. by a call to
#' \code{\link{impimp}}.
#' @param event a list of objects of class \code{"impimp_event"},
#' specifiying the event of interest. See 'Details'.
#' @param constraints a list of so-called logical constraints or
#' fixed zeros. Each element must be an object of class
#' \code{"impimp_event"}. See 'Details' .
#'
#' @return A numeric vector of length 2, where the
#' first component contains the lower and the second component
#' the upper probability of the event of interest.
#'
#' @details
#' \code{event} should be a list of objects of class
#' \code{"impmp_event"}, where the set union of impimp_events is the
#' actual event of interest.
#'
#' By specifying \code{constraints} one can exlude combinations of
#' imputed values which are deemed impossible, so called
#' \sQuote{logical constraints} or \sQuote{fixed zeros}.
#' \code{constraints} should be a list of objects of class
#' \code{"impimp_event"}.
#'
#' An object of class \code{"impimp_event"} is obtained as a result
#' of a call to \code{\link{impimp_event}}.
#'
#' For both \code{event} and \code{constraints} holds that overlapping
#' in the resulting events generated by the individual impimp_events
#' does not have any side effects, besides a potential decrease
#' in performance.
#'
#' @keywords robust
#'
#' @seealso \code{\link{impimp}}, \code{\link{impimp_event}} for
#' sepcifying constraints and events; \code{\link{impestcond}} for
#' the estimation of conditional probabilities
#'
#' @references Endres, E., Fink, P. and Augustin, T. (2018),
#' Imprecise Imputation: A Nonparametric Micro Approach Reflecting
#' the Natural Uncertainty of Statistical Matching with Categorical
#' Data, \emph{Department of Statistics (LMU Munich): Technical Reports},
#' No. 214
#'
#' @examples
#' A <- data.frame(x1 = c(1,0), x2 = c(0,0),
#'                 y1 = c(1,0), y2 = c(2,2))
#' B <- data.frame(x1 = c(1,1,0), x2 = c(0,0,0),
#'                 z1 = c(0,1,1), z2 = c(0,1,2))
#' AimpB <- impimp(A, B, method = "variable_wise")
#' BimpA <- impimp(B, A, method = "variable_wise")
#' AB <- rbindimpimp(AimpB, BimpA)
#'
#' ## P(Z1=1, Z2=0)
#' myevent1 <- list(impimp_event(z1 = 1, z2 = 0))
#' impest(AB, event = myevent1)
#'
#' ## P[(Z1,Z2) in {(1,0),(0,1),(1,1)}]
#' myevent2 <- list(impimp_event(z1 = 1,z2 = 0),
#'                  impimp_event(z1 = c(0,1), z2 = 1))
#' impest(AB, event = myevent2)
#'
#' @export
impest <- function(data, event,  constraints = NULL) {

  # sanity checks
  if(!is.impimp(data)) {
    stop(gettextf("imprecise estimation is only meaningful for objects of class %s",
                  dQuote("impimp"), domain = "R-impimp"))
  }
  eventcheck(event)

  # make all events equal by unlisting them
  event <- unlist(event, recursive = FALSE)
  # generate the tupel data, including the constraints
  tupelData <- generateTupelData(data, constraints)

  # extract subset compatible with event
  # gives the number of matches within each expanded observation
  compatibleSubset <- vapply(tupelData, function(data) {
    sum(evalConditions(event, data))
  }, FUN.VALUE = numeric(1L))


  overallTupels <- sapply(tupelData, NROW)
  nrowData <- length(tupelData)

  # compute belief
  # the observation is an exact match, if all expanded match
  lower <- sum((compatibleSubset == overallTupels) &
                 (overallTupels)) / nrowData

  # compute plausibility
  # observations is compatible if there is at least one match
  upper <- sum(compatibleSubset > 0) / nrowData

  # return belief and plausibility
  return(c(lower, upper))
}

Try the impimp package in your browser

Any scripts or data that you put into this service are public.

impimp documentation built on May 1, 2019, 10:13 p.m.