R/m_estimator.R

m_estimator <- function(x,
  y = NULL,
  z = NULL,
  m = 2,
  prior = "data",
  coercion = FALSE
) {
    
    if(coercion) {
      x <- as.factor(x)
      
      if(!is.null(y)) {
        y <- as.factor(y)
      }
      
      if(!is.null(z)) {
        z <- as.factor(z)
      }
      warning("variables were coerced to factors.")
    }
    
    condition <- is.null(y) & is.null(z)
    if(condition) {
      
      condition <- is.factor(x)
      if(condition) {
        
        # compute frequency (fx) and sample size (nx)
        fx <- table(x)
        nx <- sum(fx)
        
        kx <- nlevels(x)
        
        switch(
          EXPR = prior, 
          data = {
            px <- fx / nx
          },
          uniform = {
            px <- rep(1 / kx, kx)
          },
          {
            stop("'prior' must be set to 'data' or 'uniform'.")
          }
        )
        
        # m-estimate
        dx <- (fx + (m * px)) / (nx + m) 
        dx.names <- names(dx)
        dx <- as.vector(dx)
        names(dx) <- dx.names
        # return
        rout <- dx
        return(rout)
        
      } else {
        stop("x must be a factor.")
      }
    }
    
    condition <- !is.null(y) & is.null(z)
    if(condition) {
      
      condition <- is.factor(x) & is.factor(y)
      if(condition) {
        
        condition <- length(x) == length(y)
        if(condition) {
          
          # compute frequency (fxy) and sample size (nxy)
          fxy <- table(x, y)
          nxy <- sum(fxy)
          
          kx <- nlevels(x)
          ky <- nlevels(y)
          
          switch(
            EXPR = prior, 
            data = {
              pxy <- fxy / nxy
            },
            uniform = {
              pxy <- matrix(1 / (kx * ky), nrow = kx, ncol = ky)
            },
            {
              stop("'prior' must be set to 'data' or 'uniform'.")
            }
          )
          
          # m-estimate
          dx <- (fxy + (m * pxy)) / (nxy+ m) 
          
          # return
          rout <- dx
          return(rout)
          
        } else {
          stop("number of elements in x AND y do not match.")
        }
        
      } else {
        stop("x AND y must be factors.")
      }
      
    }
    
    condition <- !is.null(y) & !is.null(z)
    if(condition) {
      
      condition <- is.factor(x) & is.factor(y) & is.factor(z)
      if(condition) {
        
        condition <- 
          (length(x) == length(y)) |
          (length(x) == length(z)) |
          (length(y) == length(z))
        if(condition) {
          
          # compute frequency (fxyz) and sample size (nxyz)
          fxyz <- table(x, y, z)
          nxyz <- sum(fxyz)
          
          kx <- nlevels(x)
          ky <- nlevels(y)
          kz <- nlevels(z)
          
          switch(
            EXPR = prior, 
            data = {
              pxyz <- fxyz / nxyz
              
              # m-estimate
              dx <- fxyz
              for(i in seq_len(kz)) {
                dx[,, i] <- (fxyz[,, i] + (m * pxyz[,, i])) / (nxyz + m) 
              }
              
              
            },
            uniform = {
              pxyz <- matrix(1 / (kx * ky * kz), nrow = kx, ncol = ky)

              # m-estimate
              dx <- fxyz
              for(i in kz) {
                dx[,, i] <- (fxyz[,, i] + (m * pxyz)) / (nxyz + m) 
              }
              
            },
            {
              stop("'prior' must be set to 'data' or 'uniform'.")
            }
          )
          
          
        } else {
          stop("Number of elements in x AND y AND z do not match.")
        }
        
      } else {
        stop("x AND y AND z must be factors.")
      } 
      
      # return
      rout <- dx
      return(rout)
    }
    
  }
dsnavega/imputeForest documentation built on May 8, 2019, 2:43 p.m.