R/misclassMLogit.R
In misclassGLM: Computation of Generalized Linear Models with Misclassified Covariates Using Side Information

Documented in boot.misclassMlogit mfx.misclassMlogit misclassMlogit predict.misclassMlogit simulate_mlogit_dataset

#' Mlogit estimation under misclassified covariate
#'
#' \code{misclassMLogit} computes estimator for a GLM with a misclassified covariate
#' using additional side information on the misclassification process
#'
#' @param Y a matrix of 0s and 1s, indicating the target class. This is the dependent variable.
#' @param X a matrix containing the independent variables
#' @param setM matrix, rows containing potential patterns for a misclassed (latent) covariate M in
#'        any coding for a categorical independent variable, e.g. dummy coding.
#' @param P probabilities corresponding to each of the potential pattern conditional on the other
#'        covariates denoted in x.
#' @param na.action how to treat NAs
#' @param control options for the optimization procedure (see \code{\link{optim}},
#'        \code{\link{ucminf}} for options and details).
#' @param par (optional) starting parameter vector
#' @param baseoutcome reference outcome class
#' @param x logical, add covariates matrix to result?
#' @examples
#' ## simulate data
#' \donttest{
#' data <- simulate_mlogit_dataset()
#' }
#'
#' ## estimate model without misclassification error
#' \donttest{
#' library(mlogit)
#' data2 <- mlogit.data(data, varying = NULL, choice = "Y", shape = "wide")
#' summary(mlogit(Y ~ 1 | X + M2, data2, reflevel = "3"))
#' }
#'
#' ## estimate model with misclassification error
#' \donttest{
#' summary(mlogit(Y ~ 1 | X + M, data2, reflevel = "3"))
#' }
#'
#' ## estimate misclassification probabilities
#' \donttest{
#' Pmodel <- glm(M2 ~ M + X, data = data, family = binomial("logit"))
#' summary(Pmodel)
#' }
#'
#' ## construct a-posteriori probabilities from Pmodel
#' \donttest{
#' P <- predict(Pmodel, newdata = data, type = "response")
#' P <- cbind(1 - P, P)
#' dimnames(P)[[2]] <- c("M0", "M1") ## speaking names
#' }
#'
#' ## estimate misclassGLM
#' \donttest{
#' Yneu <- matrix(rep.int(0, nrow(data) * 3), ncol = 3)
#' for (i in 1:nrow(data)) Yneu[i, data$Y[i]] <- 1
#' est <- misclassMlogit(Y = Yneu,
#'                       X = as.matrix(data[, 2, drop = FALSE]),
#'                       setM = matrix(c(0, 1), nrow = 2),
#'                       P = P)
#' summary(est)
#' }
#'
#' ## and bootstrapping the results from dataset
#' \dontrun{
#' summary(boot.misclassMlogit(est,
#'                          Y = Yneu,
#'                          X = data.matrix(data[, 2, drop = FALSE]),
#'                          Pmodel = Pmodel,
#'                          PX = data,
#'                          repetitions = 100))
#' }
#'
#' @export
misclassMlogit <- function(Y, X, setM, P,
                        na.action = na.omit,
                        control = list(),
                        par = NULL,
                        baseoutcome = NULL,
                        x = FALSE) {
  if (!is.matrix(X)) stop("X is no matrix")
  if (!is.matrix(setM)) stop("setM is no matrix")
  if (!is.matrix(Y)) stop("Y must be a matrix")
  if (nrow(Y) != nrow(X)) stop("dimensions of Y and X do not match")
  if (is.null(baseoutcome)) baseoutcome <- which.max(colSums(Y))
  if (baseoutcome < ncol(Y)) Y <- cbind(Y[, -baseoutcome], Y[, baseoutcome])

  if (!is.matrix(P)) stop("P is no matrix")
  if (nrow(setM) != ncol(P)) stop("dimensions of P and setM do not match")
  if (nrow(P) != nrow(X)) stop("dimensions of P and X do not match")

  g <- NULL

  control <- do.call("make.control", control)

  # log-likelihood definieren
    f <- fmlogitValidation
    if (control$method == "BFGS" | control$method == "BFGS2" | control$method == "CG" | control$method == "nlm") {
      g <- gmlogitValidation
    }

    # starting parameters
    length.par <- (ncol(X) + ncol(setM) + 1) * (ncol(Y) - 1)
    if (is.null(par)) {
      cat("estimate starting parameters...")
      par <- rep.int(0.0, length.par)
    } else {
      if (length(par) != length.par) stop("starting parameter par is not valid.")
    }

  # optimise
  cat("optimize...")
  erg <- fit.algorithm(par, f, g, Y, X, P, setM, control)
  cat("ok.\n")

  # fertig
  ret <- list()
  ret$baseoutcome <- baseoutcome
  ret$family <- family
  ret$setM <- setM
  p <- ncol(X) + ncol(setM) + 1

  ret$coefficients <- erg$par[1:(p * (ncol(Y) - 1))]
  for (l in 1:(ncol(Y) - 1)) {
    names(ret$coefficients)[(l - 1) * p + 1:p] <- c(paste0("alt", l, ":(Intercept)"),
    unlist(lapply(dimnames(X)[[2]], function(n) paste0("alt", l, ":", n))),
    unlist(lapply(dimnames(P)[[2]][-1], function(p) paste0("alt", l, ":", p))))
  }

  ret$y <- Y
  ret$na.action <- na.action
  ret$df.residual <- nrow(X) - length(par)
  ret$optim <- erg
  if (!is.null(g)) ret$optim$gradient <- g(erg$par, Y, X, P, setM)
  if (control$method == "BFGS2") {
    ret$iter <- erg$info[4]
    ret$optim$call <- "ucminf"
  } else {
    ret$optim$call <- "optim"
    #ret$optim$hessian <- hessian(f, erg$par, Y=Y, X=X, W=P, SetM=setM)
    ret$iter <- erg$counts
  }
  if (x) ret$x <- cbind(rep.int(1, nrow(X)), X, P[, -1, drop = FALSE])

  ret$SEtype <- "standard"
  ret$setM <- setM

  class(ret) <- c("misclassMlogit", "mlogit")

  ret$prior.weights <- rep_len(1, nrow(X))
  ret$fitted.values <- predict(ret, X, P, type = "response")
  ret$control <- control

  ret$logLik <- erg$value

  ret$formula <- as.formula(paste0("y ~ ", paste(dimnames(X)[[2]], collapse = " + "), " + M"))

  return(ret)
}


#' Predict Method for \code{misclassMlogit} Fits
#'
#' Obtains predictions
#'
#' @usage ## S3 method for class 'misclassMlogit'
#'        \method{predict}{misclassMlogit}(object, X, P = NULL, type = c("link", "response"),
#'        na.action = na.pass, ...)
#' @param object a fitted object of class inheriting from 'misclassMlogit'.
#' @param X matrix of fixed covariates.
#' @param P a-posteriori probabilities for the true values of the misclassified variable. If provided,
#'        the conditional expectation on X,P is computed, otherwise a set of marginal predictions is
#'        provided, one for each alternative.
#' @param type the type of prediction required. The default is on the scale of the linear predictors;
#'        the alternative "response" is on the scale of the response variable.
#'        Thus for a default binomial model the default predictions are of log-odds (probabilities
#'        on logit scale) and type = "response" gives the predicted probabilities.
#'
#'        The value of this argument can be abbreviated.
#' @param na.action function determining what should be done with missing values in \code{newdata}.
#'        The default is to predict NA.
#' @param \dots additional arguments (not used at the moment)
#' @seealso \code{\link{misclassMlogit}}
#' @export
predict.misclassMlogit <- function(object, X, P = NULL, type = c("link", "response"),
                                   na.action = na.pass, ...) {

  type <- match.arg(type)

    k <- length(object$coefficients) / (ncol(X) + 1 + ncol(object$setM))
    ret <- list(nrow(object$setM))
    for (m in 1:nrow(object$setM)) {
      eta <- get.eta(cbind(X, object$setM[m,]), object$coefficients, k)
      if (type == "response") {
        summen <- rowSums(exp(eta)) + 1
        ret[[m]] <- cbind(exp(eta), rep.int(1, nrow(eta))) / summen
        dimnames(ret[[m]])[[2]] <- c(paste0("M", 1:k), "M0")
      } else {
        ret[[m]] <- eta
        dimnames(ret[[m]])[[2]] <- paste0("M", 1:k)
      }
    }
    if (type == "response" & !is.null(P)) {
      tmp <- ret
      ret <- tmp[[1]] * P[, 1]
      for (m in 2:nrow(object$setM)) {
        ret <- ret + tmp[[m]] * P[, m]
      }
    }

  return(ret)
}


get.eta <- function(X, beta, k) {
  d <- ncol(X)
  ret <- do.call(cbind, lapply(1:k, function(l) {
    X %*% beta[(1:d) + (d + 1)*(l - 1) + 1] + beta[1 + (d + 1)*(l - 1)]
  }))
  return(ret)
}


#' Compute Bootstrapped Standard Errors for \code{misclassMlogit} Fits
#'
#' Obtain bootstrapped standard errors.
#'
#' @param ret a fitted object of class inheriting from 'misclassMlogit'.
#' @param Y a matrix of 0s and 1s, indicating the target class. This is the dependent variable.
#' @param X a matrix containing the independent variables.
#' @param Pmodel a fitted model (e.g. of class 'GLM' or 'mlogit') to implicitly produce variations
#'        of the predicted true values probabilities. (Usually conditional on the observed
#'        misclassified values and additional covariates.)
#' @param PX covariates matrix suitable for predicting probabilities from \code{Pmodel},
#'        usually including the mismeasured covariate.
#' @param boot.fraction fraction of sample to be used for estimating the bootstrapped standard
#'        errors, for speedup.
#' @param repetitions number of bootstrap samples to be drown.
#' @seealso \code{\link{misclassMlogit}}
#' @export
boot.misclassMlogit <- function(ret, Y, X, Pmodel, PX,
                             boot.fraction = 1, repetitions = 1000) {
  if (!("misclassMlogit" %in% class(ret))) stop("object is no misclassMlogit result")
  baseoutcome <- ret$baseoutcome
  if (boot.fraction > 1) stop("boot.fraction must be <1")
  if (boot.fraction <= 0) stop("boot.fraction must be >0")

  if (is.matrix(Y)) {
    k <- ncol(Y) - 1
    if (k == 0) k <- 1
  } else {
    k <- 1
  }

  if (!is.matrix(X)) stop("X is no matrix")

  # letzte Kategorie als Referenz
  # daher: umsortieren!
  if (!is.matrix(Y)) stop("Y must be a matrix")
  if (nrow(Y) != nrow(X)) stop("dimensions of Y and X do not match")
  if (is.null(baseoutcome)) baseoutcome <- which.max(colSums(Y))
  if (baseoutcome < k) Y <- cbind(Y[, -baseoutcome], Y[, baseoutcome])

  g <- NULL

  # log-likelihood definieren
  f <- cfmlogitValidation
  if (ret$control$method == "BFGS" || ret$control$method == "BFGS2" ||
    ret$control$method == "CG" || ret$control$method == "nlm") {
    g <- cgmlogitValidation
  }

  return(bootstrapping(ret, Y, X, Pmodel, PX, f, g, boot.fraction, repetitions))
}

#' @export
summary.misclassMlogit <- function(object, ...) {
  b <- coef(object, fixed = TRUE)
  std.err <- sqrt(diag(vcov(object)))
  z <- b / std.err
  p <- 2 * (1 - pnorm(abs(z)))
  CoefTable <- cbind(b, std.err, z, p)
  print(CoefTable)
  colnames(CoefTable) <- c("Estimate", "Std. Error", "t-value", "Pr(>|t|)")
  object$CoefTable <- CoefTable

  #object$lratio <- lratio(object)
  #object$mfR2 <- mfR2(object)

  #if (!is.null(object$rpar)) {
  #  rpar <- object$rpar
  #  object$summary.rpar <- t(sapply(rpar, summary))
  #}
  class(object) <- c("summary.misclassMlogit", "summary.mlogit", "mlogit")
  return(object)
}


#' @export
print.summary.misclassMlogit <- function(x, digits = max(3L, getOption("digits") - 3L), ...) {
  class(x) <- setdiff(class(x), "summary.misclassMlogit")
  #print(x = x, digits = digits, ...)

    cat("\nCall:\n")
    print(x$call)
    cat("\n")
    cat("Frequencies of alternatives:")
    print(prop.table(x$freq), digits = digits)
    cat("\n")
    print(x$est.stat)
    cat("\nCoefficients :\n")
    printCoefmat(x$CoefTable, digits = digits)
    cat("\n")
    cat(paste("Log-Likelihood: ", signif(x$logLik, digits), "\n",
              sep = ""))
    #if (has.intercept(x$formula)) {
    #  cat("McFadden R^2: ", signif(x$mfR2, digits), "\n")
    #  cat("Likelihood ratio test : ", names(x$lratio$statistic),
    #      " = ", signif(x$lratio$statistic, digits), " (p.value = ",
    #      format.pval(x$lratio$p.value, digits = digits), ")\n",
    #      sep = "")
    #}
    #if (!is.null(x$summary.rpar)) {
    #  cat("\nrandom coefficients\n")
    #  print(x$summary.rpar)
    #}
    #invisible(x)

  if (x$SEtype == "robust") {
    cat("Results show robust standard errors.\n")
  } else if (x$SEtype == "bootstrap") {
    cat("Results based on ")
    cat(nrow(x$boot.beta))
    cat(" bootstrap samples.\n\n")
  }
}


#' @export
vcov.misclassMlogit <- function(object, ...) {
  if (is.null(object$CoVar)) {
    ok <- try(covmat <- chol2inv(chol(object$optim$hessian)), silent = TRUE)
    if (inherits(ok, "try-error")) {
      print(ok[1])
      return(NULL)
    }
  } else {
    covmat <- object$CoVar
  }
  return(covmat)
}


#' Compute Marginal Effects for 'misclassMlogit' Fits
#'
#' Obtain marginal effects.
#'
#' @param w a fitted object of class inheriting from 'misclassMlogit'.
#' @param x.mean logical, if true computes marginal effects at mean, otherwise average marginal
#'        effects.
#' @param rev.dum logical, if true, computes differential effects for switch from 0 to 1.
#' @param outcome for which the ME should be computed.
#' @param baseoutcome base outcome, e.g. reference class of the model.
#' @param digits number of digits to be presented in output.
#' @param \dots further arguments passed to or from other functions.
#' @seealso \code{\link{misclassMlogit}}
#' @export
mfx.misclassMlogit <- function(w, x.mean = TRUE, rev.dum = TRUE, outcome = 2, baseoutcome = 1,
    digits = 3, ...) {
  if (outcome == baseoutcome) return(NULL)
  if (!("mlogit" %in% class(w))) {
    stop("Please provide an object from 'mlogit()'.\n")
  }
  if (is.null(dim(w$x))) {
    stop("Please specify 'x = TRUE' in misclassMlogit().\n")}

  x.bar <- as.matrix(colMeans(w$x))
  b.est <- as.matrix(coef(w))
  K <- nrow(x.bar)

  alt <- length(coef(w)) / K
  xb <- rep.int(0, alt)
  for (i in 1:alt) {
    xb[i] <- t(x.bar) %*% b.est[(0:(K - 1)) * alt + i,]
  }
  xb <- exp(xb)

  if (!x.mean) stop('not implemented')

  if (outcome < baseoutcome) {
    f.xb <- xb[outcome] / ((1 + sum(xb)))^2
    me <- f.xb * coef(w)[(0:(K - 1)) * alt + outcome]
  } else if (outcome > baseoutcome) {
    f.xb <- xb[outcome - 1] / ((1 + sum(xb)))^2
    me <- f.xb * coef(w)[(0:(K - 1)) * alt + outcome - 1]
  }

  bx <- list()
  for (i in 1:alt) {
    bx[[i]] <- b.est[(0:(K - 1)) * alt + i,] %*% t(x.bar)
  }

  if (baseoutcome > outcome) {
    pg <- xb[outcome] / (1 + sum(xb))
    dr <- diag(1, K, K) + (1 - 2 * pg) * bx[[outcome]]
    va <- (pg - pg^2)^2 * dr %*% vcov(w)[(0:(K - 1)) * alt + outcome,
                                         (0:(K - 1)) * alt + outcome] %*% t(dr)
  } else if (baseoutcome < outcome) {
    pg <- xb[outcome - 1] / (1 + sum(xb))
    dr <- diag(1, K, K) + (1 - 2 * pg) * bx[[outcome - 1]]
    va <- (pg - pg^2)^2 * dr %*% vcov(w)[(0:(K - 1)) * alt + outcome - 1,
                                         (0:(K - 1)) * alt + outcome - 1] %*% t(dr)
  }

  se <- sqrt(diag(va))

  if (rev.dum) {
    for (i in 1:ncol(w$x)) {
      if (identical(sort(unique(w$x[,i])), c(0, 1)) || (i >= ncol(w$x) - dim(w$setM)[2]) ) {
        x.d1 <- x.bar; x.d1[i, 1] <- 1
        x.d0 <- x.bar; x.d0[i, 1] <- 0
        xb0 <- rep.int(0, alt)
        xb1 <- rep.int(0, alt)
        for (j in 1:alt) {
          xb0[j] <- t(x.d0) %*% b.est[(0:(K - 1)) * alt + j,]
          xb1[j] <- t(x.d1) %*% b.est[(0:(K - 1)) * alt + j,]
        }
        xb1 <- exp(xb1)
        xb0 <- exp(xb0)
        if (baseoutcome > outcome) {
          pr1 <- xb1[outcome] / (1 + sum(xb1))
          pr0 <- xb0[outcome] / (1 + sum(xb0))
          dr2 <- (pr1 - pr1^2) %*% t(x.d1) - (pr0 - pr0^2) %*% t(x.d0)
          va2 <- dr2 %*% vcov(w)[(0:(K - 1)) * alt + outcome,
                                 (0:(K - 1)) * alt + outcome] %*% t(dr2)
        } else if (baseoutcome < outcome) {
          pr1 <- xb1[outcome - 1] / (1 + sum(xb1))
          pr0 <- xb0[outcome - 1] / (1 + sum(xb0))
          dr2 <- (pr1 - pr1^2) %*% t(x.d1) - (pr0 - pr0^2) %*% t(x.d0)

          va2 <- dr2 %*% vcov(w)[(0:(K - 1)) * alt + outcome - 1,
                                 (0:(K - 1)) * alt + outcome - 1] %*% t(dr2)
        }
        me[i] <- pr1 - pr0

        se[i] <- sqrt(as.numeric(va2))
      }
    }
  }
  out <- data.frame(effect = me, error = se)
  out$t.value <- out$effect / out$error
  out$p.value <- 2 * (1 - pt(abs(out[, 3]), w$df.residual))
  out <- round(out, digits = digits)
  result <- list(f.xb = f.xb, w = w, out = out)
  class(result) <- c("mfx.misclassMlogit", "mfx")
  return(result)
}


#' Simulate a Data Set to Use With \code{misclassMlogit}
#'
#' simulates a data set with
#' - one continuous variable X drawn from a Gaussian distribution,
#' - a binary or trinary variable M with misclassification (M2)
#' - a dependent variable drawn from a multionomial distribution dependent on X and M.
#'
#' This can be used to demonstrate the abilities of misclassMlogit. For an example
#' see \code{misclassMlogit}.
#'
#' @param n number observations
#' @param const constants
#' @param alpha parameters for X
#' @param beta parameters for M(1)
#' @param beta2 parameters for M2, if NULL, M is a binary covariate, otherwise a three-valued
#'        categorical.
#' @seealso \code{\link{misclassMlogit}}
#' @export
simulate_mlogit_dataset <- function(n = 1000, const = c(0, 0), alpha = c(1, 2),
                                    beta = -2 * c(1, 2), beta2 = NULL) {
  set.seed(30)

  X <- rnorm(n, 0, 2)

  if (is.null(beta2)) {
    Mmisclass <- rbinom(n, 1, 0.5)

    M <- rep.int(0, n)
    for (i in 1:n) {
      temp <- exp(X[i] + Mmisclass[i])
      M[i] <- rbinom(1, 1, temp / (1 + temp))
    }

    Y1 <- (alpha[1] * X) + beta[1] * M + const[1]
    Y2 <- (alpha[2] * X) + beta[2] * M + const[2]
  } else {
    M <- matrix(rep.int(0, 2 * n), ncol = 2)
    for (i in 1:n) {
      if (sum(Mmisclass[i,]) == 2) Mmisclass[i,] <- c(0, 0)
      temp <- exp(X[i])
      misclass <- rbinom(1, 1, temp / (1 + temp))
      if (rbinom(1, 1, 0.5)) {
        M[i, 1] <- ifelse(misclass, 1 - Mmisclass[i, 1], Mmisclass[i, 1])
      } else {
        M[i, 2] <- ifelse(misclass, 1 - Mmisclass[i, 2], Mmisclass[i, 2])
      }
      if (sum(M[i,]) == 2) M[i,] <- c(0, 0)
    }

    Y1 <- alpha[1] * X + beta[1] * M[, 1] + beta2[1] * M[, 2] + const[1]
    Y2 <- alpha[2] * X + beta[2] * M[, 1] + beta2[2] * M[, 2] + const[2]
  }
  Y1 <- exp(Y1)
  Y2 <- exp(Y2)
  tmp <- rep.int(1, n) + Y1 + Y2
  Y <- matrix(rep.int(0, 3 * n), ncol = 3)
  for (i in 1:n) {
    prob <- c(Y1[i] / tmp[i], Y2[i] / tmp[i], 1 / tmp[i])
    Y[i,] <- t(rmultinom(1, 1, prob))
  }

  return(data.frame(Y = as.factor(Y[, 1] + 2 * Y[, 2] + 3 * (1 - Y[, 1] - Y[, 2])),
                    X = X,
                    M = Mmisclass,
                    M2 = M))
}

Any scripts or data that you put into this service are public.

misclassGLM documentation built on Nov. 19, 2023, 9:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

misclassGLM
Computation of Generalized Linear Models with Misclassified Covariates Using Side Information

R/misclassMLogit.R
In misclassGLM: Computation of Generalized Linear Models with Misclassified Covariates Using Side Information

Defines functions simulate_mlogit_dataset mfx.misclassMlogit vcov.misclassMlogit print.summary.misclassMlogit summary.misclassMlogit boot.misclassMlogit get.eta predict.misclassMlogit misclassMlogit

Documented in boot.misclassMlogit mfx.misclassMlogit misclassMlogit predict.misclassMlogit simulate_mlogit_dataset

Try the misclassGLM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

misclassGLM Computation of Generalized Linear Models with Misclassified Covariates Using Side Information

R/misclassMLogit.R In misclassGLM: Computation of Generalized Linear Models with Misclassified Covariates Using Side Information

Defines functions simulate_mlogit_dataset mfx.misclassMlogit vcov.misclassMlogit print.summary.misclassMlogit summary.misclassMlogit boot.misclassMlogit get.eta predict.misclassMlogit misclassMlogit

Documented in boot.misclassMlogit mfx.misclassMlogit misclassMlogit predict.misclassMlogit simulate_mlogit_dataset

Try the misclassGLM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

misclassGLM
Computation of Generalized Linear Models with Misclassified Covariates Using Side Information

R/misclassMLogit.R
In misclassGLM: Computation of Generalized Linear Models with Misclassified Covariates Using Side Information