UdBRpackage: Miscellaneous R Functions

Documented in rmvarlogistic

#' Funció per fer LRT de reducció de models logístics
#' 
#' @param model a logistic model (an object of class "glm", usually, a result of a call to glm).
#' @param max.effect.change a threshold to assess changes is betas when a variable is removed (see Hosmer-Lemeshow). Default value is 0.2 (more than 20\% of change in betas is suspicious/confusion).
#' @return A list with complete results: rmvar, proposed variable to be removed from the model (based on LRT); res, a table with complete results (all LRT performed); mod, a table indicating, for each removed variable, variables changing their effect more than max.effect.change; change, smart version of mod.
#' @export
rmvarlogistic <- function(model, max.effect.change = 0.2){
  require(lmtest)

  data <- model$data
  datam <- model$model
  covar <- names(datam)[-1]
  l <- length(covar)
  l.param <- length(names(coef(model))[-1])
  ps <- rep(NA, l)
  # keep.by.confusion <- matrix("-", l, l.param)
  keep.by.confusion <- matrix(0, l, l.param)
  colnames(keep.by.confusion) <- names(coef(model))[-1]
  rownames(keep.by.confusion) <- names(datam)[-1]

  exclude <- c()
  modified <- data.frame(var.candidate = character(0), var.modified = character(0))

  tab <- attributes(terms(formula(model)))$factors

  for(i in 1:l){
    cv <- colnames(tab)[tab[rownames(tab) == covar[i], ] == 0]
    # kk <- covar[substring(covar, first = 1,last = 12) == "interaction("]
    # kk2 <- substring(kk, first = 13)
    # kk3 <- substring(kk2, first = 1, last = nchar(kk2)-1)
    # kk4 <- trim(unlist(strsplit(kk3, ",")))
    covar2<-gdata::trim(
      unlist(
        strsplit(
          substring(
            substring(
              covar[substring(covar,first=1,last=12)=="interaction("],
              first=13),
            first=1,last=nchar(substring(covar[substring(covar,first=1,last=12)=="interaction("],
                                         first=13))-1), ","
          )
        )
      )

    ifelse(is.null(covar2), covar2 <- covar, covar2 <- c(covar[-which(covar %in% covar[substring(covar, first=1, last=12) == "interaction("])], covar2))
    # covar2 <- c(covar[-which(covar %in% covar[substring(covar, first = 1, last = 12) == "interaction("])], covar2)

    ifelse(length(cv)==0, 
           modA <- update(model, as.formula(paste(".~1")), data = data[complete.cases(data[, colnames(data) %in% covar2]), ], family = binomial),
           modA <- update(model, as.formula(paste(".~", paste(cv,collapse = "+"))), data = data[complete.cases(data[, colnames(data) %in% covar2]), ], family = binomial) )
    # modA <- update(model, as.formula(paste(".~.-", covar[i])), data = data[complete.cases(data[, colnames(data) %in% covar2]), ], family = binomial)
    ps[i] <- lrtest(modA,model)$'Pr(>Chisq)'[2]

    if(ps[i] > 0.05){
      cat("\n*", covar[i], " is a candidate to remove from the initial model (p-value = ", roundp(ps[i]), ")", sep="")

      for(j in names(coef(modA))[-1]){
        if(abs(coef(modA)[j]-coef(model)[j])/coef(model)[j] > max.effect.change & coef(summary(model))[j, 4] < 0.05){
          keep.by.confusion[i, j] <- 1
          cat("\nRemoving ", covar[i], " changes the effect of ", j, " more than ", 100 * max.effect.change, "%. Keep it!", sep="")
          modified <- rbind(modified, data.frame(var.candidate = covar[i], var.modified = covar[which(pmatch(covar, j) == 1)]))
        }
      }
    }
    if(sum(keep.by.confusion[i, ]) > 0) exclude <- c(exclude, covar[i])
  }

  exclude2 <- unique(c(exclude, paste(modified$var.modified)))

  ifelse(is.null(exclude2) | length(exclude2)==0, ps.exc <- ps, ps.exc <- ps[-which(is.element(covar, exclude2))])
  ifelse(is.null(exclude2) | length(exclude2)==0, covar.exc <- covar, covar.exc <- covar[-which(is.element(covar, exclude2))])

  if(max(ps.exc) < 0.05){
    cat("\n\nYou have your final model. Congrats!\n\n\n")
    if(any(!is.element(exclude2, exclude))) for(k in 1:nrow(modified)) cat("Try to enter an interaction term between", paste(modified$var.candidate[k]), "and", paste(modified$var.modified[k]), "\n\n")
  }
  if(max(ps.exc) >= 0.05) cat("\n\nRemove: ", covar.exc[which.max(ps.exc)], "\n\n\n")

  psr <- lapply(ps, roundp)
  res <- as.data.frame(cbind(variables = unlist(covar), p.LRT = unlist(psr), p = unlist(ps)))
  res <- res[order(as.numeric(paste(res[, 3]))), 1:2]

  rows <- c(); for(i in 1:nrow(keep.by.confusion)) if(sum(keep.by.confusion[i, ]) == 0) rows <- c(rows, i)
  cols <- c(); for(i in 1:ncol(keep.by.confusion)) if(sum(keep.by.confusion[, i]) == 0) cols <- c(cols, i)
  kbc <- data.frame(keep.by.confusion[-rows, -cols])
  if(nrow(kbc) != length(rownames(keep.by.confusion)[-rows])) kbc <- t(kbc)
  rownames(kbc) <- rownames(keep.by.confusion)[-rows]
  colnames(kbc) <- colnames(keep.by.confusion)[-cols]

  return(list(rmvar = ifelse(max(ps.exc) < 0.05, "You don't need to remove more variables. Try to add some one!", covar.exc[which.max(ps.exc)]), res = res, mod = modified, change = kbc))
}