UdBRpackage: Miscellaneous R Functions

Documented in addvarlogistic

#' Funció per fer LRT d'amplició de models logístics
#' 
#' @param model a logistic model (an object of class "glm", usually, a result of a call to glm).
#' @param max.effect.change a threshold to assess changes is betas when a variable is added (see Hosmer-Lemeshow). Default value is 0.2 (more than 20\% of change in betas is suspicious/confusion).
#' @param candidates a vector with the names of the variables proposed to be added (shold be the same than in the original dataset).
#' @param formulaH0 an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model.
#' @return A list with complete results: addvar, proposed variable to be added into the model (based on LRT); res, a table with complete results (all LRT performed); mod, a table indicating, for each added variable, variables changing their effect more than max.effect.change; change, smart version of mod.
#' @export
addvarlogistic <- function(model, max.effect.change = 0.2, candidates = NULL, formulaH0 = NULL){
  # require(lmtest)

  data <- model$data

  if(any(!is.element(candidates, names(data)))){
    cat("At least one variable is not found in the dataset. We have removed it from the candidates.")
    candidates <- candidates[-which(!is.element(candidates, names(data)))]
  }

  if(is.null(candidates) | length(candidates) == 0) stop("What variables are candidates to enter into the model?")

  covar <- names(model$model)[-1]
  l <- length(candidates)
  l.param <- length(names(coef(model)))
  ps <- rep(NA, l)

  enter.by.confusion <- matrix(0, l, l.param)
  colnames(enter.by.confusion) <- names(coef(model))
  rownames(enter.by.confusion) <- candidates

  additional <- c()
  modified <- data.frame(var.candidate = character(0), var.modified = character(0))

  for(i in 1:l){
    #covar2<-c(covar,candidates[i])
    covar2 <- candidates[i]
    modH0 <- glm(formulaH0, data = data[complete.cases(data[, colnames(data) %in% covar2]), ], family = binomial)
    modHA <- glm(as.formula(paste(c(formulaH0, covar2), collapse="+")), data = data[complete.cases(data[, colnames(data) %in% covar2]), ], family = binomial)
    ps[i] <- lmtest::lrtest(modH0, modHA)$'Pr(>Chisq)'[2]

    if(ps[i] < 0.05) cat("\n*", candidates[i], " is a candidate to enter in the model (p-value = ", roundp(ps[i]), ")", sep="")

    if(!all(names(coef(modH0)) %in% c("(Intercept)"))){
      for(j in names(coef(modH0))[-1]){
        if(abs(coef(modHA)[j]-coef(modH0)[j])/coef(modH0)[j] > max.effect.change & coef(summary(modH0))[j,4] < 0.05){
          enter.by.confusion[i, j] <- 1
          cat("\nAdding ", candidates[i], " changes the effect of ", j," more than ", 100 * max.effect.change, "%. Considerer to add it!", sep="")
          covar3 <- c(names(coef(modH0))[-1], covar2)
          modified <- rbind(modified, data.frame(var.candidate = candidates[i], var.modified = covar3[which(pmatch(covar3, j) == 1)]))
        }
      }
      if(sum(enter.by.confusion[i, ]) > 0) additional <- c(additional, candidates[i])
    }
  }
  # additional2<-unique(c(additional,paste(modified$var.modified)))
  #
  # ifelse(is.null(additional2),ps.add<-ps,ps.add<-ps[-which(is.element(covar,additional2))])
  # ifelse(is.null(additional2),covar.add<-covar,covar.add<-covar[-which(is.element(covar,additional2))])

  if(min(ps) < 0.05) cat("\n\nAdd: ", candidates[which.min(ps)], "\n\n\n")

  if(min(ps) >= 0.05 & !is.null(additional)){
    kk <- c()
    for(k in 1:nrow(modified)){
      cat("\n\nConsider to add: ", paste(modified$var.candidate[k]), ". You can try to enter an interaction term between", paste(modified$var.candidate[k]), "and", paste(modified$var.modified[k]), "\n\n")
      kk <- c(kk,k)
    }
  }

  if(min(ps) >= 0.05 & is.null(additional)) cat("\n\nYou have your final model. Congrats!\n\n\n")

  psr <- lapply(ps, roundp)
  res <- as.data.frame(cbind(candidates = unlist(candidates), p.LRT = unlist(psr), p = unlist(ps)))
  res <- res[order(as.numeric(paste(res[,3]))), 1:2]

  rows <- c(); for(i in 1:nrow(enter.by.confusion)) if(sum(enter.by.confusion[i, ]) == 0) rows <- c(rows, i)
  cols <- c(); for(i in 1:ncol(enter.by.confusion)) if(sum(enter.by.confusion[, i]) == 0) cols <- c(cols, i)
  kbc <- data.frame(enter.by.confusion[-rows, -cols])
  if(nrow(kbc) != length(rownames(enter.by.confusion)[-rows])) kbc <- t(kbc)
  rownames(kbc) <- rownames(enter.by.confusion)[-rows]
  colnames(kbc) <- colnames(enter.by.confusion)[-cols]

  return(list(addvar = ifelse(min(ps) > 0.05 & is.null(additional), "You don't need to add more variables. Try to remove some one!",
                            ifelse(min(ps) > 0.05 & !is.null(additional), candidates[which(candidates %in% modified$var.candidate[kk])][which.min(ps[which(candidates %in% modified$var.candidate[kk])])],
                                   candidates[which.min(ps)])),
              res = res,
              mod = modified,
              change = kbc))
}