R/build.interactions.R

Defines functions build.interactions

Documented in build.interactions

#' Building Interactions
#'
#' Builds interactions found from logic forest fit
#'
#' @importFrom survival coxph
#'
#' @param fit Fitted logic regression tree object containing outcome, model type, and logic tree information.
#' @param test.data Any dataset that contains the variables to create the interactions
#' @param n_ints Max number of interactions to build
#' @param remove_negated Whether to build interactions that consist of only negated PIs (True/False)
#' @param req_frequency Minimum frequency required to build interaction (0-1)
#'
#' @details
#' This function creates the interactions in the data that are found via logic forest.
#'
#' @return A dataframe containing the the input dataframe and the interactions built from logic forest.
#'
#' @references
#' Wolf BJ, Hill EG, Slate EH. Logic Forest: an ensemble classifier for discovering logical combinations of binary markers.
#' \emph{Bioinformatics}. 2010;26(17):2183–2189. \doi{10.1093/bioinformatics/btq354}
#'
#' @author
#' Andrew Gothard \email{andrew.gothard@@osumc.edu}
#'
#' @seealso \code{\link{logforest}}



build.interactions <-
  function(fit, test.data, n_ints = NULL, remove_negated = FALSE, req_frequency = NULL){
    if (is.null(n_ints)){
      topPIs <- fit$PI.importance[order(-fit$PI.importance)]
    }
    else{
      topPIs <- fit$PI.importance[order(-fit$PI.importance)][1:n_ints]
    }
    PIs <- names(topPIs)
    PIs_new <- PIs
    PImat <- matrix(NA, nrow = nrow(test.data), ncol=0)

    for (i in 1:length(PIs))
    {
      nms<-strsplit(PIs[i], " & ")[[1]]
      nms2<-gsub("!", "", nms)
      ntloc<-grep("!", nms)
      pidt<-test.data[,which(colnames(test.data)%in%nms2)]
      tmp <- gsub("!", "n", PIs[i])
      tmp2 <- gsub("&", "_", tmp)
      PIs_new[i] <- gsub(" ", "", tmp2)
      if(length(ntloc)==0){
        # This check added to see if only main effect; pidt becomes vect w/o col indexes
        if(length(nms) == 1){
          if(i==1){
            PImat<- pidt
          }
          else {
            PImat<-cbind(PImat, pidt)
          }
        }
        else{
          if(i==1){
            PImat<-apply(pidt, 1, prod)
          }
          else {
            PImat<-cbind(PImat, apply(pidt, 1, prod))
          }
        }
      }
      if(length(ntloc)>0){
        # Same check on main effect as above
        if(length(nms) == 1){
          pidt<-1-pidt
          PImat<-cbind(PImat, pidt)
        }
        else{
          pidt[,ntloc]<-1-pidt[,ntloc]
          PImat<-cbind(PImat, apply(pidt, 1, prod))
        }
      }
    }
    if (remove_negated == TRUE){
      all_negated <- c()
      for (i in 1:length(PIs)){
        nms<-strsplit(PIs[i], " & ")[[1]]
        ntloc<-grep("!", nms)
        if (length(ntloc) == length(nms)){
          all_negated <- c(all_negated, i)
        }
      }

      PIs_new <- PIs_new[-all_negated]
      PImat <- PImat[,-all_negated]
    }


    colnames(PImat)<- PIs_new

    if (!is.null(req_frequency)){
      PImat <- PImat[, colSums(PImat) > req_frequency * nrow(test.data)]
    }

    test.data.out <- cbind(test.data, PImat)
    return(test.data.out)
  }

Try the LogicForest package in your browser

Any scripts or data that you put into this service are public.

LogicForest documentation built on Feb. 14, 2026, 1:08 a.m.