R/find_interactions_untested.R

Defines functions interaction_search

Documented in interaction_search

#' interaction_search
#'
#' This function runs the XGBfi utility on a trained xgboost model to identify interactions to be included in future model interations.
#' @param mdl         A trained xgboost model of class xgb.Booster
#' @param features    A list of the feature names used in the model (ordering must be consistent with the dmatrix used to train the model)
#' @param XGBfiBin    The file path to the bin of the XGBfi utility
#' @param filterExp   A character expression that tells the process how to filter the XGBfi results. e.g. Gain > 2000
#' @param XGBfiParams A named list that contains the parameters to be passed to the cli of the XGBfi utility.
#' @param intern      Passed to the system command (internalise console output or not)
#' @keywords interaction search
#' @export
#' @importFrom readxl excel_sheets read_excel
#' @importFrom dplyr filter select
#' @examples
#' 

interaction_search <- function(mdl,features,XGBfiBin='~/Kaggle/xgbfi/bin/',filterExp=NULL,XGBfiParams = list(d=3,g=-1,t=100,k=100,h=10),intern=FALSE) {
  
  ## Create the fmap
  featureList <- features
  featureVector <- c() 
  for (i in 1:length(featureList)) { 
    featureVector[i] <- paste(i-1, featureList[i], "q", sep="\t") 
  }
  
  setwd(XGBfiBin) ##TODO find out what error stops using non work dir
  write.table(featureVector, "fmap.txt", row.names=FALSE, quote = FALSE, col.names = FALSE)
  
  ## Save the XGBoost model
  if(class(mdl)!="xgb.Booster") stop('ERROR: Aww Snap - mdl must be class xgb.Booster')
  silent = xgb.dump(model = mdl,fmap = "fmap.txt",with_stats = TRUE,fname = "xgb.dump")
  
  ## Build up the XGBfi Command
  command <- paste0("XgbFeatureInteractions.exe",
                    ' -d ', XGBfiParams$d,
                    ' -g ', XGBfiParams$g,
                    ' -t ', XGBfiParams$t,
                    ' -k ', XGBfiParams$k,
                    ' -h ', XGBfiParams$h)
  
  ## Build a batch file & run the command
  cat(command,file = './runXFGBFI.cmd')
  system(command = './runXFGBFI.cmd',intern = intern)
  
  ## Find out what sheets we've got in the output
  sheet_names = readxl::excel_sheets(path = './XgbFeatureInteractions.xlsx')
  sheet_names = setdiff(sheet_names[grepl(pattern = 'Interaction Depth',x = sheet_names)],'Interaction Depth 0')
  
  ## Preparing to thug
  goodInts = lapply(X = sheet_names,function(s){
    rxl = as.data.frame(readxl::read_excel(path = './XgbFeatureInteractions.xlsx',sheet = s,col_names = TRUE)) 
    names(rxl) = gsub(pattern = ' ',replacement = '_',x = names(rxl))
    #rxl = rxl %>% dplyr::filter(eval(parse(text = filterExp))) %>% dplyr::select(Interaction) %>% unlist() %>% as.character() 
    return(rxl)
  })
  
  return(goodInts)
}
#interactionNames = interaction_search(mdl = tmp$finalModel,features = features,intern = TRUE,filterExp = "Gain_Rank < 11")
gm209/gmtools documentation built on May 22, 2019, 2:39 p.m.