R/fitXGB.R

Defines functions predXGB fitXGB

fitXGB <- function(TD, par0, TDout = NULL){

  if(!is.null(par0$dF) |
     !is.null(par0$trans) |
     !is.null(par0$feats)){ TD <- copy(TD) }

  type = par0$type
  cat("Model:", type, "....  ")
  ##
  PARAM <- readRDS("./input/PARAM.RDS")[[type]]
  ##
  names(TD)[1] <- "y"
  if(!is.null(TDout))names(TDout)[1] <- "y"
  basescr <- TD[, mean(abs(y))]
  ##
  ind0 <- abs(TD$y) < PARAM[["outl"]]
  ## Drop feat
  if(!is.null(par0$dF)){ TD[, par0$dF := NULL] }
  ## Transformation
  if(!is.null(par0$trans)){
    cat(" -- trans:", par0$trans$type, "--")
    if(!is.null(par0$trans$var)) {boolv <- TRUE; strv <- par0$trans$var} # Pass variable (strv) to transformation function
    TD[, y := ftrans(y, t = par0$trans$type, var = `if`(boolv, get(strv), NULL))]
    if(!is.null(TDout)){ TD[, y := ftrans(y, t = par0$trans$type, var = `if`(boolv, get(strv), NULL))] }
  }
  ## Specific features
  if(!is.null(par0$feats)){
    TD <- TD[, c("y", par0$feats), with = FALSE]
    if(!is.null(TDout))TDout <- TDout[, c("y", par0$feats), with = FALSE]
  }
  ## Set parameters
  for( s in dupl_names(par0, PARAM) ){
    PARAM[[s]] <- par0[[s]]
  }
  cat("..", paste(unlist(PARAM), collapse = ";"), "..")
  ##
  NROUNDS <- par0$nrounds
  ##
  if(type == "XGBLOG"){
    TD[, y := ifelse(y > 0, 1, 0)]
    if(!is.null(TDout)){ TDout[, y := ifelse(y > 0, 1, 0)] }
  }
  ##
  if(is.null(par0$eval_metric)){
    eval_metric <- `if`(type == "XGBREG", "mae", "logloss")
  }else{
    eval_metric <- par0$eval_metric
  }
  ##
  cat(" -- ", eval_metric, " -- ", sep = "")
  xgb_param <- list(      objective           = `if`(type == "XGBREG", "reg:linear", "binary:logistic"),
                          booster             = "gbtree",
                          eval_metric         = eval_metric,
                          eta                 = PARAM[["eta"]],
                          max_depth           = PARAM[["max_depth"]],
                          subsample           = PARAM[["subsample"]],
                          colsample_bytree    = PARAM[["colsample_bytree"]],
                          min_child_weight    = PARAM[["min_child_weight"]],
                          gammma              = PARAM[["gamma"]],
                          alpha               = PARAM[["alpha"]],
                          maximize            = FALSE

  )
  ##
  cat("nfeats = ", ncol(TD) - 1, "... ")
  xgb_dtrain <- xgb.DMatrix(data.matrix(TD[ind0])[, -1],
                            label = TD[ind0, y], missing = NA)
  ##
  if(!is.null(TDout)) { xgb_dtest <- xgb.DMatrix(data.matrix(TDout)[, -1],
                                                  label = TDout[[1]], missing = NA)
                         if(is.null(NROUNDS)){ NROUNDS = 10000 }
                         cat("\n")
  }
  ##
  if(is.null(NROUNDS) & is.null(TDout)) {
                          cat(" !! Finding NROUNDS with cw !! (naiv mae = ", basescr, ")\n")
                          mdcv <- xgb.cv(data = xgb_dtrain, params = xgb_param, nfold = 6, nrounds = 5000, verbose =T,
                                         print_every_n = 100, early_stopping_rounds = 10, maximize = FALSE)
                          NROUNDS <- mdcv$best_iteration
                          cat(" NROUNDS =", NROUNDS,  "...\n")
  }
  ###
  ####
  fit <- xgb.train(       params                = xgb_param,
                          data                  = xgb_dtrain,
                          nrounds               = NROUNDS,
                          verbose               = 1,
                          print_every_n         = 100L,
                          early_stopping_rounds = `if`(!is.null(TDout), 10),
                          watchlist             = `if`(!is.null(TDout), list(train = xgb_dtrain, eval = xgb_dtest))
  )
  #gbl[[length(gbl) + 1]] <<- c("fit", names(TD)[-1])
  cat(" :: DONE\n")

  list(fit = fit,
       predf = predXGB,
       editFunc = function(p0, p1)ifelse((p0 > 0) == (p1 > 0.5), p0, 0),
       feature_imp = xgb.importance(feature_names = names(TD)[-1], model = fit),
       trans = par0$trans,
       feats = `if`(is.null(par0$feats), NULL, names(TD)[-1]),
       dF = par0$dF
       )
}


predXGB <- function(l, newX, ...) {
  cat("Predicting xgboost..........")
  if(!is.null(l$dF) |
     !is.null(l$trans) |
     !is.null(l$feats)){ newX <- copy(newX) }

  ## Drop feat
  if(!is.null(l$dF)){ newX[, l$dF := NULL] }
  if(!is.null(l$feats)){ newX <- newX[, l$feats, with = FALSE] }
  #gbl[[length(gbl) + 1]] <<- c("pred", names(newX))

  cat("  using", ncol(newX), "features \n")
  xgb_pred <- xgb.DMatrix(data.matrix(newX), missing = NA)

  pred0 <- predict(l$fit, xgb_pred)

  if(!is.null(l$trans)) {
    if(!is.null(l$trans$var)) {boolv <- TRUE; strv <- l$trans$var} # Pass variable (strv) to transformation function
    pred0 <- ftrans_inv(pred0, l$trans$type, var = `if`(boolv, newX[[strv]], NULL))
  }

  return(pred0)
}
steinarv/k1 documentation built on Oct. 19, 2017, 4:41 a.m.