R/xgb_train_binary_tree_weighted.R

xgb_train_binary_tree_weighted <- function(Xtrain,Xtest,y,iter,pct_train,w){
  
  n_train <- nrow(Xtrain)
  n_test <- nrow(Xtest)
  
  OUTPUT <- init_OUPUT(n_train,n_test,iter,pct_train)
  
  # get missing attr
  missing <-  NA
  dtest <- xgb.DMatrix(Xtest, missing = missing)
  
  for(i in 1:iter){
    cat('building model',i, '\n\n')  
    idx <- OUTPUT$IDX[,i]
    
    # index madness
    idx_stop <- sample((1:n_train)[-idx] ,length(idx))
    idx_valid <- (1:n_train)[-c(idx,idx_stop)]
    
    #initialize 3 xgb.Dmatrices
    dtrain   <-   xgb.DMatrix( Xtrain[idx,]       ,  weight = w[idx],  missing = missing, label = y[idx] )
    dstop    <-   xgb.DMatrix( Xtrain[idx_stop,]   , missing = missing, label = y[idx_stop] )
    dvalid   <-   xgb.DMatrix( Xtrain[idx_valid,]  ,  missing = missing, label = y[idx_valid] )
     
    
    param = list( 
      booster          =   'gbtree',
      objective        =   'binary:logistic',
      eval_metric      =   'logloss', 
      max.depth        =   3, 
      eta              =   .001,
      gamma            =   runif(1,4,10),
      min_child_weight =   runif(1,4,10),
      subsample        =   runif(1,.4,.6),
      colsample_bytree =   runif(1,.3,.7),
      nrounds          =   1000,
      lambda           =   runif(1,2,5),  ##tree default 1 related?
      alpha            =   0,                 ## tree related?
      base_score       =   mean(y),
      nthread          =   12 )
    
    model <- xgb.train(
      early.stop.round  = 20,
      watchlist         = list( stop_err = dstop),
      print.every.n     = 25,
      param             = param,
      data              = dtrain,
      nrounds           = param$nrounds,
      maximize          = F,
      verbose  =  1 )
    
    OUTPUT$PCV[idx_valid,i]    <-   predict(model, newdata = dvalid, ntreelimit = model$bestInd)
    OUTPUT$PT[,i]              <-   predict(model, newdata = dtest,  ntreelimit = model$bestInd)
    
    OUTPUT$DATA[[i]]   <-  list(param  = param,
                                rounds   = model$bestInd,
                                cv_score = model$bestScore)
    
    
  }
  
  
  
  return(OUTPUT)
}
tim-scharf/binary documentation built on May 31, 2019, 1:47 p.m.