cw_models/xgb_stack.R

options(scipen = 10, digits = 5)
dtrain <- fread("zcat ./input/train.gz")

#define_cat_feats(dtrain, saveEnv = FALSE, minInst = 500)

ord_feats <- names(dtrain)[ !(names(dtrain) %in% pkg_env$cat_vars) ][ -(1:3) ]

#########
prepeare_features(dtrain)
prepeare_special_features(dtrain)


dtrain  <- dtrain[, IDW := readRDS("./input/IDW.rds")]
n <- nrow(dtrain)

feats <- names(dtrain)[!( names(dtrain) %in% c("parcelid", "logerror", "transactiondate", pkg_env$cat_vars) )]


xgb_stack <- list(
  xgbrmse     = list( fitfunc = fitXGB,
                      par0 = list(eval_metric = "rmse",
                                      type = "XGBREG",
                                      colsample_bytree = 0.75,
                                      eta = 0.006,
                                      min_child_weight = 50,
                                  nrounds = 1000)
              ),
  xgbregmae   = list( fitfunc = fitXGB,
                      par0 = list(eval_metric = "mae",
                                      type = "XGBREG",
                                      subsample = .5,
                                      eta = 0.0065,
                                      min_child_weight = 50,
                                      nrounds = 1000)
              ),
  xgbregmae2   = list( fitfunc = fitXGB,
                      par0 = list(eval_metric = "mae",
                                  type = "XGBREG",
                                  subsample = .5,
                                  eta = 0.0065,
                                  min_child_weight = 50,
                                  nrounds = 1000,
                                  dF = "IDW")
              ),
  xgbregmae3   = list( fitfunc = fitXGB,
                       par0 = list(eval_metric = "mae",
                                   type = "XGBREG",
                                   subsample = .5,
                                   eta = 0.0065,
                                   min_child_weight = 50,
                                   nrounds = 1000,
                                   dF = c("trend", paste0("m", 1:12)))
            )
)


models_0 <- xgb_stack


# Make features for stacking model
mF_stac <- c(
  lapply(names(xgb_stack), function(s)list(feat = paste0(s, "_SQ"), func = function(a, b)a^(2), v1 = s, v2 = NULL))
)


models_0[["stack"]] <- list(fitfunc = fitTwoStage,
                          par0 = list(
                            stage1 = xgb_stack,
                            stage2 = list( fitfunc = fitLASSO,
                                           par0 = list( #mF = mF_stac,
                                                        feats = names(xgb_stack),
                                                        printMAE = TRUE)
                                          ),
                            firstSmp = 1/3
                           )
)


models_1 <- NULL

set.seed(2)
iin <- sample(1:n, floor(2 * n / 3))
iin <- sample(1:n, floor(n * 0.5))

# for(i in seq_along(models_0$stack$par0$stage1))models_0$stack$par0$stage1[[i]]$par0$nrounds <- NULL
# (l_cw <- doIt(dtrain[iin], sX = feats, sY = "logerror", seed = 12,
#              models0 = list(test = models_0$stack), models1 = models_1))

l_cw <- doIt(dtrain[iin], sX = feats, sY = "logerror", seed = 12,
             models0 = models_0, models1 = models_1)


l_cw


# rlm        s2 xgbregmae     lasso
# 0.007132  0.014290  0.017073  0.011390
#
# > l_cw$basemodels$mp.prc
# [1] 0.016015


######################################### save model??? ######################################################################
# saveRDS(list(xgb_stack = models_0$stack), "./models/xgb_stack_list.RDS")
steinarv/k1 documentation built on Oct. 19, 2017, 4:41 a.m.