inst/doc/xgb2sql.R

## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- message=FALSE, warning=FALSE---------------------------------------
library(data.table)
library(xgboost)
library(xgb2sql)
df <- data.frame(ggplot2::diamonds)
head(df)

## ------------------------------------------------------------------------
out <- onehot2sql(df)
print(out$meta)
head(out$model.matrix)

## ------------------------------------------------------------------------
cat(out$sql)

## ------------------------------------------------------------------------
d2 <- data.table(ggplot2::diamonds)
# change column class
d2[, cut:=factor(cut, ordered=FALSE)]
d2[, clarity:=as.character(clarity)]
# create IDate column
d2[, tsdt:=as.IDate('2017-01-05')]
d2[1:3, tsdt:=tsdt-1]
# add NAs
d2[1, clarity:=NA]
d2[2, depth:=NA]
head(d2)
out2 <- onehot2sql(d2)
head(out2$model.matrix)

## ------------------------------------------------------------------------
newdata <- d2[1:5,]
# newdata has columns with new elements
newdata[5, clarity:='NEW']; newdata[1,tsdt:=as.IDate('2018-05-01')]
# newdata has a new column
newdata[, new_col:=1]
# newdata is lacking a column
newdata[, cut:=NULL]
head(newdata)
onehot2sql(newdata, meta=out2$meta)$model.matrix

## ------------------------------------------------------------------------
x <- out$model.matrix[,colnames(out$model.matrix)!='price']
y <- out$model.matrix[,colnames(out$model.matrix)=='price']
bst <- xgboost(data = x,
               label = y,
               max.depth = 2,
               eta = .3,
               nround = 2,
               objective = 'reg:linear')
booster2sql(bst, output_file_name='xgb.txt')

## ---- warning=FALSE, message=FALSE---------------------------------------
cat(readChar('xgb.txt', file.info('xgb.txt')$size))

## ------------------------------------------------------------------------
xgb.dump(bst)

## ------------------------------------------------------------------------
booster2sql(bst, output_file_name='onehot-xgb.txt', input_onehot_query=out$sql)

## ---- warning=FALSE, message=FALSE---------------------------------------
cat(readChar('onehot-xgb.txt', file.info('onehot-xgb.txt')$size))

Try the xgb2sql package in your browser

Any scripts or data that you put into this service are public.

xgb2sql documentation built on May 2, 2019, 1:09 p.m.