R/prepeare_features.R

Defines functions prepeare_special_features prepeare_features

# An initial data cleaning script on raw data first #######################################
prepeare_features <- function(X){
  nc0 <- ncol(X)
  cat("Starting with address:", address(X), "and", nc0, "columns...\n")
  cat("prepearing features.......\n")

  #print(address(X))

  # create propitional features
  for(feat in pkg_env$prop_feats){
    splt <- strsplit(feat, "_DivBy_", fixed = TRUE)
    X[, (feat) := get(splt[[1]][1]) / get(splt[[1]][2])]
  }


  # create categorical features with one hot encoding (dummys)
  for(feat in pkg_env$cat_feats){
    splt <- strsplit(feat, "_IS_", fixed = TRUE)
    X[, (feat) := ifelse( get(splt[[1]][1]) == splt[[1]][2], 1, 0 )]
  }


  cat(ncol(X) - nc0, "features created. Address of data.table is", address(X), "\n")
}



prepeare_special_features <- function(X, dDate = NULL){
  # data.table needs transaction date column
  if(is.null(dDate)){
    X[, transactiondate := as.Date(transactiondate)]
  }else{
    X[, transactiondate := dDate]
  }
  X[, md := month(transactiondate)] # if we have trend we do not need integer month --- use dummys
  for(m in 1:12)set(X, j = paste0("m", m), value = X[, as.numeric(md == m)])
  X[, md := NULL]
  X[, trend := as.numeric(transactiondate - as.Date("2016-01-01"))]
}







########## data.table in function is not copied when using <- or set ###
# dt1 <- data.table(a = 1:3, b = 10:12)
#
# f1 <- function(dt0){
#   dt0 <- dt0[1, a := 100]
#   dt0 <- dt0[, balle := a + b]
#   dt0
# }
#
# print(dt1)
# dtX <- f1(dt1)
# print(dtX)
# print(dt1)
#
# f1 <- function(dt0){
#   dt0$a[1] <- 200
#   dt0
# }
#
# print(dt1)
# dtX <- f1(dt1)
# print(dtX)
# print(dt1)
#
# f1 <- function(dt0){
#   set(dt0, 1L, "a", 300)
#   set(dt0, 1L, 2L, 400)
#   dt0
# }
#
# print(dt1)
# dtX <- f1(dt1)
# print(dtX)
# print(dt1)
steinarv/k1 documentation built on Oct. 19, 2017, 4:41 a.m.