R/fct_utils.R

# Global parameters -----------------------------------------------------------
.onLoad <- function(libname, pkgname) {

  # Set new options
  op <- options()
  op.mlprepr <- list(
    mlprepr.debug_prints = F,
    mlprepr.debug_logs = F,
    mlprepr.default_Kappa_max = 0.5,
    mlprepr.default_RMSE_min  = 0.2,
    mlprepr.default_em_max = 0.2,
    mlprepr.default_kl_max = 0.01,
    mlprepr.logfile = data.table::data.table(
      time = character(0),
      type = character(0),
      ctxt = character(0),
      mesg = character(0))
  )
  toset <- !(names(op.mlprepr) %in% names(op))
  if(any(toset)) options(op.mlprepr[toset])

  invisible()
}

# Utils functions -------------------------------------------------------------

print_log <- function() getOption("mlprepr.logfile")

log_set <- function(newlog) {
  options(list(mlprepr.logfile = newlog))
  invisible()
}

my_log <- function(ctxt, mesg, type = "message", time = NA,
                   silent = getOption("mlprepr.debug_prints")) {
  if(!silent) {
    if(is.na(time)) time <- Sys.time()
    new_row <- data.table(
      type = as.character(time),
      type = as.character(type),
      ctxt = as.character(ctxt),
      mesg = as.character(mesg)
    )
    old_log <- getOption("mlprepr.logfile")
    new_log <- rbindlist(list(old_log, new_row))
    log_set(new_log)
  }
  invisible()
}

my_log_reset <- function() {
  log_set(data.table(
    time = character(0),
    type = character(0),
    ctxt = character(0),
    mesg = character(0)))
}

my_print <- function(ctxt, mesg, silent = getOption("mlprepr.debug_prints")) {
  my_time <- Sys.time()
  my_log(ctxt, mesg, "message", my_time)
  my_mesg <- paste(my_time, ctxt, ":", mesg, "...")
  if(!silent) {
    message(my_mesg)
  }
}

# Functions to ease data.tables operations ------------------------------------

# dt <- tmp(); dt2 <- tmp(); setnames(dt2, LETTERS[1:4])
# dt2[, V2 := 1:4]
# cbind_by_reference(dt, dt2)
cbind_by_reference <- function(dt, dt2, allow.substitute = T) {
  if(is.null(dt2)) {
    my_log(ctxt = "cbind_by_reference", mesg = "dt2 is null")
    warning(paste("cbind_by_reference : data.table dt2 is null, ignoring this one."))
    return(NULL)
  }
  if(!(is.data.table(dt) & is.data.table(dt2))) {
    my_log(ctxt = "cbind_by_reference", mesg = "dt1 and/or dt2 are/is not data.table(s)")
    warning(paste("cbind_by_reference : expected data.table, ignoring this one."))
    return(NULL)
  }
  is_common_name <- names(dt2) %in% names(dt)
  if(any(is_common_name)) {
    if(allow.substitute) {
      my_print(ctxt = "cbind_by_reference",
               mesg = paste("replacing",
                            paste(names(dt2)[is_common_name],
                                  collapse = ", ")))
    } else {
      stop("Trying to insert existing column")
    }
  }
  new_cols_names <- names(dt2)

  # This syntax looks good but triggers error : Invalid .internal.selfref
  # detected and fixed by taking a (shallow) copy of the data.table
  # dt[, (new_cols_names) := dt2]

  # Pre-allocate enough memory for the table dt
  # This is only required if ncol(dt) + ncol(dt2) > nb col already available
  # By default, data.table seems to allocate 1026 columns, so that should
  # rarely be useful.
  # WARNING: truelength is an experimental function and  might change syntax
  total_nb_cols <- length(dt) + length(new_cols_names)
  if(total_nb_cols > truelength(dt)) {
    alloc.col(dt, total_nb_cols)
  }

  for(ncn in new_cols_names) {
    set(x = dt, i = NULL, j = ncn, value = dt2[[ncn]])
  }
  return(dt)
}

# Iterate on column + target in a data.table
column_iterator <- function(dt_source, target_colname = "target") {
  not_target <- function(x) return(x != target_colname)
  colname_iter <- iter(copy(names(dt_source)), checkFunc = not_target)
  nextEl <- function() {
    next_colname <- nextElem(colname_iter)
    next_cols <- c(next_colname, target_colname)
    my_log("column_iterator (iter)", mesg = next_colname, type = "message")
    return(dt_source[, (next_cols), with=F])
  }
  obj <- list(nextElem=nextEl)
  class(obj) <- c('iforever','abstractiter','iter')
  obj
}
desstatsutiles/prepare.data documentation built on May 15, 2019, 5:05 a.m.