inst/examples/new/old/preprocessing.history.R

# Initialize preprocessed data
print("Start collecting variables to a polished data frame")
df.preprocessed <- data.frame(list(row.index = 1:nrow(df.orig)))

print("Entry identifier to match back to the originals")
df.preprocessed$original_row <- df.orig$original_row

print("List raw data fields to be preprocessed")
update.fields <- sort(names(df.orig)) # Update all

# List how raw data fields will be converted into
# preprocessed data fields
conversions <- list()

# Preprocess the field only if it has to be updated
for (field in update.fields) {

  start.time <- Sys.time()
  print(paste("Preprocessing", field, "(", match(field, update.fields), "/", length(update.fields), ")"))

  # Polish the given field
  df.tmp <- polish_field(df.orig, field, verbose = TRUE)

  # List the output fields for this input field
  conversions[[field]] <- names(df.tmp)

  # Remove the fields to be updated
  inds <- which(names(df.preprocessed) %in% unlist(conversions[[field]]))
  if (length(inds) > 0) { df.preprocessed <- df.preprocessed[, -inds]}

  # Add the newly preprocessed field
  df.preprocessed <- cbind(df.preprocessed, df.tmp)

  # Remove the temporary data.frame
  rm(df.tmp)

}
COMHIS/estc documentation built on April 7, 2022, 4:53 p.m.