# Make sure data.table knows we know we're using it
.datatable.aware <- TRUE
# Prevent R CMD check from complaining about the use of pipe expressions
# standard data.table variables
if (getRversion() >= "2.15.1") {
utils::globalVariables(c(":=", ".", ".I", ".N", ".SD"), utils::packageName())
utils::globalVariables(c(".grp", ".anomaly", "ds", "value", ".tag"))
}
#' Get dataframe/datatable list from calling environment
#'
#' This returns a list of all dataframes / datatables in the calling environment.
#'
#' (Internal function)
#'
#' @return char vector of dt list
get_dt_from_env <- function() {
dt_list <- ls(envir = .GlobalEnv)[sapply(ls(envir = .GlobalEnv),
function(t)
is.data.frame(get(t)))]
if (length(dt_list) == 0) {
dt_list <- list("None")
}
dt_list
}
#' Process dataframe selected from environment
#'
#' This internal function is responsible for pre-processing a selected df from
#' the env. The shiny app relies on this func to check all req of the df (cols,
#' col types, data quality etc).
#'
#' (Internal function)
#'
#' @param df_full (data.table/data.frame/tibble) raw dataframe from env
#' @param picker_select_datecol (char) name of respective column selected by user
#' @param picker_select_grpcol (char) name of respective column selected by user
#' @param picker_select_valuecol (char) name of respective column selected by user
#' @param picker_select_tagcol (char) name of respective column selected by user
#' @param picker_select_anomalycol (char) name of respective column selected by user
#'
#' @return list containing preprocessed DT & info about preprocessing steps
checkprep_dt_from_env <- function(df_full,
picker_select_datecol,
picker_select_grpcol,
picker_select_valuecol,
picker_select_tagcol,
picker_select_anomalycol) {
preprocess_steps <- list(
"df_class" = list(
"initial_class" = NULL,
"convert_to_datatable" = FALSE,
"msg" = NULL
),
"datetime_col" = list(
"initial_data_type_check_successful" = FALSE,
# Date or POSIXct
"conversion_attempted" = FALSE,
"conversion_successful" = FALSE,
"msg" = NULL
),
"tag_col" = list(
"add_tag_col" = FALSE,
"count_NA" = 0,
"NA_replaced" = FALSE,
"msg" = NULL
),
"anomaly_col" = list(
"add_anomaly_col" = FALSE,
"only_contains_T_or_F" = TRUE,
"msg" = NULL
),
"other_col" = list(
"other_cols_exist" = FALSE,
"msg" = NULL
),
"grp_col" = list(
"initial_data_type_check_successful" = FALSE,
"converted_to_char" = FALSE,
"msg" = NULL
),
"value_col" = list(
"initial_data_type_check_successful" = FALSE,
"msg" = NULL
),
"quality_checks" = list(
"anomaly1_tag_empty" = FALSE,
"anomaly1_tag_empty_msg" = NULL,
"anomaly0_tag_nonempty" = FALSE,
"anomaly0_tag_nonempty_msg" = NULL
),
"col_list" = list(
"datecol" = NULL,
"grpcol" = NULL,
"valuecol" = NULL,
"tagcol" = NULL,
"anomalycol" = NULL,
"othercols" = NULL
),
"metadata" = list("custom_tags" = NULL),
"sorted_by_key_index" = FALSE,
"go_nogo" = FALSE
)
# Class of input dataframe
preprocess_steps$df_class$initial_class <- class(df_full)[1]
if (!data.table::is.data.table(df_full)) {
preprocess_steps$df_class$convert_to_datatable <- TRUE
preprocess_steps$df_class$msg <-
"Converting input dataframe to data.table. On saving back to your environment, you will be returned an object of your original class (data.frame or tibble)"
df_full <- data.table::as.data.table(df_full)
} else {
preprocess_steps$df_class$initial_class <- "data.table"
}
# .columns
if (picker_select_anomalycol == "No anomaly column") {
preprocess_steps$anomaly_col$add_anomaly_col <- TRUE
preprocess_steps$anomaly_col$msg <- "Adding .anomaly column"
picker_select_anomalycol <- ".anomaly"
df_full[, .anomaly := FALSE]
}
if (picker_select_tagcol == "No tag column") {
preprocess_steps$tag_col$add_tag_col <- TRUE
preprocess_steps$tag_col$msg <- "Adding .tag column"
picker_select_tagcol <- ".tag"
df_full[, .tag := ""]
}
# Col data types
## Is the datecol of class Date or POSIXct?
if (!any(lubridate::is.Date(df_full[, get(picker_select_datecol)]),
lubridate::is.POSIXct(df_full[, get(picker_select_datecol)]))) {
preprocess_steps$datetime_col$initial_data_type_check_successful <-
FALSE
# Attempting conversion to POSIXct
df_full[, c(picker_select_datecol) := lubridate::ymd_hms(get(picker_select_datecol),
truncated = 3,
tz = "UTC")]
if (any(is.na(df_full[, get(picker_select_datecol)]))) {
preprocess_steps$datetime_col$conversion_attempted <- TRUE
preprocess_steps$datetime_col$conversion_successful <- FALSE
preprocess_steps$datetime_col$msg <-
paste0(
"Could not parse date-time column `",
picker_select_datecol,
"`. Format expected - For date-time: %Y-%m-%dT%H:%M:%SZ or similar. For date: %Y-%m-%d."
)
} else {
preprocess_steps$datetime_col$conversion_attempted <- TRUE
preprocess_steps$datetime_col$conversion_successful <- TRUE
preprocess_steps$datetime_col$msg <-
paste0(
"Date-time column`",
picker_select_datecol,
"` converted to POSIXct."
)
}
} else {
preprocess_steps$datetime_col$initial_data_type_check_successful <-
TRUE
preprocess_steps$datetime_col$msg <-
paste0("Date/Time column `",
picker_select_datecol,
"` checked: OK")
}
## Grp Column
if (picker_select_grpcol != "No group column") {
if (class(df_full[, get(picker_select_grpcol)]) %in% c("character", "factor")) {
preprocess_steps$grp_col$initial_data_type_check_successful <- TRUE
preprocess_steps$grp_col$msg <-
paste0(
"Class of group column `",
picker_select_grpcol,
"` checked: OK"
)
} else {
preprocess_steps$grp_col$initial_data_type_check_successful <- FALSE
preprocess_steps$grp_col$msg <-
paste0(
"Converting group column `",
picker_select_grpcol,
"` to class: character"
)
df_full[, c(picker_select_grpcol) := as.character(get(picker_select_grpcol))]
}
total_grps <-
df_full[, length(unique(get(picker_select_grpcol)))]
grp_unique_list <- df_full[, unique(get(picker_select_grpcol))]
} else {
total_grps <- 0
grp_unique_list <- NA
}
## Value Column
if (class(df_full[, get(picker_select_valuecol)]) != "numeric") {
preprocess_steps$value_col$initial_data_type_check_successful <-
FALSE
preprocess_steps$value_col$msg <-
paste0(
"Value column `",
picker_select_valuecol,
"` needs to be of class: numeric or integer"
)
} else {
preprocess_steps$value_col$initial_data_type_check_successful <-
TRUE
preprocess_steps$value_col$msg <-
paste0("Value column `",
picker_select_valuecol,
"` checked: OK")
}
## Remaining Cols, if any
standard_cols <- c(picker_select_datecol,
picker_select_valuecol,
picker_select_tagcol,
picker_select_anomalycol)
if(picker_select_grpcol != "No group column")
standard_cols <- c(standard_cols, picker_select_grpcol)
other_user_cols <- colnames(df_full)[!(colnames(df_full) %in% standard_cols)]
if(length(other_user_cols)==0){
preprocess_steps$other_col$other_cols_exist <- FALSE
preprocess_steps$other_col$msg <- "No user columns found"
} else {
preprocess_steps$other_col$other_cols_exist <- TRUE
preprocess_steps$other_col$msg <- paste0(length(other_user_cols),
" other user columns found. These will be preserved.")
}
# Data Quality Checks
if (!all(df_full[, unique(get(picker_select_anomalycol))] %in% c(FALSE, TRUE))) {
preprocess_steps$anomaly_col$only_contains_T_or_F <- FALSE
preprocess_steps$anomaly_col$msg <-
paste0(
"Anomaly column `",
picker_select_anomalycol,
"` contains values not [FALSE, TRUE]"
)
}
# Test for NA values in tag. If present, replace by ""
if ((count_ <-
df_full[is.na(get(picker_select_tagcol)), .N]) != 0) {
preprocess_steps$tag_col$count_NA <- count_
preprocess_steps$tag_col$NA_replaced <- TRUE
preprocess_steps$tag_col$msg <-
paste0(
"Found ",
count_,
" NA values in tag column `",
picker_select_tagcol,
"`. Replacing by empty string ('')"
)
df_full[is.na(get(picker_select_tagcol)), c(picker_select_tagcol) := ""]
}
# Any anomaly=1 where tag = ""?
count_ <-
df_full[get(picker_select_anomalycol) == 1 &
get(picker_select_tagcol) == "", .N]
if (count_ != 0) {
preprocess_steps$quality_checks$anomaly1_tag_empty <- TRUE
preprocess_steps$quality_checks$anomaly1_tag_empty_msg <-
paste0(
"Warning: Found ",
count_,
" rows where anomaly is TRUE, yet tag is empty ('')"
)
}
# Any anomaly=0 where tag != ""?
count_ <-
df_full[get(picker_select_anomalycol) == 0 &
get(picker_select_tagcol) != "", .N]
if (count_ != 0) {
preprocess_steps$quality_checks$anomaly0_tag_nonempty <- TRUE
preprocess_steps$quality_checks$anomaly0_tag_nonempty_msg <-
paste0(
"Warning: Found ",
count_,
" rows where anomaly is FALSE, yet tag is not empty"
)
}
#Sort by key-index
if (picker_select_grpcol == "No group column") {
data.table::setkeyv(df_full, picker_select_datecol)
preprocess_steps$sorted_by_key_index <- TRUE
} else {
data.table::setkeyv(df_full,
c(picker_select_datecol, picker_select_grpcol))
preprocess_steps$sorted_by_key_index <- TRUE
}
return_list <- list()
# Process tag values
tag_choices <- tag_values <- default_tag_values()
tag_choices[tag_choices == ""] <- "remove tag"
tags_in_file <- df_full[get(picker_select_anomalycol) == TRUE, unique(get(picker_select_tagcol))]
custom_tags <- tags_in_file[!(tags_in_file %in% tag_choices)]
if (length(custom_tags) > 0) {
tag_values <- c(tag_values[tag_values != ""], custom_tags, "")
tag_choices <- c(tag_choices[tag_choices != "remove tag"], custom_tags, "remove tag")
}
# Save everything to return back to caller
## Metadata
preprocess_steps$metadata$custom_tags <- custom_tags
preprocess_steps$metadata$tag_values <- tag_values
preprocess_steps$metadata$tag_choices <- tag_choices
preprocess_steps$metadata$total_pts <- df_full[, .N]
preprocess_steps$metadata$total_grps <- total_grps
preprocess_steps$metadata$grp_unique_list <- grp_unique_list
preprocess_steps$metadata$count_existing_anomalies <-
df_full[get(picker_select_anomalycol) == TRUE, .N]
## Data
return_list$preprocessed_df <- df_full
## Final Go-NoGo
if (all(
any(
preprocess_steps$datetime_col$initial_data_type_check_successful,
preprocess_steps$datetime_col$conversion_successful
),
preprocess_steps$anomaly_col$only_contains_T_or_F,
preprocess_steps$value_col$initial_data_type_check_successful
)) {
preprocess_steps$go_nogo <- TRUE
}
## Cols
preprocess_steps$col_list$datecol <- picker_select_datecol
preprocess_steps$col_list$grpcol <- picker_select_grpcol
preprocess_steps$col_list$valuecol <- picker_select_valuecol
preprocess_steps$col_list$tagcol <- picker_select_tagcol
preprocess_steps$col_list$anomalycol <- picker_select_anomalycol
preprocess_steps$col_list$othercols <- other_user_cols
## Process Steps
return_list$preprocess_steps <- preprocess_steps
return_list
}
#' Default tag values
#'
#' @return char vector of default tag values
default_tag_values <- function() {
c(
"spike",
"trend-change",
"level-shift",
"variance-shift",
"outlier",
""
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.