#' @title ggcc_preprocess
#'
#' @description Function to parse data for use by ggcc.
#'
#' @param df Dataframe to parse.
#' @param filter_str String to filter df.
#' @param clre Bool to indicate if CC is for typical CLRE. Defaults to TRUE.
#' @return Returns parsed dataframe.
#'
#' @export
ggcc_preprocess <- function(
df,
filter_str,
clre = TRUE
) {
ccdf <- df %>%
dplyr::filter_(filter_str) %>%
dplyr::mutate(
monyr = as.character(YYYYMM)
) %>%
{if (clre) add_clre_tech(., write = FALSE) else .} %>%
{if (clre) join_clre_limits(.) else .} %>%
{if (clre) dplyr::group_by(., monyr, alert_limit) else dplyr::group_by(., monyr)} %>%
dplyr::summarise(complaints = n()) %>%
dplyr::right_join(
y = data.frame(
monyr = strftime(c(Sys.time() - months(13:2), last_month()),
format = '%Y%m'),
stringsAsFactors = FALSE),
by = 'monyr') %>%
dplyr::ungroup() %>%
{if (clre) dplyr::arrange(., alert_limit) else .} %>%
{if (clre) {
dplyr::mutate(
.,
alert_limit = zoo::na.locf(alert_limit),
complaints = replace(complaints, is.na(complaints), 0)
)
} else {
dplyr::mutate(
.,
complaints = replace(complaints, is.na(complaints), 0)
)
}
} %>%
dplyr::arrange(monyr) %>%
dplyr::mutate(range = c(NA, abs(diff(complaints))))
avgr <- round(mean(ccdf$range, na.rm = TRUE), digits = 1)
lclr <- 0
uclr <- round(3.267*avgr, digits = 1)
pseudo_sd <- avgr / 1.128
avg <- round(mean(ccdf$complaints), digits = 1)
lcl <- round(mean(ccdf$complaints) - 3*pseudo_sd, digits = 1)
ucl <- round(mean(ccdf$complaints) + 3*pseudo_sd, digits = 1)
lzb <- round(mean(ccdf$complaints) - 2*pseudo_sd, digits = 1)
uzb <- round(mean(ccdf$complaints) + 2*pseudo_sd, digits = 1)
lzc <- round(mean(ccdf$complaints) - pseudo_sd, digits = 1)
uzc <- round(mean(ccdf$complaints) + pseudo_sd, digits = 1)
# test2_func <- function(x) {
# side <- (x - avg) > 0
# nums <- rle(side)
# if (any(nums$lengths >= 9)) {
# ind <- c(1:length(nums$lengths))[nums$lengths >= 9]
# run <- nums$lengths[ind]
# if (ind == 1) {
# return(rep(c(0, 2, 0), c(8, 1, length(x) - 9)))
# } else {
# before <- sum(nums$lengths[1:(ind - 1)])
# return(rep(c(0, 2, 0), c(run + before - 1, 1, (length(x) - run - before))))
# }
# } else {
# return(rep(0, length(x)))
# }
# }
#
# test3_func <- function(x) {
# diffs <- diff(x) > 0
# nums <- rle(diffs)
# if (any(nums$lengths >= 6)) {
# ind <- c(1:length(nums$lengths))[nums$lengths >= 6]
# run <- nums$lengths[ind]
# if (ind == 1) {
# return(rep(c(0, 3, 0), c(5, 1, length(x) - 6)))
# } else {
# before <- sum(nums$lengths[1:(ind - 1)])
# return(rep(c(0, 3, 0), c(before+run, 1, (length(x) - run - before - 1))))
# }
# } else {
# return(rep(0, length(x)))
# }
# }
#
# test5_func <- function(x) {
# ret <- rep(0, length(x))
# for(i in c(3:length(x))) {
# test5 <- abs(x[(i-2):i] - avg) >= 2*pseudo_sd
# if (sum(test5) >= 2 & (all((x[(i-2):i] - avg)[test5] > 0) | all((x[(i-2):i] - avg)[test5] < 0))) {
# ret[(i-2):i][test5] <- 5
# }
# }
# return(ret)
# }
#
# test6_func <- function(x) {
# ret <- rep(0, length(x))
# for(i in c(5:length(x))) {
# test6 <- abs(x[(i-4):i] - avg) >= pseudo_sd
# if (sum(test6) >= 4 & (all((x[(i-4):i] - avg)[test6] > 0) | all((x[(i-4):i] - avg)[test6] < 0))) {
# ret[(i-4):i][test6] <- 6
# }
# }
# return(ret)
# }
#
# test8_func <- function(x) {
# ret <- rep(0, length(x))
# for (i in c(8:length(x))) {
# test8 <- sum(abs(x[(i-7):i] - avg) >= 2 * pseudo_sd)
# if (test8 == 8) {
# ret[i] <- 8
# }
# }
# return(ret)
# }
#
# get_test <- function(x) {
# start <- ifelse(clre, 5, 4)
# inds <- as.numeric(x[start:length(x)]) > 0
# if (sum(inds) > 0) {
# test <- min(as.numeric(x[4:length(x)][inds]), na.rm = TRUE)
# } else {
# test <- 0
# }
#
# return(test)
# }
#
# red_lines <- function(x) {
# range <- data.frame(test = c(1,2,3,5,6,8), range = c(0, 0, 5, 2, 4, 7))
# reds <- rep(0, length(x))
# if (x[1] > 0) {
# reds[1] <- 1
# }
# for (i in 2:length(x)) {
# if (x[i] > 0) {
# back <- range$range[range$test == x[i]]
# # reds[(i-back):i] <- 1
# reds[i] <- 1
# }
# }
#
# x <- rle(reds)
# for (i in 1:length(x$values)) {
# if (x$values[i] == 1) {
# x$values[i] <- i
# }
# }
# reds <- rep(x$values, x$lengths)
# return(reds)
# }
ccdf %<>%
# dplyr::mutate(
# test1 = ifelse(abs(complaints - avg) >= 3 * pseudo_sd, 1, 0)
# ) %>%
# dplyr::mutate(
# test = apply(., 1, get_test)
# ) %>%
{if (clre) {
dplyr::mutate(
.,
# count_label = ifelse(test > 0, 1, 0),
# reds = red_lines(test),
alert_lim = alert_limit
)
} else {
# dplyr::mutate(
# .,
# count_label = ifelse(test > 0, 1, 0),
# reds = red_lines(test)
# )
.
}
} %>%
{if (clre) {
dplyr::select(
.,
monyr,
complaints,
range,
# test,
# count_label,
# reds,
alert_lim
)
} else {
dplyr::select(
.data = .,
monyr,
complaints,
range
# test,
# count_label,
# reds
)
}}
return(ccdf)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.