Nothing
#' K-Means User Item Tibble
#'
#' @family Kmeans
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @description
#' Takes in a data.frame/tibble and transforms it into an aggregated/normalized
#' user-item tibble of proportions. The user will need to input the parameters
#' for the rows/user and the columns/items.
#'
#' @details
#' This function should be used before using a k-mean model. This is
#' commonly referred to as a user-item matrix because "users" tend to be on the
#' rows and "items" (e.g. orders) on the columns. You must supply a column that
#' can be summed for the aggregation and normalization process to occur.
#'
#' @param .data The data that you want to transform
#' @param .row_input The column that is going to be the row (user)
#' @param .col_input The column that is going to be the column (item)
#' @param .record_input The column that is going to be summed up for the aggregation
#' and normalization process.
#'
#' @examples
#' library(healthyR.data)
#' library(dplyr)
#'
#' data_tbl <- healthyR_data %>%
#' filter(ip_op_flag == "I") %>%
#' filter(payer_grouping != "Medicare B") %>%
#' filter(payer_grouping != "?") %>%
#' select(service_line, payer_grouping) %>%
#' mutate(record = 1) %>%
#' as_tibble()
#'
#' hai_kmeans_user_item_tbl(
#' .data = data_tbl,
#' .row_input = service_line,
#' .col_input = payer_grouping,
#' .record_input = record
#' )
#'
#' @return
#' A aggregated/normalized user item tibble
#'
#' @export
#'
hai_kmeans_user_item_tbl <- function(.data, .row_input, .col_input, .record_input) {
# * Tidyeval ----
row_input_var_expr <- rlang::enquo(.row_input)
col_input_var_expr <- rlang::enquo(.col_input)
rec_input_var_expr <- rlang::enquo(.record_input)
# * Checks ----
if (!is.data.frame(.data)) {
stop(call. = FALSE, "(.data) is not a data.frame/tibble. Please supply.")
}
if (rlang::quo_is_missing(row_input_var_expr)) {
stop(call. = FALSE, "You must supply a user/row input.")
}
if (rlang::quo_is_missing(col_input_var_expr)) {
stop(call. = FALSE, "You must supply an item/column input")
}
if (rlang::quo_is_missing(rec_input_var_expr)) {
stop(call. = FALSE, "You must supply a record column that can be summed up
for the aggregation and normalization process.")
}
# * Data ----
data_tbl <- tibble::as_tibble(.data)
# * Manipulate ----
data_summarized_tbl <- data_tbl %>%
dplyr::group_by({{ row_input_var_expr }}, {{ col_input_var_expr }}) %>%
dplyr::summarise(total_records = sum({{ rec_input_var_expr }}, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
# Normalize proportions
dplyr::group_by({{ row_input_var_expr }}) %>%
dplyr::mutate(prop_of_total = total_records / sum(total_records)) %>%
dplyr::ungroup()
# User/Item format
user_item_tbl <- data_summarized_tbl %>%
dplyr::select({{ row_input_var_expr }}, {{ col_input_var_expr }}, prop_of_total) %>%
dplyr::mutate(prop_of_total = base::ifelse(
base::is.na(prop_of_total), 0, prop_of_total
)) %>%
tidyr::pivot_wider(
names_from = {{ col_input_var_expr }},
values_from = prop_of_total,
values_fill = list(prop_of_total = 0)
)
# * Return ----
return(user_item_tbl)
}
#' @rdname hai_kmeans_user_item_tbl
#' @export
kmeans_user_item_tbl <- hai_kmeans_user_item_tbl
#' K-Means Object
#'
#' @family Kmeans
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @description
#' Takes the output of the [hai_kmeans_user_item_tbl()] function and applies the
#' k-means algorithm to it using [stats::kmeans()]
#'
#' @details Uses the [stats::kmeans()] function and creates a wrapper around it.
#'
#' @param .data The data that gets passed from [hai_kmeans_user_item_tbl()]
#' @param .centers How many initial centers to start with
#'
#' @examples
#' library(healthyR.data)
#' library(dplyr)
#'
#' data_tbl <- healthyR_data %>%
#' filter(ip_op_flag == "I") %>%
#' filter(payer_grouping != "Medicare B") %>%
#' filter(payer_grouping != "?") %>%
#' select(service_line, payer_grouping) %>%
#' mutate(record = 1) %>%
#' as_tibble()
#'
#' hai_kmeans_user_item_tbl(
#' .data = data_tbl,
#' .row_input = service_line,
#' .col_input = payer_grouping,
#' .record_input = record
#' ) %>%
#' hai_kmeans_obj()
#'
#' @return
#' A stats k-means object
#'
#' @export
#'
hai_kmeans_obj <- function(.data, .centers = 5) {
# * Tidyeval ----
centers_var_expr <- .centers
# * Checks ----
if (!is.data.frame(.data)) {
stop(call. = FALSE("(.data) is missing. Please supply."))
}
# Default to 5
if (is.null(centers_var_expr)) {
centers_var_expr <- 5
}
# * Data ----
data <- tibble::as_tibble(.data)
# * k-means ----
kmeans_tbl <- data %>%
dplyr::select(-1)
kmeans_obj <- kmeans_tbl %>%
stats::kmeans(
centers = centers_var_expr,
nstart = 100
)
return(kmeans_obj)
}
#' @rdname hai_kmeans_obj
#' @export
kmeans_obj <- hai_kmeans_obj
#' K-Means Object Tidy Functions
#'
#' @family Kmeans
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @description
#' K-Means tidy functions
#'
#' @details
#' Takes in a k-means object and its associated user item tibble and then
#' returns one of the items asked for. Either: [broom::tidy()], [broom::glance()]
#' or [broom::augment()]. The function defaults to [broom::tidy()].
#'
#' @param .kmeans_obj A [stats::kmeans()] object
#' @param .tidy_type "tidy","glance", or "augment"
#' @param .data The user item tibble created from [hai_kmeans_user_item_tbl()]
#'
#' @examples
#' library(healthyR.data)
#' library(dplyr)
#' library(broom)
#'
#' data_tbl <- healthyR_data %>%
#' filter(ip_op_flag == "I") %>%
#' filter(payer_grouping != "Medicare B") %>%
#' filter(payer_grouping != "?") %>%
#' select(service_line, payer_grouping) %>%
#' mutate(record = 1) %>%
#' as_tibble()
#'
#' uit_tbl <- hai_kmeans_user_item_tbl(
#' .data = data_tbl,
#' .row_input = service_line,
#' .col_input = payer_grouping,
#' .record_input = record
#' )
#'
#' km_obj <- hai_kmeans_obj(uit_tbl)
#'
#' hai_kmeans_tidy_tbl(
#' .kmeans_obj = km_obj,
#' .data = uit_tbl,
#' .tidy_type = "augment"
#' )
#'
#' hai_kmeans_tidy_tbl(
#' .kmeans_obj = km_obj,
#' .data = uit_tbl,
#' .tidy_type = "glance"
#' )
#'
#' hai_kmeans_tidy_tbl(
#' .kmeans_obj = km_obj,
#' .data = uit_tbl,
#' .tidy_type = "tidy"
#' ) %>%
#' glimpse()
#'
#' @return
#' A tibble
#'
#' @export
#'
hai_kmeans_tidy_tbl <- function(.kmeans_obj, .data, .tidy_type = "tidy") {
# * Tidyeval ----
kmeans_obj <- .kmeans_obj
tidy_type <- .tidy_type
# * Checks ----
if (!is.data.frame(.data)) {
stop(call. = FALSE, "(.user_item_data) is not a data.frame/tibble, please supply original user item tibble.")
}
if (!inherits(x = kmeans_obj, what = "kmeans")) {
stop(call. = FALSE, "(.kmeans_obj) is not of class 'kmeans'")
}
if (!tidy_type %in% c("tidy", "augment", "glance")) {
stop(
call. = FALSE,
"(.tidy_type) must be either tidy, glance, or augment"
)
}
# * Manipulate ----
uit_tbl <- tibble::as_tibble(.data)
row_col <- colnames(uit_tbl[1])
if (tidy_type == "tidy") {
km_tbl <- kmeans_obj %>% broom::tidy()
} else if (tidy_type == "glance") {
km_tbl <- kmeans_obj %>% broom::glance()
} else if (tidy_type == "augment") {
km_tbl <- kmeans_obj %>%
broom::augment(uit_tbl) %>%
dplyr::select(row_col, .cluster) %>%
dplyr::rename("cluster" = .cluster)
}
# * Return ----
return(km_tbl)
}
#' @rdname hai_kmeans_tidy_tbl
#' @export
kmeans_tidy_tbl <- hai_kmeans_tidy_tbl
#' K-Means Mapping Function
#'
#' @family Kmeans
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @description Create a tibble that maps the [hai_kmeans_obj()] using [purrr::map()]
#' to create a nested data.frame/tibble that holds n centers. This tibble will be
#' used to help create a scree plot.
#'
#' @seealso
#' \url{https://en.wikipedia.org/wiki/Scree_plot}
#'
#' @details Takes in a single parameter of .centers. This is used to create the tibble
#' and map the [hai_kmeans_obj()] function down the list creating a nested tibble.
#'
#' @param .centers How many different centers do you want to try
#' @param .data You must have a tibble in the working environment from the
#' [hai_kmeans_user_item_tbl()]
#'
#' @examples
#' library(healthyR.data)
#' library(dplyr)
#'
#' data_tbl <- healthyR_data %>%
#' filter(ip_op_flag == "I") %>%
#' filter(payer_grouping != "Medicare B") %>%
#' filter(payer_grouping != "?") %>%
#' select(service_line, payer_grouping) %>%
#' mutate(record = 1) %>%
#' as_tibble()
#'
#' ui_tbl <- hai_kmeans_user_item_tbl(
#' .data = data_tbl,
#' .row_input = service_line,
#' .col_input = payer_grouping,
#' .record_input = record
#' )
#'
#' hai_kmeans_mapped_tbl(ui_tbl)
#'
#' @return
#' A nested tibble
#'
#' @export
#'
hai_kmeans_mapped_tbl <- function(.data, .centers = 15) {
# * Tidyeval ----
centers_var_expr <- .centers
# * Checks ----
if (!is.data.frame(.data)) {
stop(call. = FALSE, "(.data) is not a data.frame/tibble. Please supply.")
}
input_data <- tibble::as_tibble(.data)
km_mapper <- function(centers = 3) {
input_data %>%
dplyr::select(-1) %>%
stats::kmeans(
centers = centers,
nstart = 100
)
}
# * Manipulate ----
data_tbl <- tibble::tibble(centers = 1:centers_var_expr) %>%
dplyr::mutate(k_means = centers %>%
purrr::map(km_mapper)) %>%
dplyr::mutate(glance = k_means %>%
purrr::map(broom::glance))
# * Return ----
return(data_tbl)
}
#' @rdname hai_kmeans_mapped_tbl
#' @export
kmeans_mapped_tbl <- hai_kmeans_mapped_tbl
#' K-Means Scree Plot Data Table
#'
#' @family Kmeans
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @description Take data from the [hai_kmeans_mapped_tbl()] and unnest it into a
#' tibble for inspection and for use in the [hai_kmeans_scree_plt()] function.
#'
#' @details Takes in a single parameter of .data from [hai_kmeans_mapped_tbl()] and
#' transforms it into a tibble that is used for [hai_kmeans_scree_plt()]. It will
#' show the values (tot.withinss) at each center.
#'
#' @param .data You must have a tibble in the working environment from the
#' [hai_kmeans_mapped_tbl()]
#'
#' @examples
#' library(healthyR.data)
#' library(dplyr)
#'
#' data_tbl <- healthyR_data %>%
#' filter(ip_op_flag == "I") %>%
#' filter(payer_grouping != "Medicare B") %>%
#' filter(payer_grouping != "?") %>%
#' select(service_line, payer_grouping) %>%
#' mutate(record = 1) %>%
#' as_tibble()
#'
#' ui_tbl <- hai_kmeans_user_item_tbl(
#' .data = data_tbl,
#' .row_input = service_line,
#' .col_input = payer_grouping,
#' .record_input = record
#' )
#'
#' kmm_tbl <- hai_kmeans_mapped_tbl(ui_tbl)
#'
#' hai_kmeans_scree_data_tbl(kmm_tbl)
#'
#' @return
#' A nested tibble
#'
#' @export
#'
hai_kmeans_scree_data_tbl <- function(.data) {
# * Checks ----
if (!is.data.frame(.data)) {
stop(call. = FALSE, "(.data) is not a data.frame/tibble. Please supply.")
}
# * Manipulate ----
data_tbl <- tibble::as_tibble(.data)
data_tbl <- data_tbl %>%
tidyr::unnest(glance) %>%
dplyr::select(centers, tot.withinss)
# * Return ----
return(data_tbl)
}
#' @rdname hai_kmeans_scree_data_tbl
#' @export
kmeans_scree_data_tbl <- hai_kmeans_scree_data_tbl
#' K-Means Scree Plot
#'
#' @family Kmeans
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @description Create a scree-plot from the [hai_kmeans_mapped_tbl()] function.
#'
#' @details Outputs a scree-plot
#'
#' @seealso
#' \url{https://en.wikipedia.org/wiki/Scree_plot}
#'
#' @param .data The data from the [hai_kmeans_mapped_tbl()] function
#'
#' @examples
#' library(healthyR.data)
#' library(dplyr)
#'
#' data_tbl <- healthyR_data %>%
#' filter(ip_op_flag == "I") %>%
#' filter(payer_grouping != "Medicare B") %>%
#' filter(payer_grouping != "?") %>%
#' select(service_line, payer_grouping) %>%
#' mutate(record = 1) %>%
#' as_tibble()
#'
#' ui_tbl <- hai_kmeans_user_item_tbl(
#' .data = data_tbl,
#' .row_input = service_line,
#' .col_input = payer_grouping,
#' .record_input = record
#' )
#'
#' kmm_tbl <- hai_kmeans_mapped_tbl(ui_tbl)
#'
#' hai_kmeans_scree_plt(.data = kmm_tbl)
#'
#' @return
#' A ggplot2 plot
#'
#' @export
#'
hai_kmeans_scree_plt <- function(.data) {
# * Checks ----
if (!is.data.frame(.data)) {
stop(call. = FALSE, "(.data) is not a data.frame/tibble. Please supply.")
}
# * Manipulate ----
data_tbl <- tibble::as_tibble(.data)
data_tbl <- data_tbl %>%
tidyr::unnest(glance) %>%
dplyr::select(centers, tot.withinss)
# * Plot
p <- data_tbl %>%
ggplot2::ggplot(
mapping = ggplot2::aes(
x = centers,
y = tot.withinss
)
) +
ggplot2::geom_point() +
ggplot2::geom_line() +
# Kaiser Line
ggplot2::geom_hline(yintercept = 1, color = "red", linetype = "dashed") +
ggrepel::geom_label_repel(
mapping = ggplot2::aes(
label = centers
)
) +
ggplot2::theme_minimal() +
ggplot2::labs(
title = "Scree Plot",
subtitle = "Measures the distance each of the users are from the closest k-means cluster",
y = "Total Within Sum of Squares",
x = "Centers"
)
# * Return ----
return(p)
}
#' @rdname hai_kmeans_scree_plt
#' @export
kmeans_scree_plt <- hai_kmeans_scree_plt
#' @rdname hai_kmeans_scree_plt
#' @export
hai_kmeans_scree_plot <- hai_kmeans_scree_plt
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.