#' Segment a dataset along a variable, and optionally a second one.
#'
#' @description Computes the weighted mean of a target variable accross one or two dimensions.
#' The first dimension (variable) is meant to be plotted along the x-axis, while the values of the
#' second variable will be mapped to color aesthetics.
#'
#' @param data The data to segment.
#' @param target Name of the target variable.
#' @param var Character, name of the variable to segment accross.
#' @param var2 Optional, character name of the 2nd variable. This is useful to see interaction between two variables and a target variable.
#' @param weight Optional, name of the variable in `data` to be used as weights in the weighted average calculation.
#' @param exposure Optional, name of the variable in `data` that represents the exposure. This will be shown as bars when plotted.
#' @param quantiles Should `var` be quantilized ? If an integer, represents the number of quantiles to use. If TRUE, 5 quantiles will be used.
#' @param labels Labels to use for `var` quantiles. Default is to show the interval bounds.
#' @param quantiles2 Should `var2` be quantilized ? If an integer, represents the number of quantiles to use. If TRUE, 5 quantiles will be used.
#' @param laels2 Labels to use for `var2` quantiles. Default is to show the interval bounds.
#' @return A summarized dataset.
#' @export
#' @import data.table magrittr
twoway <- function(data, target, var, var2=NULL, weight=NULL, exposure = NULL,
quantiles = FALSE, labels = NULL, quantiles2 = FALSE, labels2 = NULL){
t <- as.data.table(data)
if(isTRUE(quantiles)) quantiles <- 5
if(isTRUE(quantiles2)) quantiles2 <- 5
if(is.numeric(quantiles)){
t[[var]] <-
cut(t[[var]],
unique(quantile(t[[var]], probs = seq(0,quantiles)/quantiles, na.rm = TRUE)),
include.lowest = TRUE,
labels = labels,
right = FALSE)
}
if(is.numeric(quantiles2)){
if(is.null(var2)) stop('var2 should not be NULL if quantiles is not FALSE')
t[[var2]] <-
cut(t[[var2]],
unique(quantile(t[[var2]], probs = seq(0,quantiles2)/quantiles2, na.rm = TRUE)),
include.lowest = TRUE,
labels = labels2,
right = FALSE)
}
if(!is.null(weight)){
if(!is.null(exposure)){
res <-
t[,
list(sum(.SD[[1]] * .SD[[2]], na.rm = TRUE)/sum(.SD[[1]], na.rm = TRUE),
sum(.SD[[3]])),
.SDcols = c(weight, target, exposure),
by = c(var, var2)]
}
else{
res <-
t[,
list(sum(.SD[[1]] * .SD[[2]], na.rm = TRUE)/sum(.SD[[1]], na.rm = TRUE),
.N),
.SDcols = c(weight, target),
by = c(var, var2)]
}
}
else{
if(!is.null(exposure)){
res <-
t[,
list(mean(.SD[[1]], na.rm = TRUE),
sum(.SD[[2]])),
.SDcols = c(target, exposure),
by = c(var, var2)]
}
else{
res <-
t[,
list(mean(.SD[[1]], na.rm = TRUE),
.N),
.SDcols = c(target),
by = c(var, var2)]
}
}
setnames(res, c(var, 'V1'), c('value1', target))
res %<>% mutate(var1 = var)
if(!is.null(exposure)){
setnames(res, 'V2', exposure)
}
if(!is.null(var2)){
setnames(res, var2, 'value2')
res %<>% mutate(var2 = var2)
}
class(res) <- append(class(res), 'twoway')
attr(res, 'target') <- target
attr(res, 'var1') <- var
attr(res, 'var2') <- var2
attr(res, 'exposure') <- if(is.null(exposure)) 'N' else exposure
res
}
#' Plot a twoway object.
#'
#' @param The twoway object.
#' @param pal Name of a palette to use.
#'
#' @return A plotly object.
#' @import plotly
#' @import magrittr
#' @importFrom dplyr arrange
#' @export
show <- function(twoway_df, pal = 'Set1', title = NULL){
if(!is.null(attr(twoway_df, 'var2'))){
twoway_df %<>% dplyr::arrange(value1, value2)
}
else{
twoway_df %<>% dplyr::arrange(value1)
}
p <- plot_ly(twoway_df)
if(!is.null(attr(twoway_df, 'var2'))){
p %<>%
add_trace(x = ~value1,
y = as.formula(paste0('~', attr(twoway_df, 'target'))),
type = 'scatter',
mode = 'lines+markers',
color = as.formula(paste0('~', 'value2')),
colors = pal) %>%
add_bars(
x = ~value1,
y = as.formula(paste0('~', attr(twoway_df, 'exposure'))),
color = as.formula(paste0('~', 'value2')),
colors = pal,
yaxis = 'y2',
opacity = 0.35,
showlegend = FALSE
)
}
else{
p <-
p %>%
add_trace(x = ~value1,
y = as.formula(paste0('~', attr(twoway_df, 'target'))),
type = 'scatter',
mode = 'lines+markers',
colors = pal,
name = attr(twoway_df, 'var1')) %>%
add_bars(
x = ~value1,
y = as.formula(paste0('~', attr(twoway_df, 'exposure'))),
yaxis = 'y2',
opacity = 0.35,
showlegend = FALSE
)
}
p %>%
layout(
xaxis = list(
title = attr(twoway_df, 'var1')
),
yaxis = list(
showgrid = TRUE
),
yaxis2 = list(
side = 'right',
overlaying = 'y',
showgrid = FALSE
),
title = title
)
}
# ggplot_segment <- function(summ_data, pal = 'Set1'){
#
# target <- setdiff(colnames(summ_data), c('var1', 'var2', 'value1', 'value2'))
#
# p <-
# ggplot(data = summ_data,
# aes_string(x = 'value1', y = target))
#
# if('var2' %in% colnames(summ_data)){
# p <-
# p +
# geom_point(aes(color = value2, group = value2)) +
# geom_line(aes(color = value2, group = value2))
# }
# else{
# p <-
# p +
# geom_point() +
# geom_line(aes(group = 1))
# }
#
# p +
# facet_wrap(~ var1, scales = 'free')
# }
#
#
# data_summ1 <- segment(data, 'y', 'g1', 'g2', exposure = 'e')
# data_summ2 <- segment(data, 'y', 'g1', 'g3', exposure = 'e')
#
# data_summ <- bind_rows(data_summ1, data_summ2)
#
# ggplot(data = data_summ, aes(x = value1, y = y, color = value2, group = value2)) +
# geom_point() +
# geom_line() +
#
# facet_wrap(~ var2)
#
# ggplot_segment(data_summ)
#
#
# segments_df <-
# map(
# c('g1', 'g2'),
# ~ segment(data, 'y', .x)
# )
#
# plots <-
# map(
# segments_df,
# plot_segment
# )
#
# subplot(plots)
#
# ggplot_segment(segments_df)
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.