R/twoway.R

#' Segment a dataset along a variable, and optionally a second one.
#'
#' @description Computes the weighted mean of a target variable accross one or two dimensions.
#' The first dimension (variable) is meant to be plotted along the x-axis, while the values of the
#' second variable will be mapped to color aesthetics.
#'
#' @param data The data to segment.
#' @param target Name of the target variable.
#' @param var Character, name of the variable to segment accross.
#' @param var2 Optional, character name of the 2nd variable. This is useful to see interaction between two variables and a target variable.
#' @param weight Optional, name of the variable in `data` to be used as weights in the weighted average calculation.
#' @param exposure Optional, name of the variable in `data` that represents the exposure. This will be shown as bars when plotted.
#' @param quantiles Should `var` be quantilized ? If an integer, represents the number of quantiles to use. If TRUE, 5 quantiles will be used.
#' @param labels Labels to use for `var` quantiles. Default is to show the interval bounds.
#' @param quantiles2 Should `var2` be quantilized ? If an integer, represents the number of quantiles to use. If TRUE, 5 quantiles will be used.
#' @param laels2 Labels to use for `var2` quantiles. Default is to show the interval bounds.
#' @return A summarized dataset.
#' @export
#' @import data.table magrittr
twoway <- function(data, target, var, var2=NULL, weight=NULL, exposure = NULL,
                   quantiles = FALSE, labels = NULL, quantiles2 = FALSE, labels2 = NULL){

  t <- as.data.table(data)

  if(isTRUE(quantiles)) quantiles <- 5
  if(isTRUE(quantiles2)) quantiles2 <- 5

  if(is.numeric(quantiles)){
    t[[var]] <-
      cut(t[[var]],
          unique(quantile(t[[var]], probs = seq(0,quantiles)/quantiles, na.rm = TRUE)),
          include.lowest = TRUE,
          labels = labels,
          right = FALSE)
  }

  if(is.numeric(quantiles2)){
    if(is.null(var2)) stop('var2 should not be NULL if quantiles is not FALSE')
    t[[var2]] <-
      cut(t[[var2]],
          unique(quantile(t[[var2]], probs = seq(0,quantiles2)/quantiles2, na.rm = TRUE)),
          include.lowest = TRUE,
          labels = labels2,
          right = FALSE)
  }

  if(!is.null(weight)){
    if(!is.null(exposure)){
      res <-
        t[,
          list(sum(.SD[[1]] * .SD[[2]], na.rm = TRUE)/sum(.SD[[1]], na.rm = TRUE),
            sum(.SD[[3]])),
          .SDcols = c(weight, target, exposure),
          by = c(var, var2)]
    }
    else{
      res <-
        t[,
          list(sum(.SD[[1]] * .SD[[2]], na.rm = TRUE)/sum(.SD[[1]], na.rm = TRUE),
            .N),
          .SDcols = c(weight, target),
          by = c(var, var2)]
    }
  }
  else{
    if(!is.null(exposure)){
      res <-
        t[,
          list(mean(.SD[[1]], na.rm = TRUE),
            sum(.SD[[2]])),
          .SDcols = c(target, exposure),
          by = c(var, var2)]
    }
    else{
      res <-
        t[,
          list(mean(.SD[[1]], na.rm = TRUE),
            .N),
          .SDcols = c(target),
          by = c(var, var2)]
    }
  }

  setnames(res, c(var, 'V1'), c('value1', target))
  res %<>% mutate(var1 = var)

  if(!is.null(exposure)){
    setnames(res, 'V2', exposure)
  }

  if(!is.null(var2)){
    setnames(res, var2, 'value2')
    res %<>% mutate(var2 = var2)
  }

  class(res) <- append(class(res), 'twoway')
  attr(res, 'target') <- target
  attr(res, 'var1') <- var
  attr(res, 'var2') <- var2
  attr(res, 'exposure') <- if(is.null(exposure)) 'N' else exposure

  res
}



#' Plot a twoway object.
#'
#' @param The twoway object.
#' @param pal Name of a palette to use.
#'
#' @return A plotly object.
#' @import plotly
#' @import magrittr
#' @importFrom dplyr arrange
#' @export
show <- function(twoway_df, pal = 'Set1', title = NULL){

  if(!is.null(attr(twoway_df, 'var2'))){
    twoway_df %<>% dplyr::arrange(value1, value2)
  }
  else{
    twoway_df %<>% dplyr::arrange(value1)
  }

  p <- plot_ly(twoway_df)

  if(!is.null(attr(twoway_df, 'var2'))){
    p %<>%
      add_trace(x = ~value1,
                y = as.formula(paste0('~', attr(twoway_df, 'target'))),
                type = 'scatter',
                mode = 'lines+markers',
                color = as.formula(paste0('~', 'value2')),
                colors = pal) %>%
      add_bars(
        x = ~value1,
        y = as.formula(paste0('~', attr(twoway_df, 'exposure'))),
        color = as.formula(paste0('~', 'value2')),
        colors = pal,
        yaxis = 'y2',
        opacity = 0.35,
        showlegend = FALSE
      )
  }
  else{
    p <-
      p %>%
      add_trace(x = ~value1,
                y = as.formula(paste0('~', attr(twoway_df, 'target'))),
                type = 'scatter',
                mode = 'lines+markers',
                colors = pal,
                name = attr(twoway_df, 'var1')) %>%
      add_bars(
        x = ~value1,
        y = as.formula(paste0('~', attr(twoway_df, 'exposure'))),
        yaxis = 'y2',
        opacity = 0.35,
        showlegend = FALSE
      )
  }

  p %>%
    layout(
      xaxis = list(
        title = attr(twoway_df, 'var1')
      ),
      yaxis = list(
        showgrid = TRUE
      ),
      yaxis2 = list(
        side = 'right',
        overlaying = 'y',
        showgrid = FALSE
      ),
      title = title
    )
}


# ggplot_segment <- function(summ_data, pal = 'Set1'){
#
#   target <- setdiff(colnames(summ_data), c('var1', 'var2', 'value1', 'value2'))
#
#   p <-
#     ggplot(data = summ_data,
#            aes_string(x = 'value1', y = target))
#
#   if('var2' %in% colnames(summ_data)){
#     p <-
#       p +
#       geom_point(aes(color = value2, group = value2)) +
#       geom_line(aes(color = value2, group = value2))
#   }
#   else{
#     p <-
#       p +
#       geom_point() +
#       geom_line(aes(group = 1))
#   }
#
#   p +
#     facet_wrap(~ var1, scales = 'free')
# }
#

#
# data_summ1 <- segment(data, 'y', 'g1', 'g2', exposure = 'e')
# data_summ2 <- segment(data, 'y', 'g1', 'g3', exposure = 'e')
#
# data_summ <- bind_rows(data_summ1, data_summ2)
#
# ggplot(data = data_summ, aes(x = value1, y = y, color = value2, group = value2)) +
#   geom_point() +
#   geom_line() +
#
#   facet_wrap(~ var2)
#
# ggplot_segment(data_summ)
#
#
# segments_df <-
#   map(
#     c('g1', 'g2'),
#     ~ segment(data, 'y', .x)
#   )
#
# plots <-
#   map(
#     segments_df,
#     plot_segment
#   )
#
# subplot(plots)
#
# ggplot_segment(segments_df)
#
artichaud1/twoway documentation built on May 12, 2019, 5:43 a.m.