R/plotly_datahandler.R

#' R6 class for handling plotly data
#'
#' @export
#' @docType class
#' @format An \code{R6::R6Class} object
#' @importFrom R6 R6Class
#' @importFrom assertthat assert_that
#' @importFrom data.table as.data.table setkey
#' @importFrom dplyr %>% case_when filter if_else bind_rows select left_join
#' @importFrom stringr str_replace str_sub str_subset
#' @importFrom stringr str_extract str_remove str_detect
#' @importFrom purrr map map_chr map_dfr compact pmap
#' @importFrom tibble tibble as_tibble
#' @importFrom bit64 as.integer64
#' @importFrom nanotime as.nanotime
#' @importFrom plotly add_trace subplot
#' @importFrom tidyr unnest pivot_wider
#' @importFrom lazyeval f_eval
#' @importFrom DBI dbConnect dbDisconnect dbExecute dbGetQuery
#' @importFrom duckdb duckdb
#' @description
#' A class for handling \code{plotly} data,
#' which defines functions used in the \code{downsampler} class

# class decralation -------------------------------------------------------

plotly_datahandler <- R6::R6Class(
  "plotly_datahandler",

# public members ----------------------------------------------------------
  public = list(
    #' @field figure \code{plotly} object.
    figure = NULL,

    # constructor ---------------------------------------------------------
    #' @description
    #' Constructing an instance.
    #' The data contained in a \code{plotly} object (\code{figure} argument)
    #' will be included in the instance (as a reference).
    #' @param figure \code{plotly} object.
    #' The traces of this object will be down-sampled.
    #' @param srcs,srcs_ext,formula Character and formula, optional.
    #' \code{srcs} is the path of the source data (or directory).
    #' When a directory is specified, \code{srcs_ext} is the extension of the source file.
    #' \code{formula} is the formula to extract the data from the source data.
    #' @param legend_options Named list, optional.
    #' Names of the elements are \code{name_prefix},
    #' \code{name_suffix}, \code{xdiff_prefix},
    #' and \code{xdiff_suffix}.
    #' \code{name_prefix} and \code{name_suffix}
    #' will be added to the name of the trace when the down-sampling is applied.
    #' By default, prefix is a bold orange \code{[S]} and suffix is none.
    #' \code{xdiff_prefix} and \code{xdiff_suffix} are employed to show
    #' the mean aggregation size of the down-sampling.
    #' @param tz Character, optional.
    #' Time zone used to display time-series data.
    #' By default \code{Sys.timezone()}.
    #' @param use_light_build Boolean, optional.
    #' Whether \code{plotly_build_light} is used.
    #' It quickly build scatter-type \code{plotly} data.
    #' By default, \code{TRUE}.
    #'
    initialize = function(
      figure = NULL,
      srcs = NULL,
      formula = NULL,
      srcs_ext = NULL,
      legend_options = list(
        name_prefix  = '<b style="color:sandybrown">[S]</b> ',
        name_suffix  = "",
        xdiff_prefix = '<i style="color:#fc9944"> ~',
        xdiff_suffix = "</i>"
      ),
      tz = Sys.timezone(),
      use_light_build = TRUE
    ) {

      # check classes and lengths of the arguments
      assertthat::assert_that(is.null(figure) || inherits(figure, "plotly"))
      assertthat::assert_that(is.null(srcs) || inherits(srcs, "character"))
      assertthat::assert_that(inherits(legend_options, "list"))
      assertthat::assert_that(inherits(tz, "character"))

      assertthat::assert_that(
        all(purrr::map_lgl(legend_options, ~length(.x) == 1))
      )

      for (opt in names(private$legend_options)) {
        if (!is.null(legend_options[[opt]])) {
          private$legend_options[[opt]] <- legend_options[[opt]]
        }
      }

      if (!is.null(figure) & !is.null(srcs)) {
        warning("Both figure and srcs are given. srcs is neglected.")
      }

      private$tz <- tz

      # before registering the data,
      # build the data of the figure
      if (is.null(figure)) {
        figure <- plotly::plot_ly(type = "scatter", mode = "lines") %>%
          plotly_build() %>%
          plotly::subplot()

        figure$x$data <- list()

      } else {
        if (is.null(figure$x$data)) {
          if (use_light_build) {
            figure <- plotly_build_light(figure)
          } else {
            message("building plotly data for the first time may take much time")
            figure <- plotly::plotly_build(figure)
          }
        }

        x_title <- figure$x$layout$xaxis$title
        y_title <- figure$x$layout$yaxis$title

        # note: this subplot may change the layout
        figure <- plotly::subplot(
          figure,
          titleX = !is.null(x_title) && is.character(x_title),
          titleY = !is.null(y_title) && is.character(y_title)
        )
      }


      # convert the trace data to data frame
      # it will be registered after adjusting figure data,
      # because self$figure must be registered before doing that
      private$set_trace_df_default()
      traces_df <- self$plotly_data_to_df(figure$x$data)

      if (nrow(traces_df) == 0 & !is.null(srcs)) {
        assertthat::assert_that(!is.null(formula) && inherits(formula, "formula"))
        traces_df <- self$srcs_to_df(fml = formula, srcs = srcs, srcs_ext = srcs_ext)
      }

      # change figure data according to the traces_df, if necessary
      figure$x$attrs <- NULL
      if (length(figure$x$data) > 0 && nrow(traces_df) > 0 && "data" %in% colnames(traces_df)) {
        figure$x$data <- purrr::map2(
          figure$x$data, traces_df$data,
          ~.x[setdiff(names(.x), colnames(.y))]
          )
      } else if (length(figure$x$data) == 0) {

        figure$x$data <- purrr::pmap(
          traces_df,
          function(xaxis, ...) {
            plotly::plotly_build(
              plotly::plot_ly(
                type = "scatter", mode = "lines", xaxis = xaxis
              )
            )$x$data[[1]]
          }
        )
      } else if (nrow(traces_df) == 0) {
        figure$x$data <- plotly::plotly_build(
          plotly::plot_ly(type = "scatter", mode = "lines")
          )$x$data
      }

      # setting of the show legend
      if (is.null(figure$x$layout$showlegend) ||
          figure$x$layout$showlegend == FALSE) {
        figure$x$layout$showlegend <- TRUE
      }

      # finally register the figure and the data
      self$figure <- figure
      self$set_trace_data(traces_df = traces_df)

    }, #end of initialization

    #' @description
    #' In the instance, data is contained as a data frame
    #' (see \code{self$orig_data} for detailed information).
    #' Using this method, the data can be added or overwritten.
    #' If a data frame (\code{traces_df} argument) is given, it will be
    #' added to \code{self$orig_data} or reassigned as \code{self$orig_data}.
    #' If attributes to construct a \code{plotly} object (\code{...} argument)
    #' are given, a data frame is constructed and used.
    #' @param ... Arguments to constitute a \code{plotly} attributes, optional.
    #' For instance, \code{x}, \code{y}, \code{type}, and \code{mode}
    #' are applicable. See \code{plotly::plot_ly}.
    #' @param traces_df Data frame, optional.
    #' Data frame whose format is agreed with \code{self$orig_data}.
    #' If \code{traces_df} is given, arguments in \code{...} are neglected.
    #' @param append Boolean, optional.
    #' Whether the data is append or overwrite. By default, \code{FALSE}
    #' (the traces are overwritten).
    set_trace_data = function(
      ..., traces_df = NULL, append = FALSE
      ) {

      # make traces_df, if not given
      if (is.null(traces_df)) {
        message("data is built with plotly_build_light")
        traces_df <- plotly::plot_ly(...) %>%
          plotly_build_light() %>%
          .$x %>% .$data %>%
          self$plotly_data_to_df()
      }

      if (append) {
        private$traces_df <- dplyr::bind_rows(
          private$traces_df,
          traces_df
        )
      } else {
        private$traces_df <- traces_df
      }

      # set figure xaxis
      if (nrow(traces_df) > 0) {
        traces_axis <- private$traces_df %>%
          dplyr::mutate(
            xaxis_type = purrr::map_chr(
              data,
              ~if_else(inherits(.x$x, "nanotime"), "date", "linear")
            )
          ) %>%
          dplyr::distinct(xaxis, xaxis_type) %>%
          dplyr::group_by(xaxis) %>%
          dplyr::mutate(cnt = dplyr::n()) %>%
          dplyr::ungroup() %>%
          dplyr::mutate(xaxis = stringr::str_replace(xaxis, "^x", "xaxis"))

        if (any(traces_axis$cnt > 1)) {
          warning("types of the x values are not consistent")
          traces_axis <- dplyr::distinct(trace_axis, xaxis, .keep_all = TRUE)
        }

        args <- NULL
        for (i in 1:nrow(traces_axis)) {
          args[[traces_axis$xaxis[i]]][["type"]] <- traces_axis$xaxis_type[i]
        }

        self$figure <- do.call(plotly::layout, c(list(self$figure), args))
      }

      invisible()
    },

    #' @description
    #' Covert the data contained in \code{srcs} file(s) to a duck-db.
    #' A minimum data and the path of the database will be returned.
    #' @param fml Formula.
    #' The formula to extract the data from the source data.
    #' @param srcs Character.
    #' The name of the source file (e.g. data.parquet) or the directory can be specified.
    #' @param srcs_ext Character, optional.
    #' The extension of the source file, if \code{srcs} is a directory.
    srcs_to_df = function(fml, srcs, srcs_ext = NULL) {

      assertthat::assert_that(file.exists(srcs) || dir.exists(srcs))
      assertthat::assert_that(
        length(labels(terms(fml))) == 1,
        msg = "only one term is acceptable"
      )

      srcs_name <- basename(srcs) %>% str_remove("\\..*$")

      if (dir.exists(srcs)) {
        srcs_path <- paste0(
          "'", str_remove(srcs, "/?$"),
          "/**/*", srcs_ext, "'"
        )
      } else {
        srcs_path <- paste0("'", srcs, "'")
      }

      db_path <- paste0(tempfile("db"), "/", srcs_name, ".duckdb")
      message("The duck-db is created at ", db_path)

      if (file.exists(db_path)) {
        file.remove(db_path)
      } else if (!dir.exists(dirname(db_path))) {
        dir.create(dirname(db_path))
      }

      x_name <- labels(terms(fml))
      y_name <- all.vars(fml)[1]

      # create a table in the database from the source data
      con <- DBI::dbConnect(duckdb::duckdb(), dbdir = db_path)

        DBI::dbExecute(
          con,
          paste0(
            "create table original_raw_data",
            " as select ", x_name, " as x, ", y_name, " as y from ", srcs_path
          )
        )

        col_info <- DBI::dbGetQuery(con, paste0("PRAGMA table_info(original_raw_data)"))

        x_type <- col_info["type"][col_info["name"] == "x"]
        y_type <- col_info["type"][col_info["name"] == "y"]

        DBI::dbExecute(
          con,
          paste0(
            "create table original_data",
            "(x ", x_type, " primary key, y ", y_type, ")"
          )
        )

        DBI::dbExecute(
          con,
          paste0(
            "create table original_grouped_data",
            " as select x, first(y) as y",
            " from original_raw_data ", "group by x order by x"
          )
        )

        DBI::dbExecute(
          con,
          paste0(
            "insert into original_data(x, y) ",
            "select * from original_grouped_data"
          )
        )

      DBI::dbDisconnect(con)

      if (stringr::str_detect(x_type, "^TIME") | stringr::str_detect(x_type, "^DATE")) {
        x_initval <- nanotime::as.nanotime(Sys.time())
      } else {
        x_initval <- 0.0
      }

      y_initval <- 0.0

      data_init <- data.table::data.table(
        x = x_initval, y = y_initval
      )

      traces_df_output <- data.table::data.table(
        uid = stringr::str_replace(basename(tempfile()), "^file", "ar"),
        name = y_name,
        showlegend = TRUE,
        legendgroup = NA,
        customdatasrc = list(db_path),
        xaxis = "x",
        yaxis = "y",
        data = list(data_init)
      )

      return(traces_df_output)
    },


    #' @description
    #' Covert the data contained in \code{plotly} object to a data frame.
    #' A unique id (\code{uid}) is granted to each data.
    #' The data frame will be returned.
    #' @param plotly_data List.
    #' The list whose elements are named list representing \code{plotly} traces.
    #' All elements must have elements named \code{type}.
    #' @param use_datatable Boolean.
    #' If it is \code{TRUE}, data such as \code{x} and \code{y} are nested
    #' in a \code{data.table}, of which key column is \code{x}.
    #' By default, \code{TRUE}.
    plotly_data_to_df = function(plotly_data, use_datatable = TRUE) {

      # check argument
      assertthat::assert_that(inherits(plotly_data, "list"))
      assertthat::assert_that(
        all(purrr::map_lgl(plotly_data, ~!is.null(.x$type))),
        msg = "All traces must have 'type' attribute"
      )
      assertthat::assert_that(
        all(purrr::map_lgl(
          plotly_data, ~.x$type %in% private$trace_type_accept
          )),
        msg = paste0(
          "The types of the all traces must be ",
          paste(private$trace_type_accept, collapse = " or ")
        )
      )

      # check trace members
      purrr::map(
        plotly_data,
        function(trace) {
          purrr::pmap(
            self$trace_df_default,
            function(name, required, class, ...) {
              if (required) {
                assertthat::assert_that(
                  name %in% names(trace),
                  msg = paste0(name, " is required for all traces")
                )
              }

              if (name %in% names(trace)) {
                assertthat::assert_that(inherits(trace[[name]], class))
              }
            }
          )
          invisible()
        }
      )

      # change the class, if necessary
      tryCatch({
        plotly_data <- purrr::map(
          plotly_data,
          function(trace) {
            if (inherits(trace[["x"]], c("integer64", "POSIXt", "Date"))) {
              trace[["x"]] <- nanotime::as.nanotime(trace[["x"]])
            } else if (inherits(trace[["x"]], "character")) {
              trace[["x"]] <- private$plotlytime_to_nanotime(
                trace[["x"]], private$tz
                )
            }

            if (inherits(trace[["y"]], "character")) {
              trace[["y"]] <- factor(trace[["y"]])
            }
            return(trace)
          }
        )
      },
      error = function(e) {
        stop("x is not numeric or datetime")
      }
      )

      vars_hf <- self$trace_df_default$name[self$trace_df_default$data]

      # make data table and set key at initialization,
      # of which cost is relatively high.
      traces_df <- purrr::map(
        plotly_data,
        function(trace) {

          if (use_datatable) {

            hf_data <- trace[intersect(names(trace), vars_hf)] %>%
              data.table::setDT(key = "x") %>%
              unique(by = "x")

            trace_df <- c(
              trace[setdiff(names(trace), vars_hf)] %>%
                purrr::modify_if(~length(.x) > 1 || is.list(.x), list),
              list(data = list(hf_data))
              ) %>%
              data.table::setDT()
          } else {
            trace_df <- trace %>%
              purrr::modify_if(~length(.x) > 1 || is.list(.x), list) %>%
              data.table::setDT()
          }

          return(trace_df)
        }
      ) %>%
        bind_rows() %>%
        bind_rows(
          self$trace_df_default %>%
            dplyr::filter(purrr::map_lgl(default, ~!is.null(.x))) %>%
            dplyr::select(name, default) %>%
            tidyr::pivot_wider(names_from = "name", values_from = "default") %>%
            tidyr::unnest(everything()) %>%
            .[0,]
        )


      if (nrow(traces_df) > 0) {

        traces_df_output <- traces_df %>%
          dplyr::mutate(
            uid = basename(tempfile(rep("", nrow(.)))),
            name = if_else(
              !is.na(name),
              as.character(name),
              paste("trace", dplyr::row_number())
            ),
            showlegend = dplyr::if_else(
              is.na(legendgroup),
              TRUE,
              !duplicated(legendgroup)
            )
          )
      } else {
        traces_df_output <- traces_df %>%
          bind_rows(
            tibble(uid = "", name = "", showlegend = TRUE) %>%
              .[0,]
          )
      }


      if (use_datatable && nrow(traces_df_output) > 0) {
        message(paste0(
          purrr::map_int(traces_df_output$data, nrow),
          " unique data were obtained from ",
          traces_df_output$name
        ) %>%
          paste(collapse = "\n"))
      }

      return(traces_df_output)

    }
  ), # end of the public member

  active = list(
    #' @field orig_data Data frame representing \code{plotly} traces.
    orig_data = function() private$traces_df,
    #' @field trace_df_default Data frame representing default values
    #' of \code{plotly} traces.
    #' \code{name} column represents the names of the attributes.
    #' \code{required} column represents whether the attributes are necessary
    #' to construct a data frame of a trace.
    #' \code{data} column represents whether the attributes are the data.
    #' \code{default} attributes represents default values of the attributes.
    #' When constructing a data frame of a trace, default values are used
    #' if no values are assigned.
    #' \code{class} column represents the acceptable classes of the attributes.
    trace_df_default = function() private$trace_df_def
  ),

# private members ---------------------------------------------------------

  private = list(

    # traces_df and its default structure
    traces_df = NULL,
    trace_df_def = NULL,
    trace_type_accept = c("scatter", "scattergl", "candlestick"),

    set_trace_df_default = function() {
      x_class <- c("numeric", "integer", "integer64", "POSIXt", "character","Date")
      y_class <- c("numeric", "integer", "character", "factor")
      name_class <- c("character", "factor")
      lgrp_class <- c("character", "factor")

      private$trace_df_def <- tibble::tribble(
        ~name,         ~required, ~data, ~default,     ~class,
        "x",           TRUE,      TRUE,  NULL,         x_class,
        "y",           FALSE,     TRUE,  NULL,         y_class,
        "open",        FALSE,     TRUE,  NULL,         c("numeric", "integer"),
        "high",        FALSE,     TRUE,  NULL,         c("numeric", "integer"),
        "low",         FALSE,     TRUE,  NULL,         c("numeric", "integer"),
        "close",       FALSE,     TRUE,  NULL,         c("numeric", "integer"),
        "text",        FALSE,     TRUE,  NULL,         c("character"),
        "hovertext",   FALSE,     TRUE,  NULL,         c("character"),
        "xaxis",       FALSE,     FALSE, "x",          c("character"),
        "name",        FALSE,     FALSE, character(0), name_class,
        "legendgroup", FALSE,     FALSE, character(0), lgrp_class
      )
    },

    # settings for the trace names
    legend_options = list(
      name_prefix  = '<b style="color:sandybrown">[S]</b> ',
      name_suffix  = "",
      xdiff_prefix = '<i style="color:#fc9944"> ~',
      xdiff_suffix = "</i>"
    ),

    # time zone employed to show time-series data
    tz = Sys.timezone(),

    # compute the name of the trace based on the differences of x values
    cmpt_trace_name = function(name, x) {

      assertthat::assert_that(inherits(name, c("character", "factor")))
      assertthat::assert_that(inherits(x, c("numeric","integer", "integer64")))

      if (length(x) < 2) return(name)

      # mean difference of the x
      x_diff_median <- as.numeric(median(diff(x), na.rm = TRUE))

      x_diff_chr <- dplyr::if_else(
        inherits(x, "nanotime"),
        private$nanosecond_to_label(x_diff_median),
        private$numeric_to_label(x_diff_median)
      )

      name <- paste0(
        private$legend_options$name_prefix,
        name,
        private$legend_options$name_suffix,
        private$legend_options$xdiff_prefix,
        x_diff_chr,
        private$legend_options$xdiff_suffix
      )

      return(name)
    },


    # Generate an aggregation label according to the data interval
    numeric_to_label = function(x) {
      thr <- 0.5
      e3_unit <- 1e3
      e6_unit <- 1e6
      e9_unit <- 1e9
      e12_unit <- 1e12

      output_chr <- dplyr::case_when(
        x > thr * e12_unit ~ sprintf("%.1fT",  x / e12_unit),
        x > thr * e9_unit  ~ sprintf("%.1fG",  x / e9_unit),
        x > thr * e6_unit  ~ sprintf("%.1fM",  x / e6_unit),
        x > thr * e3_unit  ~ sprintf("%.1fK",  x / e3_unit),
        TRUE               ~ sprintf("%.1f",   x)
      )
      return(output_chr)
    },


    # Generate an time-based aggregation label according to nanotime interval
    nanosecond_to_label = function(ns) {
      thr     <- 0.5
      us_unit <- 1e3
      ms_unit <- 1e6
      s_unit  <- 1e9
      m_unit  <- 1e9 * 60
      h_unit  <- 1e9 * 60 * 60
      d_unit  <- 1e9 * 60 * 60 * 24
      w_unit  <- 1e9 * 60 * 60 * 24 * 7
      mo_unit <- 1e9 * 60 * 60 * 24 * 30
      yr_unit <- 1e9 * 60 * 60 * 24 * 365

      output_chr <- dplyr::case_when(
        ns > thr * yr_unit ~ sprintf("%.1fy",  ns / yr_unit),
        ns > thr * mo_unit ~ sprintf("%.1fm",  ns / mo_unit),
        # ns > thr * w_unit  ~ sprintf("%.1fw",  ns / w_unit),
        ns > thr * d_unit  ~ sprintf("%.1fd",  ns / d_unit),
        ns > thr * h_unit  ~ sprintf("%.1fh",  ns / h_unit),
        ns > thr * m_unit  ~ sprintf("%.1fm",  ns / m_unit),
        ns > thr * s_unit  ~ sprintf("%.1fs",  ns / s_unit),
        ns > thr * ms_unit ~ sprintf("%.1fms", ns / ms_unit),
        ns > thr * us_unit ~ sprintf("%.1fus", ns / us_unit),
        TRUE               ~ sprintf("%.1fns", ns)
      )
      return(output_chr)
    },

    # Conversion of nanotime to plotly-style time
    nanotime_to_plotlytime = function(time, tz = private$tz) {
      assertthat::assert_that(
        inherits(time, "nanotime"),
        msg = "Only the nanotime class is acceptable"
      )

      tz_hms <- format(as.POSIXct("1970-01-01", tz), "%z") %>%
        stringr::str_replace("^(\\-?)\\+?([0-9]{1,2})([0-9]{2})", "\\1\\2:\\3:00")

      time_plotly <- (time + nanotime::as.nanoduration(tz_hms)) %>%
        format("%Y-%m-%d %H:%M:%E9S")

      return(time_plotly)
    },

    # Conversion of plotly-style time to nanotime
    # as.POSIXct is necessary because as.nanotime does not accept NA
    plotlytime_to_nanotime = function(time, tz = private$tz) {
      if (inherits(time, "nanotime")) return(time)
      return(nanotime::as.nanotime(as.POSIXct(time, tz = tz)))
    }

  ) # end of the private member
) # end of the R6 class

Try the shinyHugePlot package in your browser

Any scripts or data that you put into this service are public.

shinyHugePlot documentation built on Oct. 1, 2024, 5:08 p.m.