R/data_pooler.R

#' @docType class
#' @name data_pooler
#' @aliases Data-Pooler
#' @import R6
#' @title Indodax Historical Data Generator
#' @description Pooling historical data from Indodax server and clean it up.
#' @author Suberlin Sinaga @2021
#' @return Object of type \code{\link{data_pooler}}
#' @importFrom jsonlite fromJSON
#' @importFrom httr GET
#' @importFrom httr content
#' @importFrom dplyr bind_rows
#' @importFrom dplyr arrange
#' @importFrom dplyr mutate
#' @importFrom dplyr copy_to
#' @importFrom dplyr distinct
#' @importFrom DBI dbConnect
#' @importFrom lubridate as_date
#' @importFrom lubridate floor_date
#' @importFrom lubridate ceiling_date
#' @importFrom lubridate days
#' @importFrom RSQLite SQLite
#' @importFrom dplyr %>%
#' @export
data_pooler <- R6Class(
    "data_pooler",
    private = list(init = NULL),
    public = list(
        #' @description Method to pool historical data from Indodax server
        #' @aliases indodax_crypto_pooler
        #' @param date_range: character vector of start date and end date we want to pool the data out
        #' @param pair: the pair in {coin_idr} format. It must be a single coin for each data.
        #' @param resolution: The time frame we are wanted to pull out. It must be a single time frame.
        #' @param url: The url to fetch data
        #' @param sleep: Since indodax only allow open API to be hit by 18 times per minute, then we need to set sleep to meet this limitation.
        #' @param type: Getter type only used when we are fixing missing data.
        #' @param direct: TRUE is only used when we are fixing missing data.
        #' @return object of type \code{\link{data_pooler}} when it is initial and non direct type. A data frame when it is getter and direct type.
        indodax_crypto_pooler = function(date_range, pair, resolution = c(1, 5, 15, 60, 240, "D"), url, sleep = 4,
                                         type = c("initial", "getter"), direct = FALSE) {
            type <- match.arg(type, c("initial", "getter"))
            resolution = match.arg(as.character(resolution), c(1, 5, 15, 60, 240, "D"))
            if (missing(url)) {
                url <- "https://indodax.com/tradingview/history?symbol="
            }

            get_data <- function(dates, url, pair, resolution) {
                date_1 <- as.numeric(as.POSIXct(dates[1]))
                date_2 <- as.numeric(as.POSIXct(dates[2]))
                data <- tryCatch(GET(paste0(url, pair, "&resolution=", resolution, "&from=", date_1,
                                   "&to=", date_2)), error = function(e) e)

                if (!is.character(data)) {
                    data <- data %>%
                        content(type = "text", encoding = "UTF-8")
                    data <- data %>%
                        fromJSON() %>%
                        bind_rows() %>%
                        arrange(.data[['t']]) %>%
                        mutate(s = pair)
                }

                Sys.sleep(sleep)

                return(data)

            }

            if (direct) {
                # cat(paste0(url, pair, "&resolution=", resolution, "&from=", date_range[[1]],
                #              "&to=", date_range[[2]], "\n"))
                final_data <- tryCatch(GET(paste0(url, pair, "&resolution=", resolution, "&from=", date_range[[1]],
                                                  "&to=", date_range[[2]])), error = function(e){e})

                if (!is.character(final_data)) {
                    final_data <- final_data %>%
                        content(type = "text", encoding = "UTF-8") %>%
                        fromJSON()
                    if (length(final_data$t) < 1) {
                        return(NULL)
                    }
                    final_data <- final_data %>%
                        bind_rows() %>%
                        arrange(.data[['t']]) %>%
                        mutate(s = pair)

                    return(final_data)
                } else {
                    return(NULL)
                }

            } else {
                cat(paste0("Collecting data for ", pair, "...\n"))
                date_1 <- as_date(date_range[[1]])
                date_2 <- as_date(date_range[[2]])
                date_ranges <- seq(floor_date(date_1, "month"), floor_date(date_2, "month"), "months")
                if (length(date_ranges) > 1) {
                    date_ranges[1] <- date_1
                    date_ranges[length(date_ranges)] <- date_2

                    date_ranges <- data.frame(date_1 = date_ranges) %>%
                        mutate(date_2 = ceiling_date(date_1, "months") - days(1))

                    final_data <- apply(date_ranges, 1, get_data, url = url, pair = pair, resolution = resolution) %>%
                        bind_rows() %>%
                        arrange(.data[['t']])
                } else if (length(date_ranges) <= 1) {
                    final_data <- get_data(c(date_1, date_2), url = url, pair, resolution) %>%
                        bind_rows() %>%
                        arrange(.data[['t']])
                }
            }

            cat(paste0("Finish Collecting data for ", pair, "...\n"))
            if(type == "getter") {
                return(final_data)
            } else {
                private$data <- final_data
                return(self)
            }
        },
        #' @description Method to fix historical data fetched from Indodax server
        #' @param asset_name: the pair in {coin_idr} format. It must be a single coin for each data.
        #' @param resolution: The time frame we are wanted to pull out. It must be a single time frame.
        #' @param type: Getter type only used when we are fixing missing data.
        #' @param direct: TRUE is only used when we are fixing missing data.
        #' @param interval: Second unit for time frame.
        #' @param data: data that we want to be fixed
        #' @param ...: Arguments to be implemented to \href{#method-indodax_crypto_pooler}{\code{indodax_crypto_pooler()}}
        #' @return object of type \code{\link{data_pooler}}.
        indodax_data_fixer = function(asset_name, resolution, interval, data,
                                      type = "getter", direct = TRUE, ...) {
            cat(paste0("Fixing data for ", asset_name, "...\n"))
            if (missing(data)) {
                data = private$data
                cat(paste0("Total row initial data: ", nrow(data), "\n"))
            }

            if (!all(c("t", "o", "l", "h", "c", "v") %in% names(data))) {
                stop("Please provide tolhcv data format!")
            }

            incomplete_seq <- which(diff(data$t) != interval)
            cat(paste0("there are ", length(incomplete_seq), " incomplete data.\n"))

            if (length(incomplete_seq) > 0) {
                for (i in incomplete_seq) {
                    date <- c(data$t[i], data$t[i+1])
                    cat(paste0("Processing data index ", i, " with time from ", data$t[i], " to ", data$t[i+1], "\n"))

                    fix_data <- self$indodax_crypto_pooler(date, pair = asset_name, resolution = resolution, type = type, direct = direct, ...)
                    if (inherits(fix_data, "data.frame")) {
                        if (nrow(fix_data) > 0) {
                            if (all(c("t", "o", "l", "h", "c", "v") %in% names(data))) {
                                data <- data %>%
                                    rbind(fix_data) %>%
                                    distinct(.data[['t']], .keep_all = TRUE)
                            }
                        }

                    } else {
                        next
                    }
                }
            }
            private$data <- data %>%
                arrange(.data[['t']]) %>%
                mutate(timeframe = c(interval, diff(.data[["t"]])),
                       interval = interval)

            cat(paste0("Finish fixing data for ", asset_name, "...\n"))
            cat(paste0("New row data must be: ", nrow(data), "\n"))
            return(self)
        },
        #' @description Method to push our data into database
        #' @param db_name: The database name where we want to store our data.
        #' @param table_name: The table name where we want to store our data.
        #' @param .fun: It can be \code{SQLite()} or Any other database driver.
        #' @param name_fun: The name of the database driver we want to use.
        #' @param ...: Additional arguments to be added to our \code{.fun()}
        #' @return object of type \code{\link{data_pooler}}.
        push_data = function(db_name, table_name, .fun, name_fun = deparse(substitute(.fun)), ...){
            "Push historical data to database."
            "@param db_name: database name to store the final data"
            cat(paste0("Pushing data to database :", db_name, " to table: ", table_name, "\n"))
            if (name_fun == "SQLite") {
                private$conn <- dbConnect(SQLite(), db_name)
            } else {
                private$conn <- dbConnect(.fun(), dbname = db_name, host = host, user = user, pass = pass, port = port)
            }
            copy_to(private$conn, private$data, table_name,
                    temporary = FALSE, indexes = list(c("t")),
                    overwrite = TRUE)
            return(self)
        },
        #' @description Method to print out our data_pooler class specifically
        #' @return String to print about the object information.
        print = function() {
            cat("Historical portfolio data pooler!\n\t")
            if (exists("private$data")) {
                print(head(private$data, 1))
            } else {
                cat("No data fetched!")
            }
        }
    ),
    active = list(
        #' @description Active binding method to bind data into object \code{\link{data_pooler}}.
        #' @param data: The data that we want to be inserted into our object.
        #' @return None.
        add_portfolio_data = function(data) {
            if (missing(data)) {
                private$data
            } else {
                private$data <- data
            }
        }
    ),
    lock_objects = FALSE
)
blakcjack/ims documentation built on Dec. 19, 2021, 9:52 a.m.