R/getSliceData.R

Defines functions getSliceData getSliceColumnsData getSliceIndicatorData spreadColumnsToXtsList getSliceDataSharadar getSliceDataQuoteMedia

Documented in getSliceColumnsData getSliceData getSliceDataQuoteMedia getSliceDataSharadar getSliceIndicatorData spreadColumnsToXtsList

#' Get all stock data of specified columns and date range
#'
#' @param dbcon.str Connection string for Sharadar or QuoteMedia.
#' @param columns columns to retrieve (valid columns = open, high, low, close,
#'  adj.open, adj.high, adj.low, adj.close, volume, tover, dividend, roc.pc2tc,
#'  roc.pc2to, roc.to2tc, sma, sd, avg.tover and open.gap.coef)
#' @param start.date start date by "yyyy-mm-dd" format
#' @param end.date end date by "yyyy-mm-dd" format
#' @param symbols Symbol list for subsetting the data
#' @param verbose Output messages.
#' @param sma.lens SMA calculation lengths by integer vector
#' @param sd.lens SD calculation lengths by integer vector
#' @param avg.tover.lens Average turnover calculation lengths by integer vector
#' @param open.gap.coef.lens Open Gap Coef calculation lengths by integer vector
#'
#' @importFrom magrittr %>%
#' @importFrom foreach %do%
#'
#' @return List of all stock data of specified column as data.table format
#' @export
getSliceData <- function(dbcon.str, columns,
  start.date         = Sys.Date() - 365,
  end.date           = Sys.Date(),
  symbols            = NULL,
  verbose            = FALSE,
  sma.lens           = NULL,
  sd.lens            = NULL,
  avg.tover.lens     = NULL,
  open.gap.coef.lens = NULL)
{
  # Functions
  buildColumns <- function(columns) {

    valid.columns <- c("open", "high", "low", "close", "volume",
                       "adj.open", "adj.high", "adj.low", "adj.close",
                       "adj.volume", "dividend")

    req.columns <- columns[columns %in% valid.columns]

    if ("tover" %in% columns) {
      req.columns <- c(req.columns, "adj.close", "adj.volume")
    }
    if ("roc.pc2tc" %in% columns) {
      req.columns <- c(req.columns, "adj.close")
    }
    if ("roc.pc2to" %in% columns) {
      req.columns <- c(req.columns, "adj.close", "adj.open")
    }
    if ("roc.to2tc" %in% columns) {
      req.columns <- c(req.columns, "adj.close", "adj.open")
    }
    if ("sma" %in% columns) {
      req.columns <- c(req.columns, "adj.close")
    }
    if ("sd" %in% columns) {
      req.columns <- c(req.columns, "adj.close")
    }
    if ("avg.tover" %in% columns) {
      req.columns <- c(req.columns, "adj.close", "adj.volume", "close")
    }
    if ("open.gap.coef" %in% columns) {
      req.columns <- c(req.columns, "adj.close", "adj.open")
    }
    unique(req.columns)
  }

  calcOpenGapCoefMany <- function(roc.pc2to, roc.to2tc,
                                  windows, start.date) {

    # Indicator window must be greater than 1
    windows <- windows[windows > 1]

    # Skip
    if (length(windows) == 0) {
      return (NULL)

    } else {
      result <- foreach(window = windows) %do% {
        subset.str <- paste0(start.date - (window * 1.5), "::")
        ogc <- calcOpenGapCoefParallel(window,
                                       roc.pc2to[subset.str, ],
                                       roc.to2tc[subset.str, ],
                                       TRUE)
      }
      names(result) <- windows
      return (result)
    }
  }

  # Check Input -------------------------------------------------------------

  # Date values
  start.date <- as.Date(start.date)
  end.date   <- as.Date(end.date)
  range      <- paste(start.date, end.date, sep = "::")

  # Columns
  db.columns        <- c("open", "high", "low", "close", "volume",
                         "adj.open", "adj.high", "adj.low", "adj.close",
                         "adj.volume", "dividend")
  calc.columns      <- c("tover", "roc.pc2tc", "roc.pc2to", "roc.to2tc")
  indicator.columns <- c("sma", "sd", "avg.tover", "open.gap.coef")
  valid.columns     <- c(db.columns, calc.columns, indicator.columns)

  lapply(columns, function(x) {
    if (!x %in% valid.columns) stop(paste0(x, " is not supported column."))
  })

  # Filter length
  valid.lens         <- seq(10, 250, 10)
  sma.lens           <- intersect(sma.lens, valid.lens)
  sd.lens            <- intersect(sd.lens, valid.lens)
  avg.tover.lens     <- intersect(avg.tover.lens, valid.lens)
  open.gap.coef.lens <- intersect(open.gap.coef.lens, valid.lens)

  # Check length
  if (("sma" %in% columns) & (length(sma.lens) == 0)) {
    columns <- columns[!columns == "sma"]
    cat("sma is specified, but lens is not valid. omitted.\n")
  }
  if (("sd" %in% columns) & (length(sd.lens) == 0)) {
    columns <- columns[!columns == "sd"]
    cat("sd is specified, but lens is not valid. omitted.\n")
  }
  if (("avg.tover" %in% columns) & (length(avg.tover.lens) == 0)) {
    columns <- columns[!columns == "avg.tover"]
    cat("avg.tover is specified, but lens is not valid. omitted.\n")
  }
  if (("open.gap.coef" %in% columns) & (length(open.gap.coef.lens) == 0)) {
    columns <- columns[!columns == "open.gap.coef"]
    cat("open.gap.coef is specified, but lens is not valid. omitted.\n")
  }

  # DB Access ------------------------------------------------------------------

  # Load more data if indicator needed and DB is not "plus db"
  if (!(grepl("plus", dbcon.str)) &
      (((columns %in% indicator.columns) %>% sum()) > 0)) {

    max.len <- max(c(sma.lens, sd.lens, avg.tover.lens, open.gap.coef.lens))
    sql.start.date <- start.date - (max.len * 1.5)

  } else {
    sql.start.date <- start.date
  }

  # Load data from Sharadar
  if (grepl("sharadar", dbcon.str)) {
    data <- getSliceDataSharadar(dbcon.str, buildColumns(columns),
                                 sql.start.date, end.date, verbose)

  # Load data from QuoteMedia
  } else if (grepl("quotemedia", dbcon.str)) {
    data <- getSliceDataQuoteMedia(dbcon.str, buildColumns(columns),
                                   sql.start.date, end.date, verbose)

  } else {
    stop("Correct DB connection string must be specified.")
  }

  # Calculate Additional Columns --------------------------------------------

  # Turnover
  if ("tover" %in% columns) {
    data[["tover"]] <- data$adj.close * data$adj.volume
  }

  # ROC: prior close to today close (= Log Return)
  if ("roc.pc2tc" %in% columns) {
    data[["roc.pc2tc"]] <- xts::diff.xts(data$adj.close, log = TRUE)
  }

  # ROC: prior close to today open (= Open Gap)
  if ("roc.pc2to" %in% columns) {
    data[["roc.pc2to"]] <- log(data$adj.open) - xts::lag.xts(log(data$adj.close))
  }

  # ROC: today close to today open (= Open to Close Return)
  if ("roc.to2tc" %in% columns) {
    data[["roc.to2tc"]] <- log(data$adj.close) - log(data$adj.open)
  }

  # SMA
  if ("sma" %in% columns) {

    # Use "Plus DB"
    if (grepl("plus", dbcon.str)) {
      if (verbose) cat("Retrieving SMA... ")
      sma <- getSliceIndicatorData(dbcon.str, "sma", start.date,
                                   end.date, sma.lens)
      data[["sma"]] <- spreadColumnsToXtsList(sma)
      if (verbose) cat("Done\n")

    # Calc on demand
    } else {
      if (verbose) cat("Calculating SMA... ")
      data[["sma"]] <- calcIndicatorParallel(data$adj.close, sma.lens,
                                             mean, na.rm = TRUE)
      if (verbose) cat("Done\n")
    }
  }

  # SD
  if ("sd" %in% columns) {

    # Use "Plus DB"
    if (grepl("plus", dbcon.str)) {
      if (verbose) cat("Retrieving SD... ")
      sd <- getSliceIndicatorData(dbcon.str, "sd", start.date,
                                  end.date, sd.lens)
      data[["sd"]] <- spreadColumnsToXtsList(sd)
      if (verbose) cat("Done\n")

    # Calc on demand
    } else {

      if (verbose) cat("Calculating SD... ")
      if ("roc.pc2tc" %in% columns) {
        data[["sd"]] <- calcIndicatorParallel(data$roc.pc2tc, sd.lens,
                                              stats::sd, na.rm = TRUE)
      } else {
        roc <- log(data$adj.close) - xts::lag.xts(log(data$adj.close))
        data[["sd"]] <- calcIndicatorParallel(roc, sd.lens,
                                              stats::sd, na.rm = TRUE)
      }
      if (verbose) cat("Done\n")
    }
  }

  # Average turnover
  if ("avg.tover" %in% columns) {

    # Use "Plus DB"
    if (grepl("plus", dbcon.str)) {
      if (verbose) cat("Retrieving Average Turnover... ")
      ato <- getSliceIndicatorData(dbcon.str, "ato", start.date,
                                   end.date, avg.tover.lens)
      data[["avg.tover"]] <- spreadColumnsToXtsList(ato)
      if (verbose) cat("Done\n")

    # Calc on demand
    } else {

      if (verbose) cat("Calculating Average Turnover... ")
      if ("tover" %in% columns) {
        data[["avg.tover"]] <- calcIndicatorParallel(data$tover, avg.tover.lens,
                                                     mean, na.rm = TRUE)
      } else {
        tover <- data$adj.close * data$adj.volume
        data[["avg.tover"]] <- calcIndicatorParallel(tover, avg.tover.lens,
                                                     mean, na.rm = TRUE)
      }
      if (verbose) cat("Done\n")
    }
  }

  # Open Gap Coeficient
  if ("open.gap.coef" %in% columns) {

    # Use "Plus DB"
    if (grepl("plus", dbcon.str)) {
      if (verbose) cat("Retrieving Open Gap Coeficient... ")
      ogc <- getSliceIndicatorData(dbcon.str, "ogc", start.date, end.date,
                                   open.gap.coef.lens)
      data[["open.gap.coef"]] <- spreadColumnsToXtsList(ogc)
      if (verbose) cat("Done\n")

    # Calc on demand
    } else {

      if (verbose) cat("Calculating Open Gap Coeficient... ")
      if ("roc.pc2to" %in% columns & "roc.to2tc" %in% columns) {
        data[["open.gap.coef"]] <- calcOpenGapCoefMany(data[["roc.pc2to"]],
                                                      data[["roc.to2tc"]],
                                                      open.gap.coef.lens,
                                                      start.date)

      } else {
        roc.pc2to <- log(data$adj.open) - xts::lag.xts(log(data$adj.close))
        roc.to2tc <- log(data$adj.close) - log(data$adj.open)
        data[["open.gap.coef"]] <- calcOpenGapCoefMany(roc.pc2to, roc.to2tc,
                                                      open.gap.coef.lens,
                                                      start.date)
      }
      if (verbose) cat("Done\n\n")
    }
  }

  # Universe for subset data ---------------------------------------------------

  # All names in data
  universes <- lapply(names(data), function(name) {
    if (!name %in% indicator.columns) {
      names(data[[name]])
    } else {
      names(data[[name]][[1]])
    }
  })

  # Add symbol list
  if (!is.null(symbols)) {
    universes <- c(universes, list(symbols))
  }

  # Setdiff of all names
  universe <- universes[[1]]
  for (i in 2:length(universes)) {
    universe <- intersect(universe, universes[[i]])
  }

  # Subset Result --------------------------------------------------------------
  data <- data[columns]
  for (name in names(data)) {
    if (!name %in% indicator.columns) {
      data[[name]] <- data[[name]][range, universe]
    } else {
      data[[name]] <- lapply(data[[name]], "[", range, universe)
    }
  }

  return (data)
}

# ------------------------------------------------------------------------------
#' Get all stock data of specified columns and date range
#'
#' @inheritParams getSliceData
#'
#' @return List of all stock data of specified column as data.table format
getSliceColumnsData <- function(dbcon.str, columns, start.date, end.date,
                                verbose) {

  # Functions
  buildSharadarSqlStr <- function(columns, start.date, end.date) {

    # Change to DB name (adj.open -> adj_open) and omit OHL
    db.columns <- purrr::map_chr(columns, convertColumnName)
    db.columns <- db.columns[!db.columns %in% c("open", "high", "low", "volume")]

    # Build columns for SQL select
    req.columns <- c("ticker", "date", db.columns)

    if ("open" %in% columns) {
      req.columns <- c(req.columns, "adj_open", "adj_close", "close")
    }
    if ("high" %in% columns) {
      req.columns <- c(req.columns, "adj_high", "adj_close", "close")
    }
    if ("low" %in% columns) {
      req.columns <- c(req.columns, "adj_low", "adj_close", "close")
    }
    if ("volume" %in% columns) {
      req.columns <- c(req.columns, "adj_volume", "adj_close", "close")
    }

    req.columns <- unique(req.columns)

    # Build SQL string
    columns.str <- paste(req.columns, collapse = "],[")
    columns.str <- paste0("[", columns.str, "]")

    paste0("SELECT ", columns.str, " FROM [equity_prices]
            WHERE [date] BETWEEN '", start.date, "' AND '", end.date, "' AND
                  [ticker] NOT LIKE '%-%' AND [ticker] != 'TRUE' 
            ORDER BY [ticker],[date]")
  }

  buildQuotemediaSqlStr <- function(columns, start.date, end.date) {

    # Build SQL string
    db.columns  <- purrr::map_chr(columns, convertColumnName)
    columns.str <- paste(c("ticker", "date", db.columns), collapse = "],[")
    columns.str <- paste0("[", columns.str, "]")

    paste0("SELECT ", columns.str, " FROM [eod_prices]
           WHERE [date] BETWEEN '", start.date, "' AND '", end.date, "' AND
                 [ticker] != 'TRUE'
           ORDER BY [ticker],[date]")
  }

  # Check date values
  start.date <- as.Date(start.date)
  end.date   <- as.Date(end.date)

  # Switch by data source
  if (stringr::str_detect(dbcon.str, "sharadar")) {
    ds <- "Sharadar"
    sql <- buildSharadarSqlStr(columns, start.date, end.date)
  } else{
    ds <- "Quotemedia"
    sql <- buildQuotemediaSqlStr(columns, start.date, end.date)
  }

  # Use SQLite (start with local file path like "/home..")
  if (startsWith(dbcon.str, "/")) {
    # Connect to SQLite DB
    channel <- DBI::dbConnect(RSQLite::SQLite(), dbcon.str)

    if (verbose) cat(paste("\nRetrieving", ds, "data from SQLite DB... "))
    data <- DBI::dbGetQuery(channel, sql)
    if (verbose) cat("Done\n")

    # Convert date to POSIXct
    data$date <- as.POSIXct(data$date)

    # Disconnect from SQL Server
    DBI::dbDisconnect(channel)

  # Use MSSQL
  } else {
    # Connect to SQL Server
    channel <- RODBC::odbcDriverConnect(dbcon.str)

    if (verbose) cat(paste("\nRetrieving", ds, "data from SQL Server... "))
    data <- RODBC::sqlQuery(channel, sql, stringsAsFactors = FALSE)
    if (verbose) cat("Done\n")

    # Disconnect from SQL Server
    RODBC::odbcClose(channel)
  }

  # Warn if no data
  if (nrow(data) == 0) {
    stop(paste0("No data found for ", columns, ". (", start.date, " - ",
                end.date, ")"))
  } else {
    data <- data.table::data.table(data)
    names(data) <- purrr::map_chr(names(data), convertColumnName)
  }

  return (data)
}

# ------------------------------------------------------------------------------
#' Get all indicator data of specified type and date range
#'
#' @inheritParams getSliceData
#' @param type Indicator type.
#'
#' @return List of all indicator data of specified type as data.table format
getSliceIndicatorData <- function(dbcon_str, type, start_date, end_date, lens) {

  # Check date values
  start_date <- as.Date(start_date)
  end_date   <- as.Date(end_date)

  # Check DB
  if (!stringr::str_detect(dbcon_str, "sharadar_plus")) {
    stop("Use \"sharadar_plus DB\" to get indicator values.")
  }

  # Check indicator type
  valid_types <- c("sma", "sd", "ato", "ogc")
  lapply(type, function(x) {
    if (!x %in% valid_types) stop(paste0(x, " is not supported type."))
  })

  # Filter by valid length
  # valid_lens <- seq(10, 250, 10)
  # lens <- intersect(lens, valid_lens)

  # Build SQL
  cols <- paste0("[", paste(c("date", "ticker", lens), collapse = "],["), "]")
  sql <- paste0("SELECT ", cols, " FROM [", type, "] WHERE [date] BETWEEN '",
                start_date, "' AND '", end_date, "'")

  # Connect to SQLite DB
  channel <- DBI::dbConnect(RSQLite::SQLite(), dbcon_str)
  data <- DBI::dbGetQuery(channel, sql)
  DBI::dbDisconnect(channel)

  # Warn if no data
  if (nrow(data) == 0) {
    stop(paste0("No data found for ", type, ". (", start_date, " - ",
                end_date, ")"))
  } else {
    data <- data.table::data.table(data)
    data.table::setnames(data, "ticker", "symbol")
    data$date <- as.POSIXct(data$date)
  }

  return (data)
}

# ------------------------------------------------------------------------------
#' Spread columns to xts list
#'
#' @param data data.table data containing "symbol", "date" and other columns
#' @importFrom data.table dcast
#' @importFrom data.table as.xts.data.table
#'
#' @return List of xts.
spreadColumnsToXtsList <- function(data) {

  dcast.columns <- names(data)[!names(data) %in% c("symbol", "date")]

  data <- lapply(dcast.columns, function(column) {
    data[, .SD, .SDcol = c("symbol", "date", column)] %>%
      data.table::dcast(date ~ symbol, value.var = column) %>%
      data.table::as.xts.data.table()
  })

  names(data) <- dcast.columns
  return (data)
}

# ------------------------------------------------------------------------------
#' Get all stock data of specified columns and date range from Sharadar
#'
#' @inheritParams getSliceData
#'
#' @return List of all stock data of specified column as data.table format
getSliceDataSharadar <- function(dbcon.str, columns,
                                 start.date = Sys.Date() - 365,
                                 end.date = Sys.Date(), verbose = FALSE)
{
  # Validate columns
  original.columns <- c("adj.open", "adj.high", "adj.low", "adj.close",
                        "adj.volume", "dividend", "close")
  calc.columns     <- c("open", "high", "low", "volume")
  valid.columns    <- c(original.columns, calc.columns)

  lapply(columns, function(x) {
    if (!x %in% valid.columns) stop(paste0(x, " is not supported column."))
  })

  # Get data from DB and spread columns
  data <- getSliceColumnsData(dbcon.str, columns, start.date, end.date, verbose)
  data <- spreadColumnsToXtsList(data)

  ## Calculate Addtional Columns

  # Unadjusted open
  if ("open" %in% columns) {
    ratio <- data$adj.open / data$adj.close
    data[["open"]] <- data$close * ratio
  }

  # Unadjusted high
  if ("high" %in% columns) {
    ratio <- data$adj.high / data$adj.close
    data[["high"]] <- data$close * ratio
  }

  # Unadjusted low
  if ("low" %in% columns) {
    ratio <- data$adj.low / data$adj.close
    data[["low"]] <- data$close * ratio
  }

  # Unadjusted volume
  if ("volume" %in% columns) {
    ratio <- data$close / data$adj.close
    data[["volume"]] <- data$adj.volume / ratio
  }

  # Subset result
  data <- data[columns]

  return(data)
}

# ------------------------------------------------------------------------------
#' Get all stock data of specified columns and date range from QuoteMedia
#'
#' @inheritParams getSliceData
#'
#' @return List of all stock data of specified column as data.table format
getSliceDataQuoteMedia <- function(dbcon.str, columns,
                                   start.date = Sys.Date() - 365,
                                   end.date = Sys.Date(), verbose = FALSE)
{
  # Validate columns
  valid.columns <- c("open", "high", "low", "close", "volume", "dividend",
                     "adj.open", "adj.high", "adj.low", "adj.close",
                     "adj.volume")

  lapply(columns, function(x) {
    if (!x %in% valid.columns)
      stop(paste0(x, " is not supported column."))
    })

  # Get data from DB and spread columns
  data <- getSliceColumnsData(dbcon.str, columns, start.date, end.date, verbose)
  data <- spreadColumnsToXtsList(data)

  # Subset result
  data <- data[columns]

  return(data)
}
tmk-c/myrlib documentation built on May 29, 2019, 1:44 p.m.