Nothing
###############################################################################
# R (http://r-project.org/) Instrument Class Model
#
# Copyright (c) 2009-2012
# Peter Carl, Dirk Eddelbuettel, Jeffrey Ryan,
# Joshua Ulrich, Brian G. Peterson, and Garrett See
#
# This library is distributed under the terms of the GNU Public License (GPL)
# for full details see the file COPYING
#
# $Id$
#
###############################################################################
#' load instrument metadata into the .instrument environment
#'
#' This function will load instrument metadata (data about the data)
#' either from a file specified by the \code{file} argument or
#' from a \code{data.frame} specified by the \code{metadata} argument.
#'
#' The function will attempt to make reasonable assumptions about what you're trying to do, but this isn't magic.
#'
#' You will typically need to specify the \code{type} of instrument to be loaded, failure to do so will generate a Warning and \code{default_type} will be used.
#'
#' You will need to specify a \code{primary_id}, or define a \code{id_col} that contains the data to be used as the primary_id of the instrument.
#'
#' You will need to specify a \code{currency}, unless the instrument \code{type} is 'currency'
#'
#' Use the \code{identifier_cols} argument to specify which fields (if any) in the CSV are to be passed to \code{\link{instrument}} as the \code{identifiers} argument
#'
#' Typically, columns will exist for \code{multiplier} and \code{tick_size}.
#'
#' Any other columns necessary to define the specified instrument type will also be required to avoid fatal Errors.
#'
#' Additional columns will be processed, either as additional identifiers for recognized identifier names, or as custom fields. See \code{\link{instrument}} for more information on custom fields.
#'
#' @param file string identifying file to load, default NULL, see Details
#' @param ... any other passthru parameters
#' @param metadata optional, data.frame containing metadata, default NULL, see Details
#' @param id_col numeric column containing id if primary_id isn't defined, default 1
#' @param default_type character string to use as instrument type fallback, see Details
#' @param identifier_cols character vector of field names to be passed as identifiers, see Details
#' @param overwrite TRUE/FALSE. See \code{\link{instrument}}.
#' @seealso
#' \code{\link{loadInstruments}},
#' \code{\link{instrument}},
#' \code{\link{setSymbolLookup.FI}},
#' \code{\link[quantmod]{getSymbols}},
#' \code{\link{getSymbols.FI}}
#' @examples
#' \dontrun{
#' load.instruments(system.file('data/currencies.csv.gz',package='FinancialInstrument'))
#' load.instruments(system.file('data/root_contracts.csv.gz',package='FinancialInstrument'))
#' load.instruments(system.file('data/future_series.csv.gz',package='FinancialInstrument'))
#'
#' }
#' @export
load.instruments <- function (file=NULL, ..., metadata=NULL, id_col=1, default_type='stock', identifier_cols=NULL, overwrite=TRUE) {
if(is.null(file) && is.null(metadata)) stop("You must pass either a file identifier string or a metadata object to be converted.")
if(is.null(metadata)){
if (file.exists(file)){
filedata<-read.csv(file,stringsAsFactors=FALSE, ...=...)
} else {
stop("The specified file ",file," does not seem to exist, maybe specify the full path?")
}
} else {
filedata<-metadata
rm(metadata)
}
# check required column headers
if(!any(grepl('primary_id',colnames(filedata)))) {
#no primary_id column name, use id_col as the name
set_primary<-TRUE
} else {
set_primary<-FALSE
}
dotargs<-list(...)
if(!any(grepl('type',colnames(filedata)))) {
if (is.null(dotargs$type)) {
warning("metadata does not appear to contain instrument type, using ",default_type,". This may produce incorrect valuations.")
filedata$type<-rep(default_type,nrow(filedata))
} else {
filedata$type <- rep(dotargs$type, nrow(filedata))
dotargs$type <- NULL
}
}
if (!is.null(dotargs$currency) && !is.currency.name(dotargs$currency)) currency(dotargs$currency)
#now process the data
for(rn in 1:nrow(filedata)){
type <- as.character(filedata[rn,'type'])
arg <- as.list(filedata[rn,])
if(type=='spread' || type=='guaranteed_spread'){
if(!is.null(arg$members)){
arg$members<-unlist(strsplit(arg$members,','))
}
if(!is.null(arg$memberratio)){
arg$memberratio<-unlist(strsplit(arg$memberratio,','))
}
if(!is.null(arg$ratio)){
arg$memberratio<-unlist(strsplit(arg$ratio,','))
}
}
arg$type <- NULL
arg <- arg[!is.na(arg)]
arg <- arg[!arg==""]
if (set_primary) {
arg$primary_id<-filedata[rn,id_col]
}
#do some name cleanup to make up for Reuters silliness
if(substr(arg$primary_id,1,1)==1) arg$primary_id <- substr(arg$primary_id,2,nchar(arg$primary_id))
arg$primary_id<-make.names(arg$primary_id)
if(!is.null(arg$X.RIC)){
if(substr(arg$X.RIC,1,1)==1) arg$X.RIC <- substr(arg$X.RIC,2,nchar(arg$X.RIC))
}
if(!is.null(arg$RIC)){
if(substr(arg$RIC,1,1)==1) arg$RIC <- substr(arg$RIC,2,nchar(arg$RIC))
}
if(length(dotargs)) arg<-c(arg,dotargs)
if(!is.null(identifier_cols) && any(identifier_cols %in% names(arg))){
arg$identifiers <- arg[names(arg) %in% identifier_cols]
arg[identifier_cols] <- NULL
}
arg$overwrite <- overwrite
if(is.function(try(match.fun(type),silent=TRUE))){
out <- try(do.call(type,arg))
#TODO recover gracefully?
} else {
# the call for a function named for type didn't work, so we'll try calling instrument as a generic
type=c(type,"instrument")
arg$type<-type # set the type
arg$assign_i<-TRUE # assign to the environment
try(do.call("instrument",arg))
}
} # end loop on rows
}
#' set quantmod-style SymbolLookup for instruments
#'
#' This function exists to tell \code{\link[quantmod]{getSymbols}} where to look for your repository of market data.
#'
#' The \code{base_dir} parameter \emph{must} be set or the function will fail.
#' This will vary by your local environment and operating system. For mixed-OS environments,
#' we recommend doing some OS-detection and setting the network share to your data to a common
#' location by operating system. For example, all Windows machines may use \dQuote{M:/}
#' and all *nix-style (linux, Mac) machines may use \dQuote{/mnt/mktdata/}.
#'
#' The \code{split_method} currently allows either \sQuote{days} or \sQuote{common}, and expects the
#' file or files to be in sub-directories named for the symbol. In high frequency data, it is standard practice to split
#' the data by days, which is why that option is the default.
#'
#' @param base_dir string specifying the base directory where data is stored, see Details
#' @param Symbols character vector of names of instruments for which to \code{setSymbolLookup}
#' @param \dots any other passthru parameters
#' @param storage_method currently only \sQuote{rda}, but we will eventually support \sQuote{indexing} at least, and maybe others
#' @param split_method string specifying the method files are split, currently \sQuote{days} or \sQuote{common}, see Details
#' @param use_identifier string identifying which column should be use to construct the \code{primary_id} of the instrument, default 'primary_id'
#' @param extension file extension, default "rda"
#' @param src which \code{\link[quantmod]{getSymbols}} sub-type to use, default \code{\link{getSymbols.FI}} by setting 'FI'
#' @seealso
#' \code{\link{getSymbols.FI}},
#' \code{\link{instrument_attr}},
#' \code{\link{load.instruments}}, \code{\link{loadInstruments}},
#' \code{\link[quantmod]{setSymbolLookup}}
#' @importFrom zoo as.Date
#' @export
setSymbolLookup.FI<-function(base_dir, Symbols, ..., split_method=c("days","common"), storage_method='rda', use_identifier='primary_id', extension='rda', src='FI'){
# check that base_dir exists
if(!file.exists(base_dir)) stop('base_dir ',base_dir,' does not seem to specify a valid path' )
# take split
split_method<-split_method[1] # only use the first value
#load all instrument names
instr_names <- if(missing(Symbols)) {
ls_non_currencies(ls(pos=.instrument)) #if roots begin with a dot, this will filter out roots and currencies
} else Symbols
#TODO add check to make sure that src is actually the name of a getSymbols function
#initialize list
params<-list()
params$storage_method<-storage_method
params$extension<-extension
params$split_method<-split_method
params$src<-src
if(length(list(...))>=1){
dlist<-list(...)
params<-c(params,dlist)
}
new.symbols<-list()
ndc<-nchar(base_dir)
if(substr(base_dir,ndc,ndc)=='/') sepch='' else sepch='/'
for (instr in instr_names){
tmp_instr<-getInstrument(instr)
if(!use_identifier=='primary_id'){
instr_str<-make.names(tmp_instr$identifiers[[use_identifier]])
} else {
instr_str<-make.names(tmp_instr[[use_identifier]])
}
if(!is.null(instr_str)) instr<-instr_str
symbol<-list()
symbol[[1]]<-params
# construct $dir
symbol[[1]]$dir<-base_dir
names(symbol)[1]<-instr
new.symbols<-c(new.symbols,symbol)
}
setSymbolLookup(new.symbols)
}
#' getSymbols method for loading data from split files
#'
#' This function should probably get folded back into getSymbols.rda in
#' quantmod.
#'
#' Meant to be called internally by \code{\link[quantmod]{getSymbols}} .
#'
#' The symbol lookup table will most likely be loaded by
#' \code{\link{setSymbolLookup.FI}}
#'
#' If date_format is NULL (the Default), we will assume an ISO date as changed
#' by \code{\link{make.names}}, for example, 2010-12-01 would be assumed to be a
#' file containing 2010.12.01
#'
#' If \code{indexTZ} is provided, the data will be converted to that timezone
#'
#' If auto.assign is FALSE, \code{Symbols} should be of length 1. Otherwise,
#' \code{\link[quantmod]{getSymbols}} will give you an error that says
#' \dQuote{must use auto.assign=TRUE for multiple Symbols requests}
#' However, if you were to call \code{getSymbols.FI} directly (which is
#' \emph{NOT} recommended) with \code{auto.assign=FALSE} and more than one
#' Symbol, a list would be returned.
#'
#' Argument matching for this function is as follows. If the user provides a
#' value for an argument, that value will be used. If the user did not provide
#' a value for an argument, but there is a value for that argument for the
#' given \code{Symbol} in the Symbol Lookup Table (see
#' \code{\link{setSymbolLookup.FI}}), that value will be used. Otherwise,
#' the formal defaults will be used.
#'
#' @param Symbols a character vector specifying the names of each symbol to be
#' loaded
#' @param from Retrieve data no earlier than this date. Default '2010-01-01'.
#' @param to Retrieve data through this date. Default Sys.Date().
#' @param ... any other passthru parameters
#' @param dir if not specified in getSymbolLookup, directory string to use.
#' default ""
#' @param return.class only "xts" is currently supported
#' @param extension file extension, default "rda"
#' @param split_method string specifying the method used to split the files,
#' currently \sQuote{days} or \sQuote{common}, see
#' \code{\link{setSymbolLookup.FI}}
#' @param use_identifier optional. identifier used to construct the
#' \code{primary_id} of the instrument. If you use this, you must have
#' previously defined the \code{\link{instrument}}
#' @param date_format format as per the \code{\link{strptime}}, see Details
#' @param verbose TRUE/FALSE
#' @param days_to_omit character vector of names of weekdays that should not be
#' loaded. Default is \code{c("Saturday", "Sunday")}. Use \code{NULL} to
#' attempt to load data for all days of the week.
#' @param indexTZ valid TZ string. (e.g. \dQuote{America/Chicago} or
#' \dQuote{America/New_York}) See \code{\link[xts]{indexTZ}}.
#' @seealso
#' \code{\link{saveSymbols.days}}
#' \code{\link{instrument}}
#' \code{\link{setSymbolLookup.FI}}
#' \code{\link{loadInstruments}}
#' \code{\link[quantmod]{getSymbols}}
#' @examples
#' \dontrun{
#' getSymbols("SPY", src='yahoo')
#' dir.create("tmpdata")
#' saveSymbols.common("SPY", base_dir="tmpdata")
#' rm("SPY")
#' getSymbols("SPY", src='FI', dir="tmpdata", split_method='common')
#' unlink("tmpdata/SPY", recursive=TRUE)
#' }
#' @export
getSymbols.FI <- function(Symbols,
from=getOption("getSymbols.FI.from", "2010-01-01"),
to=getOption("getSymbols.FI.to", Sys.Date()),
...,
dir=getOption("getSymbols.FI.dir", ""),
return.class=getOption("getSymbols.FI.return.class",
"xts"),
extension=getOption("getSymbols.FI.extension", "rda"),
split_method=getOption("getSymbols.FI.split_method",
c("days", "common")),
use_identifier=getOption("getSymbols.FI.use_identifier",
NA),
date_format=getOption("getSymbols.FI.date_format"),
verbose=getOption("getSymbols.FI.verbose", TRUE),
days_to_omit=getOption("getSymbols.FI.days_to_omit",
c("Saturday", "Sunday")),
indexTZ=getOption("getSymbols.FI.indexTZ", NA)
)
{
if (is.null(date_format)) date_format <- "%Y.%m.%d"
if (is.null(days_to_omit)) days_to_omit <- 'NULL'
this.env <- environment()
for(var in names(list(...))) {
assign(var,list(...)[[var]], this.env)
}
#The body of the following function comes from Dominik's answer here:
#browseURL("http://stackoverflow.com/questions/7224938/can-i-rbind-be-parallelized-in-r")
#it does what do.call(rbind, lst) would do, but faster and with less memory usage
do.call.rbind <- function(lst) {
while(length(lst) > 1) {
idxlst <- seq(from=1, to=length(lst), by=2)
lst <- lapply(idxlst, function(i) {
if(i==length(lst)) { return(lst[[i]]) }
return(rbind(lst[[i]], lst[[i+1]]))
})
}
lst[[1]]
}
# Find out if user provided a value for each formal
if (hasArg.from <- hasArg(from)) .from <- from
if (hasArg.to <- hasArg(to)) .to <- to
if (hasArg.dir <- hasArg(dir)) .dir <- dir
if (hasArg.return.class <- hasArg(return.class))
.return.class <- return.class
if (hasArg.extension <- hasArg(extension)) .extension <- extension
if (hasArg.split_method <- hasArg(split_method))
.split_method <- split_method
if (hasArg.use_identifier <- hasArg(use_identifier))
.use_identifier <- use_identifier
if (hasArg.date_format <- hasArg(date_format)) .date_format <- date_format
if (hasArg.verbose <- hasArg(verbose)) .verbose <- verbose
if (hasArg.days_to_omit <- hasArg(days_to_omit))
.days_to_omit <- days_to_omit
if (hasArg.indexTZ <- hasArg(indexTZ)) .indexTZ <- indexTZ
importDefaults("getSymbols.FI")
# Now get the values for each formal that we'll use if not provided
# by the user and not found in the SymbolLookup table
default.from <- from
default.to <- to
default.dir <- dir
default.return.class <- return.class
default.extension <- extension
default.split_method <- split_method[1]
default.use_identifier <- use_identifier
default.date_format <- date_format
default.verbose <- verbose
default.days_to_omit <- days_to_omit
default.indexTZ <- indexTZ
# quantmod:::getSymbols will provide auto.assign and env
# so the next 2 if statements should always be TRUE
auto.assign <- if(hasArg(auto.assign)) {auto.assign} else TRUE
env <- if(hasArg(env)) {env} else .GlobalEnv
# make an argument matching function to sort out which values to use for each arg
pickArg <- function(x, Symbol) {
if(get(paste('hasArg', x, sep="."))) {
get(paste(".", x, sep=""))
} else if(!is.null(SymbolLookup[[Symbol]][[x]])) {
SymbolLookup[[Symbol]][[x]]
} else get(paste("default", x, sep="."))
}
SymbolLookup <- getSymbolLookup()
fr <- NULL
datl <- lapply(1:length(Symbols), function(i) {
#FIXME? Should nothing be saved if there are errors with any of
# the Symbols (current behavior)? Or, if auto.assign == TRUE, should
# we assign the data as we get it instead of making a list of data and
# assigning at the end.
from <- pickArg("from", Symbols[[i]])
to <- pickArg("to", Symbols[[i]])
dir <- pickArg("dir", Symbols[[i]])
return.class <- pickArg("return.class", Symbols[[i]])
extension <- pickArg('extension', Symbols[[i]])
split_method <- pickArg('split_method', Symbols[[i]])
use_identifier <- pickArg('use_identifier', Symbols[[i]])
date_format <- pickArg('date_format', Symbols[[i]])
verbose <- pickArg('verbose', Symbols[[i]])
days_to_omit <- pickArg('days_to_omit', Symbols[[i]])
indexTZ <- pickArg('indexTZ', Symbols[[i]])
# if 'dir' is actually the 'base_dir' then we'll paste the instrument name (Symbol) to the end of it.
# First, find out what the instrument name is
instr_str <- NA
if(!is.na(use_identifier[1])) {
tmp_instr <- try(getInstrument(Symbols[[i]], silent=FALSE))
if (inherits(tmp_instr,'try-error') || !is.instrument(tmp_instr))
stop("must define instrument first to call with 'use_identifier'")
if (!use_identifier[1]=='primary_id') {
instr_str <- make.names(unlist(tmp_instr$identifiers[use_identifier]))
} else instr_str <- make.names(tmp_instr[use_identifier])
if (length(instr_str) == 0L) stop("Could not find instrument. Try with use_identifier=NA")
}
Symbol <- ifelse(is.na(instr_str), make.names(Symbols[[i]]), instr_str)
ndc<-nchar(dir)
if(substr(dir,ndc,ndc)=='/') dir <- substr(dir,1,ndc-1) #remove trailing forward slash
dirs <- paste(dir, Symbol, sep="/")
tmp <- list()
dirstr<-paste(dirs, collapse=' ')
if(!length(dirs)==1) warning(paste0('multiple directories ',dirstr,' referenced, merge may interleave dissimilar data.'))
for(dir in dirs) {
if(!dir=="" && !file.exists(dir)) {
if (verbose) cat("\ndirectory ",dir," does not exist, skipping\n")
} else {
if(verbose) cat("loading ",Symbols[[i]],".....\n")
switch(split_method[1],
days={
StartDate <- as.Date(from)
EndDate <- as.Date(to)
date.vec <- as.Date(StartDate:EndDate)
date.vec <- date.vec[!weekdays(date.vec) %in% days_to_omit]
date.vec <- format(date.vec, format=date_format)
sym.files <- paste(date.vec, basename(dir), extension, sep=".")
if (dir != "") sym.files <- file.path(dir, sym.files)
dl <- lapply(sym.files, function(fp) {
sf <- strsplit(fp, "/")[[1]]
sf <- sf[length(sf)]
if (verbose) cat("Reading ", sf, "...")
if(!file.exists(fp)) {
if (verbose) cat(" failed. File not found in ", dir, " ... skipping\n")
} else {
if (verbose) cat(' done.\n')
local.name <- load(fp)
dat <- get(local.name)
if (!is.na(indexTZ) && !is.null(dat)) indexTZ(dat) <- indexTZ
dat
}
})
if (verbose) cat('rbinding data ... ')
fr <- do.call.rbind(dl)
},
common = , {
sym.file <- paste(Symbol,extension,sep=".")
if(dir != "") sym.file <- file.path(dir, sym.file)
if(!file.exists(sym.file)) {
if (verbose) cat("file ",paste(Symbol,extension,sep='.')," does not exist in ",dir,"....skipping\n")
} else {
#fr <- read.csv(sym.file)
local.name <- load(sym.file)
dat <- get(local.name)
if (!is.na(indexTZ) && !is.null(dat)) indexTZ(dat) <- indexTZ
assign('fr', dat)
if(verbose) cat("done.\n")
#if(!is.xts(fr)) fr <- xts(fr[,-1],as.Date(fr[,1],origin='1970-01-01'),src='rda',updated=Sys.time())
}
} # end 'common'/default method (same as getSymbols.rda)
) # end split_method switch
fr <- convert.time.series(fr=fr,return.class=return.class)
Symbols[[i]] <-make.names(Symbols[[i]])
if(dir==dirs[1]) tmp[[Symbols[[i]]]] <- fr
else tmp[[Symbols[[i]]]] <- rbind(tmp[[Symbols[[i]]]],fr)
} # end Symbols else
}
if(verbose) cat("done.\n")
tmp
}) #end lapply loop over Symbols
if (length(Filter("+", lapply(datl, length))) == 0) {
warning("No data found.")
return(NULL)
}
datl.names <- do.call(c, lapply(datl, names))
missing <- Symbols[!Symbols %in% datl.names]
if (length(missing) > 0) warning('No data found for ', paste(missing, collapse=" "))
if(auto.assign) {
#invisible(lapply(datl, function(x) if (length(x) > 0) assign(names(x), x[[1]], pos=env)))
out <- Filter(function(x) length(x) > 0, datl)
invisible(lapply(out, function(x) assign(names(x), x[[1]], pos=env)))
return(datl.names)
} else {
#NOTE: Currently, NULLs aren't filtered out. If there are data for any Symbol,
# the returned list will have an element for each symbol requested even if some don't contain data.
out <- lapply(datl, function(x) {
if (length(x) > 0) x[[1]]
})
if (length(out) == 1)
return(out[[1]])
else {
names(out) <- Symbols
return(out)
}
}
}
#' currency metadata to be used by \code{\link{load.instruments}}
#'
#' @name currencies
#' @docType data
#' @keywords data
NULL
#' future metadata to be used by \code{\link{load.instruments}}
#'
#' @name root_contracts
#' @docType data
#' @keywords data
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.