R/scorecard_check.R

Defines functions scorecard_check

Documented in scorecard_check

#' Check scorecard transaction file before use
#'
#' Determines whether a scorecard transaction file contains expected
#' structure and valid content.  Content includes valid activated models,
#' and valid tickers for those activated models, along with valid dates
#' and date orders.
#' @param scorecard_file the path to the scorecard configuration file (YAML)
#' @param transaction_file the path to the transaction file (CSV)
#' @return list containing validation checks and final determination
#' @note assumes precondition \code{scorecard_precheck} performed and valid
#' @seealso scorecard_update, scorecard_produce, scorecard_precheck
#' @export
scorecard_check <- function( scorecard_file=NA, transaction_file=NA ) {

  rv <- list(file=scorecard_file,
             transactions=transaction_file,
             date=lubridate::today(),
             messages=c(),
             has_activated=FALSE,
             has_model_files=FALSE,
             has_transactions=FALSE,
             has_active_transactions=FALSE,
             has_valid_dates=FALSE,
             has_valid_actions=FALSE,
             has_valid_active_tickers=FALSE,
             has_sequential_dates=FALSE,
             has_prices=FALSE,
             has_quantities=FALSE,
             valid=FALSE)

  require(yaml)
  require(magrittr)
  `%nin%` <- Negate(`%in%`)


  # options for many functions
  o1 <- options("stringsAsFactors" = FALSE)
  on.exit(options(o1), add=TRUE)

  if ( is.na(scorecard_file) ) {
    rv$messages <- c(rv$messages,"Scorecard file name not provided")
    return(rv)
  }

  if ( !file.exists(scorecard_file) ) {
    rv$messages <- c(rv$messages,"Scorecard input file does not exist")
    return(rv)
  }

  if ( is.na(transaction_file) ) {
    rv$messages <- c(rv$messages,"Transaction file name not provided")
    return(rv)
  }

  if ( !file.exists(transaction_file) ) {
    rv$messages <- c(rv$messages,"Transaction file does not exist")
    return(rv)
  }

  scorecard <- yaml::yaml.load_file(scorecard_file)

  scorecard$table$activated <- lapply(scorecard$table$activated, function(i){
    i$status <- "activated"
    rv$has_activated <<- TRUE
    return(i)
  })

  scorecard_table <- c(
    scorecard$table$activated
    # skipping other sections for this check
  )

  rv$model_files <-
    unlist(lapply(scorecard_table, function(x)
      return(x$config)))

  rv$has_model_files <- TRUE
  rv$model_check <- lapply(scorecard_table, function( scorecard_row ) {
    if ( scorecard_row$status %in% c('activated') ) { # skipping 'candidate'
      fp <- file.path("models", scorecard_row$config)
      fa <- file.access( fp, mode=4 )
      if ( fa != 0 ) {
        rv$has_model_files <<- FALSE
      } else {
        mf <- yaml::yaml.load_file(fp)
      }
      return( list(path=fp, readable=as.numeric( fa )) )
    }
    return ( list() )
  })


  active_models <- NULL
  if ( rv$has_model_files ) {
    active_models <- lapply(scorecard_table, function( scorecard_row ) {
      if ( scorecard_row$status %in% c('activated') ) { # skipping 'candidate'
        fp <- file.path("models", scorecard_row$config)
        mf <- yaml::yaml.load_file(fp)
        return(mf)
      }
    })
  }


  # read transactions
  transactions <- read.csv(transaction_file,
                           colClasses = c(rep("character",4),
                                          rep("numeric",4),
                                          "character"),
                           header = TRUE,
                           stringsAsFactors = FALSE)
  transactions$Date <- as.Date(transactions$Date,format="%m/%d/%y")
  rv$has_transactions <- TRUE

  # find active models
  active_model_ids <- sapply(active_models,function(am) { am$model })
  active_transactions <- transactions %>% dplyr::filter(Model %in% active_model_ids)
  rv$has_active_transactions <- (nrow(active_transactions) > 0)

  # remove cash
  active_transactions <- active_transactions %>% dplyr::filter(Ticker != "#CASH")

  # check actions and dates
  rv$has_valid_actions <- all(active_transactions$Action %in% c("Buy","Sell") )
  rv$has_valid_dates <- all(is.na(active_transactions$Date)==FALSE)
  rv$has_sequential_dates <- all(sign(diff(active_transactions$Date))>=0)

  # check ticker usage
  rv$has_valid_active_tickers <- TRUE
  ignore <- sapply(active_models,function(am) {
    id <- am$model
    amss <- active_transactions %>% dplyr::filter(Model == id)
    transaction_tickers <- unique(amss$Ticker)
    model_tickers <- am$config$basket
    if ( any(transaction_tickers %nin% model_tickers) ) {
      rv$has_valid_active_tickers <- FALSE
      rv$messages <- c(rv$messages,
                       paste("Non-model tickers found in",id))
      rv$messages <- c(rv$messages,
                       unique(transaction_tickers[transaction_tickers %nin% model_tickers]))
    }
  })

  # check prices and shares/quantities
  rv$has_prices <- all(is.na(transactions$Price)==FALSE)
  rv$has_quantities <- all(is.na(transactions$Shares)==FALSE)

  # final validation of structure and content
  rv$valid <- all(rv$has_activated,
                  rv$has_model_files,
                  rv$has_transactions,
                  rv$has_active_transactions,
                  rv$has_valid_actions,
                  rv$has_valid_dates,
                  rv$has_sequential_dates,
                  rv$has_valid_active_tickers,
                  rv$has_prices,
                  rv$has_quantities,
                  rv$has_model_files)

  rv$messages <- c(rv$messages,
                   ifelse(rv$valid,
                          "Transaction file validated",
                          "Transaction file invalid"))

  return(rv)
}
greatgray/scorecard documentation built on May 17, 2019, 8:34 a.m.