R/wrdsr.R

Defines functions connect query qry listLibraries listTables listColumns

Documented in connect listColumns listLibraries listTables qry query

#' Connect to the WRDS database
#'
#' If no arguments provided, authentication details are loaded from ~/.wrdsrc
#'
#' @param user=NA WRDS user name
#' @param pass=NA Hashed WRDS password
#' @export
#' @examples
#' \dontrun{
#' wrds = connect()
#' query(wrds, "SELECT * FROM COMP.NAMES (OBS=10)")
#' }
connect = function(user=NA, pass=NA) {
  auth = tryCatch(
    {
      readLines(path.expand('~/.wrdsrc'))
    },
    warning = function(cond) {
      return(NA)
    }
  )
  if (any(is.na(auth)) & (is.na(user) | is.na(pass)))
    stop("Error opening WRDS authentication (.wrdsrc) in default location: ",
         path.expand('~/.wrdsrc'),
         "\n Need to provide WRDS username (user) and SAS hashed password (pass)")

  else if (!any(is.na(auth))) {
    user = auth[1]
    pass = auth[2]
  }

  jars = c(system.file("java", "sas.intrnet.javatools.jar", package="wrdsr"),
           system.file("java", "sas.core.jar", package="wrdsr"))

  if (!all(sapply(X = jars, FUN = file.exists)))
    stop("Error opening jar drivers")

  driver = RJDBC::JDBC("com.sas.net.sharenet.ShareNetDriver",
                       jars,
                       identifier.quote="`")

  RJDBC::dbConnect(driver,
                   "jdbc:sharenet://wrds-cloud.wharton.upenn.edu:8551/",
                   user,
                   pass)
}

#' Query WRDS and return result as a tibble
#'
#' white space stripped from character columns - simple wrapper around dbGetQuery
#'
#' @param conn, the JDBC connection
#' @param statement, the SQL query (can span multiple lines)
#' @export
query = function(conn, statement) {
  df = RJDBC::dbGetQuery(conn, qry(statement))

  if (any(duplicated(names(df)))) {
    ix = which(duplicated(names(df)))
    names(df)[ix] = paste(names(df)[ix], ix, sep = "")
  }

 # Strip potential white space from character columns
 charix = sapply(df, function(col) class(col) == 'character')
 df[charix] = lapply(df[charix], trimws)
 # Set empty strings to NA
 is.na(df) = df == ""

  tibble::as_tibble(df)
}

#' Turn a multiline string into a valid SQL query
#'
#' @param  str The SQL query
#' @export
#' @examples
#' \dontrun{
#' query = qry("SELECT * FROM
#'           COMP.NAMES
#'           (OBS=10)")
#' dbGetQuery(wrds, query)
#' }
qry = function(str)
  strwrap(str, width = 1000, simplify = TRUE)

#' List the available libraries
#'
#' @param wrds WRDS connection object
#' @export
#' @examples
#' \dontrun{
#' listLibraries(wrds)
#' }
listLibraries = function(wrds) {
  RJDBC::dbGetQuery(wrds, "SELECT DISTINCT libname FROM dictionary.tables")
}

#' List the tables in a library
#'
#' @param wrds WRDS connection object
#' @param library WRDS library
#' @export
#' @examples
#' \dontrun{
#' listTables(wrds, "CRSP")
#' }
listTables = function(wrds, library) {
  q = paste("SELECT DISTINCT MEMNAME FROM dictionary.columns WHERE libname =",
             shQuote(library))
  RJDBC::dbGetQuery(wrds, q)
}

#' List the columns in a SAS table
#'
#' @param wrds WRDS connection object
#' @param library WRDS library
#' @param table WRDS table name
#' @export
#' @examples
#' \dontrun{
#' listColumns(wrds, "CRSP", "DSF")
#' }
listColumns = function(wrds, library, table) {
  q = paste("SELECT NAME FROM dictionary.columns WHERE libname =",
            shQuote(library),
            "AND memname = ",
            shQuote(table))
  RJDBC::dbGetQuery(wrds, q)
}
nuffe/wrdsr documentation built on May 24, 2019, 9:57 a.m.