R/getBBAG.R

Defines functions getBBAG

Documented in getBBAG

#' getBBAG - Download historic sale results data from BBAG.
#'
#' \code{getBBAG} downloads historic sale results data from the BBAG website in
#' xls format.
#'
#' \code{getBBAG} downloads historic sale results data from the Baden-Badener
#' Auktionsgesellschaft \url{http://bbag-sales.de/} website, in xls format,
#' based on the supplied URL. Various options may be specified such as a
#' \code{filename} and output format. Valid output formats are \code{csv},
#' \code{Rds} and \code{sqlite}. Only an \code{Rds} is generated by default.
#' Valid URLs for BBAG sales have been tested as far back as 2009. The demo
#' directory contains a complete set of URLs and function calls to generate
#' results data for all sales back to 2009.
#'
#' @param url A string containing the universal resource locator for an xls file
#'   of historic bloodstock sale results data. Required, no default set.
#' @param catalogue A string containing the universal resource locator for a
#'   sale catalogue, usually containing pedigree information in PDF format.
#'   Optional, no default set.
#' @param auctioneer A string containing the name of the company conducting the
#'   auction sale. Required, no default set.
#' @param country A string containing the abbreviated country code for the
#'   location of the sale. e.g. DE. Required, no default set.
#' @param currency A string containing the abbreviated currency code for the
#'   currency of sale bids and payments. e.g. EUR. Required, no default set.
#' @param date A string containing the date of the sale. Multi-day sales should
#'   only have the first day's date entered. The date should be entered in the
#'   format yyyy-mm-dd. Required, no default set.
#' @param csv A Boolean defining the data output format, in this case a CSV
#'   file. Required. Defaults to FALSE. May be changed to TRUE. Multiple output
#'   formats are possible.
#' @param rds A Boolean defining the data output format, in this case an Rds
#'   file. Required. Defaults to TRUE. May be changed to FALSE. Multiple output
#'   formats are possible.
#' @param sqlite A Boolean defining the data output format, in this case an
#'   SQLite file. Required. Defaults to FALSE. May be changed to TRUE. Multiple
#'   output formats are possible. Requires the RSQLite library to be installed,
#'   which is only optional for pinhooker package installation. Prior to
#'   attempting SQLite output, please ensure the RSQLite package is installed.
#' @param sale A string containing the name of the sale. e.g. Breeding Stock
#'   Sale. Required, no default set.
#' @param filename A string containing the output file name, without file
#'   extension. Required. Defaults to 'bloodstockSalesData'. Files are output to
#'   the current working directory.
#'
#' @return If all parameters are valid, xls data will be downloaded from the
#'   BBAG website, normalised and output, as the specified file fomats, in the
#'   current working directory.
#'
#' @examples
#'   getBBAG(url =
#'   "http://www.bbag-sales.de/en/Catalogues-and-statistics.html?auctionType=Sales%20%26%20Racing%20Festival&auctionYear=2015&page=csvExport",
#'    catalogue =
#'   "http://www.bbag-sales.de/download/public/share/public/BBAG/PDF-Dokumente/Kataloge/SuR_Inhalt_2015_LR.pdf",
#'    csv = FALSE, rds = TRUE, sqlite = FALSE, auctioneer = "BBAG", country =
#'   "DE", currency = "EUR", date = "2015-10-16", sale = "October Mixed Sales",
#'   filename = "bbagSaleData")
#'
#' @export

getBBAG <-
  function(url, catalogue = "", auctioneer, country, currency, date, csv = FALSE, rds = TRUE, sqlite = FALSE, sale = "", filename = "bloodstockSalesData") {
    # Read in CSV file and remove any additional columns
    saleData <-
      read.csv(
        url, colClasses = "character", blank.lines.skip = TRUE, encoding = "latin1", sep = ";"
      )

    # Normalise column names
    saleData <-
      plyr::rename(
        saleData,c(
          "Lot.No." = "Lot", "Date.of.birth" = "Foaled", "Vendor" = "Consignor", "Result" = "Price"
        )
      )

    # Rename sexes from German to English
    saleData$Sex[saleData$Sex == "Stute"] <- "Filly"
    saleData$Sex[saleData$Sex == "Hengst"] <- "Colt"
    saleData$Sex[saleData$Sex == "Wallach"] <- "Gelding"

    # Standardise Withdrawn lots
    saleData$Purchaser[saleData$Price == "zurückgezogen"] <-
      "Withdrawn"
    saleData$Price[saleData$Price == "zurückgezogen"] <- "0"

    # Standardise Vendor re-purchases and Not Sold lots
    saleData$Purchaser[which(grepl("zurückgekauft", saleData$Price) == TRUE)] <-
      "Vendor"
    saleData$Purchaser[which(grepl("nicht verkauft", saleData$Price) == TRUE)] <-
      "Not Sold"

    saleData$Price <-
      gsub("\\.", "", saleData$Price) # Remove full stops from Price column
    saleData$Price <-
      gsub(",.*", "", saleData$Price) # Remove everything in Price column after the comma

    saleData$Purchaser[saleData$Purchaser == "Not Sold"] <-
      paste("Not Sold (",saleData$Price[saleData$Purchaser == 'Not Sold'],")", sep = "")
    saleData$Purchaser[saleData$Purchaser == "Vendor"] <-
      paste("Vendor (",saleData$Price[saleData$Purchaser == 'Vendor'],")", sep = "")

    saleData$Price[which(grepl("Vendor", saleData$Purchaser) == TRUE)] <-
      "0"
    saleData$Price[which(grepl("Not Sold", saleData$Purchaser) == TRUE)] <-
      "0"

    # Create empty dataframe with correct column names. Not all XLS files initially contain all column names.
    allCols <-
      data.frame(
        Lot = integer(), Name = character(), Foaled = character(), Sex = character(), Type = character(), Colour = character(), Sire = character(), Dam = character(), Consignor = character(), Stabling = character(), Purchaser = character(), coveringSire = character(), Catalogue = character(), Price = integer(), stringsAsFactors =
          FALSE
      )

    # Bind empty dataframe with XLS data
    saleData <- plyr::rbind.fill(allCols, saleData)

    # Create new columns with data input from function options
    saleData$Auctioneer <- auctioneer
    saleData$Country <- country
    saleData$Currency <- currency
    saleData$saleDate <- date
    saleData$Catalogue <- catalogue
    saleData$Sale <- sale

    # Reset column data types
    saleData$Price <- as.integer(saleData$Price)
    saleData$saleDate <- as.Date(saleData$saleDate, "%Y-%m-%d")

    # Check to see if CSV file exists. Then write CSV.
    if (isTRUE(csv)) {
      if (!isTRUE(file.exists(paste(filename,".csv", sep = "")))) {
        write.csv(
          saleData, paste(filename,".csv", sep = ""), row.names = FALSE, na =
            ""
        )
      } else {
        saleDataSaved <-
          read.csv(
            paste(filename,".csv", sep = ""), sep = ",", stringsAsFactors =
              FALSE, as.is = TRUE
          )
        saleData$saleDate <- as.character(saleData$saleDate)
        saleDataFinal <- rbind(saleDataSaved, saleData)
        write.csv(
          saleDataFinal, paste(filename,".csv", sep = ""), row.names = FALSE, na =
            ""
        )
      }
    }

    # Check to see if RDS file exists. Then write RDS.
    if (isTRUE(rds)) {
      if (!isTRUE(file.exists(paste(filename,".rds", sep = "")))) {
        saleData[is.na(saleData)] <- ""
        saveRDS(saleData, paste(filename,".rds", sep = ""))
      } else {
        saleDataSaved <- readRDS(paste(filename,".rds", sep = ""))
        saleData[is.na(saleData)] <- ""
        saleDataFinal <- rbind(saleDataSaved, saleData)
        saveRDS(saleDataFinal, paste(filename,".rds", sep = ""))
      }
    }

    # Check to see if SQLite file exists. Then write SQLite file.
    if (isTRUE(sqlite)) {
      if (!requireNamespace("RSQLite", quietly = TRUE)) {
        stop(
          "The package RSQlite is required to generate the SQLite data file. Please install it and run the script again.",
          call. = FALSE
        )
      }
      if (!isTRUE(file.exists(paste(filename,".sqlite", sep = "")))) {
        saleData[is.na(saleData)] <- ""
        saleData$saleDate <- as.character(saleData$saleDate)
        con <-
          dbConnect(SQLite(), paste(filename,".sqlite", sep = ""))
        dbWriteTable(
          con, name = filename, value = transform(saleData, saleDate), row.names =
            FALSE, append = TRUE
        )
        dbDisconnect(con)
      } else {
        con <-
          dbConnect(SQLite(), paste(filename,".sqlite", sep = ""))
        sql1 <- paste("SELECT * FROM ",filename, sep = "")
        saleDataSaved <- dbGetQuery(con, sql1)
        saleData[is.na(saleData)] <- ""
        saleData$saleDate <- as.character(saleData$saleDate)
        saleDataFinal <- rbind(saleDataSaved, saleData)
        dbWriteTable(
          con, name = filename, value = transform(saleDataFinal, saleDate), row.names =
            FALSE, overwrite = TRUE
        )
        dbDisconnect(con)
      }
    }
  }
phillc73/pinhooker documentation built on Feb. 18, 2021, 9:21 p.m.