#' getArqana - Download historic sale results data from Arqana.
#'
#' \code{getArqana} downloads historic sale results data from the Arqana website
#' in xls format.
#'
#' \code{getArqana} downloads historic sale results data from the Arqana
#' \url{http://arqana.com} website, in xls format, based on the supplied URL.
#' Various options may be specified such as a \code{filename} and output format.
#' Valid output formats are \code{csv}, \code{Rds} and \code{sqlite}. Only an
#' \code{Rds} file is generated by default. Valid URLs for Arqana sales have
#' been tested as far back as 2009. The demo directory contains a complete set
#' of URLs and function calls to generate results data for all sales back to
#' 2009.
#'
#' @param url A string containing the universal resource locator for an xls file
#' of historic bloodstock sale results data. Required, no default set.
#' @param catalogue A string containing the universal resource locator for a
#' sale catalogue, usually containing pedigree information in PDF format.
#' Optional, no default set.
#' @param auctioneer A string containing the name of the company conducting the
#' auction sale. Required, no default set.
#' @param country A string containing the abbreviated country code for the
#' location of the sale. e.g. FR. Required, no default set.
#' @param currency A string containing the abbreviated currency code for the
#' currency of sale bids and payments. e.g. EUR. Required, no default set.
#' @param date A string containing the date of the sale. Multi-day sales should
#' only have the first day's date entered. The date should be entered in the
#' format yyyy-mm-dd. Required, no default set.
#' @param csv A Boolean defining the data output format, in this case a CSV
#' file. Required. Defaults to FALSE. May be changed to TRUE. Multiple output
#' formats are possible.
#' @param rds A Boolean defining the data output format, in this case an Rds
#' file. Required. Defaults to TRUE. May be changed to FALSE. Multiple output
#' formats are possible.
#' @param sqlite A Boolean defining the data output format, in this case an
#' SQLite file. Required. Defaults to FALSE. May be changed to TRUE. Multiple
#' output formats are possible. Requires the RSQLite library to be installed,
#' which is only optional for pinhooker package installation. Prior to
#' attempting SQLite output, please ensure the RSQLite package is installed.
#' @param sale A string containing the name of the sale. e.g. Breeding Stock
#' Sale. Required, no default set.
#' @param filename A string containing the output file name, without file
#' extension. Required. Defaults to 'bloodstockSalesData'. Files are output to
#' the current working directory.
#'
#' @return If all parameters are valid, xls data will be downloaded from the
#' Arqana website, normalised and output, as the specified file fomats, in the
#' current working directory.
#'
#' @examples
#' getArqana(url =
#' "http://www.arqana.com/web/vente/vente_actions.php?mode=get_csv&venid=205",
#' catalogue =
#' "http://www.arqana.com/upload/pedigrees/vente205/complet_eng.pdf", csv =
#' FALSE, rds = TRUE, sqlite = FALSE, auctioneer = "Arqana", country = "FR",
#' currency = "EUR", date = "2015-12-05", sale = "Breeding Stock Sale",
#' filename = "arqanaSaleData")
#'
#' @export
getArqana <- function(url, catalogue = "", auctioneer, country, currency, date,
csv = FALSE, rds = TRUE, sqlite = FALSE, sale = "",
filename = "bloodstockSalesData") {
# Read in XLS file and remove any additional columns
saleData <-
gdata::read.xls(
url, sheet = 1, method = "csv", colClasses = "character", skip = 1, blank.lines.skip = TRUE, encoding = "latin1"
)
# Rename columns to English, check if Pleine.de col exists because it doesn't always. If not, insert it in correct position
if("Pleine.de" %in% colnames(saleData))
{
names(saleData) <- c("Lot", "Sex", "Foaled", "Type", "Name", "Sire", "Dam", "Consignor", "Stabling", "coveringSire", "Issue", "Purchaser", "Price")
} else {
saleData$Pleine.de <- ""
saleData <- saleData[,c(1:9, 13, 10:12)]
names(saleData) <- c("Lot", "Sex", "Foaled", "Type", "Name", "Sire", "Dam", "Consignor", "Stabling", "coveringSire", "Issue", "Purchaser", "Price")
}
# Translate French to English
saleData$Sex[saleData$Sex == "F." &
saleData$Type == "Foal"] <- "Filly"
saleData$Sex[saleData$Sex == "M." &
saleData$Type == "Foal"] <- "Colt"
saleData$Sex[saleData$Sex == "F." &
saleData$Type == "Jument"] <- "Mare"
saleData$Sex[saleData$Sex == "M." &
saleData$Type == "Etalon"] <- "Stallion"
saleData$Sex[saleData$Sex == "M." &
saleData$Type == "Parts d'étalon"] <- "Stallion Shares"
saleData$Sex[saleData$Sex == "F."] <- "Filly"
saleData$Sex[saleData$Sex == "M."] <- "Colt"
saleData$Sex[saleData$Sex == "H."] <- "Gelding"
saleData$Type[saleData$Type == "Jument"] <- "Mare"
saleData$Type[saleData$Type == "Pouliche"] <- "Filly"
saleData$Type[saleData$Type == "Etalon"] <- "Stallion"
saleData$Type[saleData$Type == "prospect étalon"] <- "Prospective Stallion"
saleData$Type[saleData$Type == "Parts d'étalon"] <- "Stallion Shares"
saleData$Type[saleData$Type == "Cheval à l'entrainement"] <-
"Horse in Training"
saleData$Type[saleData$Type == "2 ans"] <- "2 years"
saleData$Type[saleData$Type == "3 ans"] <- "3 years"
saleData$Type[saleData$Type == "Store 2 ans"] <- "Store 2"
saleData$Type[saleData$Type == "Store 3 ans"] <- "Store 3"
saleData$Issue[saleData$Issue == "Absent"] <- "Withdrawn"
saleData$Issue[saleData$Issue == "Racheté"] <- "Not Sold"
saleData$Issue[saleData$Issue == "Vendu"] <- "Sold"
saleData$Consignor[saleData$Consignor == "Inconnu"] <- "Unknown"
saleData$Name[saleData$Name == "INCONNU"] <- "UNKNOWN"
saleData$Sire[saleData$Sire == "INCONNU"] <- "UNKNOWN"
saleData$Consignor[grepl("Page Blanche", saleData$Consignor) == TRUE] <- "Blank Page"
saleData$Name[grepl("PAGE BLANCHE", saleData$Name) == TRUE] <- "BLANK PAGE"
# Normalise Not Sold lots to match Goffs
saleData$Purchaser[saleData$Issue == "Withdrawn"] <- "Withdrawn"
saleData$Purchaser[saleData$Issue == "Not Sold"] <-
paste("Not Sold (",saleData$Price[saleData$Issue == "Not Sold"],")", sep = "")
saleData$Price[saleData$Issue == "Not Sold"] <- "0"
saleData$Purchaser[saleData$Issue == "Amiable"] <-
paste(saleData$Purchaser[saleData$Issue == "Amiable"],"(PS)", sep = " ")
saleData$Issue <- NULL
# Create empty dataframe with correct column names. Not all XLS files initially contain all column names.
allCols <-
data.frame(
Lot = integer(), Name = character(), Foaled = character(), Sex = character(),
Type = character(), Colour = character(), Sire = character(), Dam = character(),
Consignor = character(), Stabling = character(), Purchaser = character(),
coveringSire = character(), Catalogue = character(), Price = integer(),
stringsAsFactors = FALSE
)
# Bind empty dataframe with XLS data
saleData <- plyr::rbind.fill(allCols, saleData)
# Create new columns with data input from function options
saleData$Auctioneer <- auctioneer
saleData$Country <- country
saleData$Currency <- currency
saleData$saleDate <- date
saleData$Catalogue <- catalogue
saleData$Sale <- sale
# Reset column data types
saleData$Price <- as.integer(saleData$Price)
saleData$saleDate <- as.Date(saleData$saleDate, "%Y-%m-%d")
# Check to see if CSV file exists. Then write CSV.
if (isTRUE(csv)) {
if (!isTRUE(file.exists(paste(filename,".csv", sep = "")))) {
write.csv(
saleData, paste(filename,".csv", sep = ""), row.names = FALSE, na =
""
)
} else {
saleDataSaved <-
read.csv(
paste(filename,".csv", sep = ""), sep = ",", stringsAsFactors =
FALSE, as.is = TRUE
)
saleData$saleDate <- as.character(saleData$saleDate)
saleDataFinal <- rbind(saleDataSaved, saleData)
write.csv(
saleDataFinal, paste(filename,".csv", sep = ""), row.names = FALSE, na =
""
)
}
}
# Check to see if RDS file exists. Then write RDS.
if (isTRUE(rds)) {
if (!isTRUE(file.exists(paste(filename,".rds", sep = "")))) {
saleData[is.na(saleData)] <- ""
saveRDS(saleData, paste(filename,".rds", sep = ""))
} else {
saleDataSaved <- readRDS(paste(filename,".rds", sep = ""))
saleData[is.na(saleData)] <- ""
saleDataFinal <- rbind(saleDataSaved, saleData)
saveRDS(saleDataFinal, paste(filename,".rds", sep = ""))
}
}
# Check to see if SQLite file exists. Then write SQLite file.
if (isTRUE(sqlite)) {
if (!requireNamespace("RSQLite", quietly = TRUE)) {
stop(
"The package RSQlite is required to generate the SQLite data file. Please install it and run the script again.",
call. = FALSE
)
}
if (!isTRUE(file.exists(paste(filename,".sqlite", sep = "")))) {
saleData[is.na(saleData)] <- ""
saleData$saleDate <- as.character(saleData$saleDate)
con <-
dbConnect(SQLite(), paste(filename,".sqlite", sep = ""))
dbWriteTable(
con, name = filename, value = transform(saleData, saleDate), row.names =
FALSE, append = FALSE
)
dbDisconnect(con)
} else {
con <-
dbConnect(SQLite(), paste(filename,".sqlite", sep = ""))
sql1 <- paste("SELECT * FROM ",filename, sep = "")
saleDataSaved <- dbGetQuery(con, sql1)
saleData[is.na(saleData)] <- ""
saleData$saleDate <- as.character(saleData$saleDate)
saleDataFinal <- rbind(saleDataSaved, saleData)
dbWriteTable(
con, name = filename, value = transform(saleDataFinal, saleDate), row.names =
FALSE, overwrite = TRUE
)
dbDisconnect(con)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.