#' @demoTitle baseball-pyramid
#'
#' Demo population pyramids
#'
#' To install and use baseball demo dataset in Aster:
#'
#' 1. download baseball.zip from
#' https://bitbucket.org/grigory/toaster/downloads/baseball.zip
#' 2. run script to create data set in Aster
#' sh load_baseball_data.sh -d mydbname -U username -w mypassword
#' 3. create Aster ODBC DSN on your desktop
#' see https://bitbucket.org/grigory/toaster/wiki/Home#markdown-header-odbc-driver-and-dns
library(toaster)
## utility input function
readlineDef <- function(prompt, default) {
if (!is.null(prompt))
prompt = paste0(prompt, "[", default, "]: ")
else
prompt = paste0(prompt, ": ")
result = readline(prompt)
if (result == "")
return (default)
else
return (result)
}
## utility connection function
connectWithDSNToAster <- function(dsn=NULL) {
dsn = readlineDef("Enter Aster ODBC DSN: ", dsn)
tryCatch(close(conn), error=function(err) {NULL})
conn = tryCatch({
conn = odbcConnect(dsn)
odbcGetInfo(conn)
return (conn)
}, error=function(err) {
stop(paste("Can't connect to Aster - check DSN '", dsn, "'"))
})
}
## connect to Aster first
conn = connectWithDSNToAster()
## must be connected to baseball dataset
if(!all(isTable(conn, c('salaries', 'pitching_enh', 'pitching', 'batting_enh')))) {
stop("Must connect to baseball dataset and tables must exist.")
}
# Compare salaries by league
salaryHistAll = computeHistogram(conn, tableName='salaries', columnName='salary',
binsize=200000, startvalue=0,
by='lgid', where='yearID between 2000 and 2013')
createPopPyramid(data=salaryHistAll, bin='bin_start', count='bin_count', divideBy='lgid',
values=c('NL','AL'),
title="Salary Pyramid by MLB Leagues", xlab='Salary', ylab='Player Count')
# Same salary pyramid for up to 5 million
salaryHist5Mil = computeHistogram(conn, tableName='salaries', columnName='salary',
binsize=100000, startvalue=0, endvalue=5000000,
by='lgid', where='yearID between 2000 and 2013')
createPopPyramid(data=salaryHist5Mil, divideBy='lgid', values=c('NL','AL'),
title="Salary Pyramid by MLB Leagues (less 5M only)", xlab='Salary', ylab='Player Count')
# ERA Pyramid by Leagues
eraHist = computeHistogram(conn, tableName='pitching', columnName='era',
binsize=.1, startvalue=0, endvalue=10,
by='lgid', where='yearid between 2000 and 2013')
createPopPyramid(data=eraHist, divideBy='lgid', values=c('NL','AL'),
title="ERA Pyramid by MLB Leagues", xlab='ERA', ylab='Player Count')
# Log ERA
eraLogHist = computeHistogram(conn, tableName='pitching_enh', columnName='era_log',
binsize=.02, startvalue=-0.42021640338318984325,
endvalue=2.2764618041732441,
by='lgid', where='yearid between 2000 and 2013 and era > 0')
createPopPyramid(data=eraLogHist, divideBy='lgid', values=c('NL','AL'),
title="log(ERA) Pyramid by MLB Leagues", xlab='log(ERA)', ylab='Player Count')
# Batting (BA) Pyramid by Leagues
battingHist = computeHistogram(conn, tableName='batting_enh', columnName='ba',
binsize=.01, startvalue=0.01, endvalue=0.51,
by='lgid', where='yearid between 2000 and 2013')
createPopPyramid(data=battingHist, divideBy='lgid', values=c('NL','AL'),
title="Batting BA Pyramid by MLB Leages", xlab='BA', ylab='Player Count')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.