tests/testthat/test-get_retrosheet.R

test_that("Caching works", {

    skip_if_offline(host = "retrosheet.org")

    # With caching
    schedule_1 <- get_retrosheet("schedule", 1995, cache = "testdata")
    schedule_1a <- get_retrosheet("schedule", 1995, cache = "testdata/") # Test with trailing slash
    roster_1 <- get_retrosheet("roster", 1995, cache = "testdata")
    game_1 <- get_retrosheet("game", 2012, cache = "testdata")
    play_1 <- get_retrosheet("play", 2012, "SFN", cache = "testdata")

    # Without caching
    schedule_2 <- get_retrosheet("schedule", 1995)
    roster_2 <- get_retrosheet("roster", 1995)
    game_2 <- get_retrosheet("game", 2012)
    play_2 <- get_retrosheet("play", 2012, "SFN")

    # With and without caching should always give you the same thing
    expect_equal(schedule_1, schedule_2)
    expect_equal(schedule_1, schedule_1a)
    expect_equal(roster_1, roster_2)
    expect_equal(game_1, game_2)
    expect_equal(play_1, play_2)

    # Re-using cached data should give a message about using a local cache
    expect_message(get_retrosheet("schedule", 1995, cache = "testdata"), "Using local cache: testdata/schedule/1995SKED.zip")

})

test_that("Schedule downloading works", {

    skip_if_offline(host = "retrosheet.org")

    schedule <- get_retrosheet(type = "schedule", year = 1995, cache = "testdata")
    schedule_unnamed <- get_retrosheet("schedule", 1995, cache = "testdata")
    schedule_splits <- get_retrosheet(type = "schedule", year = 1995, schedSplit = "TimeOfDay")
    schedule_splits_some_named <- get_retrosheet("schedule", 1995, schedSplit = "TimeOfDay")

    # In 1995, each of the 28 teams played 72 games
    expect_equal(nrow(schedule), 28 * 72)

    # Schedules should be the same, regardless of whether we used named params
    expect_equal(schedule_splits, schedule_splits_some_named)

    # There are three different splits, (D)ay, (E)vening, and (Night)
    expect_equal(length(schedule_splits), 3)

    # In 1995, there were 1355 (N)ight games
    expect_equal(nrow(schedule_splits$N), 1355)

    # There should be the same number of games in the split and unsplit schedules
    expect_equal(sum(unlist(lapply(schedule_splits, nrow), recursive = TRUE)), nrow(schedule))

    # Confirm that using named and unnamed arguments returns the same thing
    expect_equal(schedule, schedule_unnamed)

})

test_that("Roster downloading works", {

    skip_if_offline(host = "retrosheet.org")

    roster <- get_retrosheet("roster", 1995, cache = "testdata")

    # In 1995, there should be 28 regular teams
    # The two All Star teams (NLS + ALS) are no longer included in these data
    expect_equal(length(roster), 28)

    # In 1995, 39 different players played for TOR
    expect_equal(nrow(roster$TOR), 39)

})

test_that("Game downloading works", {

    skip_if_offline(host = "retrosheet.org")

    game <- get_retrosheet("game", 2012, cache = "testdata")

    # Confirm the returned dataframe has the correct dimensions
    expect_equal(dim(game), c(2430, 161))

})

test_that("Play downloading works", {

    skip_if_offline(host = "retrosheet.org")

    play <- get_retrosheet("play", 2012, "SFN", cache = "testdata")

    # There were 81 different SFN home games in 2012
    expect_equal(length(play), 81)

    # There were 68 different plays in the first game of the year
    expect_equal(nrow(play[[1]]$play), 68)

    # There were 4 different substitutions in the first game of the year
    expect_equal(nrow(play[[1]]$sub), 4)

    # There were 18 different starters in the first game of the year
    # (because it's a pre-DL NL game, there are always 18 different starters)
    expect_equal(nrow(play[[1]]$start), 18)

    # There are 26 different bits of "info" associated with that game00
    expect_equal(nrow(play[[1]]$info), 26)

})

test_that("Data is cleaned up as expected", {

    skip_if_offline(host = "retrosheet.org")

    game <- get_retrosheet("game", 2012, cache = "testdata")

    expect_true("data.frame" %in% class(game))
    expect_equal(class(game$Date), "Date")  # Dates are correct data type
    expect_false(any(game$Completion == "", na.rm = TRUE))  # Confirm that empty strings are converted to NAs
    expect_false(any(game$Forfeit == "", na.rm = TRUE))  # Confirm that empty strings are converted to NAs
    expect_false(any(game$Protest == "", na.rm = TRUE))  # Confirm that empty strings are converted to NAs

    schedule <- get_retrosheet(type = "schedule", year = 1995, cache = "testdata")
    expect_true("data.frame" %in% class(schedule))
    expect_equal(class(schedule$Date), "Date")  # Confirm that dates are Dates
    expect_equal(class(schedule$GameNo), "integer")  # Confirm that simple data (esp game numbers) are integers

    # Warn if trying to use stringsAsFactors
    expect_warning(
        get_retrosheet("schedule", 1995, schedSplit = "TimeOfDay", stringsAsFactors = TRUE, cache = "testdata")
        )
})

# Delete any previously cached data
unlink("testdata", recursive = TRUE)
colindouglas/retrosheet documentation built on March 2, 2024, 4:51 p.m.