## Unit Tests for rf1 package
# 1. rf1 model runs successfully for mosquitoes
# 2. rf1 model runs successfully for humans
# 3. rf1 model final results are correctly formatted for export to dfmip
#devtools::load_all()
test_that("rf1 core model runs successfully for mosquitoes", {
#**# NOTE: non-quantile regression is an option, but is not explicitly tested in the unit tests.
# Set up other inputs
set.seed(20200304)
forecast.targets = c('seasonal.mosquito.MLE')
weekinquestion = as.Date("2015-07-26", "%Y-%m-%d") #**# Is the as.Date part necessary?
analysis.counties = unique(rf1::human.data$location)
analysis.years = seq(2011, 2015)
rf1.inputs = list(NA, NA, analysis.counties, analysis.years, NA, NA, NA)
#week.id = sprintf("test:%s", weekinquestion)
id.string = 'test'
results.path = "RF1TESTS/"
break.type = "seasonal"
response.type = "continuous"
quantile.model = 1
out = FormatDataForRF1(rf1::human.data, rf1::mosq.data, weekinquestion, rf1::weather.data, rf1.inputs, results.path, break.type)
my.data = out[[1]]
independent.vars = out[[2]]
# Split the data set into the forecast year and the historical data
forecast.year = as.numeric(substr(as.character(weekinquestion), 1, 4))
forecast.data = my.data[my.data$year == forecast.year, ]
historical.data = my.data[my.data$year < forecast.year, ] #**# This will prevent later years from informing hindcasts of earlier years
dep.var = "IR"
# drop IR variable from independent.vars
m.independent.vars = independent.vars[independent.vars != "IR"]
mosquito.results.path = sprintf("%s/mosquitoes", results.path)
m.label = "mosquitoes"
mosquito.results = do.rf(historical.data, dep.var, m.independent.vars, mosquito.results.path,
response.type = response.type, label = m.label, quantile.model = quantile.model) #do.spatial = 0, create.test.set = 0, create.ci.null = 0
# Test that mosquito.results came out as expected
kept.vars = as.character(mosquito.results[[6]])
#expect_equal(kept.vars, c('RMEAN_2', 'TMEANC_2', "TMINC_1", 'abundance', 'density'))
#expect_equal(kept.vars, c('RMEAN_2', 'TMEANC_2', 'PR_1')) #**# Unclear why this changed - could be a change in the randomization procedures leading to a different result.
#**# And now it's changed back. Very Very odd.
#**# changed again - but we changed the underlying data in the quarterly summaries, so that could change variable relationships
# Did not change predictions, so clearly some of the minor variables are not contributing much.
expect_equal(kept.vars, c("RMEAN_2", "TMEANC_2", "VPD_2", "TMINC_1"))
new.df = forecast.data[ ,kept.vars]
predictions = predict(mosquito.results$MODEL, new.df) # Does 0.1, 0.5, 0.9 by default
expect_equal(unname(predictions[1,1]), 0)
expect_equal(round(unname(predictions[1,2]),3), 0.014)
expect_equal(round(unname(predictions[4,3]),3), 0.014)
# Save mosquito.results as testing object (only done once. Update if updating unit test results)
#usethis::use_data(mosquito.results)
unlink(results.path)
})
test_that("rf1 model runs successfully for humans", {
#**# NOTE: non-quantile regression is an option, but is not explicitly tested in the unit tests.
# Set up other inputs
set.seed(20200304)
forecast.targets = c('annual.human.cases')
weekinquestion = as.Date("2015-07-26", "%Y-%m-%d") #**# Is the as.Date part necessary?
analysis.counties = unique(rf1::human.data$location)
analysis.years = seq(2011, 2015)
rf1.inputs = list(NA, NA, analysis.counties, analysis.years, NA, NA, NA)
#week.id = sprintf("test:%s", weekinquestion)
id.string = 'test'
results.path = "RF1TESTS/"
break.type = "seasonal"
response.type = "continuous"
quantile.model = 1
out = FormatDataForRF1(rf1::human.data, rf1::mosq.data, weekinquestion, rf1::weather.data, rf1.inputs, results.path, break.type)
my.data = out[[1]]
independent.vars = out[[2]]
# Split the data set into the forecast year and the historical data
forecast.year = as.numeric(substr(as.character(weekinquestion), 1, 4))
forecast.data = my.data[my.data$year == forecast.year, ]
historical.data = my.data[my.data$year < forecast.year, ] #**# This will prevent later years from informing hindcasts of earlier years
dep.var = "Cases"
human.results.path = sprintf("%s/humans", results.path)
h.label = "humans"
human.results = do.rf(historical.data, dep.var, independent.vars, human.results.path,
response.type = response.type, label = h.label,
quantile.model = quantile.model) #do.spatial = 0, create.test.set = 0, create.ci.null = 0
# Test that mosquito.results came out as expected
kept.vars = as.character(human.results[[6]])
#expect_equal(kept.vars, c('PR_1', 'RMEAN_2'))
expect_equal(kept.vars, c("PR_1")) # Does not change the test predictions, interesting enough.
new.df = forecast.data[ ,kept.vars]
new.df = matrix(new.df, ncol = length(kept.vars))
predictions = predict(human.results$MODEL, new.df) # Does 0.1, 0.5, 0.9 by default
expect_equal(unname(predictions[1,1]), 1)
expect_equal(round(unname(predictions[1,2]),0), 1)
expect_equal(round(unname(predictions[4,3]),0), 1)
expect_equal(round(unname(predictions[2,2]),0), 0)
# Save human.results as testing object (only done once. Update if updating unit test results)
#usethis::use_data(human.results)
unlink(results.path)
})
test_that("rf1 model final results are correctly formatted for export to dfmip", {
# Set up other inputs
set.seed(20200304)
forecast.targets = c('annual.human.cases', 'seasonal.mosquito.MLE')
weekinquestion = as.Date("2015-07-26", "%Y-%m-%d") #**# Is the as.Date part necessary?
analysis.counties = unique(rf1::human.data$location)
analysis.years = seq(2011, 2015)
rf1.inputs = list(NA, NA, analysis.counties, analysis.years, NA, NA, NA)
#week.id = sprintf("test:%s", weekinquestion)
id.string = 'test'
results.path = "RF1TESTS/"
out.results = rf1(forecast.targets, rf1::human.data, rf1::mosq.data, rf1::weather.data,
weekinquestion, rf1.inputs, results.path, id.string, break.type = "seasonal", response.type = "continuous",
quantile.model = 1, n.draws = 1000, bins = c(0,seq(1,51,1),101,151,201,1000), use.testing.objects = TRUE)
# Check that 4 outputs are generated: The Results dataframe, the Distributions dataframe, the Bins dataframe, and the model object results
expect_equal(length(out.results), 4)
# Check Results data frame
RF1.results = out.results[[1]]
expect_equal(nrow(RF1.results), 10)
expect_equal(RF1.results$forecast.target[1], 'seasonal.mosquito.MLE')
expect_equal(RF1.results$location[4], 'district4')
expect_equal(RF1.results$location[5], 'test-STATEWIDE')
expect_equal(round(RF1.results$value[1], 3), 0.014)
expect_equal(round(RF1.results$value[5], 3), 0.007)
expect_equal(RF1.results$forecast.target[6], 'annual.human.cases')
expect_equal(RF1.results$value[10], 3)
# Check the distributions data frame
RF1.distributions = out.results[[2]]
expect_equal(nrow(RF1.distributions), 10)
expect_equal(ncol(RF1.distributions), 1002)
expect_equal(RF1.distributions$forecast.target[1], 'seasonal.mosquito.MLE')
expect_equal(RF1.distributions$location[4], 'district4')
expect_equal(RF1.distributions$location[5], 'test-STATEWIDE')
expect_equal(round(RF1.distributions$DRAW1[1], 3), 0.014)
expect_equal(round(RF1.distributions$DRAW999[5], 3), 0.000)
expect_equal(RF1.distributions$forecast.target[6], 'annual.human.cases')
expect_equal(RF1.distributions$DRAW500[10], 3)
# Check the Bins data frame
#**# NOT SCRIPTED
# Check the other outputs
#**# NOT SCRIPTED
# Remove testing directory
unlink(results.path)
})
# Test DOY.to.day function
test_that("DOY.to.date function works", {
expect_equal(DOY.to.date(1, 2001), "2001-01-01")
expect_equal(DOY.to.date(31, 2001), "2001-01-31")
expect_equal(DOY.to.date(32, 2001), "2001-02-01")
expect_equal(DOY.to.date(59, 2001), '2001-02-28')
expect_equal(DOY.to.date(60, 2001), '2001-03-01')
expect_equal(DOY.to.date(60, 2000), '2000-02-29')
expect_equal(DOY.to.date(365, 2001), '2001-12-31')
expect_equal(DOY.to.date(366, 2000), '2000-12-31')
})
# Test that various errors have useful error messages
test_that("Assorted problems give useful information", {
# Test bad values in mosq.data file virus field
set.seed(20200304)
forecast.targets = c('seasonal.mosquito.MLE')
weekinquestion = as.Date("2015-07-26", "%Y-%m-%d") #**# Is the as.Date part necessary?
analysis.counties = unique(rf1::human.data$location)
analysis.years = seq(2011, 2015)
rf1.inputs = list(NA, NA, analysis.counties, analysis.years, NA, NA, NA)
#week.id = sprintf("test:%s", weekinquestion)
id.string = 'test'
results.path = "RF1TESTS/"
mosq.data = rf1::mosq.data
mosq.data$wnv_result[2] = 'none'
# Error message does not match according to the code, but copy and pasting
# the output error messages for Expected and Actual leads to a perfect match.
# Must be some difference in invisible formatting.
#m1 = "The VIRUS field (wnv_result) can only contain 0, 1, or NA values."
#m2 = "Entered values were: 1, none, 0"
#err.message = as.character(sprintf("%s %s", m1, m2))
expect_error(rf1(forecast.targets, rf1::human.data, mosq.data,
rf1::weather.data, weekinquestion, rf1.inputs,
results.path, id.string, break.type = "seasonal",
response.type = "continuous", quantile.model = 1,
n.draws = 1000,
bins = c(0,seq(1,51,1),101,151,201,1000),
use.testing.objects = FALSE))
#, err.message)
})
# Test district.to.location function
test_that("district.to.location function works", {
test.data = rf1::weather.data
colnames(test.data)[1] = 'district' # Change from location - data set has been updated to new name
data.label = "weather.data"
out = district.to.location(test.data, data.label, old.name = 'district', new.name = 'location')
expect_equal(paste(colnames(out), collapse = " "), "location doy year tminc tmeanc tmaxc pr rmean vpd date location_year")
colnames(test.data)[1] = "location"
out = district.to.location(test.data, data.label, old.name = 'district', new.name = 'location')
expect_equal(paste(colnames(out), collapse = " "), "location doy year tminc tmeanc tmaxc pr rmean vpd date location_year")
colnames(test.data)[1] = "notlocation"
expect_error(district.to.location(test.data, data.label, old.name = 'district', new.name = 'location'),
"Required 'location' field is missing. Field names are notlocation, doy, year, tminc, tmeanc, tmaxc, pr, rmean, vpd, date")
colnames(test.data)[1] = 'location'
test.data$district_year = sprintf("%s_%s", test.data$location, test.data$year)
expect_warning(district.to.location(test.data, data.label, old.name = 'district', new.name = 'location'),
"location_year field missing. Subsituting values from district_year field")
})
test_that("convert.env.data function works properly", {
weather.data = rf1::weather.data
all.locations = unique(weather.data$location)
season.breaks = c(31,59, 90) # End of Jan, Feb, and March
env.data1 = convert.env.data(weather.data, all.locations, season.breaks, leap.correct = 1)
#**# NEED TO DO MANUAL CALCULATIONS TO ENSURE THE CORRECT RESULTS ARE BEING PRODUCED.
# CURRENTLY, WE JUST CHECK THAT THEY ARE CONSISTENT AND THE CODE RUNS
expect_equal(nrow(env.data1), 20)
expect_equal(ncol(env.data1), 21)
# [1] is not a leap year, [2] is.
# Recall, these are simulated data with mean 0 and sd1 - so there is no actual weather here
expect_equal(round(env.data1$TMINC_1[1], 2), -0.01)
expect_equal(round(env.data1$TMINC_1[2], 2), 0.17)
expect_equal(round(env.data1$TMINC_2[1], 2), 0.13)
expect_equal(round(env.data1$TMINC_2[2], 2), 0.10)
expect_equal(round(env.data1$TMINC_3[1], 2), -0.07)
expect_equal(round(env.data1$TMINC_3[2], 2), -0.10)
env.data2 = convert.env.data(weather.data, all.locations, season.breaks, leap.correct = 0)
expect_equal(nrow(env.data2), 20)
expect_equal(ncol(env.data2), 21)
expect_equal(round(env.data2$TMINC_1[1], 2), -0.01)
expect_equal(round(env.data2$TMINC_1[2], 2), 0.17) # Should not be different, as leap year should only affect breaks AFTER doy 59
expect_equal(round(env.data2$TMINC_2[1], 2), 0.13)
expect_equal(round(env.data2$TMINC_2[2], 2), 0.07) # Should be different
expect_equal(round(env.data2$TMINC_3[1], 2), -0.07)
expect_equal(round(env.data2$TMINC_3[2], 2), -0.06) # Should be different from above - leap year
#**# Would be good to manually check the math on the average function, but at least now the behavior is correct.
})
# # Test that RF1 model correctly handles districts with no human cases included in the analysis are handled properly.
# test_that("Districts missing human cases are handled properly", {
#
# # Test when all districts have values, a correct output is generated
# stop("Unit test not scripted")
#
# # Test when one or more districts have no human cases, a correct output is generated
# stop("Unit test not scripted")
# })
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.