examples/large-summarize_rainfall-test.R

# Test summarize_rainfall function large data sets (> 88 MB)
# Jeff Oliver
# jcoliver@email.arizona.edu
# 2019-11-17

rm(list = ls())

################################################################################
library(wxsumR)

infile <- "../large-data-files/NPSY4_x1_rf1_daily.csv"
outfile <- NULL

test_data <- read.csv(file = infile)

message("Don't run this, it takes ALL the RAM")
stop()

########################################
# Test 1, season includes new year
test_num <- 1
message(paste0("Running test ", test_num, " of summarize_rainfall"))
start_month <- 11
end_month <- 02
start_day <- 15
end_day <- 25

# Test the summarize_rainfall function by performing one of the calculations
# (mean_season) on original data. Need to do some wrangling on these test data
# to perform the calculations
# Create vector of the column names we want for 1983 season data
end_date <- as.Date(paste0("1984-", end_month, "-", end_day))
current_date <- as.Date(paste0("1983-", start_month, "-", start_day))
test_col_names <- character(as.numeric(end_date - current_date))
counter <- 1
while (current_date <= end_date) {
  test_col_names[counter] <- paste0("rf_", format(current_date, "%Y%m%d"))
  current_date <- current_date + 1
  counter <- counter + 1
}

# Calculate the row mean for this subset of data; results should correspond to
# values in the mean_season column in output of summarize_temperature
season_means <- rowMeans(x = test_data[order(test_data$y4_hhid), test_col_names])

test_start <- Sys.time()
rain_summary <- summarize_rainfall(inputfile = infile,
                                   start_month = start_month,
                                   end_month = end_month,
                                   start_day = start_day,
                                   end_day = end_day,
                                   wide = FALSE)
test_end <- Sys.time()
if (all(season_means == rain_summary$mean_season[rain_summary$season_year == 1983])) {
  message("Test ", test_num, " PASS")
} else {
  message("Test ", test_num, " FAIL")
}
test_time <- difftime(time1 = test_end, time2 = test_start, units = "mins")
test_time <- round(x = test_time, digits = 3)
message(paste0("Test ", test_num, " time: ", test_time, " minutes"))

stop()

########################################
# Test 2, season excludes new year
test_num <- 2
message(paste0("Running test ", test_num, " of summarize_rainfall"))
start_month <- 02
end_month <- 11
start_day <- 15
end_day <- 25

# Create vector of the column names we want for 1983 season data
end_date <- as.Date(paste0("1983-", end_month, "-", end_day))
current_date <- as.Date(paste0("1983-", start_month, "-", start_day))
test_col_names <- character(as.numeric(end_date - current_date))
counter <- 1
while (current_date <= end_date) {
  test_col_names[counter] <- paste0("rf_", format(current_date, "%Y%m%d"))
  current_date <- current_date + 1
  counter <- counter + 1
}

# Calculate the row mean for this subset of data; results should correspond to
# values in the mean_season column in output of summarize_temperature
season_means <- rowMeans(x = test_data[order(test_data$y4_hhid), test_col_names])

test_start <- Sys.time()
rain_summary <- summarize_rainfall(inputfile = infile,
                                   start_month = start_month,
                                   end_month = end_month,
                                   start_day = start_day,
                                   end_day = end_day,
                                   wide = FALSE)
test_end <- Sys.time()
if (all(season_means == rain_summary$mean_season[rain_summary$season_year == 1983])) {
  message("Test ", test_num, " PASS")
} else {
  message("Test ", test_num, " FAIL")
}
test_time <- difftime(time1 = test_end, time2 = test_start, units = "mins")
test_time <- round(x = test_time, digits = 3)
message(paste0("Test ", test_num, " time: ", test_time, " minutes"))

########################################
# Test 3, season includes new year (same as test 1) but output wide format data
test_num <- 3
message(paste0("Running test ", test_num, " of summarize_rainfall"))
start_month <- 11
end_month <- 02
start_day <- 15
end_day <- 25

# Test the summarize_rainfall function by performing one of the calculations
# (mean_season) on original data. Need to do some wrangling on these test data
# to perform the calculations
# Create vector of the column names we want for 1983 season data
end_date <- as.Date(paste0("1984-", end_month, "-", end_day))
current_date <- as.Date(paste0("1983-", start_month, "-", start_day))
test_col_names <- character(as.numeric(end_date - current_date))
counter <- 1
while (current_date <= end_date) {
  test_col_names[counter] <- paste0("rf_", format(current_date, "%Y%m%d"))
  current_date <- current_date + 1
  counter <- counter + 1
}

# Calculate the row mean for this subset of data; results should correspond to
# values in the mean_season column in output of summarize_temperature
season_means <- rowMeans(x = test_data[order(test_data$y4_hhid), test_col_names])

test_start <- Sys.time()
rain_summary <- summarize_rainfall(inputfile = infile,
                                   start_month = start_month,
                                   end_month = end_month,
                                   start_day = start_day,
                                   end_day = end_day,
                                   wide = TRUE)
test_end <- Sys.time()
if (all(season_means == rain_summary$mean_season_1983)) {
  message("Test ", test_num, " PASS")
} else {
  message("Test ", test_num, " FAIL")
}
test_time <- difftime(time1 = test_end, time2 = test_start, units = "mins")
test_time <- round(x = test_time, digits = 3)
message(paste0("Test ", test_num, " time: ", test_time, " minutes"))
jcoliver/weathercommand documentation built on Sept. 12, 2021, 3:28 a.m.