tests/testthat/test-knn_space_time_disagg_simple.R

# small scale check -----------------------------------
# small scale, but full disagg with current data, and for a subset of the gages
# disagg based on total flow for bluff, green river ut, and cisco, and int for
# mead
library(xts)

load(file = "nf_test_data.rda")

index_flow <- nf_index_flow
mon_flow <- nf_mon_flow

lf <- cbind(2019:2021, c(9000000, 15000000, 12500000))
nsim <- 5
ym <- zoo::as.yearmon("2019-01") + 0:35/12
ym <- paste(format(ym, "%Y"), format(ym, "%m"), sep = "-")

# ** check specifying index years, and make sure values match exactly
# ** check specifiying 1, and no scale_sites
setup(dir.create("tmp_disagg"))
teardown(unlink("tmp_disagg", recursive = TRUE))

ann_sum <- function(x)
{
  do.call(
    cbind,
    lapply(
      seq_len(ncol(x)),
      function(xx) apply(matrix(x[,xx], ncol = 12, byrow = TRUE), 1, sum)
    )
  )
}

# test output ---------------------------
test_that("`knn_space_time_disagg()` output is properly created for nsim = 5", {
  expect_is(
    tmp <- knn_space_time_disagg(
      lf,
      index_flow,
      mon_flow,
      start_month = 1,
      scale_sites = 1:20,
      nsim = nsim
    ),
    "knnst"
  )
  expect_identical(write_knnst(tmp, "tmp_disagg"), tmp)

  for (i in seq_len(nsim)) {
    f1 <- file.path("tmp_disagg", paste0("disagg_", i, ".csv"))
    expect_true(file.exists(f1))
    t1 <- read.csv(f1)
    expect_identical(dim(t1), as.integer(c(36, 30)))

    # all 5 files should not be the same at the monthly level
    j <- ifelse(i == nsim, 1, i + 1)
    expect_false(
      identical(knnst_get_disagg_data(tmp, i), knnst_get_disagg_data(tmp, j)),
      info = paste(i, "compared to", j)
    )

    # but they should all sum to the same annual value for lees ferry (not LB)
    t1 <- knnst_get_disagg_data(tmp, i)
    t1 <- ann_sum(t1)
    t2 <- knnst_get_disagg_data(tmp, j)
    t2 <- ann_sum(t2)
    expect_equal(
      apply(t1[,1:20], 1, sum),
      apply(t2[,1:20], 1, sum),
      info = paste(i, "compared to", j)
    )
    # and those should sum to the input
    temp_ann <- cbind(
      c(2019:2021),
      round(apply(t1[,1:20], 1, sum), 0)
    )
    expect_identical(lf, temp_ann, info = paste("sim:", i))

    # and LB should match the natural flow data exactly
    lb <- rbind(
      as.matrix(
        mon_flow[as.character(tmp$disagg_sims[[i]]$index_years[1]), 21:29]
      ),
      as.matrix(
        mon_flow[as.character(tmp$disagg_sims[[i]]$index_years[2]), 21:29]
      ),
      as.matrix(
        mon_flow[as.character(tmp$disagg_sims[[i]]$index_years[3]), 21:29]
      )
    )
    dimnames(lb) <- NULL
    rownames(lb) <- ym
    colnames(lb) <- colnames(mon_flow)[21:29]

    expect_equal(knnst_get_disagg_data(tmp, i)[,21:29], lb)
  }


  # check index_years

  index_out <- as.matrix(read.csv(file.path("tmp_disagg", "index_years.csv")))
  expect_identical(dim(index_out), as.integer(c(3, nsim)))
  expect_true(!anyNA(index_out))
  expect_true(!anyNA(knnst_index_years(tmp)))
  expect_true(all(index_out %in% index_flow[,1]))
  expect_equal(dim(knnst_index_years(tmp)), c(nrow(lf), nsim))
  # sim
  expect_equal(knnst_nsim(tmp), nsim)
  expect_equal(expect_output(print(tmp)), tmp)
})

ind_yrs <- cbind(c(2000, 1906, 1936), c(1999, 1976, 2010), c(2000, 1909, 1954))
nsim <- 3

# specified index_years ---------------------------
test_that("`knn_space_time_disagg()` works for index years for nsim != 1", {
  expect_is(
    expect_message(tmp <- knn_space_time_disagg(
      lf,
      index_flow,
      mon_flow,
      start_month = 1,
      scale_sites = 1:20,
      nsim = nsim,
      index_years = ind_yrs
    )),
    "knnst"
  )

  expect_equal(knnst_index_years(tmp), ind_yrs)
  expect_equal(dim(knnst_index_years(tmp)), c(nrow(lf), nsim))
  # sim
  expect_equal(knnst_nsim(tmp), nsim)
  # print
  expect_equal(expect_output(print(tmp)), tmp)

  for (i in seq_len(nsim)) {
    # all sims should not be the same at the monthly level
    j <- ifelse(i == nsim, 1, i + 1)
    expect_false(
      identical(knnst_get_disagg_data(tmp, i), knnst_get_disagg_data(tmp, j)),
      info = paste(i, "compared to", j)
    )

    # but they should all sum to the same annual value for lees ferry (not LB)
    t1 <- knnst_get_disagg_data(tmp, i)
    t1 <- ann_sum(t1)
    t2 <- knnst_get_disagg_data(tmp, j)
    t2 <- ann_sum(t2)
    expect_equal(
      apply(t1[,1:20], 1, sum),
      apply(t2[,1:20], 1, sum),
      info = paste(i, "compared to", j)
    )

    # and LB should match the natural flow data exactly
    lb <- rbind(
      as.matrix(mon_flow[as.character(ind_yrs[1, i]), 21:29]),
      as.matrix(mon_flow[as.character(ind_yrs[2, i]), 21:29]),
      as.matrix(mon_flow[as.character(ind_yrs[3, i]), 21:29])
    )
    dimnames(lb) <- NULL
    rownames(lb) <- ym
    colnames(lb) <- colnames(mon_flow)[21:29]

    expect_equal(knnst_get_disagg_data(tmp, i)[,21:29], lb)
  }

  expect_equivalent(
    knnst_get_disagg_data(tmp, 1)[1:12, 15] /
      sum(knnst_get_disagg_data(tmp, 1)[1:12, 15]),
    as.vector(mon_flow[as.character(ind_yrs[1,1]), 15] /
      sum(mon_flow[as.character(ind_yrs[1,1]), 15]))
  )

  expect_equivalent(
    knnst_get_disagg_data(tmp, 2)[25:36, 18] /
      sum(knnst_get_disagg_data(tmp, 2)[25:36, 18]),
    as.vector(mon_flow[as.character(ind_yrs[3, 2]), 18] /
      sum(mon_flow[as.character(ind_yrs[3, 2]), 18]))
  )

  expect_equivalent(
    knnst_get_disagg_data(tmp, 3)[13:24, 1] /
      sum(knnst_get_disagg_data(tmp, 3)[13:24, 1]),
    as.vector(mon_flow[as.character(ind_yrs[2, 3]), 1] /
      sum(mon_flow[as.character(ind_yrs[2, 3]), 1]))
  )
})

# knnstdisagg:::full_year -------------------
test_that("knnstdisagg:::full_year() works", {
  expect_identical(knnstdisagg:::full_year(1), 1:12)
  expect_identical(knnstdisagg:::full_year(10), c(10:12, 1:9))
  expect_identical(knnstdisagg:::full_year(12), c(as.integer(12), 1:11))
  expect_identical(knnstdisagg:::full_year(6), c(6:12, 1:5))
})

# knnstdisagg:::ym_labels <- function(years, start_month) ----------------
test_that("knnstdisagg:::ym_labels works", {
  expect_identical(
    knnstdisagg:::ym_labels(2000:2001, 1),
    paste(
      c(rep(2000, 12), rep(2001, 12)),
      sprintf("%02d", rep(1:12, 2)),
      sep = "-"
    )
  )
  expect_identical(
    knnstdisagg:::ym_labels(2000:2001, 10),
    paste(
      c(rep(1999, 3), rep(2000, 12), rep(2001, 9)),
      sprintf("%02d", c(10:12, 1:12, 1:9)),
      sep = "-"
    )
  )
})

# start_year ------------------
# selecting the nearest neighbor, with the same data, but a different start_year
# should result in same output data with different rownames

flow_mat <- cbind(c(2000, 2001, 2002), c(1400, 1567, 1325))
# made up historical data to use as index years
ind_flow <- cbind(1901:1980, rnorm(80, mean = 1500, sd = 300))
# make up monthly flow for two sites
mon_flow <- cbind(
  rnorm(80 * 12, mean = 20, sd = 5),
  rnorm(80 * 12, mean = 120, sd = 45)
)

test_that("changing start_month does not change data", {
  expect_is(
    d1 <- knn_space_time_disagg(
      flow_mat,
      ind_flow,
      mon_flow,
      start_month = 1,
      scale_sites = 1:2,
      k_weights = knn_params(1, 1)
    ),
    "knnst"
  )
  expect_is(
    d2 <- knn_space_time_disagg(
      flow_mat,
      ind_flow,
      mon_flow,
      start_month = 10,
      scale_sites = 1:2,
      k_weights = knn_params(1, 1)
    ),
    "knnst"
  )
  expect_identical(knnst_index_years(d1), knnst_index_years(d2))

  t1 <- d1$disagg_sims[[1]]$disagg_flow
  expect_identical(
    rownames(t1),
    paste(
      c(rep(2000, 12), rep(2001, 12), rep(2002, 12)),
      sprintf("%02d", rep(1:12, 3)),
      sep = "-"
    )
  )

  t2 <- d2$disagg_sims[[1]]$disagg_flow
  expect_identical(
    rownames(t2),
    paste(
      c(rep(1999, 3), rep(2000, 12), rep(2001, 12), rep(2002, 9)),
      sprintf("%02d", c(10:12, 1:12, 1:12, 1:9)),
      sep = "-"
    )
  )

  rownames(t1) <- NULL
  rownames(t2) <- NULL
  expect_identical(t1, t2)
})

# scale_sites ----------------------
test_that("scale_sites works with different specifications", {
  t1 <-  knn_space_time_disagg(
    flow_mat,
    ind_flow,
    mon_flow,
    start_month = 1,
    scale_sites = TRUE,
    k_weights = knn_params(1, 1)
  )
  t2 <- knn_space_time_disagg(
    flow_mat,
    ind_flow,
    mon_flow,
    start_month = 1,
    scale_sites = 1:2,
    k_weights = knn_params(1, 1)
  )
  # have to remove meta for them to be identical (date and call will not be ==)
  t1$meta <- NULL
  t2$meta <- NULL
  expect_identical(t1, t2)
  # check that indexed sites are == between disaggs
  expect_is(
    expect_message(d1 <- knn_space_time_disagg(
      flow_mat,
      ind_flow,
      mon_flow,
      start_month = 1,
      scale_sites = FALSE,
      k_weights = knn_params(1, 1)
    )),
    "knnst"
  )
  expect_is(
    expect_message(d2 <- knn_space_time_disagg(
      flow_mat,
      ind_flow,
      mon_flow,
      start_month = 1,
      scale_sites = 1,
      k_weights = knn_params(1, 1)
    )),
    "knnst"
  )
  expect_is(
    expect_message(d3 <- knn_space_time_disagg(
      flow_mat,
      ind_flow,
      mon_flow,
      start_month = 1,
      scale_sites = 2,
      k_weights = knn_params(1, 1)
    )),
    "knnst"
  )
  expect_identical(
    d1$disagg_sims[[1]]$disagg_flow[,1],
    d3$disagg_sims[[1]]$disagg_flow[,1]
  )
  expect_identical(
    d1$disagg_sims[[1]]$disagg_flow[,2],
    d2$disagg_sims[[1]]$disagg_flow[,2]
  )

  # all scaled flow should exist in mon_flow
  expect_true(all(d1$disagg_sims[[1]]$disagg_flow %in% mon_flow))
  expect_true(all(d2$disagg_sims[[1]]$disagg_flow[, 2] %in% mon_flow))
  expect_true(all(d3$disagg_sims[[1]]$disagg_flow[, 1] %in% mon_flow))
  expect_false(all(d2$disagg_sims[[1]]$disagg_flow[, 1] %in% mon_flow))
  expect_false(all(d3$disagg_sims[[1]]$disagg_flow[, 2] %in% mon_flow))
})

# scaled vs. unscaled -------------------------
# simple test with monthly gages that sum to index gage
# if input are volumes that exist in the index gage and k = 1, then monthly
# data should also exist

mon_flow <- round(matrix(rnorm(36*3, mean = 200, sd = 20), ncol = 3), 0)
ann_ind <- cbind(2010:2012, apply(ann_sum(mon_flow), 1, sum))
ann_flow <- cbind(
  2020:2025,
  c(ann_ind[2, 2], ann_ind[1, 2], ann_ind[3, 2], min(ann_ind[,2] * .7),
    max(ann_ind[,2] * 1.23), 200*12)
)

test_that("values are identical to input as appropriate", {
  expect_is(
    tmp <- knn_space_time_disagg(
      ann_flow,
      ann_ind,
      mon_flow,
      start_month = 1,
      k_weights = knn_params(1, 1)
    ),
    "knnst"
  )

  # first three years of data should exist in input monthly data
  expect_true(all(knnst_get_disagg_data(tmp)[1:36,] %in% mon_flow))

  # last three years of data should not exist in monthly data
  expect_false(all(knnst_get_disagg_data(tmp)[37:72,] %in% mon_flow))
})

# random_seed -----------------------
test_that("random_seed makes things reproducible but different", {
  ann_flow <- cbind(c(2000, 2001, 2002), c(1400, 1567, 1325))
  # made up historical data to use as index years
  ann_ind <- cbind(1901:1980, rnorm(80, mean = 1500, sd = 300))
  # make up monthly flow for two sites
  mon_flow <- cbind(
    rnorm(80 * 12, mean = 20, sd = 5),
    rnorm(80 * 12, mean = 120, sd = 45)
  )

  d1 <- knn_space_time_disagg(
    ann_flow,
    ann_ind,
    mon_flow,
    start_month = 1,
    random_seed = 131313
  )

  d2 <- knn_space_time_disagg(
    ann_flow,
    ann_ind,
    mon_flow,
    start_month = 1,
    nsim = 5,
    random_seed = 131313
  )

  d3 <- knn_space_time_disagg(
    ann_flow,
    ann_ind,
    mon_flow,
    start_month = 1
  )

  d4 <- knn_space_time_disagg(
    ann_flow,
    ann_ind,
    mon_flow,
    start_month = 1,
    random_seed = 131313
  )

  d5 <- knn_space_time_disagg(
    ann_flow,
    ann_ind,
    mon_flow,
    start_month = 1,
    nsim = 5,
    random_seed = 131313
  )

  d6 <- knn_space_time_disagg(
    ann_flow,
    ann_ind,
    mon_flow,
    start_month = 1,
    nsim = 5
  )

  # remove meta before testing equivelence as date and call may be different
  d1$meta <- NULL
  d2$meta <- NULL
  d3$meta <- NULL
  d4$meta <- NULL
  d5$meta <- NULL
  d6$meta <- NULL

  expect_identical(d1, d4)
  expect_identical(d1$disagg_sims[[1]], d2$disagg_sims[[1]])
  expect_false(isTRUE(all.equal(d1, d3)))
  expect_false(isTRUE(all.equal(d2$disagg_sims[[1]], d2$disagg_sims[[2]])))
  expect_false(isTRUE(all.equal(d2$disagg_sims[[2]], d2$disagg_sims[[3]])))
  expect_identical(d2, d5)
  expect_false(identical(d2, d6))
  expect_false(isTRUE(all.equal(d5, d6)))
})
rabutler-usbr/knnstdisagg documentation built on Sept. 14, 2023, 2:47 p.m.