tests/testthat/test-density-semiparametric.R

context("test-density-semiparametric.R -- Lrnr_density_semiparametric")

test_that("Lrnr_density_semiparametric works", {
  set.seed(1234)

  # define test dataset
  n <- 1e6
  x <- runif(n, 0, 3)
  epsilon_x <- rnorm(n, 0, 0.5 + sqrt(x))
  # epsilon_x <- rnorm(n)
  y <- 3 * x + epsilon_x

  data <- data.table(x = x, x2 = x^2, y = y)
  covariates <- c("x")
  task <- make_sl3_Task(data, covariates = covariates, outcome = "y")

  # train
  hse_learner <- make_learner(Lrnr_density_semiparametric,
    mean_learner = make_learner(Lrnr_glm)
  )

  mvd_learner <- make_learner(Lrnr_density_semiparametric,
    mean_learner = make_learner(Lrnr_glm),
    var_learner = make_learner(Lrnr_glm)
  )

  hse_fit <- hse_learner$train(task)
  mvd_fit <- mvd_learner$train(task)

  # test sampling
  y_samp <- mvd_fit$sample(task[1:10], 100)

  x_grid <- seq(from = min(data$x), to = max(data$x), length = 100)
  y_grid <- seq(from = min(data$y), to = 1.5 * max(data$y), length = 100)
  pred_data <- as.data.table(expand.grid(x = x_grid, y = y_grid))
  pred_data$x2 <- pred_data$x^2
  pred_task <- make_sl3_Task(pred_data, covariates = covariates, outcome = "y")

  pred_data$hse_preds <- hse_fit$predict(pred_task)
  pred_data$mvd_preds <- mvd_fit$predict(pred_task)
  pred_data[, true_dens := dnorm(x = y, mean = 3 * x, sd = abs(x))]

  nll <- function(observed, pred) {
    res <- -1 * observed * log(pred)
    res[observed < .Machine$double.eps] <- 0

    return(res)
  }

  hse_nll <- sum(nll(pred_data$true_dens, pred_data$hse_preds))
  mvd_nll <- sum(nll(pred_data$true_dens, pred_data$mvd_preds))

  expect_lt(hse_nll, n)
  expect_lt(mvd_nll, hse_nll)
  # long <- melt(pred_data, id = c("x", "y", "true_dens"), measure = c("hse_preds", "mvd_preds", "true_dens"))
  # x_samp <- sample(x_grid, 20)
  # ggplot(long[x%in%x_samp],aes(x=y,y=value,color=variable))+geom_line()+facet_wrap(~round(x,5),scales="free_x")+theme_bw()+coord_flip()
})
jeremyrcoyle/sl3 documentation built on April 30, 2024, 10:16 p.m.