tests/testthat/test-nn-rnn.R

context("nn-rnn")

test_that("rnn nonlinearity", {
  rnn <- nn_rnn(1, 10)
  expect_equal(rnn$nonlinearity, "tanh")

  rnn <- nn_rnn(1, 10, nonlinearity = "relu")
  expect_equal(rnn$nonlinearity, "relu")

  expect_error(
    rnn <- nn_rnn(1, 10, nonlinearity = "garbage"),
    class = "value_error"
  )
})

test_that("rnn dropout", {
  for (p in c(0., .276, .731, 1)) {
    for (train in c(TRUE, FALSE)) {
      rnn <- nn_rnn(10, 1000, 2, bias = FALSE, dropout = p, nonlinearity = "relu")

      with_no_grad({
        rnn$weight_ih_l1$fill_(1)
        rnn$weight_hh_l1$fill_(1)
        rnn$weight_ih_l2$fill_(1)
        rnn$weight_hh_l2$fill_(1)
      })

      if (train) {
        rnn$train()
      } else {
        rnn$eval()
      }

      input <- torch_ones(1, 1, 10)
      hx <- torch_zeros(2, 1, 1000)

      out <- rnn(input, hx)
      output <- out[[1]]
      hy <- out[[2]]


      expect_equal_to_tensor(output$min(), output$max(), tolerance = 1e-2)

      output_val <- output[1, 1, 1]

      if (p == 0 || !train) {
        expect_equal_to_r(output_val, 10000)
      } else if (p == 1) {
        expect_equal_to_r(output_val, 0)
      } else {
        expect_equal_to_r(output_val > 8000, TRUE)
        expect_equal_to_r(output_val < 12000, TRUE)
      }

      expect_equal_to_tensor(hy[1, , ]$min(), hy[1, , ]$max(), tolerance = 1e-2)
      expect_equal_to_tensor(hy[2, , ]$min(), hy[2, , ]$max(), tolerance = 1e-2)
      expect_equal_to_r(hy[1, 1, 1], 10)
      expect_equal_to_tensor(hy[2, 1, 1], output_val, tolerance = 1e-2)
    }
  }
})

test_that("rnn packed sequence", {
  x <- torch_tensor(rbind(
    c(1, 2, 0, 0),
    c(1, 2, 3, 0),
    c(1, 2, 3, 4)
  ), dtype = torch_float())
  x <- x[, , newaxis]
  lens <- torch_tensor(c(2, 3, 4), dtype = torch_long())

  p <- nn_utils_rnn_pack_padded_sequence(x, lens,
    batch_first = TRUE,
    enforce_sorted = FALSE
  )

  rnn <- nn_rnn(1, 4, nonlinearity = "relu")
  out <- rnn(p)

  unpack <- nn_utils_rnn_pad_packed_sequence(out[[1]])
  expect_tensor_shape(unpack[[1]], c(4, 3, 4))
  expect_equal_to_r(unpack[[2]]$to(dtype = torch_int()), c(2, 3, 4))
})

test_that("lstm", {
  lstm <- nn_lstm(10, 5)
  expect_equal(lstm$mode, "LSTM")

  input <- torch_ones(1, 1, 10)
  o <- lstm(input)
  expect_length(o, 2)

  expect_tensor_shape(o[[1]], c(1, 1, 5))
  expect_tensor_shape(o[[2]][[1]], c(1, 1, 5))
  expect_tensor_shape(o[[2]][[2]], c(1, 1, 5))

  expect_tensor_shape(lstm$weight_ih_l1, c(20, 10))
  expect_tensor_shape(lstm$weight_hh_l1, c(20, 5))
  expect_tensor_shape(lstm$bias_ih_l1, c(20))
  expect_tensor_shape(lstm$bias_hh_l1, c(20))

  expect_length(lstm$parameters, 4)

  with_no_grad({
    lstm$weight_ih_l1$fill_(1)
    lstm$weight_hh_l1$fill_(1)
    lstm$bias_ih_l1$fill_(1)
    lstm$bias_hh_l1$fill_(1)
  })

  z <- lstm(input)

  expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 0.7615868, tolerance = 1e-5)
  expect_equal_to_tensor(z[[2]][[1]], torch_ones(1, 1, 5) * 0.7615868, tolerance = 1e-5)
  expect_equal_to_tensor(z[[2]][[2]], torch_ones(1, 1, 5), tolerance = 1e-5)

  lstm <- nn_lstm(10, 5, bias = FALSE)

  expect_tensor_shape(lstm$weight_ih_l1, c(20, 10))
  expect_tensor_shape(lstm$weight_hh_l1, c(20, 5))
  expect_null(lstm$bias_ih_l1)
  expect_null(lstm$bias_hh_l1, NULL)

  with_no_grad({
    lstm$weight_ih_l1$fill_(1)
    lstm$weight_hh_l1$fill_(1)
  })

  z <- lstm(input)

  expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 0.7615405, tolerance = 1e-5)
  expect_equal_to_tensor(z[[2]][[1]], torch_ones(1, 1, 5) * 0.7615405, tolerance = 1e-4)
  expect_equal_to_tensor(z[[2]][[2]], torch_ones(1, 1, 5), tolerance = 1e-4)

  lstm <- nn_lstm(10, 5, num_layers = 2)
  expect_length(lstm$parameters, 8)
  lstm <- nn_lstm(10, 5, num_layers = 3)
  expect_length(lstm$parameters, 12)

  with_no_grad({
    for (p in lstm$parameters) {
      p$fill_(1)
    }
  })

  z <- lstm(input)
  expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 0.7580, tolerance = 1e-4)

  expect_equal_to_tensor(z[[2]][[1]][1, , ], torch_ones(1, 5) * 0.7616, tolerance = 1e-4)
  expect_equal_to_tensor(z[[2]][[1]][2, , ], torch_ones(1, 5) * 0.7580, tolerance = 1e-4)
  expect_equal_to_tensor(z[[2]][[1]][3, , ], torch_ones(1, 5) * 0.7580, tolerance = 1e-4)

  expect_equal_to_tensor(z[[2]][[2]][1, , ], torch_ones(1, 5), tolerance = 1e-4)
  expect_equal_to_tensor(z[[2]][[2]][2, , ], torch_ones(1, 5) * 0.9970, tolerance = 1e-4)
  expect_equal_to_tensor(z[[2]][[2]][3, , ], torch_ones(1, 5) * 0.9969, tolerance = 1e-4)
})

test_that("gru", {
  gru <- nn_gru(10, 5)
  expect_equal(gru$mode, "GRU")

  input <- torch_ones(1, 1, 10)
  o <- gru(input)
  expect_length(o, 2)

  expect_tensor_shape(o[[1]], c(1, 1, 5))
  expect_tensor_shape(o[[2]], c(1, 1, 5))

  expect_tensor_shape(gru$weight_ih_l1, c(15, 10))
  expect_tensor_shape(gru$weight_hh_l1, c(15, 5))
  expect_tensor_shape(gru$bias_ih_l1, c(15))
  expect_tensor_shape(gru$bias_hh_l1, c(15))

  expect_length(gru$parameters, 4)

  with_no_grad({
    gru$weight_ih_l1$fill_(1)
    gru$weight_hh_l1$fill_(1)
    gru$bias_ih_l1$fill_(1)
    gru$bias_hh_l1$fill_(1)
  })

  z <- gru(input)

  expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 6.1989e-06, tolerance = 1e-5)
  expect_equal_to_tensor(z[[2]], torch_ones(1, 1, 5) * 6.1989e-06, tolerance = 1e-5)

  gru <- nn_gru(10, 5, bias = FALSE)

  expect_tensor_shape(gru$weight_ih_l1, c(15, 10))
  expect_tensor_shape(gru$weight_hh_l1, c(15, 5))
  expect_null(gru$bias_ih_l1)
  expect_null(gru$bias_hh_l1, NULL)

  with_no_grad({
    gru$weight_ih_l1$fill_(1)
    gru$weight_hh_l1$fill_(1)
  })

  z <- gru(input)

  expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 4.5419e-05, tolerance = 1e-5)
  expect_equal_to_tensor(z[[2]], torch_ones(1, 1, 5) * 4.5419e-05, tolerance = 1e-4)

  gru <- nn_gru(10, 5, num_layers = 2)
  expect_length(gru$parameters, 8)
  gru <- nn_gru(10, 5, num_layers = 3)
  expect_length(gru$parameters, 12)

  with_no_grad({
    for (p in gru$parameters) {
      p$fill_(1)
    }
  })

  z <- gru(input)
  expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 0.0702, tolerance = 1e-4)

  expect_equal_to_tensor(z[[2]][1, , ], torch_ones(1, 5) * 6.1989e-06, tolerance = 1e-4)
  expect_equal_to_tensor(z[[2]][2, , ], torch_ones(1, 5) * 1.1378e-01, tolerance = 1e-4)
  expect_equal_to_tensor(z[[2]][3, , ], torch_ones(1, 5) * 7.0209e-02, tolerance = 1e-4)
})

test_that("rnn gpu", {
  skip_if_cuda_not_available()

  rnn <- nn_rnn(10, 1)
  rnn$to(device = "cuda")

  input <- torch_ones(1, 1, 10, device = "cuda")

  expect_message(out <- rnn(input), regexp = NA)

  expect_length(out, 2)
  expect_tensor_shape(out[[1]], c(1, 1, 1))
  expect_tensor_shape(out[[2]], c(1, 1, 1))
})

test_that("GRU on the GPU keeps its parameters", {
  skip_if_cuda_not_available()

  model <- nn_module(
    initialize = function(input_size, hidden_size) {
      self$rnn <- nn_gru(
        input_size = input_size,
        hidden_size = hidden_size,
        batch_first = TRUE
      )
      self$output <- nn_linear(hidden_size, 1)
    },
    forward = function(x) {
      # list of [output, hidden]
      # we are interested in the final timestep only, so we can directly use [[2]]
      # but we want to remove the un-needed singleton dimension on the left
      x <- self$rnn(x)[[2]]$squeeze(1)
      x %>% self$output()
    }
  )
  m <- model(1, 64)
  e_pars <- names(m$parameters)
  m$cuda()
  r_pars <- names(m$parameters)

  expect_equal(r_pars, e_pars)
})

test_that("lstm and gru works with packed sequences", {
  # regression test for https://github.com/mlverse/torch/issues/499

  x <- torch_tensor(rbind(
    c(1, 2, 0, 0),
    c(1, 2, 3, 0),
    c(1, 2, 3, 4)
  ), dtype = torch_float())
  x <- x[, , newaxis]
  lens <- torch_tensor(c(2, 3, 4), dtype = torch_long())

  p <- nn_utils_rnn_pack_padded_sequence(x, lens,
    batch_first = TRUE,
    enforce_sorted = FALSE
  )

  rnn <- nn_lstm(1, 4)
  out <- rnn(p)

  unpack <- nn_utils_rnn_pad_packed_sequence(out[[1]])
  expect_tensor_shape(unpack[[1]], c(4, 3, 4))

  rnn <- nn_gru(1, 4)
  out <- rnn(p)

  unpack <- nn_utils_rnn_pad_packed_sequence(out[[1]])
  expect_tensor_shape(unpack[[1]], c(4, 3, 4))
})

test_that("gru can be traced", {
  x <- nn_gru(10, 10)
  tr <- jit_trace(x, torch_randn(10, 10, 10))

  v <- torch_randn(10, 10, 10)
  expect_equal_to_tensor(
    x(v)[[1]],
    tr(v)[[1]]
  )
})

Try the torch package in your browser

Any scripts or data that you put into this service are public.

torch documentation built on June 7, 2023, 6:19 p.m.