Nothing
context("nn-rnn")
test_that("rnn nonlinearity", {
rnn <- nn_rnn(1, 10)
expect_equal(rnn$nonlinearity, "tanh")
rnn <- nn_rnn(1, 10, nonlinearity = "relu")
expect_equal(rnn$nonlinearity, "relu")
expect_error(
rnn <- nn_rnn(1, 10, nonlinearity = "garbage"),
class = "value_error"
)
})
test_that("rnn dropout", {
for (p in c(0., .276, .731, 1)) {
for (train in c(TRUE, FALSE)) {
rnn <- nn_rnn(10, 1000, 2, bias = FALSE, dropout = p, nonlinearity = "relu")
with_no_grad({
rnn$weight_ih_l1$fill_(1)
rnn$weight_hh_l1$fill_(1)
rnn$weight_ih_l2$fill_(1)
rnn$weight_hh_l2$fill_(1)
})
if (train) {
rnn$train()
} else {
rnn$eval()
}
input <- torch_ones(1, 1, 10)
hx <- torch_zeros(2, 1, 1000)
out <- rnn(input, hx)
output <- out[[1]]
hy <- out[[2]]
expect_equal_to_tensor(output$min(), output$max(), tolerance = 1e-2)
output_val <- output[1, 1, 1]
if (p == 0 || !train) {
expect_equal_to_r(output_val, 10000)
} else if (p == 1) {
expect_equal_to_r(output_val, 0)
} else {
expect_equal_to_r(output_val > 8000, TRUE)
expect_equal_to_r(output_val < 12000, TRUE)
}
expect_equal_to_tensor(hy[1, , ]$min(), hy[1, , ]$max(), tolerance = 1e-2)
expect_equal_to_tensor(hy[2, , ]$min(), hy[2, , ]$max(), tolerance = 1e-2)
expect_equal_to_r(hy[1, 1, 1], 10)
expect_equal_to_tensor(hy[2, 1, 1], output_val, tolerance = 1e-2)
}
}
})
test_that("rnn packed sequence", {
x <- torch_tensor(rbind(
c(1, 2, 0, 0),
c(1, 2, 3, 0),
c(1, 2, 3, 4)
), dtype = torch_float())
x <- x[, , newaxis]
lens <- torch_tensor(c(2, 3, 4), dtype = torch_long())
p <- nn_utils_rnn_pack_padded_sequence(x, lens,
batch_first = TRUE,
enforce_sorted = FALSE
)
rnn <- nn_rnn(1, 4, nonlinearity = "relu")
out <- rnn(p)
unpack <- nn_utils_rnn_pad_packed_sequence(out[[1]])
expect_tensor_shape(unpack[[1]], c(4, 3, 4))
expect_equal_to_r(unpack[[2]]$to(dtype = torch_int()), c(2, 3, 4))
})
test_that("lstm", {
lstm <- nn_lstm(10, 5)
expect_equal(lstm$mode, "LSTM")
input <- torch_ones(1, 1, 10)
o <- lstm(input)
expect_length(o, 2)
expect_tensor_shape(o[[1]], c(1, 1, 5))
expect_tensor_shape(o[[2]][[1]], c(1, 1, 5))
expect_tensor_shape(o[[2]][[2]], c(1, 1, 5))
expect_tensor_shape(lstm$weight_ih_l1, c(20, 10))
expect_tensor_shape(lstm$weight_hh_l1, c(20, 5))
expect_tensor_shape(lstm$bias_ih_l1, c(20))
expect_tensor_shape(lstm$bias_hh_l1, c(20))
expect_length(lstm$parameters, 4)
with_no_grad({
lstm$weight_ih_l1$fill_(1)
lstm$weight_hh_l1$fill_(1)
lstm$bias_ih_l1$fill_(1)
lstm$bias_hh_l1$fill_(1)
})
z <- lstm(input)
expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 0.7615868, tolerance = 1e-5)
expect_equal_to_tensor(z[[2]][[1]], torch_ones(1, 1, 5) * 0.7615868, tolerance = 1e-5)
expect_equal_to_tensor(z[[2]][[2]], torch_ones(1, 1, 5), tolerance = 1e-5)
lstm <- nn_lstm(10, 5, bias = FALSE)
expect_tensor_shape(lstm$weight_ih_l1, c(20, 10))
expect_tensor_shape(lstm$weight_hh_l1, c(20, 5))
expect_null(lstm$bias_ih_l1)
expect_null(lstm$bias_hh_l1, NULL)
with_no_grad({
lstm$weight_ih_l1$fill_(1)
lstm$weight_hh_l1$fill_(1)
})
z <- lstm(input)
expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 0.7615405, tolerance = 1e-5)
expect_equal_to_tensor(z[[2]][[1]], torch_ones(1, 1, 5) * 0.7615405, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][[2]], torch_ones(1, 1, 5), tolerance = 1e-4)
lstm <- nn_lstm(10, 5, num_layers = 2)
expect_length(lstm$parameters, 8)
lstm <- nn_lstm(10, 5, num_layers = 3)
expect_length(lstm$parameters, 12)
with_no_grad({
for (p in lstm$parameters) {
p$fill_(1)
}
})
z <- lstm(input)
expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 0.7580, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][[1]][1, , ], torch_ones(1, 5) * 0.7616, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][[1]][2, , ], torch_ones(1, 5) * 0.7580, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][[1]][3, , ], torch_ones(1, 5) * 0.7580, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][[2]][1, , ], torch_ones(1, 5), tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][[2]][2, , ], torch_ones(1, 5) * 0.9970, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][[2]][3, , ], torch_ones(1, 5) * 0.9969, tolerance = 1e-4)
})
test_that("gru", {
gru <- nn_gru(10, 5)
expect_equal(gru$mode, "GRU")
input <- torch_ones(1, 1, 10)
o <- gru(input)
expect_length(o, 2)
expect_tensor_shape(o[[1]], c(1, 1, 5))
expect_tensor_shape(o[[2]], c(1, 1, 5))
expect_tensor_shape(gru$weight_ih_l1, c(15, 10))
expect_tensor_shape(gru$weight_hh_l1, c(15, 5))
expect_tensor_shape(gru$bias_ih_l1, c(15))
expect_tensor_shape(gru$bias_hh_l1, c(15))
expect_length(gru$parameters, 4)
with_no_grad({
gru$weight_ih_l1$fill_(1)
gru$weight_hh_l1$fill_(1)
gru$bias_ih_l1$fill_(1)
gru$bias_hh_l1$fill_(1)
})
z <- gru(input)
expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 6.1989e-06, tolerance = 1e-5)
expect_equal_to_tensor(z[[2]], torch_ones(1, 1, 5) * 6.1989e-06, tolerance = 1e-5)
gru <- nn_gru(10, 5, bias = FALSE)
expect_tensor_shape(gru$weight_ih_l1, c(15, 10))
expect_tensor_shape(gru$weight_hh_l1, c(15, 5))
expect_null(gru$bias_ih_l1)
expect_null(gru$bias_hh_l1, NULL)
with_no_grad({
gru$weight_ih_l1$fill_(1)
gru$weight_hh_l1$fill_(1)
})
z <- gru(input)
expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 4.5419e-05, tolerance = 1e-5)
expect_equal_to_tensor(z[[2]], torch_ones(1, 1, 5) * 4.5419e-05, tolerance = 1e-4)
gru <- nn_gru(10, 5, num_layers = 2)
expect_length(gru$parameters, 8)
gru <- nn_gru(10, 5, num_layers = 3)
expect_length(gru$parameters, 12)
with_no_grad({
for (p in gru$parameters) {
p$fill_(1)
}
})
z <- gru(input)
expect_equal_to_tensor(z[[1]], torch_ones(1, 1, 5) * 0.0702, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][1, , ], torch_ones(1, 5) * 6.1989e-06, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][2, , ], torch_ones(1, 5) * 1.1378e-01, tolerance = 1e-4)
expect_equal_to_tensor(z[[2]][3, , ], torch_ones(1, 5) * 7.0209e-02, tolerance = 1e-4)
})
test_that("rnn gpu", {
skip_if_cuda_not_available()
rnn <- nn_rnn(10, 1)
rnn$to(device = "cuda")
input <- torch_ones(1, 1, 10, device = "cuda")
expect_message(out <- rnn(input), regexp = NA)
expect_length(out, 2)
expect_tensor_shape(out[[1]], c(1, 1, 1))
expect_tensor_shape(out[[2]], c(1, 1, 1))
})
test_that("GRU on the GPU keeps its parameters", {
skip_if_cuda_not_available()
model <- nn_module(
initialize = function(input_size, hidden_size) {
self$rnn <- nn_gru(
input_size = input_size,
hidden_size = hidden_size,
batch_first = TRUE
)
self$output <- nn_linear(hidden_size, 1)
},
forward = function(x) {
# list of [output, hidden]
# we are interested in the final timestep only, so we can directly use [[2]]
# but we want to remove the un-needed singleton dimension on the left
x <- self$rnn(x)[[2]]$squeeze(1)
x %>% self$output()
}
)
m <- model(1, 64)
e_pars <- names(m$parameters)
m$cuda()
r_pars <- names(m$parameters)
expect_equal(r_pars, e_pars)
})
test_that("lstm and gru works with packed sequences", {
# regression test for https://github.com/mlverse/torch/issues/499
x <- torch_tensor(rbind(
c(1, 2, 0, 0),
c(1, 2, 3, 0),
c(1, 2, 3, 4)
), dtype = torch_float())
x <- x[, , newaxis]
lens <- torch_tensor(c(2, 3, 4), dtype = torch_long())
p <- nn_utils_rnn_pack_padded_sequence(x, lens,
batch_first = TRUE,
enforce_sorted = FALSE
)
rnn <- nn_lstm(1, 4)
out <- rnn(p)
unpack <- nn_utils_rnn_pad_packed_sequence(out[[1]])
expect_tensor_shape(unpack[[1]], c(4, 3, 4))
rnn <- nn_gru(1, 4)
out <- rnn(p)
unpack <- nn_utils_rnn_pad_packed_sequence(out[[1]])
expect_tensor_shape(unpack[[1]], c(4, 3, 4))
})
test_that("gru can be traced", {
x <- nn_gru(10, 10)
tr <- jit_trace(x, torch_randn(10, 10, 10))
v <- torch_randn(10, 10, 10)
expect_equal_to_tensor(
x(v)[[1]],
tr(v)[[1]]
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.