## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
## ----eval=F-------------------------------------------------------------------
#
# library(tensorflow)
# library(keras)
# library(data.table)
# library(tfdatasets)
# library(tfaddons)
#
# # Preprocessing -----------------------------------------------------------
#
# # Assumes you've downloaded and unzipped one of the bilingual datasets offered at
# # http://www.manythings.org/anki/ and put it into a directory "data"
# # This example translates English to Dutch.
# download_data = function(){
# if(!dir.exists('data')) {
# dir.create('data')
# }
# if(!file.exists('data/nld-eng.zip')) {
# download.file('http://www.manythings.org/anki/nld-eng.zip',
# destfile = file.path("data", basename('nld-eng.zip')))
# unzip('data/nld-eng.zip', exdir = 'data')
# }
# }
#
# download_data()
#
# filepath <- file.path("data", "nld.txt")
#
# df = data.table::fread(filepath, header = FALSE, encoding = 'UTF-8',
# select = c(1,2), nrows = -1)
#
# text_cleaner <- function(text){
# text = text %>%
# # replace non ascii
# textclean::replace_non_ascii() %>%
# # remove all non relevant symbols (letters, spaces, and apostrophes are retained)
# textclean::strip(apostrophe.remove = TRUE) %>%
# paste('<start> ', ., ' <end>')
# }
#
# df = sapply(1:2, function(x) text_cleaner(df[[x]])) %>% as.data.table()
#
# text_tok <- function(text) {
# tokenizer = text_tokenizer(filters='')
# tokenizer %>% fit_text_tokenizer(text)
# vocab_size = tokenizer$word_index
# data = tokenizer %>%
# texts_to_sequences(text) %>%
# pad_sequences(padding='post')
# list(vocab_size,data,tokenizer)
# }
#
# c(input_vocab_size, data_en, tokenizer_en) %<-% c(df[['V1']] %>% text_tok())
#
# c(output_vocab_size, data_de, tokenizer_de) %<-% c(df[['V2']] %>% text_tok())
#
#
# # Split the dataset
# indices_to_take = sample.int(n = nrow(df), size = floor(0.8*nrow(df)), replace = FALSE)
#
# split_data <- function(data) {
# c(train, test) %<-% list(data[indices_to_take, ], data[-indices_to_take, ] )
# list(train, test)
# }
#
#
# c(en_train, en_test, de_train, de_test) %<-% c(split_data(data_en), split_data(data_de))
#
# rm(df, filepath, indices_to_take, download_data, split_data, text_cleaner, text_tok)
#
# batch_size = 64L
# buffer_size = nrow(en_train)
# steps_per_epoch = buffer_size %/% batch_size
# embedding_dims = 256L
# rnn_units = 1024L
# dense_units = 1024L
# dtype = tf$float32 #used to initialize DecoderCell Zero state
#
#
# dataset = tensor_slices_dataset(list(en_train, de_train)) %>%
# dataset_shuffle(buffer_size) %>% dataset_batch(batch_size, drop_remainder = TRUE)
#
#
# EncoderNetwork = reticulate::PyClass(
# 'EncoderNetwork',
# inherit = tf$keras$Model,
# defs = list(
#
# `__init__` = function(self, input_vocab_size, embedding_dims, rnn_units) {
#
# super()$`__init__`()
#
# self$encoder_embedding = layer_embedding(input_dim = length(input_vocab_size),
# output_dim = embedding_dims)
# self$encoder_rnnlayer = layer_lstm(units = rnn_units, return_sequences = TRUE,
# return_state = TRUE)
# NULL
# }
# )
# )
#
#
#
# DecoderNetwork = reticulate::PyClass(
# 'DecoderNetwork',
# inherit = tf$keras$Model,
# defs = list(
#
# `__init__` = function(self, output_vocab_size, embedding_dims, rnn_units) {
#
# super()$`__init__`()
# self$decoder_embedding = layer_embedding(input_dim = length(output_vocab_size),
# output_dim = embedding_dims)
# self$dense_layer = layer_dense(units = length(output_vocab_size))
# self$decoder_rnncell = tf$keras$layers$LSTMCell(rnn_units)
# # Sampler
# self$sampler = sampler_training()
# # Create attention mechanism with memory = NULL
# self$attention_mechanism = self$build_attention_mechanism(dense_units, NULL, c(rep(ncol(data_en), batch_size)))
# self$rnn_cell = self$build_rnn_cell(batch_size)
# self$decoder = decoder_basic(cell=self$rnn_cell, sampler = self$sampler,
# output_layer = self$dense_layer)
# NULL
# },
#
#
#
# build_attention_mechanism = function(self, units, memory, memory_sequence_length) {
# attention_luong(units = units , memory = memory,
# memory_sequence_length = memory_sequence_length)
# },
#
# build_rnn_cell = function(self, batch_size) {
# rnn_cell = attention_wrapper(cell = self$decoder_rnncell,
# attention_mechanism = self$attention_mechanism,
# attention_layer_size = dense_units)
# rnn_cell
# },
#
# build_decoder_initial_state = function(self, batch_size, encoder_state, dtype) {
# decoder_initial_state = self$rnn_cell$get_initial_state(batch_size = batch_size,
# dtype = dtype)
# decoder_initial_state = decoder_initial_state$clone(cell_state = encoder_state)
# decoder_initial_state
# }
# )
# )
#
# encoderNetwork = EncoderNetwork(input_vocab_size, embedding_dims, rnn_units)
# decoderNetwork = DecoderNetwork(output_vocab_size, embedding_dims, rnn_units)
# optimizer = tf$keras$optimizers$Adam()
#
#
#
# loss_function <- function(y_pred, y) {
# #shape of y [batch_size, ty]
# #shape of y_pred [batch_size, Ty, output_vocab_size]
# loss = keras::loss_sparse_categorical_crossentropy(y, y_pred)
# mask = tf$logical_not(tf$math$equal(y,0L)) #output 0 for y=0 else output 1
# mask = tf$cast(mask, dtype=loss$dtype)
# loss = mask * loss
# loss = tf$reduce_mean(loss)
# loss
# }
#
# train_step <- function(input_batch, output_batch,encoder_initial_cell_state) {
# loss = 0L
#
# with(tf$GradientTape() %as% tape, {
# encoder_emb_inp = encoderNetwork$encoder_embedding(input_batch)
# c(a, a_tx, c_tx) %<-% encoderNetwork$encoder_rnnlayer(encoder_emb_inp,
# initial_state = encoder_initial_cell_state)
#
# #[last step activations,last memory_state] of encoder passed as input to decoder Network
# # Prepare correct Decoder input & output sequence data
# decoder_input = tf$convert_to_tensor(output_batch %>% as.array() %>% .[,1:45]) # ignore <end>
# #compare logits with timestepped +1 version of decoder_input
# decoder_output = tf$convert_to_tensor(output_batch %>% as.array() %>% .[,2:46]) #ignore <start>
#
# # Decoder Embeddings
# decoder_emb_inp = decoderNetwork$decoder_embedding(decoder_input)
#
# #Setting up decoder memory from encoder output and Zero State for AttentionWrapperState
# decoderNetwork$attention_mechanism$setup_memory(a)
# decoder_initial_state = decoderNetwork$build_decoder_initial_state(batch_size,
# encoder_state = list(a_tx, c_tx),
# dtype = tf$float32)
# #BasicDecoderOutput
# c(outputs, res1, res2) %<-% decoderNetwork$decoder(decoder_emb_inp,initial_state = decoder_initial_state,
# sequence_length = c(rep(ncol(data_en) - 1L, batch_size)))
#
# logits = outputs$rnn_output
# #Calculate loss
#
# loss = loss_function(logits, decoder_output)
#
# })
# #Returns the list of all layer variables / weights.
# variables = c(encoderNetwork$trainable_variables, decoderNetwork$trainable_variables)
# # differentiate loss wrt variables
# gradients = tape$gradient(loss, variables)
# #grads_and_vars – List of(gradient, variable) pairs.
# grads_and_vars = purrr::transpose(list(gradients,variables))
# optimizer$apply_gradients(grads_and_vars)
# loss
# }
#
# initialize_initial_state = function() {
# list(tf$zeros(c(batch_size, rnn_units)), tf$zeros(c(batch_size, rnn_units)))
# }
#
#
# epochs = 1
#
#
# for (i in 1:sum(epochs + 1)) {
# encoder_initial_cell_state = initialize_initial_state()
# total_loss = 0.0
# res = dataset %>% dataset_take(steps_per_epoch) %>% iterate()
# for (batch in 1:length(res)) {
# c(input_batch, output_batch) %<-% res[[batch]]
# batch_loss = train_step(input_batch, output_batch, encoder_initial_cell_state)
# total_loss = total_loss + batch_loss
# if((batch+1) %% 5 == 0) {
# print(paste('total loss:', batch_loss$numpy(), 'epoch', i, 'batch',batch+1))
# }
# }
#
# }
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.