#' Train 1-dimensional Convolution Network
#'
#' @name train_conv_1d
#'
#' @description Train 1-dimensional Convolution Network using keras on the given dataset
#'
#' @param data the sentiment140 train dataset with \code{text} for text of the tweet and \code{polarity} for polarity.
#' @param max_words Maximum number of words to consider using word frequency measure.
#' @param maxlen Maximum length of a sequence.
#' @param embedding_dim Output dimension of the embedding layer.
#' @param epochs Number of epochs to run the training for.
#' @param batch_size Batch Size for model fitting.
#' @param validation_split Split ratio for validation
#' @param conv1d_filters Number of filters i.e. output dimension for convolution layers.
#' @param conv1d_kernel_size Window size for convolution layers.
#' @param conv1d_pool_size Pool size for max pooling.
#' @param seed Seed for shuffling training data.
#' @param model_save_path File path location for saving model.
#' @return plot of the training operation showing train vs validation loss and accuracy.
#'
#' @export
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr mutate
#' @importFrom rlang .data
#' @importFrom keras texts_to_sequences compile optimizer_rmsprop fit serialize_model
#' @importFrom graphics plot
#'
#' @keywords modelling keras
#'
#' @examples
#' \dontrun{
#' data(sentiment140_train)
#' train_conv_1d(model_save_path = "./train_no_glove_conv_1d.h5")
#' }
#'
utils::globalVariables(c("sentiment140_train"))
train_conv_1d <- function(data = sentiment140_train,
max_words = keras_config_params$max_words,
maxlen = keras_config_params$maxlen,
embedding_dim = keras_config_params$embedding_dim,
epochs = 20L,
batch_size = 32L,
validation_split = 0.2,
conv1d_filters = 32L,
conv1d_kernel_size = 3L,
conv1d_pool_size = 5L,
seed = config_params$default_seed,
model_save_path) {
#Make sure polarity is 0/1
data %>%
mutate(polarity = ifelse(.data$polarity=="Positive", 1, 0)) -> data
#Fit tokenizer
tokenizer <- get_tokenizer(data = data,
max_words = max_words)
#Generate Sequences
sequences <- keras::texts_to_sequences(tokenizer, data$text)
word_index <- tokenizer$word_index
cat("Found", length(word_index), "unique_tokens.\n")
#Generate Training Data
training_data <- generate_training_data(data = data,
sequences = sequences,
maxlen = maxlen,
seed = seed)
#Create Model
model <- create_conv1d_model(max_words = max_words,
embedding_dim = embedding_dim,
maxlen = maxlen,
conv1d_filters = conv1d_filters,
conv1d_kernel_size = conv1d_kernel_size,
conv1d_pool_size = conv1d_pool_size)
#Compile Model
model %>% keras::compile(
optimizer = keras::optimizer_rmsprop(lr = 1e-4),
loss = "binary_crossentropy",
metrics = c("acc")
)
#Fit model
history <- model %>% keras::fit(
training_data$x_train, training_data$y_train,
epochs = epochs,
batch_size = batch_size,
validation_split = validation_split
)
saveRDS(keras::serialize_model(model = model), file = model_save_path)
return(graphics::plot(history))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.