Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, eval = FALSE)
## -----------------------------------------------------------------------------
# library(fastai)
# library(magrittr)
#
# URLs_WIKITEXT()
#
# path = 'wikitext-2'
#
# train = data.table::fread(paste(path, 'train.csv', sep = '/'), header = FALSE, fill = TRUE)
#
# test = data.table::fread(paste(path, 'test.csv', sep = '/'), header = FALSE, fill = TRUE)
#
# df = rbind(train, test)
#
# rm(train,test)
## -----------------------------------------------------------------------------
# tr = reticulate::import('transformers')
# pretrained_weights = 'gpt2'
# tokenizer = tr$GPT2TokenizerFast$from_pretrained(pretrained_weights)
# model = tr$GPT2LMHeadModel$from_pretrained(pretrained_weights)
## -----------------------------------------------------------------------------
# tokenize = function(text) {
# toks = tokenizer$tokenize(text)
# tensor(tokenizer$convert_tokens_to_ids(toks))
# }
#
# tokenized = list()
#
# for (i in 1:length(df$V1)) {
# tokeniz = tokenize(df$V1[i])
# tokenized = tokenized %>% append(tokeniz)
# if(i %% 100 == 0 ) {
# print(i)
# }
# }
## -----------------------------------------------------------------------------
# tot = 1:nrow(df)
# tr_idx = sample(nrow(df), 0.8 * nrow(df))
# ts_idx = tot[!tot %in% tr_idx]
# splits = list(tr_idx, ts_idx)
## -----------------------------------------------------------------------------
# tls = TfmdLists(tokenized, TransformersTokenizer(tokenizer),
# splits = splits,
# dl_type = LMDataLoader())
#
# bs = 8
# sl = 100
# dls = tls %>% dataloaders(bs = bs, seq_len = sl)
#
# # Now, we are ready to create our Learner, which is a fastai object grouping data, model
# # and loss function and handles model training or inference. Since we are in a language
# #model setting, we pass perplexity as a metric, and we need to use the callback we just
# # defined. Lastly, we use mixed precision to save every bit of memory we can (and if you
# # have a modern GPU, it will also make training faster):
# learn = Learner(dls, model, loss_func=CrossEntropyLossFlat(),
# cbs = list(TransformersDropOutput()),
# metrics = Perplexity())$to_fp16()
#
# learn %>% fit_one_cycle(1, 1e-4)
## -----------------------------------------------------------------------------
# prompt = "\n = Unicorn = \n \n A unicorn is a magical creature with a rainbow tail and a horn"
# prompt_ids = tokenizer$encode(prompt)
# inp = tensor(prompt_ids)[NULL]$cuda()
# preds = learn$model$generate(inp, max_length = 80L, num_beams = 5L, temperature = 1.5)
# tokenizer$decode(as.integer(preds[0]$cpu()$numpy()))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.