# tune_fasttext.R function Tests ------------------------
# dummy data set up -------------------------
fast.text.parameters <- expand.grid(
lr = seq(4, 4.3, 0.5),
epoch = seq(30, 33, 10),
dim = seq(100,120, 25),
ws = seq(4, 6, 2),
wordNgrams = 2,
#loss = "softmax",
minn = 2,
maxn = 6
# n_sample_negatives = 5,
)
df = data.frame(text_id = c(1:23),
text = c("the seahawks are my favorite team.",
"russel wilson should be an mvp",
"seattle are superbowl champions",
"the seahawks are my favorite team.",
"russel wilson should be an mvp",
"seattle are superbowl champions",
"arsenal is the champion of the fa cup.",
"thierry henry was the best arsenal player",
"arsen wegner was the best manager.",
"arsenal is the champion of the fa cup.",
"thierry henry was the best arsenal player",
"arsen wegner was the best manager.",
"russel wilson should be an mvp",
"seattle are superbowl champions",
"the seahawks are my favorite team.",
"russel wilson should be an mvp",
"seattle are superbowl champions",
"arsenal is the champion of the fa cup.",
"thierry henry was the best arsenal player",
"arsen wegner was the best manager.",
"arsenal is the champion of the fa cup.",
"thierry henry was the best arsenal player",
"arsen wegner was the best manager."
),
labels = c("nfl",
"nfl",
"nfl",
"nfl",
"nfl",
"nfl",
"soccer",
"soccer",
"soccer",
"soccer",
"soccer",
"soccer",
"nfl",
"nfl",
"nfl",
"nfl",
"nfl",
"soccer",
"soccer",
"soccer",
"soccer",
"soccer",
"soccer"
))
# tune_fasttext testing -----------------
foo <- textwhiz::tune_fasttext(k = 5,
text = df$text,
label = df$labels,
parameters = missing_dim,
seed = 123,
text_ids = df$text_id,
parallel = F)
system.time({
textwhiz::tune_fasttext(k = 3,
text = df$text,
label = df$labels,
parameters = fast.text.parameters,
seed = 123,
text_ids = df$text_id,
parallel = T)
})
system.time({
textwhiz::tune_fasttext(k = 3,
text = df$text,
label = df$labels,
parameters = fast.text.parameters,
seed = 123,
text_ids = df$text_id,
parallel = F)
})
# test forensic_fasttext ----------
fst.txt <- expand.grid(
lr = 4.3,
epoch = 30,
dim = 120,
ws = 6,
wordNgrams = 2,
#loss = "softmax",
minn = 2,
maxn = 6
# n_sample_negatives <- 5,
)
foo <- textwhiz::forensic_fasttext(k = 3,
text = df$text,
label = df$labels,
parameters = missing_dim,
seed = 123,
text_ids = df$text_id)
incorrect = foo$incorrect
topic_m = foo$topic.metrics
system.time({
textwhiz::forensic_fasttext(k = 3,
text = df$text,
label = df$labels,
parameters = fst.txt,
seed = 123,
text_ids = df$text_id)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.