tweets <- data.frame(status_id = c(1234, 5678),
text = c("I tweet about one thing #onething #things",
"I tweet about another #anotherthing"),
stringsAsFactors = FALSE)
unusual_tweets <- data.frame(status_id = c(1234, 345, 5435, 6543),
text = c("Onewordonly",
"Oneword http://t.co.asjkre/34hjl",
"Many words thank you",
"t.co/we23f.jx9km @username"),
stringsAsFactors = FALSE)
ngram_df_n1 <- feature_ngrams(tweets, status_id, text, n_ngram = 1, top_num = 3)
ngram_df_n1_cs <- feature_ngrams(tweets, status_id, text, n_ngram = 1, top_num = 3,
type = "character_shingles")
ngram_df_n3 <- feature_ngrams(tweets, status_id, text, n_ngram = 3, top_num = 1)
ngram_df_default <- feature_ngrams(tweets, status_id, text)
unusual_tweet_df <- feature_ngrams(unusual_tweets, status_id, text, n_ngram = 2)
unusual_tweet_df_nofeatures <- feature_ngrams(unusual_tweets[unusual_tweets$status_id != 5435, ],
status_id, text, n_ngram = 2)
test_that("ngram dimensions are expected", {
expect_equal(dim(ngram_df_n1), c(2L, 4L))
expect_equal(dim(ngram_df_n1_cs), c(2L, 4L))
expect_equal(dim(ngram_df_n3), c(2L, 2L))
expect_equal(dim(ngram_df_default), c(2L, 10L))
expect_equal(dim(unusual_tweet_df), c(4L, 4L))
expect_equal(dim(unusual_tweet_df_nofeatures), c(3L, 1L))
})
test_that("ngram function doesn't return NAs", {
expect_equal(sum(is.na(ngram_df_n1)), 0)
expect_equal(sum(is.na(ngram_df_n1_cs)), 0)
expect_equal(sum(is.na(ngram_df_n3)), 0)
expect_equal(sum(is.na(ngram_df_default)), 0)
expect_equal(sum(is.na(unusual_tweet_df)), 0)
expect_equal(sum(is.na(unusual_tweet_df_nofeatures)), 0)
})
test_that("ngram errors work as expected", {
expect_error(feature_ngrams(tweets, status_id, text, type = "caracter_shingles"),
"type must be either ngram or character_shingles")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.