Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----eval=FALSE---------------------------------------------------------------
# rm(list=ls())
# ###################### LIBRARIES ###########################
# library(tm) #used for text mining
# library(e1071) #this package includes the naive Bayes algorithm
# library(Matrix)
# library(microbenchmark)
# library(e1071)
# library(fastNaiveBayes)
# library(quanteda)
# library(naivebayes)
# library(bnlearn)
# library(klaR)
# library(data.table)
#
# ############################ Timing Script ################
# results <- NULL
#
# # Bernoulli Event Model
# tweets <- fastNaiveBayes::tweetsDTM
#
#
# y_var <- tweets$airline_sentiment
# y_var <- as.factor(ifelse(y_var=='negative','negative','non-negative'))
# tweets <- tweets[,2:ncol(tweets)]
# tweets[tweets>1] <- 1
#
# tweets <- tweets[,which(colSums(tweets)!=0)]
# tweets <- tweets[,which(colSums(tweets)!=nrow(tweets))]
#
# tweet_mat <- as.matrix(tweets)
# sparse_tweets <- Matrix(as.matrix(tweet_mat), sparse = TRUE)
#
# for(i in 1:ncol(tweets)){
# tweets[[i]] <- as.factor(tweets[[i]])
# }
#
# # BNLearn
# bn_tweets <- cbind(y_var, tweets)
# colnames(bn_tweets)[1] <- 'y_var'
#
# # Quanteda
# dfm <- as.dfm(tweet_mat)
#
# res <- microbenchmark(
# klar = predict(klaR::NaiveBayes(x=tweets, grouping = y_var, fL=1), tweets),
# e1071 = predict(e1071::naiveBayes(tweets, y_var, laplace = 1), tweets),
# fastNaiveBayes = predict(fastNaiveBayes.bernoulli(tweet_mat, y_var, laplace = 1), tweet_mat),
# fastNaiveBayes_sparse = predict(fastNaiveBayes.bernoulli(sparse_tweets, y_var, laplace = 1), sparse_tweets),
# bnlearn = predict(bnlearn::naive.bayes(bn_tweets, 'y_var'), bn_tweets),
# quanteda = predict(quanteda::textmodel_nb(dfm, y_var, prior = "docfreq", distribution = "Bernoulli"),
# newdata = dfm),
# naivebayes = predict(naivebayes::naive_bayes(tweets, y_var, laplace = 1), newdata = tweets),
# times = 3,
# unit = "ms"
# )
#
# res <- as.data.table(res)
# res[,nrows:=nrow(tweet_mat)]
# res[,ncols:=ncol(tweet_mat)]
# res[,model:='Bernoulli']
#
# results <- res
#
# # Multinomial Event Model
# tweets <- fastNaiveBayes::tweetsDTM
#
# y_var <- tweets$airline_sentiment
# y_var <- as.factor(ifelse(y_var=='negative','negative','non-negative'))
# tweets <- tweets[,2:ncol(tweets)]
#
# tweets <- tweets[,which(colSums(tweets)!=0)]
#
# tweet_mat <- as.matrix(tweets)
# sparse_tweets <- Matrix(as.matrix(tweet_mat), sparse = TRUE)
#
# # Quanteda
# dfm <- as.dfm(tweet_mat)
#
# res <- microbenchmark(
# fastNaiveBayes = predict(fastNaiveBayes.multinomial(tweet_mat, y_var, laplace = 1), tweet_mat),
# fastNaiveBayes_sparse = predict(fastNaiveBayes.multinomial(sparse_tweets, y_var, laplace = 1), sparse_tweets),
# quanteda = predict(quanteda::textmodel_nb(dfm, y_var, prior = "docfreq", distribution = "multinomial"),
# newdata = dfm),
# Rfast = Rfast::multinom.nb(tweet_mat, tweet_mat, y_var),
# times = 3,
# unit = "ms"
# )
#
# res <- as.data.table(res)
# res[,nrows:=nrow(tweet_mat)]
# res[,ncols:=ncol(tweet_mat)]
# res[,model:='Multinomial']
#
# results <- rbind(results, res)
#
# # Gaussian Event Model
# cars <- mtcars
# for(i in 1:6){
# cars <- rbind(cars, cars)
# }
#
# y_var <- cars$mpg
# y_var <- as.factor(ifelse(y_var>20,'negative','non-negative'))
#
# cars <- cars[,3:7]
# for(i in 1:6){
# cars <- cbind(cars, cars)
# }
#
# cars_mat <- as.matrix(cars)
# sparse_cars <- Matrix(as.matrix(cars_mat), sparse = TRUE)
#
# res <- microbenchmark(
# klar = predict(klaR::NaiveBayes(x=cars_mat, grouping = y_var, fL=1), cars_mat),
# e1071 = predict(e1071::naiveBayes(cars_mat, y_var, laplace = 1), cars_mat),
# naivebayes = predict(naivebayes::naive_bayes(cars_mat, y_var, laplace = 1), newdata = cars_mat),
# fastNaiveBayes = predict(fastNaiveBayes.gaussian(cars_mat, y_var), cars_mat),
# fastNaiveBayes_sparse = predict(fastNaiveBayes.gaussian(sparse_cars, y_var), sparse_cars),
# Rfast = Rfast::gaussian.nb(cars_mat, cars_mat, y_var),
# times = 3,
# unit = "ms"
# )
#
# res <- as.data.table(res)
# res[,nrows:=nrow(cars_mat)]
# res[,ncols:=ncol(cars_mat)]
# res[,model:='Gaussian']
#
# results <- rbind(results, res)
#
# print(results)
# fwrite(results, file = "./package_timings.csv", row.names = FALSE)
## ----eval=FALSE---------------------------------------------------------------
# rm(list=ls())
# library(fastNaiveBayes)
#
# cars <- mtcars
# y <- as.factor(ifelse(cars$mpg>25,'High','Low'))
# x <- cars[,2:ncol(cars)]
#
# # Mixed event models
# dist <- fastNaiveBayes::fastNaiveBayes.detect_distribution(x, nrows = nrow(x))
# print(dist)
# mod <- fastNaiveBayes.mixed(x,y,laplace = 1)
# pred <- predict(mod, newdata = x)
# mean(pred!=y)
#
# # Bernoulli only
# vars <- c(dist$bernoulli, dist$multinomial)
# newx <- x[,vars]
# for(i in 1:ncol(newx)){
# newx[[i]] <- as.factor(newx[[i]])
# }
# new_mat <- model.matrix(y ~ . -1, cbind(y,newx))
# mod <- fastNaiveBayes.bernoulli(new_mat, y, laplace = 1)
# pred <- predict(mod, newdata = new_mat)
# mean(pred!=y)
#
# # Construction sparse Matrix:
# mod <- fastNaiveBayes.bernoulli(new_mat, y, laplace = 1, sparse = TRUE)
# pred <- predict(mod, newdata = new_mat)
# mean(pred!=y)
#
# # OR:
# new_mat <- Matrix::Matrix(as.matrix(new_mat), sparse = TRUE)
# mod <- fastNaiveBayes.bernoulli(new_mat, y, laplace = 1)
# pred <- predict(mod, newdata = new_mat)
# mean(pred!=y)
#
# # Multinomial only
# vars <- c(dist$bernoulli, dist$multinomial)
# newx <- x[,vars]
# mod <- fastNaiveBayes.multinomial(newx, y, laplace = 1)
# pred <- predict(mod, newdata = newx)
# mean(pred!=y)
#
# # Gaussian only
# vars <- c('hp', dist$gaussian)
# newx <- x[,vars]
# mod <- fastNaiveBayes.gaussian(newx, y)
# pred <- predict(mod, newdata = newx)
# mean(pred!=y)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.