knitr::opts_chunk$set(echo = TRUE)
pacman::p_load(tidyverse, purrr, hardhat, AmesHousing, rsample, parsnip, recipes, deeplyr) devtools::document() devtools::load_all() #devtools::install()
d <- AmesHousing::ames_raw %>% janitor::clean_names() sp <- rsample::initial_split(d) train <- training(sp) test <- testing(sp) rec <- recipes::recipe( ~ yr_sold + lot_area, data = head(train, 2000)) %>% prep f <- fit_learner(train, train$sale_price, params = list(recipe = rec), "linear", "xgboost") f$meta$get() f$meta$recipe f$meta$get() f$predict(test, y_test = test$sale_price) f$predict(test, dev = T) f$metrics f$meta$get() f$save("test") meta$new("test") g <- learner$new("test") g$predict(test, dev = T)
d <- get(load("label_political_final.Rdata")) tok <- deeplyr::fit_tokenizer(d$tweet, max_words = 1000) p <- list(tok = tok, model = list(deeplyr::keras_cnn_lstm), input_dim = 1000, seq_len = 30, epochs = 1) f <- fit_learner(d$tweet, d$pol, params = p, "binary", "keras") f$meta$get() f$meta$tok f$predict(d$tweet, y_test = d$pol) f$preds f$metrics f$predict(test, dev = T)
# load("data/label_political_final.Rdata") pol_data <- get(load("label_political_final.Rdata")) %>% #mutate(split = sample(1:5, size = n(), replace = T)) %>% dplyr::mutate(split = sample(1:5, size = n(), replace = T)) %>% dplyr::glimpse()
p1 <- list( input_dim = 10000, seq_len = 50, embed_dim = 128, n_filters = 50, lstm_dim = 50, lstm_drop = .3, batch_size = 200, min_word = 3, max_docs = .5, epochs = 1, model = list(deeplyr::keras_cnn_lstm), model_name = "cnn_lstm", verbose = 1, metric = "accuracy" ) train <- pol_data[pol_data$split == 1, ] test <- pol_data[pol_data$split == 2, ] p1$tok <- deeplyr::fit_tokenizer( text = train$tweet, max_words = p1$input_dim, max_docs = p1$max_docs, min_word = p1$min_word, char_level= F # path = path ) f <- deeplyr::fit_learner(train$tweet, train$pol, p1, "binary", "keras") f$predict(test$tweet, y_test = test$pol) f$save("test2") gg <- learner$new("test2/69dc2657") gg$meta$tok gg$predict(pol_data$tweet, dev = T) gg
fold_cv_oob <- function(data, split){ ### extract id and split vars idx <- tibble(rid = 1:nrow(data), split = split) ### find test instances indices <- 1:5 %>% purrr::map(~{idx$rid[idx$split == .x]}) ### analysis/assessment indices <- indices %>% purrr::map(rsample:::vfold_complement, n = nrow(idx)) ### make split obj. split_objs <- purrr::map(indices, rsample:::make_splits, data = data, class = "vfold_split") tibble::tibble(splits = split_objs, id = paste0("Fold", 1:length(split_objs))) }
#' fit_cv #' @export fit_cv <- function(rsample, params, task, backend, path = NULL){ out <- rsample %>% dplyr::mutate( models = map(splits, ~{ train <- dplyr::bind_rows(rsample::analysis(.x)) test <- dplyr::bind_rows(rsample::assessment(.x)) f <- deeplyr::fit_learner(train$tweet, train$pol, params, "binary", "keras") f$predict(test$tweet, y_test = test$pol) return(f) }) ) %>% dplyr::transmute(id, metrics = purrr::map(models, ~ bind_cols(.x$metrics))) %>% tidyr::unnest("metrics") return(out) } nn <- fold_cv_oob(pol_data, pol_data$split) %>% fit_cv(p1, "binary", "keras") final_metrics <- rbind(nn %>% summarise_at(-1, mean) %>% cbind(tibble(id = "overall"), .), nn) # save_json(final, "nn1.json", ".") f <- deeplyr::fit_learner(pol_data$tweet, pol_data$pol, p1, "binary", "keras") f$set_metrics <- final_metrics f$save("test2") f$metrics
devtools::document() devtools::load_all()
gg <- learner$new("test2/2a01afb0") gg$predict(pol_data$tweet, dev = T)
library(furrr) options(future.globals.maxSize = 850 * 2024^2) plan(multicore) t1 <- Sys.time() cv_models <- rsample::vfold_cv(train, v = 10) %>% future_fit_cv(rec, NULL, "linear", "xgboost") diff <- Sys.time() - t1 diff
library(furrr) options(future.globals.maxSize = 850 * 2024^2) plan(multicore) t1 <- Sys.time() pred <- 1:10 %>% future_map(~{ rsample::vfold_cv(train, v = 10) %>% fit_cv(rec, NULL, "linear", "xgboost") }, .progress = T) diff <- Sys.time() - t1 diff
devtools::document() library(magrittr) g <- fit_learner(rec, train, params = NULL, task = "linear", backend = "rpart") g$predict(new_data = test) # %>% select(-sale_price) g$preds g$metrics
devtools::document() examples <- tidytext::sentiments$word %>% split(1:length(.) %/% 100) %>% map_chr(paste0, collapse = " ") %>% tibble(text = .) %>% mutate(party = as.factor(sample(0:1, size = n(), replace = T, prob = rep(.5, 2)))) glimpse(examples) tok <- keras::text_tokenizer( num_words = 1000, lower = T, char_level = F ) keras::fit_text_tokenizer(tok, examples$text) x <- deeplyr::tokenize_text(examples$text, tok = tok, 100) # library(magrittr) # p <- list(model = list(keras_cnn_lstm), seq_len = 100, input_dim = 1000) # f1 <- fit_learner(x = x, y = examples %>% select(party), params = p, task = "binary", backend = "keras") # # f1$predict(new_data = cbind(as_tibble(x), examples %>% select(party))) # # f1$preds # # f1$metrics # # f1$params # f1$save("test") # # save_rds(f1$process$data, "test", "process") # # f1 %>% model_eval(., "party") library(magrittr) p <- list(model = list(keras_cnn_lstm), seq_len = 100, input_dim = 1000, epochs = 2) d <- cbind(examples %>% select(party), as_tibble(x)) rec_text <- recipe(party ~ ., data = d) cv_models <- rsample::vfold_cv(d, v = 5) %>% fit_cv(rec_text, p, "binary", "keras")
devtools::document() # devtools::install() n1 <- learner$new("test") n1$predict(x, dev = T)
# cvprepper <- function(split_obj, recipe, ...){ # prep(recipe, training = dplyr::bind_rows(rsample::analysis(split_obj)), fresh = TRUE, ...) # } # # tcvprepper <- function(split_obj, recipe, ...){ # prep(recipe, training = dplyr::bind_rows(rsample::analysis(split_obj)$data), fresh = TRUE, ...) # } h2o::h2o.init() library(furrr) plan(multiprocess) fit_fun <- function(split){ train <- dplyr::bind_rows(rsample::analysis(split)) assess <- dplyr::bind_rows(rsample::assessment(split)) g <- deeplyr::fit_learner(rec, train, params = NULL, task = "linear", backend = "h2o_rf") g$predict(new_data = assess) g } cv_models <- rsample::vfold_cv(train, v = 5) %>% mutate(model = map(splits, fit_fun)) %>% #furrr::future_ #, .progress = F mutate( preds = map(model, ~.x$preds), metrics = map(model, ~.x$metrics) )
fit_cv <- function(rsample, rec, params, task, backend){ rsample %>% dplyr::mutate(model = map(splits, ~{ g <- deeplyr::fit_learner( rec, dplyr::bind_rows(rsample::analysis(.x)), params, task, backend ) g$predict(new_data = dplyr::bind_rows(rsample::assessment(.x))) return(g) }) ) %>% #furrr::future_ #, .progress = F dplyr::mutate( preds = purrr::map(model, ~.x$preds), metrics = purrr::map(model, ~.x$metrics) ) } cv_models <- rsample::vfold_cv(train, v = 5) %>% fit_cv(rec, NULL, "linear", "rpart")
cv_all_models <- c("rpart", "xgboost", "h2o_glm", "h2o_rf", "h2o_dnn", "h2o_gbm", "h2o_xgb") %>% map_dfr(~{ rsample::vfold_cv(train, v = 5) %>% fit_cv(rec, NULL, "linear", .x) %>% select(id, metrics) %>% mutate(backend = .x) }) options(scipen = 999) cv_all_models %>% unnest %>% mutate(index = str_extract(id, "\\d$") %>% as.numeric) %>% ggplot(aes(index, mae, colour = backend)) + geom_line() glimpse
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.