knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(gpmodels)
We can find this in the clinspacy
package (on GitHub).
library(dplyr) library(tidytext) mtsamples = clinspacy::dataset_mtsamples() nrow(mtsamples) mtsamples$transcription[[1]]
dataset = data.frame(id = 1:nrow(mtsamples), # 1:500, variable = 'variable', text = mtsamples$transcription, # [1:500], stringsAsFactors = FALSE) dataset = dataset %>% unnest_character_shingles(char, text, n = 1, strip_non_alphanum = FALSE) %>% group_by(id) %>% mutate(sequence_num = row_number()) %>% ungroup() cat(dataset$char[1:100])
char_frame = time_frame(fixed_data = dataset %>% distinct(id), temporal_data = dataset %>% filter(id == 1), fixed_id = 'id', temporal_id = 'id', temporal_time = 'sequence_num', temporal_variable = 'variable', temporal_value = 'char', step = 2, max_length = 20, output_folder = 'Z:/kdpsingh/gpm_char_lang', save_time_frame = FALSE)
char_frame = time_frame(fixed_data = dataset %>% distinct(id), temporal_data = dataset, fixed_id = 'id', temporal_id = 'id', temporal_time = 'sequence_num', temporal_variable = 'variable', temporal_value = 'char', step = 1, output_folder = 'Z:/kdpsingh/gpm_char_lang/all_data_in_chunks', create_folder = TRUE, chunk_size = 16)
future::plan('multisession', workers = 6) # model_predictors = char_frame %>% # gpm_add_predictors(variables = 'variable', # lookback = 20, # window = 1, # stats = c(first = . %>% .[1]), # output_file = FALSE) # # # model_predictors = char_frame %>% # gpm_add_growing_predictors(variables = 'variable', # stats = c(ngram = . %>% paste(collapse = '')), # output_file = FALSE) char_frame %>% gpm_add_predictors(variables = 'variable', lookback = 50, window = 1, stats = c(first = . %>% .[1]), last_chunk_completed = 64) char_frame %>% gpm_add_outcomes(variables = 'variable', lookahead = 1, stats = c(first = . %>% .[1])) # model_predictors_ngram = char_frame %>% # gpm_add_predictors(variables = 'variable', # lookback = 3, # stats = c(ngram = . %>% paste(collapse = '')), # output_file = FALSE) model_outcome = char_frame %>% gpm_add_outcomes(variables = 'variable', lookahead = 1, stats = c(first = . %>% .[1]), output_file = FALSE) # model_outcome_ngram = char_frame %>% # gpm_add_outcomes(variables = 'variable', # lookahead = 1, # stats = c(ngram = . %>% paste(collapse = '')), # output_file = FALSE)
model_data = gpm_combine(char_frame, model_outcome, model_predictors) knitr::kable(model_data, align = 'c') # head(model_data)
model_data_ngram = gpm_combine(char_frame, model_outcome_ngram, model_predictors_ngram) knitr::kable(model_data_ngram, align = 'c') # head(model_data_ngram)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.