knitr::opts_chunk$set( collapse = TRUE, comment = "" )
basic string processing functions
library(keysandstrings) chunk_string('aaabbbcccddd', chunk_len=3) split_key(c('aaabbb', 'aaaccc','bbbaaa'), chunk_len = 3)
replaces a factor levels in a data frame with a code that with a consistant number of characters.
This feature helps for grouped time series in the hts package that requires keys with consistant length
data = data.frame(w = rep('wkey1',8), x = rep(c('xkey1', 'xkey2','xkey3', NA), 2), y = c(rep(c('ykey1', 'ykey2', 'ykey3'), 2), 'ykey1', ''), z = c(rep(c('zkey1',NA, ''), 2), 'zkey1', '') ) lc = level_coder(data) x = predict(lc, data, verbose = TRUE) print(x) predict(lc, x, rev = TRUE) key = predict(lc,data, return_primkey = TRUE) print(key) predict(lc,key, is_primkey = TRUE) z = "aaazzzzzzzzz" attr(z, 'primkey') = TRUE predict(lc, key[8] ,is_primkey = TRUE)
get lagged features from time series data. Works on a data frame or a time seires object. Uses keys and a data col. Handles missing values by carrying the next forward. This might be problematic for seasonal times series, so it's better to deal with NAs before running this.
head(ts_features(AirPassengers),2) tail(ts_features(AirPassengers),2) params = list(time_col = 'date', key_col = 'key', val_col = 'val', n_future = 2, n_past = 2, na_fill = NA) data = data.frame(date = rep(1:10,2), key = c(rep('a',10), rep('b',10) ), val =c(1:10, 2:11)) ts_features(data, params) # this demonstrates the back fill capability for missing data = data.frame(date = c(2:10, 1:10), key = c(rep('a',9), rep('b',10) ), val =c(2:10, 1:10)) new_data = ts_features(data, params) new_data[ order(new_data$key, new_data$date),]
Center and Scale new data, and uncenter and scale, all optional primary key ( creates a scale for each column and each set of keyed rows )
data = as.matrix(data.frame(key = c(rep(1,40), rep(2,39), 3), val1 = c(1:79, NA), val2 = c(1:80), val3 = rep(1, 80))) head(data) ks = keyscale(data, key_col = 'key') # scaling x = predict(ks, data) head(x) #unscaling head(predict(ks, x)) # case when there is a new key data = as.matrix(data.frame(key = c(rep(1,2), rep(2,2), 4), val1 = c(rep(1:2, 2),55), val2 = c(rep(1:2, 2), 55), val3 = c(rep(1:2,2),55))) data = as.matrix(data.frame(key = c(rep(1,40), rep(2,39), 3), val1 = c(1:79, NA), val2 = c(1:80), val3 = rep(1, 80))) head(data) ks = keyscale(data[,1:2], key_col = 'key') # scaling head(predict(ks, data, verbose = FALSE))
data = data.frame(a = c(rep('aa', 9), 'b'), b = c(rep('aa', 10))) bl = black_list(data, cols = c('a', 'b'), freq = 9 ) predict(bl, data, keep_rows = TRUE) wl = black_list(data, cols = c('a', 'b'), freq = 9, white_list = TRUE ) predict(bl, data, keep_rows = TRUE)
Catagory Sparse Encoder
data = data.frame(x = rep(x = c(NA,letters[1:4]), 5), y = rep(c(NA,letters[5:8]), 5), z = c(1:24, NA), zz = 1:25) lc = sparseEncoder(data) predict(lc, data)
Simple functions for manipulating calendar periods
x = c(201209,201212, 201301) cp_add(x,2) cp_to_fp(x) cp_to_fp(cp_to_fp(x), rev = TRUE) cp_seq(201201,6) x = rep(cp_seq(200011, 250),2) system.time(cp_to_calendar(cp_seq(201201,100))) system.time(cp_to_calendar(cp_seq(201201,100), par = FALSE))
data = c(201201+0:5, 201301+6:11) missing_cp(data)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.