knitr::opts_chunk$set(echo = TRUE)
nmr_round <- params$nmr_round

NMR data prep

Some very small data prep is needed in order to have data in the same fashion as churn data

nmr_training_data <- vroom::vroom(paste0("numerai_datasets/", nmr_round, "/numerai_training_data.csv"))
nmr_training_data %>% 
  count(data_type)

nmr_tournament_data <- vroom::vroom(paste0("numerai_datasets/", nmr_round, "/numerai_tournament_data.csv"))
nmr_tournament_data %>%
  count(data_type)
nmr_tournament_data %>%
  filter(data_type == "validation") %>%
  select(id, era, data_type, feature_intelligence1, feature_intelligence2, target)
nmr_tournament_data %>%
  filter(data_type == "test") %>%
  select(id, era, data_type, feature_intelligence1, feature_intelligence2, target) 
nmr_tournament_data %>%
  filter(data_type == "live") %>%
  select(id, era, data_type, feature_intelligence1, feature_intelligence2, target) 

We should prepare the data so that it looks more like the data used in the tuto.

nmr_data <- nmr_training_data %>%
  bind_rows(nmr_tournament_data %>% filter(data_type == "validation")) 
nmr_data %>%
  count(data_type) %>%
  mutate(prop = n/sum(n))

vroom::vroom_write(nmr_data %>% 
                     count(data_type) %>% 
                     mutate(prop = n/sum(n)),
                   paste0("numerai_datasets/", nmr_round, "/nmr_training_metadata"))
vroom::vroom_write(nmr_data %>% select(-data_type), "numerai_datasets/254/nmr_data")


kenshuri/targets-tuto documentation built on April 19, 2021, 9:58 a.m.