inst/doc/DatetimePartitionedProjects.R

## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE---------
#  library(datarobot)

## ----datetime Partition Base, echo = TRUE, eval = FALSE-----------------------
#  lending <- read.csv("https://s3.amazonaws.com/datarobot_public_datasets/10K_Lending_Club_Loans.csv")
#  partition <- CreateDatetimePartitionSpecification(datetimePartitionColumn = "earliest_cr_line",
#                                                    numberOfBacktests = 5)
#  proj <- StartProject(dataSource = lending,
#                       projectName = "Lending_Club_Time_Series",
#                       target = "is_bad",
#                       mode = "quick",
#                       partition = partition)

## ----backtest_specification_example, echo = TRUE, eval = FALSE----------------
#  backtest <- list()
#  # Dates are not project specific but rather example dates
#  backtest[[1]] <- CreateBacktestSpecification(0, ConstructDurationString(),
#                                               "1989-12-01", ConstructDurationString(days = 100))
#  backtest[[2]] <- CreateBacktestSpecification(1, ConstructDurationString(), "1999-10-01",
#                                               ConstructDurationString(days = 100))
#  # create desired partition specification
#  partition <- CreateDatetimePartitionSpecification("earliest_cr_line",
#                                                    numberOfBacktests = 2,
#                                                    backtests = backtest)

## ----model_iteration, echo = TRUE, eval = FALSE-------------------------------
#  # Request more granular information on the datetime partition specification
#  GetDatetimePartition(proj)
#  
#  # View blueprints associated with a project
#  bps <- ListBlueprints(proj)
#  
#  # View the the models within the model leaderboard
#  models <- ListModels(proj)
#  
#  # Retrieve a datetime model. There is now a new retrieval function specific to datetime partitioning
#  dt_model <- GetDatetimeModel(proj, models[[1]]$modelId)
#  
#  # Score all Backtests
#  scoreJobId <- ScoreBacktests(dt_model)
#  WaitForJobToComplete(proj, scoreJobId) # To make synchronous
#  
#  # now model information will also contain information about backtest scores
#  dtModelWithBt <- GetDatetimeModel(proj, dt_model$modelId)
#  
#  # Retrain a model using a different start & end date.
#  # One has to request a `Frozen` model to keep the hyper-parameters static and avoid lookahead bias.
#  # Within the context of deployment, this can be used to retrain a resulting model on more recent data.
#  UpdateProject(proj, holdoutUnlocked = TRUE) # If retraining on 100% of the data, we need to unlock the holdout set.
#  modelJobId_frozen   <- RequestFrozenDatetimeModel(dt_model,
#                                                    trainingStartDate = as.Date("1950/12/1"),
#                                                    trainingEndDate = as.Date("1998/3/1"))
#  new_dt_model_frozen <- GetDatetimeModelFromJobId(proj, modelJobId_frozen)
#  
#  # Train & retrieve a new date-time model based on rowcount
#  modelJobId  <- RequestNewDatetimeModel(proj, bps[[1]], trainingRowCount = 100)
#  new_dt_model <- GetDatetimeModelFromJobId(proj, modelJobId)
#  
#  # Train & retrieve a new date-time model based on duration
#  modelJobId   <- RequestNewDatetimeModel(proj, bps[[1]],
#                                          trainingDuration = ConstructDurationString(months=10))
#  new_dt_model <- GetDatetimeModelFromJobId(proj, modelJobId)

Try the datarobot package in your browser

Any scripts or data that you put into this service are public.

datarobot documentation built on Nov. 3, 2023, 1:07 a.m.