# Title :
# Objective : Utilities for bundling of models in a way, suitable for model
# Created by: sidi
# Created on: 23.06.21
library(stringr)
#' @title Read model from RDS file
#' This is a simple wrapper around readRDS, checking if some of the list names exist.
#' @param file - the file name of an RDS. The object inside should be a list, containing 'model_file' and 'required_packages'
#'
#' @return the list in the file if all needed attributes are inside
#' @export
#'
load_model_from_metadata <- function(file){
dat <- readRDS(file = file)
# print(names(dat))
for (nm in c("model_file","required_packages")) {
assertthat::assert_that(nm%in% names(dat))
}
return(dat)
}
#' Predict Data From Saved Model
#'
#' @param file path to the file, holding the data
#' @param data newdata to predict on
#'
#' @return
#' @export
#'
predict_new_model <- function(file,data){
modeldat <- load_model_from_metadata(file)
modelObj <- modeldat[["model_fit"]]
formula_obj <- modeldat[["formula_obj"]]
original_df <- modeldat[["original_df"]]
required_packages <- modeldat[['required_packages']]
#region install packages
lib <- .libPaths()[1]
i1 <- !(required_packages %in% row.names(installed.packages()))
if(any(i1)) {
install.packages(required_packages[i1], dependencies = TRUE, lib = lib)
}
#endregion install packages
predicted_values <- predict(modelObj,newdata = data)
predicted_values
}
#' Create Dockerfile From Saved Model Object
#'
#' @param required_packages required packages to hold
#' @param model_file_location location of the model file
#' @param plumber_template_location location to the plumber api script, by default the
#' one from the package is used. Leave it as-is.
#' @param runner_template_location location of the runner template script, by default
#' the one from the package distribution is used. Leave as is, normally.
#'
#' @return
#' @export
#'
createDockerfile <- function(
model_file_location,
required_packages=c("forecast"),
plumber_template_location = file.path(path.package("vertaReticulateClient"),
"plumber_sample.R",
additional_objects_file_location = "additional_objects.RData"
),
runner_template_location = file.path(path.package("vertaReticulateClient"),
"to_run.R" )
){
MAGIC_FILE_NAME <- str_glue("./tmp_model_file_{rpois(1,10000)}.RData")
if(!is.character(model_file_location)){
if(is.list(model_file_location)){
if("model_file"%in%names(model_file_location)){
saveRDS(model_file_location,file=MAGIC_FILE_NAME)
}
}
}else{
MAGIC_FILE_NAME <- model_file_location
}
if(!file.exists(additional_objects_file_location)){
print(str_glue("Additional object location not found!"))
}else{
print(str_glue("Additional object location found!"))
}
mydocker <- dockerfiler::Dockerfile$new("rstudio/plumber")
if(length(required_packages) > 0) {
for(req in required_packages){
if(length(req)>0){
# mydocker$RUN(r(install.packages(eval(req))))
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote({req})))" '))
}
}
}
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote(assertthat)))" '))
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote(renv)))" '))
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote(jsonlite)))" '))
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote(stringr)))" '))
mydocker$RUN("mkdir /usr/scripts")
mydocker$RUN("cd /usr/scripts")
mydocker$COPY(MAGIC_FILE_NAME, "/usr/scripts/model_file.RData")
mydocker$COPY(plumber_template_location, "/usr/scripts/plumber.R")
mydocker$COPY(runner_template_location, "/usr/scripts/run.R")
# if(!file.exists(additional_objects_file_location)){
# print(str_glue("Additional object location not found!"))
# }else{
# print(str_glue("Additional object location found!"))
# mydocker$COPY(additional_objects_file_location, "/usr/scripts/additional_objects.RData")
# print(str_glue("copied additional file to Docker!"))
# }
# mydocker$COPY(runner_template_location, "/usr/scripts/plumber.R")
# mydocker$RUN
mydocker$EXPOSE(8000)
mydocker$WORKDIR("usr/scripts")
# mydocker$RUN(r(plumber::plumb(file="plumber.R")))
mydocker$ENTRYPOINT("R -f run.R --slave")
mydocker$write("Dockerfile")
}
#' Title
#'
#' @param location path to the Docker context folder - normally that's the
#' current working directory
#' @param tag The tag, with which
#'
#' @return
#' @export
buildDocker <- function(location = ".",tag="verta-plumber"){
system(str_glue("docker build -t {tag} {location}"))
}
#' Title
#'
#' @param location location of docker context
#' @param tag tag of the model
#'
#' @return
#' @export
runDocker <- function(location=".",tag = "verta-plumber"){
buildDocker(location,tag)
system(str_glue("docker run -p 8000:8000 {tag}"))
}
#' Utility function to create a random string of a specific format
#' the format is
#' (if add_docker_substring then "docker_folder_" else "")[:alpha:]{letter_len1}[:digit:]{digit_len}_[:alpha:]{letter_len2}.(if add_rdata then .RData else "")
#' @param letter_len1 length first set of letters
#' @param digit_len length of first set of digits
#' @param letter_len2 length of second set of letters
#' @param add_docker_substring weather to add 'docker_folcer_' in the beginning
#' @param add_rdata weather to add ".RData" in the end
#'
#' @return
#'
createRandString<- function(letter_len1=5,digit_len=4,letter_len2=1,add_docker_substring=T,add_rdata=F) {
digits = 0:9 %>% as.character()
v = c(sample(LETTERS, letter_len1, replace = TRUE),
sample(digits, digit_len, replace = TRUE),
sample(LETTERS, letter_len2, replace = TRUE))
res = paste0(v,collapse = "")
if(add_docker_substring){
res <- paste0("docker_folder_",res)
}
if(add_rdata){
res <- paste0(res,".Rdata",collapse = "")
}
# paste0("docker_folder_",paste0(v,collapse = ""),collapse = "")
res
}
#' @title Create Dockerfile From Saved Model Object
#' @description This function creates the docker context from either an R object in the workspace, or a file, containing such an object.
#' The object contains the model, package dependencies, a dictionary of additional R object, needed for it to work, and the source code of
#' files, needed to create the API. Those are stored with the log_model object, and finally
#' @param model_file_location a model file, created in the first place by by using createDockerContextZip. It should contain a
#' single R object, which is a dictionary, and which has the following fields:
#' A key note is that the predict function should have a method for the type of model_file
#' model_fit - a fit model object
#' formula_obj-Optional
#' original_df - Optional
#' required_packages - a vector of required packages to install
#' additional_objects- a named list of all adidtional objects to load, together with their names.
#' plumberTemplateFileLines -a vector of strings, representing the lines of one of the R source files, needed to create the API.
#' runnerTemplateFileLines -a vector of strings, representing the lines of the other R source file, needed to create the API.
#'
#' @return
#' @export
#'
createDockerContextZip <- function(
model_file_location
){
# Steps:
# 1. Read the contents of the serialized file in memory (if the data is already in memory, instead write it to a )
# 2. Out of the plumberTemplateFileLines and runnerTemplateFileLines fields of that object,
# this part is needed only if we pass
# create a temporary folder to work in.
MAGIC_FOLDER_NAME <- createRandString()
dir.create(MAGIC_FOLDER_NAME)
# if we are passing a file name, then read that in memory
if(is.character(model_file_location)){
modelData <- readRDS(model_file_location)
MAGIC_FILE_NAME <- model_file_location
if(is.list(modelData)){
if("model_fit"%in%names(modelData)){
saveRDS(modelData,file=file.path(MAGIC_FOLDER_NAME,MAGIC_FILE_NAME))
}
}
}else{
# if the model is already in memory, write it to a temporary folder
MAGIC_FILE_NAME <- "tmp_model_file.RData"
while(dir.exists(MAGIC_FOLDER_NAME)){
MAGIC_FOLDER_NAME <- createRandString()
}
dir.create(MAGIC_FOLDER_NAME)
if(is.list(model_file_location)){
if("model_fit"%in%names(model_file_location)){
saveRDS(model_file_location,file=file.path(MAGIC_FOLDER_NAME,MAGIC_FILE_NAME))
}
}
modelData <- readRDS(file.path(MAGIC_FOLDER_NAME,MAGIC_FILE_NAME))
}
# 2. Create source files from two of the fields in the passed dictionary
plumberTemplateFileLines <- modelData[["plumberTemplateFileLines"]]
runnerTemplateFileLines <- modelData[["runnerTemplateFileLines"]]
runner_template_location <- "runner_template_location.R"
plumber_template_location <- "plumber_template_location.R"
runnerTemplate <-writeLines(runnerTemplateFileLines,runner_template_location)
plumberTemplate <-writeLines(plumberTemplateFileLines,plumber_template_location)
# 3. Copy the files to the temporary directory, containing the docker context
#
file.copy(runner_template_location,
file.path(MAGIC_FOLDER_NAME,"runner_template.R"))
file.copy(plumber_template_location,
file.path(MAGIC_FOLDER_NAME,"plumber_template.R"))
# 4. Start writing the Dockerfile
# start from the rstudio/plumber dockerfile here:
# https://hub.docker.com/r/rstudio/plumber/
mydocker <- dockerfiler::Dockerfile$new("rstudio/plumber")
#4.1 find all package dependencies, and add lines like
#RUN R -e "install.packages(as.character(quote(__packagename__)))"
# to the dockerfile. They will install said packages
if("required_packages"%in%modelData){
required_packages <- modelData[["required_packages"]]
}else{
required_packages <- c()
}
# 4.2 Add dockerfile instructions to install model-specific packages
if(length(required_packages) > 0) {
for(req in required_packages){
if(length(req)>0){
# NB: R's string interpolation is quite bad out of the box, and thus
# we use the str_glue function from stringr
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote({req})))" '))
}
}
}
# 4.3 Write instructions in the dockerfile to add some some basic additional libraries, to raise up an API
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote(assertthat)))" '))
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote(renv)))" '))
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote(jsonlite)))" '))
mydocker$RUN(stringr::str_glue('R -e "install.packages(as.character(quote(stringr)))" '))
# 4.4. Instructions to create a directory for the entry point.
mydocker$RUN("mkdir /usr/scripts")
mydocker$RUN("cd /usr/scripts")
# 4.5. Instructions to copy the needed files to the entry point directory
# * the model file, downloaded from the artifact repository
# * the two source code files
mydocker$COPY(MAGIC_FILE_NAME, "/usr/scripts/model_file.RData")
mydocker$COPY("plumber_template.R", "/usr/scripts/plumber.R")
mydocker$COPY("runner_template.R", "/usr/scripts/run.R")
# 4.6. Instructions to open port 8000
mydocker$EXPOSE(8000)
# 4.7 Instructions to move to the entry point and execute
mydocker$WORKDIR("usr/scripts")
# mydocker$RUN(r(plumber::plumb(file="plumber.R")))
mydocker$ENTRYPOINT("R -f run.R --slave")
# 4.8 write the whole Dockerfile
mydocker$write(file.path(MAGIC_FOLDER_NAME,"Dockerfile"))
# print(str_glue("DONE! Context Written in {MAGIC_FOLDER_NAME}"))
# 5. zip the whole context dir to a zip file, and clean up the temporary directory
zip(str_glue("{MAGIC_FOLDER_NAME}.zip"),files = MAGIC_FOLDER_NAME)
print(str_glue("DONE! Context Written in {MAGIC_FOLDER_NAME}.zip"))
# delete the dir
unlink(MAGIC_FOLDER_NAME,recursive = F)
print(str_glue("DONE! Context Written in {MAGIC_FOLDER_NAME}.zip"))
}
#' @title Create an Object, Deployable to the Verta System
#' @description This function gathers R objects from the workspace, needed to deploy an API endpoint and is intended to be used with log_model.
#' It simply gathers all needed objects, puts them in a list, and saves the list to disk. Then that file is logged with run$log_model.
#' This object can then be turned into a docker context with createDockerContextZip
#' @param file_to_save_at target file path
#' @param modFile the fit model object
#' @param required_packages character vector of required packages to call the predict function
#' @param runner_template_location - a location of the runner template file- by default taken from the package
#' @param plumber_template_location - a location of the plumber template file- by default taken from the package directory
#' @return nothing
#' @export
#'
save_model_data <- function(file_to_save_at,
modelObject,
required_packages=NULL,
additional_objects=list(),
plumber_template_location = file.path(path.package("vertaReticulateClient"),
# "inst",
"plumber_sample.R"
),
runner_template_location = file.path(
path.package("vertaReticulateClient"), "to_run.R" )
) {
if(is.null(required_packages)) {
required_packages = c()
}
plumberTemplateFileLines <- readLines(plumber_template_location)
runnerTemplateFileLines <- readLines(runner_template_location)
dat <- list(
model_fit = modelObject,
required_packages = required_packages,
additional_objects = additional_objects,
plumberTemplateFileLines = plumberTemplateFileLines,
runnerTemplateFileLines = runnerTemplateFileLines
)
saveRDS(object = dat,file=file_to_save_at)
}
#' @title Create a model and deploy it to the verta system
#' @description This function gathers R objects from the workspace, needed to deploy an API endpoint and is intended to be used with log_model.
#' It simply gathers all needed objects, puts them in a list, and saves the list to disk. Then that file is logged with run$log_model.
#' This object can then be turned into a docker context with createDockerContextZip
#' @param run a
#' @param modFile the fit model object
#' @param required_packages character vector of required packages to call the predict function
#' @param runner_template_location - a location of the runner template file- by default taken from the package
#' @param plumber_template_location - a location of the plumber template file- by default taken from the package directory
#' @return nothing
#' @export
#'
log_model_data <- function(run,
modelObject,
required_packages=NULL,
additional_objects=list(),
plumber_template_location = file.path(path.package("vertaReticulateClient"),
# "inst",
"plumber_sample.R"
),
runner_template_location = file.path(
path.package("vertaReticulateClient"),
# "inst" ,
"to_run.R" )
) {
# save everything needed to a temp file
tmp_file <- createRandString(add_docker_substring=F,add_rdata=T) # create temporary file name
save_model_data(
tmp_file,modelObject = modelObject,
required_packages = required_packages,
additional_objects = additional_objects,
plumber_template_location = plumber_template_location,
runner_template_location = runner_template_location
)
# log the model to verta
log_model(run, tmp_file, overwrite = T)
# clean up by deleting the temp file
if (file.exists(tmp_file)) {
file.remove(tmp_file)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.