withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
Books about R tend to focus on data analytics, graphics, statistics, machine learning, programming skills, and data science (whatever that means). The code and examples in these books are divorced from a design of real-world software systems. Understandably, the examples in these books are oversimplified to keep the focal point on the technique.
Very few books talk about the holistic approach for undertaking endeavours in R. In particular, two books discuss standard ways to build and deploy applications in R:
There is, however, a topic that neither of these books covers - building analytic applications. This book aims to fill this gap.
Every data science project has at least a project lead and a data scientist. Sometimes they are the same person. In any case, the first task facing the project lead is creating a template repository [@Microsoft2017]. Most practitioners use a former project, which is by itself a reincarnation of a former project, as the template repository. Regardless of what template repository is employed, its mechanics have to be communicated and thought with collaborators and your future self. Having a framework that is well documented and generalises well to a wide range of analytic applications means reducing everyone's cognitive effort and time spent teaching and learning different templates.
The core audience of this book is data science project lead who is seeking to adopt a framework for building analytic applications in R.
The book also aims to lighten the education of the project lead and team members by concentrating attention on a few essential analytic application components, the R startup process, and procedures of reproducibility. The numbers of the sections may be used as references in code review and induction of team members.
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
Data scientists use R to develop analytic applications (or analytic apps for short), such as machine learning systems, dashboards and reports, to measure and improve the performance of a subject matter. If you are an experienced data scientist, then chances are you have already carried out several analytic projects that ended up with working analytic apps. Ask yourself, when the next analytic project commences, what would guide my development process? Am I able to outline my approach to others? Am I able to share my design principles (if any) with others?
Many data scientists either have faint or no answers to these questions. Nevertheless, they jump straight into coding the analytic app while skipping over its design. Perhaps in the absence of a repeatable approach, data scientists are looking to keep themselves busy by doing something they know, i.e. programming. However, busyness does not imply productivity. In fact, there is a hidden kind of danger in ignoring up-front design. The evolving nature of analytic projects needs a design that accommodates future changes driven by circumstances, clients needs and data.
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
We demonstrate the knowledge in this book by emulating an analytic project that includes the development and deployment of a machine learning system. This chapter presents the background, requirements, and deliverables of the analytic project.
The example is based on an entry-level Kaggle competition for predicting house prices[^kaggle-competition]. Using the same dataset, we build an end-to-end machine learning system that solves a pseudo-real-life problem. The project includes two parts: training a prediction model on historical data, and making predictions on unseen data.
# Figure 1 - Predict the house sale price knitr::include_graphics('./images/for-sale-ad.jpg', dpi = NA)
Originally, the house prices competition serves as a playground for data scientists to hone their skills. The goal of the competition is to predict sales prices for 1,459 houses. The competition features a dataset with 81 columns in which 73 are identifiable house attributes, termed amenities. Each participant submits, i.e., uploads to Kaggle, a table with 1,459 rows and two columns: Id and SalePrice. Kaggle evaluates the accuracy of each solution, based on RMSE, and ranks each participant on the leader-board in comparison to other competitors' submissions scores.
See full dataset description at the appendix
tables$report_salient_amenities()
To emulate a real-work analysis project, we transmute the competition setup to a business case setup. The deliverable of the business case is an automated valuation model (AVM). The AVM provides house prices predictions for other real estate agencies tools, such as a website or a real estate management system. Some major differences in needs between the original setting of the data science competition and the business case include:
Consider the following factors:
Both of these factors require the analytic application to be re-runnable when the need arises. The first factor involves a trigger that periodically, say once a month, calls for a price update of all active listings. The second factor triggers a call when a new listing is to be added to the agency's database.
Similarly, to the competition, the solution is iterative.
[^kaggle-competition]: You can read more about it on Kaggle.
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
Take a look at the following two main.R versions:
# main.R ## Load house prices data temp_env <- new.env() load(file = usethis::proj_path("data", "train_set", ext = "rda"), envir = temp_env) data <- temp_env$train_set rm(temp_env) ## Plot important amenities par(mfrow = c(1,2)) plot(data$mpg , data$cyl, type = "p") boxplot(mpg ~ cyl, data = data)
# main.R data <- load_house_prices_data() plot_important_amenities(data)
Both code snippets have the same intent: they load the house prices dataset and provide plots for data exploration. Notice how much cognitive load the first snippet requires as the human brain compiles the code. The situation aggravates further if the reader is not familiar with the R syntax. In contrast, the second snippet hides the implementation details by wrapping the details in functions. The high-level abstractions communicate that there are two events happening in main.R: loading and plotting of data. As a result, the code is simpler to read and understand.
load_mtcars_data <- function(){ mtcars <- datasets::mtcars return(mtcars) }
Furthermore, the second snippet is easier to maintain and develop. These
qualities are desirable in any software application. This is because software
systems evolve as programmers acquire new knowledge and understanding of the
problem the software is set to solve. Importantly, analytic applications are the
result of scattershot and serendipitous explorations. As data scientist discover
new findings and signals, they incorporate them in the analytic application. For
example, plot_important_attributes
original implementations is:
plot_important_attributes <- function(data){ par(mfrow = c(1,2)) plot(data$mpg , data$cyl, type = "p") boxplot(mpg ~ cyl, data = data) }
Imagine a data scientist discovers, whether by client feedback or other mean,
that there is another important attribute to include in the data analysis.
Moreover, to reduce confusion, the data scientist decides to modify the plots
aesthetics such that they contain titles. Then, plot_important_attributes
mutates to:
plot_important_attributes <- function(data){ par(mfrow = c(1,3)) plot(data$mpg , data$hp, type = "p", main = "MPG ~ Horsepower") plot(data$mpg , data$cyl, type = "p", main = "MPG ~ Cylinders") boxplot(mpg ~ cyl, data = data, main = "MPG ~ Cylinders") }
With encapsulation, the data scientist was able to modify and extend the rendered plots without making any changes in main.R.
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
Any analytic project needs access to data. The location (electronic address) or mechanism from which data originates or can be obtained is called a data source. This definition suggests data sources vary in their origins and their storage mechanisms. First, data may originate organically, from measuring instruments that record real-data or generated syntactically to fit a specific purpose. Second, data may be stored inside a database, a flat-file, a website, or an API.
The choice of what data source to use in our analytic application is taken with regards to two major qualities: accessibility and usefulness. Given an existing data source, accessibility is the degree of how easy it is to obtain (access to) it. Accessibility is mainly impacted by company policy (regulation, security protocols, etc.) and the availability and willingness of people we are dependent on, mainly database administrators and data engineers, to respond to our data access requests. Usefulness is the degree of having practical worth or applicability. Usefulness is determined by circumstances combined with the goal we are trying to achieve.
Among the two data source qualities: accessibility and usefulness, the latter should lead our choice of a data source. The following sections describe common goals and circumstances of analytic projects and propose appropriate data sources.
In chapter 1, we decomposed the process of building analytic applications into two realms: software development and data analytics. Following that decomposition, we argue that having two data sources, one for each realm is appropriate for many analytic projects. We begin by identifying the role of data in each realm. Then, we infer what are the desirable or necessary attributes of the data source in each realm. Finally, we demonstrate the proposed data sources in our running example.
We start with data analytics as it requires the least explanations. Data analytics is the science of analyzing data in order to make conclusions about that information, i.e. actionable insights. Examples of data analytics activities include data exploration and (predictive) model selection. Obviously, when modelling the real-world, there is no substitute for real data. Nevertheless, a representative sample of the data is sufficient and even preferable in many analytic projects.
In our context, a representative sample is a subset of a data source that seeks to accurately reflect the data source. Representative samples tend to contain a subspace of the domain or a snapshot of the domain at a given time. That means that if we turn the representative sample into actionable insights, then adding more spatial/temporal data to the representative sample would give similar insights.
Representative samples can be stored in flat files of modest sizes. The file size attribute is desirable. Loading gigabytes of information every time an R session begins is both slow and memory-intensive action. Moreover, applying transformation on the loaded data may result in copies that would consume more memory.
The following code demonstrates a syntactic data source implementation for the running example. Notice, that the syntactic dataset captures our current knowledge of the house prices domain. It has an identifier column (Id), salient features (Bedrooms and Bathrooms) that must exist in the real application, and a target variable (SalePrice). The content of the dataset holds true information to the extent necessary for software development activities. For example, as you would expect, SalePrice is a numeric variable with non-negative values. However, there is no attempt (at this stage) to fake values resembling prices in the real housing market.
generate_synthetic_data <- function(n){ dummy_id <- function() stringr::str_pad(1:n, width = 6, pad = 0) dummy_numeric <- function() runif(n = n, min = 0, max = 1) dummy_integer <- function() rpois(n = n, lambda = 3) train_set <- tibble::tibble( Id = dummy_id(), Bedrooms = dummy_integer(), Bathrooms = dummy_integer(), SalePrice = dummy_numeric() ) } set.seed(1356) generate_synthetic_data(n = 100) %>% head()
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
Import
The section uses the following packages:
pkgs_names <- sort(c("DBI", "dbplyr", "RSQLite")) install_string <- bookdown$print_install.packages_command(pkgs_names) bookdown$print_package_info_table(pkgs_names)
You can install them all at once by running:
There are three important functions:
dbConnect <- DBI::dbConnect dbWriteTable <- DBI::dbWriteTable dbDisconnect <- DBI::dbDisconnect
Using generate_synthetic_data
from chapter 4
conn <- dbConnect(drv = RSQLite::SQLite(), path = ":memory:") dbWriteTable(conn, name = "train_set", value = generate_synthetic_data(n = 100)) dbWriteTable(conn, name = "test_set", value = generate_synthetic_data(n = 1000))
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
code_book <- dplyr::tbl(HousePricesData$new()$con, "code_book") cat(stringr::str_c(code_book %>% dplyr::pull()), sep = "\n")
withr::with_dir(usethis::proj_get(), pkgload::load_all(helpers = TRUE, export_all = TRUE)) # global options ---------------------------------------------------------- options(tidyverse.quiet = TRUE) # bookdown ---------------------------------------------------------------- options(tinytex.verbose = TRUE) options(bookdown.post.latex = function(lines){ rep_pos = grep('\\definecolor{shadecolor}{RGB}{248,248,248}',lines, fixed = TRUE) lines[rep_pos] = '\\definecolor{shadecolor}{RGB}{230,230,230}' lines }) # knitr ------------------------------------------------------------------- knitr::opts_chunk$set( out.width = '100%', echo = FALSE, results = "markup", message = FALSE, warning = FALSE, cache = TRUE, comment = "#>", fig.retina = 0.8, # figures are either vectors or 300 dpi diagrams dpi = 300, out.width = "70%", fig.align = 'center', fig.width = 6, fig.asp = 0.618, # 1 / phi fig.show = "hold", eval.after = 'fig.cap' # so captions can use link to demos ) # index.R metadata -------------------------------------------------------- index <- new.env() DESCRIPTION <- desc::description$new(usethis::proj_get()) index$title <- function() DESCRIPTION$get_field('Title') index$subtitle <- function() DESCRIPTION$get_field('Subtitle') index$description <- function() DESCRIPTION$get_field('Description') index$author <- function() paste(unlist(DESCRIPTION$get_author())[c('given', 'family')], collapse = ' ') index$date <- base::Sys.Date index$url <- function() DESCRIPTION$get_urls() index$cover_image <- function() NULL # "images/cover.png" index$favicon <- function() "favicon.ico" index$github_repo <- function() "Kiwi-Random-House/R-Projects"
`r if (knitr::is_html_output()) '
'`
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.