knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#",
  fig.path = "man/figures/README-",
  out.width = "100%"
)

sparklio

{sparklio} extends the {sparklyr} IO interface by providing additional utility functions for querying data, dropping tables and collecting data into R.

Installation

You can install the development version of {sparklio} from GitHub with:

# install.packages("remotes")
remotes::install_github("nathaneastwood/sparklio")

Usage

library(sparklio)
sc <- sparklyr::spark_connect(master = "local")
mtcars_spark <- sparklyr::copy_to(sc, mtcars)

Collect Data From Spark Into R

# Using the Spark table name
spark_collect_data(x = "mtcars", sc = sc)


# Or using the R reference object
spark_collect_data(x = mtcars_spark)

Querying Data In Spark

# Lazily query data in Spark and always return a `tibble`
spark_query_data(
  sc = sc,
  qry = "SELECT mpg FROM mtcars"
)


# We can specify whether we want to cache the data
qry <- spark_query_data(
  sc = sc,
  qry = "SELECT mpg FROM mtcars",
  name = "mpg_mtcars",
  type = "compute"
)
spark_collect_data(x = "mpg_mtcars", sc = sc)


# Or if we want to collect the data into R itself
spark_query_data(
  sc = sc,
  qry = "SELECT mpg FROM mtcars",
  type = "collect"
)

Droping Data From Spark

# We can drop single tables by name
spark_drop_table(sc = sc, "mtcars")
# Or drop all tables at once
mtcars_spark <- sparklyr::copy_to(sc, mtcars)
airquality_spark <- sparklyr::copy_to(sc, airquality)
spark_drop_all_tables(sc = sc)


nathaneastwood/sparklio documentation built on March 16, 2021, 7:51 p.m.