knitr::opts_chunk$set(collapse = TRUE)
Perform and Manage 'Amazon' 'Athena' Queries
This is a 'reticulated' wrapper for the 'Python' 'boto3' 'AWS' 'Athena' client library https://boto3.readthedocs.io/en/latest/reference/services/athena.html. It requires 'Python' version 3.5+ and an 'AWS' account. Tools are also provided to execute 'dplyr' chains asynchronously.
This package requires Python >= 3.5 to be available and the boto3
Python module. The package author highly recommends setting RETICULATE_PYTHON=/usr/local/bin/python3
in your ~/.Renviron
file to ensure R + reticulate
will use the proper python version.
The following functions are implemented:
create_named_query
: Create a named query.delete_named_query
: Delete a named query.get_named_query
: Get Query Executionget_named_queries
: Get Query Execution (batch/multiple)get_query_execution
: Get Query Executionget_query_executions
: Get Query Executions (batch/multiple)get_query_results
: Get Query Resultslist_named_queries
: List Named Querieslist_query_executions
: List Query Executionsstart_query_execution
: Start Query Executionstop_query_execution
: Stop Query Executioncollect_async
: Collect Amazon Athena dplyr
query results asynchronouslydevtools::install_github("hrbrmstr/roto.athena") # OR devtools::install_git("git://gitlab.com/hrbrmstr/roto.athena")
options(width=120)
library(roto.athena) library(tidyverse) # current verison packageVersion("aws.athena")
# see recent queries x <- list_query_executions(profile = "personal") head(x$QueryExecutionIds) # get last 5 query executions y <- get_query_executions(x$QueryExecutionIds[1:5], profile = "personal") # only look at the ones that succeeded filter(y$QueryExecutions, state == "SUCCEEDED") # fire off another one! start_query_execution( query = "SELECT * FROM elb_logs LIMIT 100", database = "sampledb", output_location = "s3://aws-athena-query-results-redacted", profile = "personal" ) -> sqe
Sys.sleep(2)
# see the status get_query_execution(sqe, profile = "personal") %>% glimpse() # get the results res <- get_query_results(sqe, profile = "personal") res
dplyr
callslibrary(odbc) library(DBI) DBI::dbConnect( odbc::odbc(), driver = "/Library/simba/athenaodbc/lib/libathenaodbc_sbu.dylib", Schema = "sampledb", AwsRegion = "us-east-1", AwsProfile = "personal", AuthenticationType = "IAM Profile", S3OutputLocation = "s3://aws-athena-query-results-redacted" ) -> con elb_logs <- tbl(con, "elb_logs") mutate(elb_logs, tsday = substr(timestamp, 1, 10)) %>% filter(tsday == "2014-09-29") %>% select(requestip, requestprocessingtime) %>% collect_async( database = "sampledb", output_location = "s3://aws-athena-query-results-redacted", profile_name = "personal" ) -> id
Sys.sleep(2)
get_query_execution(id, profile = "personal") %>% glimpse() get_query_results(id, profile = "personal")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.