Description Usage Arguments Examples
Use src_JDBC
to connect to an existing database with a JDBC driver,
and tbl
to connect to tables within that database.
If you are running a local database, leave all parameters set as
their defaults to connect. If you're connecting to a remote database,
ask your database administrator for the values of these variables.
1 2 3 4 |
driver |
location of the JDBC driver. |
url |
JDBC connection url |
... |
Included for compatibility with the generic, but otherwise ignored. |
src |
a sqlite src created with |
from |
Either a string giving the name of table in database, or
|
create |
if |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | ## Not run:
# Connection basics ---------------------------------------------------------
# To connect to a database first create a src:
my_db <- src_sqlite(path = tempfile(), create = TRUE)
# Then reference a tbl within that src
my_tbl <- tbl(my_db, "my_table")
## End(Not run)
# Here we'll use the Lahman database: to create your own local copy,
# run lahman_sqlite()
if (require("RSQLite") && has_lahman("sqlite")) {
# Methods -------------------------------------------------------------------
batting <- tbl(lahman_sqlite(), "Batting")
dim(batting)
colnames(batting)
head(batting)
# Data manipulation verbs ---------------------------------------------------
filter(batting, yearID > 2005, G > 130)
select(batting, playerID:lgID)
arrange(batting, playerID, desc(yearID))
summarise(batting, G = mean(G), n = n())
mutate(batting, rbi2 = 1.0 * R / AB)
# note that all operations are lazy: they don't do anything until you
# request the data, either by `print()`ing it (which shows the first ten
# rows), by looking at the `head()`, or `collect()` the results locally.
system.time(recent <- filter(batting, yearID > 2010))
system.time(collect(recent))
# Group by operations -------------------------------------------------------
# To perform operations by group, create a grouped object with group_by
players <- group_by(batting, playerID)
group_size(players)
# sqlite doesn't support windowed functions, which means that only
# grouped summaries are really useful:
summarise(players, mean_g = mean(G), best_ab = max(AB))
# When you group by multiple level, each summarise peels off one level
per_year <- group_by(batting, playerID, yearID)
stints <- summarise(per_year, stints = max(stint))
filter(ungroup(stints), stints > 3)
summarise(stints, max(stints))
# Joins ---------------------------------------------------------------------
player_info <- select(tbl(lahman_sqlite(), "Master"), playerID, hofID,
birthYear)
hof <- select(filter(tbl(lahman_sqlite(), "HallOfFame"), inducted == "Y"),
hofID, votedBy, category)
# Match players and their hall of fame data
inner_join(player_info, hof)
# Keep all players, match hof data where available
left_join(player_info, hof)
# Find only players in hof
semi_join(player_info, hof)
# Find players not in hof
anti_join(player_info, hof)
# Arbitrary SQL -------------------------------------------------------------
# You can also provide sql as is, using the sql function:
batting2008 <- tbl(lahman_sqlite(),
sql("SELECT * FROM Batting WHERE YearID = 2008"))
batting2008
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.