inst/doc/SQLDataFrame.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----getPackage, eval=FALSE---------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("SQLDataFrame")

## ----getDevel, eval=FALSE-----------------------------------------------------
#  BiocManager::install("Liubuntu/SQLDataFrame")

## ----Load, message=FALSE, eval = TRUE-----------------------------------------
library(SQLDataFrame)
library(DBI)

## ----constructor_conn---------------------------------------------------------
dbfile <- system.file("extdata/test.db", package = "SQLDataFrame")
conn <- DBI::dbConnect(DBI::dbDriver("SQLite"), dbname = dbfile)
obj <- SQLDataFrame(conn = conn, dbtable = "state",
                    dbkey = "state")

## ----constructor_credentials--------------------------------------------------
obj1 <- SQLDataFrame(dbname = dbfile, type = "SQLite",
                     dbtable = "state", dbkey = "state")
all.equal(obj, obj1)

## -----------------------------------------------------------------------------
obj
dim(obj)
colnames(obj)

## -----------------------------------------------------------------------------
slotNames(obj)
dbtable(obj)
dbkey(obj)
connSQLDataFrame(obj)

## ----methods------------------------------------------------------------------
dim(obj)
dimnames(obj)
length(obj)
ROWNAMES(obj)

## -----------------------------------------------------------------------------
dbtable(obj)
aa <- rbind(obj[1:5, ], obj[6:10, ])
aa
dbtable(aa)  ## message
bb <- saveSQLDataFrame(aa, dbname = tempfile(fileext=".db"),
                       dbtable = "aa")
connSQLDataFrame(bb)
dbtable(bb)

## -----------------------------------------------------------------------------
mtc <- tibble::rownames_to_column(mtcars)[,1:6]
filename <- file.path(tempdir(), "mtc.csv")
write.csv(mtc, file= filename, row.names = FALSE)
aa <- makeSQLDataFrame(filename, dbkey = "rowname", sep = ",",
                       overwrite = TRUE)
aa
connSQLDataFrame(aa)
dbtable(aa)

## -----------------------------------------------------------------------------
connSQLDataFrame(obj)
dbtable(obj)
obj1 <- saveSQLDataFrame(obj, dbname = tempfile(fileext = ".db"),
                        dbtable = "obj_copy")
connSQLDataFrame(obj1)
dbtable(obj1)

## -----------------------------------------------------------------------------
head(obj[[1]])
head(obj[["region"]])
head(obj$size)

## -----------------------------------------------------------------------------
head(obj[["state"]])

## ---- subsetting--------------------------------------------------------------
obj[1:3, 1:2]
obj[c(TRUE, FALSE), c(TRUE, FALSE), drop=FALSE]
obj[1:3, "population", drop=FALSE]
obj[, "population"]  ## realized column value

## -----------------------------------------------------------------------------
rnms <- ROWNAMES(obj)
obj[c("Alabama", "Colorado"), ]

## -----------------------------------------------------------------------------
obj1 <- SQLDataFrame(conn = conn, dbtable = "state",
                     dbkey = c("region", "population"))
rnms <- ROWNAMES(obj1)
obj1[c("South:3615.0", "West:365.0"), ]

## -----------------------------------------------------------------------------
obj[1]
obj["region"]

## -----------------------------------------------------------------------------
obj1 %>% filter(division == "South Atlantic" & size == "medium")

## -----------------------------------------------------------------------------
obj1 %>% mutate(p1 = population/10, s1 = size)

## -----------------------------------------------------------------------------
dbfile1 <- system.file("extdata/test.db", package = "SQLDataFrame")
con1 <- DBI::dbConnect(dbDriver("SQLite"), dbname = dbfile1)
dbfile2 <- system.file("extdata/test1.db", package = "SQLDataFrame")
con2 <- DBI::dbConnect(dbDriver("SQLite"), dbname = dbfile2)
ss1 <- SQLDataFrame(conn = con1, dbtable = "state",
                    dbkey = c("state"))
ss2 <- SQLDataFrame(conn = con2, dbtable = "state1",
                    dbkey = c("state"))
ss11 <- ss1[sample(5), ]
ss21 <- ss2[sample(10, 5), ]

## ---- eval=FALSE--------------------------------------------------------------
#  obj1 <- union(ss11, ss21)
#  obj1  ## reordered by the "dbkey()"

## -----------------------------------------------------------------------------
obj2 <- rbind(ss11, ss21) 
obj2  ## keeping the original order by updating the row index

## -----------------------------------------------------------------------------
ss12 <- ss1[1:10, 1:2]
ss22 <- ss2[6:15, 3:4]
left_join(ss12, ss22)
inner_join(ss12, ss22)
semi_join(ss12, ss22)
anti_join(ss12, ss22)

## ---- mysql-------------------------------------------------------------------
library(RMySQL)
ensbConn <- dbConnect(dbDriver("MySQL"),
                        host="genome-mysql.soe.ucsc.edu",
                        user = "genome",
                        dbname = "xenTro9")
enssdf <- SQLDataFrame(conn = ensbConn,
                       dbtable = "xenoRefGene",
                       dbkey = c("name", "txStart"))
enssdf1 <- enssdf[1:20, 1:2]
enssdf2 <- enssdf[11:30,3:4]
res <- left_join(enssdf1, enssdf2)

## ----bigquery, eval=FALSE-----------------------------------------------------
#  library(bigrquery)
#  bigrquery::bq_auth()  ## use this to authorize bigrquery in the
#                        ## browser.
#  bqConn <- DBI::dbConnect(dbDriver("bigquery"),
#                        project = "bigquery-public-data",
#                        dataset = "human_variant_annotation",
#                        billing = "") ## if not previous provided
#                                      ## authorization, must specify a
#                                      ## project name that was already
#                                      ## linked with Google Cloud with
#                                      ## billing info.
#  sdf <- SQLDataFrame(conn = bqConn, dbtable = "ncbi_clinvar_hg38_20180701")
#  sdf[1:5, 1:5]
#  sdf %>% select(GENEINFO)
#  sdf %>% filter(GENEINFO == "PYGL:5836")
#  sdf %>% filter(reference_name == "21")

## -----------------------------------------------------------------------------
sessionInfo()

Try the SQLDataFrame package in your browser

Any scripts or data that you put into this service are public.

SQLDataFrame documentation built on Nov. 29, 2020, 2:01 a.m.