inst/doc/Introduction.R

## ---- include = FALSE----------------------------------------------------
knitr::opts_chunk$set(
    eval = AnVIL::gcloud_exists(), collapse = TRUE, cache = TRUE
)
options(width=75)

## ---- eval = FALSE-------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager", repos = "https://cran.r-project.org")
#  BiocManager::install("AnVIL")

## ---- message =FALSE, eval = TRUE, cache = FALSE-------------------------
library(AnVIL)

## ---- eval = FALSE-------------------------------------------------------
#  dir(file.path(Sys.getenv("GCLOUD_SDK_PATH"), "bin"), "^(gcloud|gsutil)$")
#  ## [1] "gcloud" "gsutil"

## ---- eval = TRUE--------------------------------------------------------
## the code chunks in this vignette are fully evaluated when
## gcloud_exists() returns TRUE
gcloud_exists()

## ---- eval = FALSE-------------------------------------------------------
#  AnVIL::install("GenomicFeatures")

## ---- eval = FALSE-------------------------------------------------------
#  add_libpaths("~/my/project")

## ------------------------------------------------------------------------
#  gcloud_account() # authentication account
#  gcloud_project() # billing project information

## ------------------------------------------------------------------------
#  gcloud_cmd("projects", "list") %>%
#      readr::read_table() %>%
#      filter(startsWith(PROJECT_ID, "anvil"))

## ------------------------------------------------------------------------
#  gcloud_help("projects")

## ------------------------------------------------------------------------
#  src <- "gs://genomics-public-data/1000-genomes/"

## ------------------------------------------------------------------------
#  gsutil_ls(src)
#  
#  other <- paste0(src, "other")
#  gsutil_ls(other, recursive = TRUE)
#  
#  sample_info <- paste0(src, "other/sample_info/sample_info.csv")
#  gsutil_stat(sample_info)

## ------------------------------------------------------------------------
#  fl <- tempfile()
#  gsutil_cp(sample_info, fl)
#  
#  csv <- readr::read_csv(fl, guess_max = 5000L)
#  csv

## ------------------------------------------------------------------------
#  pipe <- gsutil_pipe(fl, "rb")
#  readr::read_csv(pipe, guess_max = 5000L) %>%
#      dplyr::select("Sample", "Family_ID", "Population", "Gender")

## ------------------------------------------------------------------------
#  destination <- tempfile()
#  stopifnot(dir.create(destination))
#  source <- paste0(src, "other/sample_info")
#  
#  ## dry run
#  gsutil_rsync(source, destination)
#  
#  gsutil_rsync(source, destination, dry = FALSE)
#  dir(destination, recursive = TRUE)
#  
#  ## nothing to synchronize
#  gsutil_rsync(source, destination, dry = FALSE)
#  
#  ## one file requires synchronization
#  unlink(file.path(destination, "README"))
#  gsutil_rsync(source, destination, dry = FALSE)

## ---- echo = FALSE, cache = FALSE----------------------------------------
#  knitr::include_graphics('images/AnVIL-Workspace-Data.png')

## ---- include = FALSE, cache = FALSE-------------------------------------
#  avworkspace_namespace("pathogen-genomic-surveillance")
#  avworkspace_name("COVID-19")

## ------------------------------------------------------------------------
#  avworkspace_namespace()
#  avworkspace_name()

## ------------------------------------------------------------------------
#  ## N.B.: IT MAY NOT BE NECESSARY TO SET THESE WHEN ON ANVIL
#  avworkspace_namespace("pathogen-genomic-surveillance")
#  avworkspace_name("COVID-19")

## ------------------------------------------------------------------------
#  avtables()
#  sample <- avtable("sample")
#  sample

## ------------------------------------------------------------------------
#  sample %>%
#      select(name, contains("fasta")) %>%
#      filter(!is.na(final_assembly_fasta))

## ---- eval = FALSE-------------------------------------------------------
#  mtcars %>%
#      mutate(cyl = factor(cyl)) %>%
#      avtable_import()

## ---- eval = FALSE-------------------------------------------------------
#  ## editable copy of '1000G-high-coverage-2019' workspace
#  avworkspace("bioconductor-rpci-anvil/1000G-high-coverage-2019")
#  sample <-
#      avtable("sample") %>%                               # existing table
#      mutate(set = sample(head(LETTERS), nrow(.), TRUE))  # arbitrary groups
#  sample %>%                                   # new 'participant_set' table
#      avtable_import_set("participant", "set", "participant")
#  sample %>%                                   # new 'sample_set' table
#      avtable_import_set("sample", "set", "name")

## ------------------------------------------------------------------------
#  avdata()

## ------------------------------------------------------------------------
#  bucket <- avbucket()
#  bucket

## ------------------------------------------------------------------------
#  avfiles_ls()

## ---- eval = FALSE-------------------------------------------------------
#  ## requires workspace ownership
#  uri <- avbucket()                             # discover bucket
#  bucket <- file.path(uri, "mtcars.tab")
#  write.table(mtcars, gsutil_pipe(bucket, "w")) # write to bucket

## ---- eval = FALSE-------------------------------------------------------
#  ## backup all files and folders in the current working directory
#  avfiles_backup(getwd(), recursive = TRUE)
#  
#  ## backup all files in the current directory
#  avfiles_backup(dir())
#  
#  ## backup all files to gs://<avbucket()>/scratch/
#  avfiles_backup(dir, paste0(avbucket(), "/scratch"))

## ------------------------------------------------------------------------
#  terra <- Terra()

## ------------------------------------------------------------------------
#  terra

## ------------------------------------------------------------------------
#  terra %>% tags("Status")

## ------------------------------------------------------------------------
#  terra$status
#  terra$status()

## ------------------------------------------------------------------------
#  status <- terra$status()
#  class(status)

## ------------------------------------------------------------------------
#  str(status)

## ------------------------------------------------------------------------
#  lst <- status %>% as.list()
#  lengths(lst)
#  lengths(lst$systems)
#  str(lst$systems)

## ------------------------------------------------------------------------
#  .MyService <- setClass("MyService", contains = "Service")
#  
#  MyService <-
#      function()
#  {
#      .MyService(Service(
#          "myservice",
#          host = "api.firecloud.org",
#          api_url = "https://api.firecloud.org/api-docs.yaml",
#          authenticate = FALSE
#      ))
#  }

## ----sessionInfo, echo=FALSE---------------------------------------------
#  sessionInfo()

Try the AnVIL package in your browser

Any scripts or data that you put into this service are public.

AnVIL documentation built on Nov. 8, 2020, 4:57 p.m.