knitr::opts_chunk$set(echo = TRUE, message = FALSE)
library(AnvilDataModels)
This vignette illustrates how to validate a set of tsv files against a data model, and then import them into AnVIL data tables.
First, we read in the data model from a JSON file.
json <- system.file("extdata", "data_model.json", package="AnvilDataModels") (model <- json_to_dm(json))
The data model can be exported to DBML for display.
tmp <- tempfile() json_to_dbml(json, tmp) readLines(tmp, n=18) unlink(tmp)
An AnVIL user would have uploaded data files to the AnVIL workspace bucket, so we copy some example files there to illustrate.
table_names <- c("subject", "phenotype", "sample", "sample_set", "file") files <- system.file("extdata", paste0(table_names, ".tsv"), package="AnvilDataModels") bucket <- AnVIL::avbucket() lapply(files, AnVIL::gsutil_cp, bucket)
Next, we stream the contents of those files directly into R using pipes.
bucket_files <- paste0(bucket, "/", table_names, ".tsv") bucket_pipes <- lapply(bucket_files, AnVIL::gsutil_pipe, "rb") names(bucket_pipes) <- table_names tables <- read_data_tables(bucket_pipes)
# not connected to AnVIL when creating vignette table_names <- c("subject", "phenotype", "sample", "sample_set", "file") files <- system.file("extdata", paste0(table_names, ".tsv"), package="AnvilDataModels") tables <- read_data_tables(files, table_names=table_names, quiet=TRUE)
We can check that the files we imported match the data model.
check_table_names(tables, model) check_column_names(tables, model) check_column_types(tables, model) check_primary_keys(tables, model) check_foreign_keys(tables, model)
Since all the checks passed, we can import the data to workspace data tables.
anvil_import_table(tables$subject, "subject", model, overwrite=TRUE) anvil_import_table(tables$phenotype, "phenotype", model, overwrite=TRUE) anvil_import_table(tables$sample, "sample", model, overwrite=TRUE) anvil_import_table(tables$file, "file", model, overwrite=TRUE)
Importing sets requires a different function.
anvil_import_set(tables$sample_set, "sample_set", overwrite=TRUE)
We might find it convenient to have a set containing all samples, so we can add that to the sample_set table.
all <- create_set_all(tables$sample, "sample") anvil_import_set(all, "sample_set", overwrite=TRUE) avtable("sample_set")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.