#' @title
#' Run Setup
#'
#' @param conn_fun PARAM_DESCRIPTION, Default: 'pg13::local_connect()'
#' @param civic_schema PARAM_DESCRIPTION, Default: 'civic'
#' @param log_table_name PARAM_DESCRIPTION, Default: 'setup_civic_log'
#' @param civic_version PARAM_DESCRIPTION, Default: as.character(Sys.time())
#' @param verbose PARAM_DESCRIPTION, Default: TRUE
#' @param render_sql PARAM_DESCRIPTION, Default: TRUE
#' @rdname run_setup
#' @export
#' @importFrom glue glue glue_collapse single_quote
#' @importFrom pg13 send ls_tables query renderRowCount table_exists write_table
#' @importFrom readr read_tsv cols write_csv
#' @importFrom purrr map set_names
#' @importFrom tibble enframe tibble
#' @importFrom dplyr mutate
#' @importFrom tidyr pivot_wider
run_setup <-
function(conn_fun = "pg13::local_connect()",
civic_schema = "civic",
log_table_name = "setup_civic_log",
civic_version = as.character(Sys.time()),
verbose = TRUE,
render_sql = TRUE) {
# Download files
dl_links <-
list(
GENE = "https://civicdb.org/downloads/nightly/nightly-GeneSummaries.tsv",
VARIANT = "https://civicdb.org/downloads/nightly/nightly-VariantSummaries.tsv",
EVIDENCE = "https://civicdb.org/downloads/nightly/nightly-ClinicalEvidenceSummaries.tsv",
VARIANT_GROUP = "https://civicdb.org/downloads/nightly/nightly-VariantGroupSummaries.tsv",
ASSERTION = "https://civicdb.org/downloads/nightly/nightly-AssertionSummaries.tsv")
ddls <-
list(
GENE =
"
DROP TABLE IF EXISTS {civic_schema}.gene;
CREATE TABLE {civic_schema}.gene (
gene_id integer,
gene_civic_url character varying(255),
name character varying(255),
entrez_id integer,
description text,
last_review_date character varying(255),
is_flagged character varying(255)
);",
VARIANT =
"
DROP TABLE IF EXISTS {civic_schema}.variant;
CREATE TABLE {civic_schema}.variant (
variant_id integer,
variant_civic_url character varying(255),
gene character varying(255),
entrez_id integer,
variant character varying(255),
summary text,
variant_groups character varying(255),
chromosome character varying(255),
start integer,
stop integer,
reference_bases character varying(255),
variant_bases character varying(255),
representative_transcript character varying(255),
ensembl_version integer,
reference_build character varying(255),
chromosome2 character varying(255),
start2 integer,
stop2 integer,
representative_transcript2 character varying(255),
variant_types character varying(255),
hgvs_expressions character varying(255),
last_review_date character varying(255),
civic_variant_evidence_score numeric,
allele_registry_id character varying(255),
clinvar_ids character varying(255),
variant_aliases character varying(255),
assertion_ids text,
assertion_civic_urls character varying(255),
is_flagged character varying(255)
);
",
EVIDENCE =
"
DROP TABLE IF EXISTS {civic_schema}.evidence;
CREATE TABLE {civic_schema}.evidence (
gene character varying(255),
entrez_id integer,
variant character varying(255),
disease character varying(255),
doid character varying(255),
phenotypes character varying(255),
drugs character varying(255),
drug_interaction_type character varying(255),
evidence_type character varying(255),
evidence_direction character varying(255),
evidence_level character varying(255),
clinical_significance character varying(255),
evidence_statement text,
citation_id integer,
source_type character varying(255),
asco_abstract_id character varying(255),
citation character varying(255),
nct_ids character varying(255),
rating integer,
evidence_status character varying(255),
evidence_id integer,
variant_id integer,
gene_id integer,
chromosome character varying(255),
start integer,
stop integer,
reference_bases character varying(255),
variant_bases character varying(255),
representative_transcript character varying(255),
chromosome2 varchar(10),
start2 integer,
stop2 integer,
representative_transcript2 character varying(255),
ensembl_version integer,
reference_build character varying(255),
variant_summary text,
variant_origin character varying(255),
last_review_date character varying(255),
evidence_civic_url character varying(255),
variant_civic_url character varying(255),
gene_civic_url character varying(255),
is_flagged character varying(255)
);
",
VARIANT_GROUP =
"
DROP TABLE IF EXISTS {civic_schema}.variant_group;
CREATE TABLE {civic_schema}.variant_group (
variant_group_id integer,
variant_group_civic_url character varying(255),
variant_group character varying(255),
description text,
last_review_date character varying(255),
is_flagged character varying(255)
);
",
ASSERTION =
"
DROP TABLE IF EXISTS {civic_schema}.assertion;
CREATE TABLE {civic_schema}.assertion (
gene character varying(255),
entrez_id integer,
variant character varying(255),
disease character varying(255),
doid character varying(255),
phenotypes character varying(312),
drugs character varying(255),
assertion_type character varying(255),
assertion_direction character varying(255),
clinical_significance character varying(255),
acmg_codes character varying(255),
amp_category character varying(255),
nccn_guideline character varying(255),
nccn_guideline_version varchar(10),
regulatory_approval character varying(255),
fda_companion_test character varying(255),
assertion_summary character varying(255),
assertion_description character varying(1376),
assertion_id integer,
evidence_item_ids text,
variant_id integer,
gene_id integer,
last_review_date character varying(255),
assertion_civic_url character varying(255),
evidence_items_civic_url character varying(1024),
variant_civic_url character varying(255),
gene_civic_url character varying(255),
is_flagged character varying(255)
);
"
)
for (i in seq_along(dl_links)) {
dl_link <- dl_links[[i]]
dest_table <- names(dl_links)[i]
ddl <- glue::glue(ddls[[dest_table]])
pg13::send(conn_fun = conn_fun,
sql_statement = ddl,
verbose = verbose,
render_sql = render_sql)
tmp_tsv <- tempfile(fileext = ".tsv")
download.file(dl_link,
destfile = tmp_tsv)
Sys.sleep(3)
x <- readr::read_tsv(file = tmp_tsv,
col_types = readr::cols(.default = "c"))
tmp_csv <- tempfile(fileext = ".csv")
readr::write_csv(x = x,
file = tmp_csv,
quote = "all")
pg13::send(
conn_fun = conn_fun,
sql_statement =
glue::glue("COPY {civic_schema}.{dest_table} FROM '{tmp_csv}' NULL AS 'NA' CSV HEADER QUOTE E'\"';")
)
unlink(tmp_tsv)
unlink(tmp_csv)
}
civic_tables <-
pg13::ls_tables(conn_fun = conn_fun,
schema = "civic")
row_counts <-
civic_tables %>%
purrr::map(function(x) pg13::query(conn_fun = conn_fun,
sql_statement = pg13::renderRowCount(schema = "civic",
tableName = x))) %>%
purrr::map(unlist) %>%
purrr::map(unname) %>%
purrr::set_names(civic_tables) %>%
tibble::enframe(name = "civic_table",
value = "row_count") %>%
dplyr::mutate(row_count = unlist(row_count)) %>%
tidyr::pivot_wider(names_from = civic_table,
values_from = row_count)
colnames(row_counts) <-
sprintf("%s_ROWS", colnames(row_counts))
log_df <-
cbind(
tibble::tibble(
sc_datetime = as.character(Sys.time()),
civic_version = civic_version,
civic_schema = civic_schema,
),
row_counts
)
if (pg13::table_exists(conn_fun = conn_fun,
schema = "public",
table_name = log_table_name)) {
row_values <- glue::glue_collapse(glue::single_quote(unname(unlist(log_df))), sep = ",")
sql_statement <-
glue::glue("INSERT INTO public.{log_table_name} VALUES ({row_values});")
pg13::send(conn_fun = conn_fun,
sql_statement = sql_statement,
verbose = verbose,
render_sql = render_sql)
} else {
pg13::write_table(conn_fun = conn_fun,
schema = "public",
table_name = log_table_name,
data = log_df)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.