knitr::opts_chunk$set(echo = TRUE,warning = F,message = F)

ROMOPOmics

Installation

devtools::install_github("AndrewC160/ROMOPomics",force=T) #for installation reference and not run

Description

ROMOPOmics standardizes metadata of high throughput assays with associated patient clinical data. Our package ROMOPOmics provides a framework to standardize these datasets and a pipeline to convert this information into a SQL-friendly database that is easily accessed by users. After installation of our R package from the github repository, users specify a data directory and a mask file describing how to map their data's fields into a common data model. The resulting standardized data tables are then formatted into a SQLite database for easily interoperating and sharing the dataset.

knitr::include_graphics("man/figures/romopomics_code_flow.png")

Package overview

See our vignette ROMOPOmics

Use Cases

library(ROMOPOmics)

TCGA data

dm_file     <- system.file("extdata","OMOP_CDM_v6_0_custom.csv",package="ROMOPOmics",mustWork = TRUE)
dm          <- loadDataModel(master_table_file = dm_file)
tcga_files  <- 
  list(
    "brca_clinical" = system.file("extdata","brca_clinical.csv",package="ROMOPOmics",mustWork = TRUE),
    "brca_mutation" = system.file("extdata","brca_mutation.csv",package="ROMOPOmics",mustWork = TRUE)
  )
msks        <- list(brca_clinical=loadModelMasks(system.file("extdata","brca_clinical_mask.csv",package="ROMOPOmics",mustWork = TRUE)),
                    brca_mutation=loadModelMasks(system.file("extdata","brca_mutation_mask.csv",package="ROMOPOmics",mustWork = TRUE)))
omop_inputs <- list(brca_clinical=readInputFile(input_file = tcga_files$brca_clinical,
                                                 data_model = dm,
                                                 mask_table = msks$brca_clinical),
                    brca_mutation=readInputFile(input_file = tcga_files$brca_mutation,
                                                 data_model = dm,
                                                 mask_table = msks$brca_mutation))
db_inputs   <- combineInputTables(input_table_list = omop_inputs)
omop_db     <- buildSQLDBR(omop_tables = db_inputs,file.path(tempdir(),"TCGA.sqlite"))
DBI::dbListTables(omop_db)

ATAC-seq data

dm_file     <- system.file("extdata","OMOP_CDM_v6_0_custom.csv",package="ROMOPOmics",mustWork = TRUE)
dm          <- loadDataModel(master_table_file = dm_file)

msk_file    <- system.file("extdata","GSE60682_standard_mask.csv",package="ROMOPOmics",mustWork = TRUE)
msks        <- loadModelMasks(msk_file)

in_file     <- system.file("extdata","GSE60682_standard.csv",package="ROMOPOmics",mustWork = TRUE)
omop_inputs <- readInputFile(input_file=in_file,data_model=dm,mask_table=msks,transpose_input_table = TRUE)
db_inputs   <- combineInputTables(input_table_list = omop_inputs)
omop_db     <- buildSQLDBR(omop_tables = db_inputs, sql_db_file=file.path(tempdir(),"GSE60682_sqlDB.sqlite"))
DBI::dbListTables(omop_db)

GEO accessions from Stevens et al. 2013

library(Biobase)

gse_ids <- c("GSE9006", "GSE26440", "GSE11504", "TABM666", "GSE6011", "GSE37721", "GSE20307", "GSE20436")

stevens_gse_lst <- fetch_geo_series(gse_ids,data_dir = tempdir())

stevens_gse_lst$merged_metadata


AndrewC160/ROMOPOmics documentation built on Jan. 27, 2021, 6:57 p.m.