#
# BEGIN_COPYRIGHT
#
# PARADIGM4 INC.
# This file is part of the Paradigm4 Enterprise SciDB distribution kit
# and may only be used with a valid Paradigm4 contract and in accord
# with the terms and conditions specified by that contract.
#
# Copyright (C) 2011 - 2017 Paradigm4 Inc.
# All Rights Reserved.
#
# END_COPYRIGHT
#
############################################################
# Helper functions for using YAML schema object
yaml_to_dim_str = function(dims, for_auto_chunking=FALSE){
if (!for_auto_chunking) {
paste(
names(dims), "=",
sapply(dims, function(x) {paste(x$start, ":",
ifelse(x$end == Inf, "*", x$end), ",", x$chunk_interval, ",",
x$overlap, sep = "")}),
sep = "", collapse = ", ")
} else {
paste0(names(dims), collapse = ",")
}
}
yaml_to_attr_string = function(attributes, compression_on = FALSE){
if (!compression_on) {
paste(names(attributes), ":", attributes, collapse=" , ")
} else {
paste(names(attributes), ":", attributes, "COMPRESSION 'zlib'", collapse=" , ")
}
}
#' mandatory fields (internal function)
#'
#' return mandatory fields while registering an entity
#'
#' 'DATASET': attributes
#' other secure metadata entities (e.g. 'INDIVIDUAL', 'BIOSAMPLE', 'BIOSAMPLE'): 'dataset_id' and attributes
#' public metadata entities (e.g. 'PROJECT'): attributes
#' feature entities: attributes
get_mandatory_fields_for_register_entity = function(arrayname){
entitynm = strip_namespace(arrayname)
attrs = names(.ghEnv$meta$L$array[[entitynm]]$attributes)
attrs = attrs[!(attrs %in% c('created', 'updated'))]
attrs
zz = get_entity_info()
entity_class = zz[zz$entity == entitynm, ]$class
if (entity_class == 'metadata') {
# Metadata entities that do not have to supply `dataset_id` at registration time
# PROJECT, DATASET, VARIANT_KEY, ...
if (!(entitynm %in% c(.ghEnv$meta$arrProject,
.ghEnv$meta$arrDataset,
.ghEnv$meta$arrOntologyCategory,
.ghEnv$meta$arrMetadataAttrKey,
.ghEnv$meta$arrMetadataValue,
.ghEnv$meta$arrVariantKey,
.ghEnv$meta$arrChromosomeKey))) {
mandatory_fields = c('dataset_id', attrs)
} else {
mandatory_fields = attrs
}
} else if (entity_class == 'featuredata') {
# pull in dimensions that are mandatory fields
if (entitynm %in% c(.ghEnv$meta$arrFeature, .ghEnv$meta$arrFeatureSynonym)) {
# arrays in which featureset_id is a dimension but also a mandatory field
mandatory_fields = c('featureset_id', attrs)
} else if (entitynm %in% c(.ghEnv$meta$arrExomicVariant)) {
mandatory_fields = c('chromosome_key_id', 'referenceset_id', attrs)
} else {
mandatory_fields = attrs
}
} else if (entity_class %in% c('measurementdata', 'measurementdata_cache',
'metadata_index', 'featuredata_index')) {
dims = get_idname(entitynm)
dims = dims[!(dims %in% c('dataset_version'))]
mandatory_fields = c(dims, attrs)
} else if (entity_class == 'permissionsdata') {
mandatory_fields = attrs
} else {
stop("Need to cover case for class: ", entity_class)
}
mandatory_fields
}
#' mandatory fields
#'
#' return mandatory fields while registering entities
#'
#' 'DATASET': attributes
#' other secure metadata entities (e.g. 'INDIVIDUAL', 'BIOSAMPLE', 'BIOSAMPLE'): 'dataset_id' and attributes
#' public metadata entities (e.g. 'PROJECT'): attributes
#' feature entities: attributes
#'
#' @export
mandatory_fields = function(){
entitynames = get_entity_names()
l1 = sapply(entitynames, function(entitynm){get_mandatory_fields_for_register_entity(entitynm)})
names(l1) = entitynames
l1
}
#' @export
unique_fields = function(){
entitynames = get_entity_names()
l1 = sapply(entitynames, function(entitynm){
.ghEnv$meta$L$array[[entitynm]]$unique_fields
})
names(l1) = entitynames
# Check that all but measurement data classes have unique fields
entity_df = get_entity_info()
lapply(names(l1), function(entity) {
if (is.null(l1[[entity]])) {
entity_class = entity_df[entity_df$entity == entity, ]$class
if ( !( entity_class %in% c('measurementdata', 'measurementdata_cache') ) ) {
stop("unique fields were not provided for entity: ", entity)
}
}
})
lapply(l1, function(elem) {
if (is.null(elem)) {
return("MESSAGE: unique fields not relevant as metadata array")
} else {
return(elem)
}
})
}
is_entity_secured = function(entitynm){
entitynm = strip_namespace(entitynm) # extra QC
nmsp = find_namespace(entitynm)
if (is.null(nmsp)) stop("unexpected namespace output")
length(grep("public", nmsp)) == 0
}
is_entity_versioned = function(entitynm){
"dataset_version" %in% get_idname(entitynm)
}
#' is entity cached
#'
#' check if entity is cached
#'
#' if no value is provided in schema file, then the entity is potentially not cached
is_entity_cached = function(entitynm) {
val = .ghEnv$meta$L$array[[entitynm]]$cached # read from SCHEMA file
# if no value for cached, then entity is potentially not cached
ifelse(is.null(val), FALSE, val)
}
#' does infoArray exist for given entity
#'
#' flexible fields
get_entity_infoArrayExists = function(entitynm) {
status = .ghEnv$meta$L$array[[entitynm]]$infoArray
if (is.null(status)) stop("infoArray status must be present for all entities")
status
}
get_idname = function(arrayname){
local_arrnm = strip_namespace(arrayname)
idname = .ghEnv$meta$L$array[[local_arrnm]]$dims
if (class(idname) == "character") return(idname) else return(names(idname))
}
get_base_idname = function(arrayname){
entitynm = strip_namespace(arrayname)
dims = get_idname(entitynm)
if (entitynm %in% c(.ghEnv$meta$arrFeature, .ghEnv$meta$arrFeatureSynonym)) {
# featuredata arrays that have featureset_id and/or gene_symbol_id
# as dimensions for faster slicing
dims[!(dims %in% c("featureset_id", "gene_symbol_id"))]
} else if (entitynm == .ghEnv$meta$arrExomicVariant) {
dims[!(dims %in% c("referenceset_id", "chromosome_key_id"))]
} else if (entitynm != .ghEnv$meta$arrDataset) {
dims[!(dims %in% c("dataset_id", "dataset_version"))]
} else {
dims[!(dims %in% "dataset_version")]
}
}
#' @export
get_entity_names = function(data_class = NULL){
varnames = names(.ghEnv$meta)
varnames = varnames[varnames != "L"]
entities = sapply(varnames, function(nm){as.character(.ghEnv$meta[nm])})
if (!is.null(data_class)) {
matches = sapply(entities, function(entity) {
ifelse(get_entity_data_class(entity) == data_class, TRUE, FALSE)
})
entities = entities[matches]
}
entities
}
get_entity_class = function(entity) {
stopifnot(entity %in% get_entity_names())
.ghEnv$meta$L$array[[entity]]$data_class
}
get_search_by_entity = function(entity) {
entity = strip_namespace(entity)
stopifnot(entity %in% get_entity_names())
.ghEnv$meta$L$array[[entity]]$search_by_entity
}
get_int64fields = function(arrayname){
local_arrnm = strip_namespace(arrayname)
attr_types = unlist(.ghEnv$meta$L$array[[local_arrnm]]$attributes)
int64_fields = names(attr_types[which(!(attr_types %in%
c('string', 'datetime', 'int32', 'double', 'bool')))])
stopifnot(all(unique(attr_types[int64_fields]) %in% c("int64", "numeric")))
int64_fields
}
get_entity_data_class = function(entity){
.ghEnv$meta$L$array[[entity]]$data_class
}
#' Get entity id
#'
#' Get entity id from entity name
#'
#' @examples
#' get_entity_id('PROJECT') # 1002
#' get_entity_id('VARIANT') # 8002
#' get_entity_id(c('PROJECT', 'VARIANT')) # c(1002, 8002)
#'\dontrun{
#' get_entity_id(c('PROJECT', 'asdf')) # Error
#' }
get_entity_id = function(entity){
if (!all(entity %in% get_entity_names())) {
stop("The following are not valid entities: ",
pretty_print(entity[!(entity %in% get_entity_names())]))
}
if (length(entity) >= 1) {
sapply(.ghEnv$meta$L$array[entity], function(elem) elem$entity_id)
} else {
stop("Expect entity to be vector or length 1 or more")
}
}
#' Get entity name
#'
#' Get entity name from entity id
#'
#' @examples
#' get_entity_from_entity_id(1002) # 'PROJECT'
#' get_entity_from_entity_id(8002) # 'VARIANT'
#' get_entity_from_entity_id(c(1002, 8002)) # c('PROJECT', 'VARIANT')
#'\dontrun{
#' get_entity_from_entity_id(c(1002, -1)) # Error
#' }
get_entity_from_entity_id = function(entity_id) {
entity_id_lookup = get_entity_id(entity = get_entity_names())
m1 = find_matches_and_return_indices(
source = entity_id,
target = entity_id_lookup
)
if (length(m1$source_unmatched_idx) > 0) {
stop("No entity for entity id: ",
pretty_print(entity_id[m1$source_unmatched_idx]))
}
entities = names(entity_id_lookup[m1$target_matched_idx])
stopifnot(all(entities %in% get_entity_names()))
entities
}
get_delete_by_entity = function(entity) {
entity = strip_namespace(entity)
stopifnot(entity %in% get_entity_names())
.ghEnv$meta$L$array[[entity]]$delete_by_entity
}
#' @export
get_entity_info = function(){
df1 = data.frame(entity = get_entity_names(), stringsAsFactors = FALSE)
df1$entity_id = sapply(get_entity_names(), function(entity) get_entity_id(entity))
df1$class = sapply(get_entity_names(), function(entity) get_entity_class(entity))
df1$subclass = sapply(get_entity_names(), function(entity) .ghEnv$meta$L$array[[entity]]$data_subclass)
df1$search_by_entity = sapply(get_entity_names(), function(entity) get_search_by_entity(entity))
df1$delete_by_entity = sapply(get_entity_names(), function(entity) get_delete_by_entity(entity))
df1 = data.frame(apply(df1, 2, function(col) {sapply(col, function(elem) {ifelse (is.null(elem), NA, elem)})}),
stringsAsFactors = FALSE)
df1 = df1[order(df1$entity_id), ]
rownames(df1) = NULL
df1
}
#' Get list of arrays maintained by revealgenomics API
get_list_of_arrays = function() {
l1 = unlist(
lapply(
get_entity_info()$entity,
function(entity) {
infoArrayPresent = .ghEnv$meta$L$array[[entity]]$infoArray
# nmsp = .ghEnv$meta$L$array[[entity]]$namespace
if (!is.null(infoArrayPresent)) {
if (infoArrayPresent) {
return(c(
revealgenomics:::full_arrayname(entity),
paste0(revealgenomics:::full_arrayname(entity), "_INFO")
))
} else { # no info array
return(revealgenomics:::full_arrayname(entity))
}
}
}
)
)
# Append a few arrays that are maintained separately
c(l1, "gh_r_object_store.R_OBJECT_CACHE", "permissions.dataset_id")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.