#' Query an entity and return the resulting dataframe, optionally including info fields
#'
#' @param aop if TRUE, return the arrayop object instead of the data frame. Only possible if not returning an info array
#' @param include_info_array if TRUE, also query the info array for the entity if one exists
#' @param join_info_array if TRUE and using `include_info_array`, return a single data frame with the info array joined to the main array. Otherwise, return a list of the two dataframes
#' @param query_attributes if TRUE, filtering/joining on attributes of main array. Inferred automatically is using `...` parameter, but needs to be set manually if using `semi_join_args`, particularly is `semi_join_args` contains any arrayop objects
#' @param semi_join_args an ordered list of things to semi_join the queried array by.
#' @param ... If `semi_join_args` is not provided, can provide items to join on here. `get_entity(*, dataset_id=1:10, dataset_version=1)` is equivalent to `get_entity(*, semi_join_args = list(data.frame(dataset_id=1:10), data.frame(dataset_version=1))`
#'
#' @examples
#' \dontrun{
#' gs_query = revealcore:::get_entity(con, revealgenomics:::.ghEnv, "GENE_SYMBOL", gene_symbol=c("BRCA1","BRCA2"), aop = T)
#' revealcore:::get_entity(con, revealgenomics:::.ghEnv, "FEATURE", semi_join_args=list(data.frame("featureset_id"=1:10), gs_query), include_info_array=T, query_attributes=F)
#' revealcore:::get_entity(con, revealgenomics:::.ghEnv, "FEATURE", semi_join_args=list(data.frame("featureset_id"=1:10), gs_query, data.frame("feature_type"="gene")), include_info_array=T, query_attributes=T, join_info_array=F)
#' revealcore:::get_entity(con, revealgenomics:::.ghEnv, "FEATURE", semi_join_args=list(data.frame("featureset_id"=1:10), gs_query, data.frame("feature_type"="gene")), include_info_array=T, query_attributes=T, join_info_array=T)
#' }
#'
#' @export
get_entity = function(con, pkg_schema, entitynm, aop = F, include_info_array = F, join_info_array = T, query_attributes=NULL, semi_join_args=NULL, ...){
stopifnot(!(aop && include_info_array))
entity_dims = get_idname(pkg_schema, entitynm)
list_args = list(...)
list_args = list_args[!sapply(list_args, is.null)]
if(is.null(semi_join_args) && length(list_args)>0){
semi_join_args = lapply(names(list_args), function(x){y=data.frame("a"=list_args[[x]]); names(y)=x; y})
if(all(names(list_args) %in% entity_dims))
query_attributes = F
else
query_attributes = T
}
if(!is.null(semi_join_args) && is.null(query_attributes)) stop("query_attributes must be specified with semi_join_args")
arraynm = full_arrayname(pkg_schema = pkg_schema, entitynm = entitynm, con = con)
array_query = scan_entity(con=con, pkg_schema = pkg_schema, entitynm = entitynm)
aop_objs = list(con$aop_connection$afl_expr(array_query))
semi_join_args_new = list()
if(!is.null(semi_join_args)){
for(i in 1:length(semi_join_args)){
if(!query_attributes && grepl("ArrayOp", class(semi_join_args[[i]])[[1]])) #ensure we join only be the array dimesnions
semi_join_args_new[[i]] = semi_join_args[[i]]$drop_dims()$mutate(.dots=sapply(setdiff(semi_join_args[[i]]$attrs_n_dims,entity_dims), function(x) NULL)) #drop anything that isn't a dimension in the main array
else
semi_join_args_new[[i]] = semi_join_args[[i]]
aop_objs[[i+1]] = aop_objs[[i]]$semi_join(semi_join_args_new[[i]])
}
}
if(aop){
return(aop_objs[[length(aop_objs)]])
} else {
ret = list()
ret[[entitynm]] = aop_objs[[length(aop_objs)]]$to_df_all()
if(include_info_array && !(is.null(pkg_schema$array[[entitynm]]$info_array)) && pkg_schema$array[[entitynm]]$info_array){
entitynm_info = paste0(entitynm,"_INFO")
arraynm = full_arrayname(pkg_schema = pkg_schema, entitynm = entitynm, con = con)
array_query_info = gsub(arraynm, paste0(arraynm,"_INFO"), array_query)
aop_objs_info = list(con$aop_connection$afl_expr(array_query_info))
if(!is.null(semi_join_args)){
if(query_attributes){ #If we're querying by main array attributes, we can't repeat the same semi_joins on the info array
aop_objs_info[[2]] = aop_objs_info[[1]]$semi_join(ret[[entitynm]][, entity_dims, drop=F])
}
else{
for(i in 1:length(semi_join_args)){
aop_objs_info[[i+1]] = aop_objs_info[[i]]$semi_join(semi_join_args[[i]])
}
}
}
ret[[entitynm_info]] = aop_objs_info[[length(aop_objs_info)]]$to_df_all()
if(!join_info_array)
return(ret)
return(unpivot_join_arr_to_info(ret[[entitynm]], ret[[entitynm_info]], by = entity_dims))
}
return(ret[[entitynm]])
}
}
unpivot_join_arr_to_info = function(main, info, by){
if(nrow(main)==0){
return(main)
}
names_from = intersect(colnames(info), c("key","metadata_attrkey"))
values_from = intersect(colnames(info), c("val","metadata_value"))
info = as.data.frame(tidyr::pivot_wider(info[,c(by, names_from, values_from)], names_from=tidyr::all_of(names_from), values_from=tidyr::all_of(values_from)))
fields_in_both = setdiff(intersect(colnames(info), colnames(main)), by)
if(length(fields_in_both)>0) info[,fields_in_both]=NULL
return(dplyr::left_join(main, info, by = by))
}
#' Search an entity by values in info fields and return results
#'
#' @export
search_entity_by_info_fields = function(con,
pkg_schema,
entitynm,
attribute_values,
semi_join_args=NULL,
case_sensitive=T,
join_info_array = T,
...){
entitynm_info = paste0(entitynm, "_INFO")
metadata_value_entity = paste0(entitynm, "_METADATA_VALUE")
metadata_attrkey_entity = paste0(entitynm, "_METADATA_ATTRKEY")
entity_dims = get_idname(pkg_schema, entitynm)
scan_arraynm = scan_entity(pkg_schema, entitynm, con=con)
scan_arraynm_info = scan_entity(pkg_schema, entitynm_info, con=con)
subset_step1 = !is.null(semi_join_args) || length(list(...)) > 0
aop_info = list(get_entity(con, pkg_schema, entitynm_info, T, query_attributes = F, semi_join_args = semi_join_args, ...))
aop_metadata_value = list(get_entity(con, pkg_schema, metadata_value_entity, T))
aop_metadata_attrkey = list(get_entity(con, pkg_schema, metadata_attrkey_entity, T))
aop_info_subset = list()
aop_metadata_value_subset = list()
aop_metadata_attrkey_subset = list()
for(i in 1:length(attribute_values)){
if(subset_step1 || i!=1){
aop_metadata_value[[length(aop_metadata_value)+1]] = aop_metadata_value[[length(aop_metadata_value)]]$semi_join(aop_info[[length(aop_info)]]$group_by("metadata_value_id")$summarize(count(metadata_value)))
aop_metadata_attrkey[[length(aop_metadata_attrkey)+1]] = aop_metadata_attrkey[[length(aop_metadata_attrkey)]]$semi_join(aop_info[[length(aop_info)]]$group_by("metadata_attrkey_id")$summarize(count(metadata_attrkey)))
}
attrkey_q = names(attribute_values)[[i]]
value_q = paste0(attribute_values[[i]], collapse="|")
if(attrkey_q==".*"){
aop_metadata_attrkey_subset[[length(aop_metadata_attrkey_subset)+1]] = aop_metadata_attrkey[[length(aop_metadata_attrkey)]]
} else {
aop_metadata_attrkey_subset[[length(aop_metadata_attrkey_subset)+1]] = aop_metadata_attrkey[[length(aop_metadata_attrkey)]]$filter(metadata_attrkey %like% !!attrkey_q, .ignore_case = !case_sensitive)
}
aop_metadata_value_subset[[length(aop_metadata_value_subset)+1]] = aop_metadata_value[[length(aop_metadata_value)]]$filter(metadata_value %like% !!value_q, .ignore_case = !case_sensitive)
aop_info_subset[[length(aop_info_subset)+1]] = aop_info[[length(aop_info)]]$semi_join(aop_metadata_attrkey_subset[[length(aop_metadata_attrkey_subset)]])$semi_join(aop_metadata_value_subset[[length(aop_metadata_value_subset)]])
aop_info[[length(aop_info)+1]] = aop_info[[length(aop_info)]]$semi_join(aop_info_subset[[length(aop_info_subset)]]$drop_dims()$mutate(.dots=sapply(setdiff(aop_info[[length(aop_info)]]$attrs_n_dims,entity_dims), function(x) NULL)))
}
df_info = aop_info[[length(aop_info)]]$to_df_all()
if(nrow(df_info)>0){
df_main = con$aop_connection$afl_expr(scan_entity(pkg_schema, entitynm, con))$semi_join(unique(df_info[,entity_dims,drop=F]))$to_df_all()
} else {
df_main = iquery(con$db, paste0("limit(",scan_entity(pkg_schema, entitynm, con),",0)"), T)
}
if(join_info_array){
return(unpivot_join_arr_to_info(df_main, df_info, by = entity_dims))
} else {
ret = list(df_main, df_info)
names(ret) = c(entitynm, entitynm_info)
return(ret)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.