######################################## MAIN_combine_clean_biopax_unifyids ########################################
MAIN_combine_clean_biopax_unifyids<-
function(new_biopax_list
,pw_df_path="default"
,exclude_id_pattern="inxight_pathways"
){
#' @title
#' MAIN -- Combine and Clean List of BioPAX Objects
#' @description
#' Merges BioPAX objects, unifies component IDs, replaces certain pathway ids with proper \code{inxight_pathway} ones,
#' and cleans BioPAX objects from html tags and garbled symbols.
#' @details
#' Wrapper for \code{unify_biopax_ids} and \code{biopax_from_dt}.
#' ID unification is conducted by appending a class of a component to its throughout number.
#' E.g. for a component of class Protein, #11 from the top of the table, ID will be "Protein11".
#' @param new_biopax_list List of BioPAX objects.
#' @param pw_df_path Path to the source table with pathway, id, name, and source values.
#' @param exclude_id_pattern Exclude pathways with such pattern in their IDs from ID unification.
#' @author
#' Ivan Grishagin
#establish initial key parameters, if not supplied by user
if(pw_df_path=="default"){
pw_df_path<-
system.file("extdata"
,"pathways_matched_to_sources_current_version.xlsx"
,package="RIGbiopax")
}
#load pathway spreadsheet
pw_df<-
read_excel_astext(path = pw_df_path
#,col_types = rep("text",11)
,sheet = 1) %>%
filter(!is.na(toxdb.Pathway.ID)
,!is.na(biopax.Pathway.ID)) %>%
dplyr::select(toxdb.Pathway.ID
,toxdb.Pathway.Name
,biopax.Pathway.ID
,toxdb.Pathway.Name
,Source) %>%
#make up id-source combinations
#to avoid excluding the same ids that come from different sources
mutate(bpid_src=
paste0(biopax.Pathway.ID
,Source)
,status=
"included") %>%
arrange(Source
,biopax.Pathway.ID
,toxdb.Pathway.ID) %>%
unique
#find duplicated ids and report them in a file
pw_df$status[duplicated(pw_df$bpid_src)]<-
"excluded"
bp_dupl_ids<-
pw_df$biopax.Pathway.ID[duplicated(pw_df$bpid_src)]
try(openxlsx::write.xlsx(filter(pw_df
,biopax.Pathway.ID %in% bp_dupl_ids)
,paste0(Sys.Date()
,"_duplicated_ids.xlsx")
,col.names=TRUE
,row.names=FALSE))
write.table(filter(pw_df
,biopax.Pathway.ID %in% bp_dupl_ids)
,paste0(Sys.Date()
,"_duplicated_ids.txt")
,quote = FALSE
,sep = "\t"
,row.names = FALSE
,col.names = TRUE)
#consider only unique ids
pw_df<-
pw_df %>%
filter(status=="included")
combined_biopax<-
1:length(new_biopax_list) %>%
#first, unify ids for each biopax dt
#and label them with first two letters of biopax source
lapply(FUN=function(lindex){
#get pathway ids for a given source
temp_pw_df<-
pw_df %>%
filter(Source %in% names(new_biopax_list)[lindex])
#any ids missing? Should not be, so it's a precaution.
missing_ids<-
temp_pw_df$biopax.Pathway.ID[!temp_pw_df$biopax.Pathway.ID %in% new_biopax_list[[lindex]]$dt$id]
if(length(missing_ids)>0){
print(head(new_biopax_list[[lindex]]$dt))
stop("MAIN_combine_clean_biopax_unifyids: in the biopax list element "
,names(new_biopax_list)[lindex]
," not all desired pathways are present.\n"
,"The following ids are not present:\n"
,missing_ids)
}
#for those ids, replace biopax pathway ids with inxight pathway ids
old_pw_ids<-
temp_pw_df$biopax.Pathway.ID
new_pw_ids<-
temp_pw_df$toxdb.Pathway.ID
temp_unif_df<-
new_biopax_list[[lindex]]$dt
temp_unif_df$id<-
temp_unif_df$id %>%
mapvalues(from = old_pw_ids
,to = new_pw_ids)
temp_unif_df$property_attr_value<-
temp_unif_df$property_attr_value %>%
mapvalues(from = old_pw_ids
,to = new_pw_ids)
temp_unif_df<-
temp_unif_df %>%
unify_biopax_ids(idtag=substr(tolower(names(new_biopax_list)[lindex])
,start = 1
,stop = 2)
,exclude_id_pattern=exclude_id_pattern
,exclude_class = "Pathway")
return(temp_unif_df)
}) %>%
do.call(rbind
,.) %>%
unique %>%
#some components have notfound in their class -> fix those
internal_fix_NOTFOUND %>%
#make biopax
biopax_from_dt %>%
#remove deadend references
remove_deadend_refs %>%
#remove duplicate biopax components
remove_duplicate_biopax_components %>%
#annotate with gene ids
add_symbols_entrezids2biopax %>%
#unify (re-label) biopax ids
unify_biopax_ids(exclude_id_pattern=exclude_id_pattern
,exclude_class = "Pathway")
return(combined_biopax)
}
######################################## MAIN_combine_clean_biopax_unifyids ########################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.