#'Calculates pathway enrichment score of the pathways linked to significant metabolites
#'@param met_path dataframe of pathways mapped to significant metabolites as obtained from met_pathways
#'@param ls_path list of all the KEGG compounds linked to pathways. This can be obtained by running comp_linkedto_pathways().R or is also available
#'please use ls_path when you want to calculate HG score depending on all the compounds in KEGG linked to a pathway
#'sysdata.rda
#'@param sps species name
#'@param padj Method for p value adjustment, i.e. "fdr"
#'@param sig_metabolite_kegg_id dataframe of significantly altered metabolites for HG calculation
#'@param refmet_class dataframe of metabolite data with refmet class for a study, alternatively,
#'data obtained from rest can also be given here which is yet not associated to refmet class
#'We need this to get the number of all the metabolites detected in the study for calculating HG score.
#'@param kegg_comp_path TRUE or FALSE depending you want to use the ls_path as your background. Default is FALSE, where background set
#'is the total metabolite in a study
#'@importFrom stats p.adjust
#'@importFrom stats phyper
#'@export
#'@examples
#'kegg_es = path_enrichmentscore(met_path,sig_metabolite_kegg_id, ls_path,refmet_class,sps='hsa',padj='fdr')
path_enrichmentscore <- function(met_path,sig_metabolite_kegg_id,ls_path,refmet_class,sps, padj,kegg_comp_path=FALSE)
{
compound_count = compoundinfo(met_path, sps)
met_path_selected = met_path[,c('Metabolite', 'PATHWAY')] %>% distinct
#met_compound_count=merge(met_path_selected, compound_count, by.x = "PATHWAY", by.y = "NAME")
comp_path_count = unique(compound_count[,c("NAME","Total_no._of_comps_in_pathway")])
met_compound_count=partial_join(met_path_selected, comp_path_count, by_x = "PATHWAY", pattern_y = "NAME")
comp_path_count=comp_path_count[order(comp_path_count$NAME),]
df_met_comp_count=data.frame(table(met_compound_count[['PATHWAY']]))
df_met_comp_count2=df_met_comp_count[!df_met_comp_count$Freq==0,]
id=setdiff(df_met_comp_count2$Var1, comp_path_count$NAME)
df_met_comp_count3=df_met_comp_count2[which(!df_met_comp_count2$Var1 %in% id ),]
id2=setdiff( comp_path_count$NAME, df_met_comp_count2$Var1)
comp_path_count2= comp_path_count[which(!comp_path_count$NAME %in% id2 ),]
#levels(met_compound_count[['PATHWAY']])=(factor(met_compound_count[['PATHWAY']]))
freqtable = cbind(df_met_comp_count3, comp_path_count2)
colnames(freqtable)[2] = "No.of mets in study"
colnames(freqtable)[1] = "Pathway name"
freqtable[3] = NULL
if (kegg_comp_path==TRUE){
N=length(ls_path)### met with pathway annotations, noofcompounds_linked_pathway.R
}else{
N=nrow(refmet_class)}
### met with pathway annotations, noofcompounds_linked_pathway.R
for(i in 1:nrow(freqtable)){
k = freqtable[i,3]## count of met inthe pathway
M = freqtable[i,2]### altered met in the pathway
L = nrow(sig_metabolites_kegg_id) ## total altered met
pp<-phyper(M-1,L, N-L,k, lower.tail=FALSE)
freqtable[i,"pathway_HG p-value"] <- pp
}
freqtable$Padjust = p.adjust(freqtable$pathway_HG, padj)
return(freqtable)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.