R/pathway_vectorization.R

Defines functions pathway_vectorization

Documented in pathway_vectorization

#' @name pathway_vectorization
#' @title Vectorization of pathways
#'
#' @description This function vectorizes pathways by creating pathway-gene profile.
#'
#' @param expression_profile Gene expression profile contains gene expression. Rownames are entrez IDs. Colnames are sample names.
#'
#' @param condition A vector contains group information of samples. For example, condition<-c(type1,type1,type2,type2) of four samples.
#'
#' @param group1 Group1 to be compared.
#'
#' @param group2 Group1 to be compared.
#'
#' @param pathway_info A list containing pathway information to be compared, which can be generated by the merge_pathway function.
#'
#' @usage pathway_vectorization(expression_profile,condition,group1,group2,pathway_info)
#'
#' @return Return a dataframe which contains pathway-gene information. Colnames are pathways' names, rownames are genes' names and the value of each element is Foldchange between two groups.
#'
#' @examples
#' data(example)
#' group1='Treatment'
#' group2='Model'
#' group3='Control'
#' Treatment_profile=pathway_vectorize(expression_profile,condition,group1,group2,pathway_info)
#' Model_profile=pathway_vectorize(expression_profile,condition,group2,group3,pathway_info)
#'
#' @export
#'
#'
#'
pathway_vectorization<-function(expression_profile,condition,group1,group2,pathway_info){

  index1<-is.na(factor(condition,levels=group1))==FALSE
  edata1=expression_profile[,index1]
  edatamean1=apply(edata1,1,function(x) mean(x))
  ex1=data.frame(mrna1=rownames(expression_profile),edatamean1)



  index2<-is.na(factor(condition,levels=group2))==FALSE
  edata2=expression_profile[,index2]
  edatamean2=apply(edata2,1,function(x) mean(x) )
  ex2=data.frame(mrna2=rownames(expression_profile),edatamean2)

  kg.sets=pathway_info[[3]]


  expression<-cbind(ex1,ex2)
  profile<-data.frame()
  gene_id=pathway_info[[2]]
  for(i in 1:length(gene_id)){

    ############## Compute the row vector of the profile
    exvector<-c()
    for(j in 1:length(pathway_info[[1]])){
      if(gene_id[i]%in%kg.sets[[pathway_info[[1]][j]]]&gene_id[i]%in%expression[,'mrna1']){
        exp=ex1[ex1[,'mrna1']==gene_id[i],2]/ex2[ex2[,'mrna2']==gene_id[i],2]  ####### compute FC value
        exvector<-c(exvector,exp)
        }
      else{exvector<-c(exvector,1)}
    }
    profile<-rbind(profile,exvector)
  }

  rownames(profile)<-gene_id
  colnames(profile)<-pathway_info[[1]]
  profile=profile[rowSums(profile)>0,]
  profile=profile[rowSums(profile)!=Inf,]
  profile=na.omit(profile)


  return(profile)
}
github-gs/QPA documentation built on Sept. 11, 2019, 9:46 a.m.