R/internal.R

Defines functions avgSP SA_module_M parseRelation ACS_ADS_DE ARS_to_size textMine writeTextOut writeTextOut TextMine scatter Relocate Split Est_mean E_tot fisher gsa.fisher pADS_pathway ADS_pathway pACS_pathway ACS_pathway perm_pathway margin_pathway pADS_global ADS_global pACS_global ACS_global perm_global permDS EDS DS permDSG permDSF permDSY EDSG EDSF EDSY DSG DSF DSY permCS ECS CS permCSG permCSF permCSY ECSG ECSF ECSY CSG CSF CSY SelectGamma PtoZ check.compatibility check.pData check.groupData check.rawData

##########################
###### Check functions ###
##########################

check.rawData <- function(data){
  G <- nrow(data)
  N <- ncol(data)
  if(!is.numeric(data)) {
    stop("expression data not numeric")
  }
  if(is.null(row.names(data))) {
    stop("gene symbol missing")
  }
  if(N <= 3) {
    stop("too few samples")
  }
  if(sum(duplicated(row.names(data)))>0){
  	stop("duplicate gene symbols")
  }
}

check.groupData <- function(group){
  l <- nlevels(group)
  if( l != 2 ) {
    stop("not a two-class comparison")
  }
}

check.pData <- function(pData){
  G <- nrow(pData)
  if(!is.numeric(pData)) {
    stop("pvalue data not numeric")
  }
  if(is.null(row.names(pData))) {
    stop("gene symbol missing")
  }
  if(ncol(pData) <2) {
    stop("missing either p-value or effect size")
  }
}


check.compatibility <- function(data, group, case.label, ctrl.label){
  G <- nrow(data)
  N1 <- ncol(data)
  N2 <- length(group)
  if(N1 != N2) {
    stop("expression data and class label have unmatched sample size")
  }
  if(!all(group %in% c(case.label,ctrl.label))){
    stop("including class labels other than the case and control")
  }
  if(sum(group==case.label) <= 1 ||sum(group==ctrl.label) <= 1){
    stop("not enough samples in either case or control group")
  }
}

##########################
###### BayesP part ###
##########################

PtoZ <- function(p2, lfc) {
  sgn <- sign(lfc)
  z <- ifelse(sgn>0, qnorm(p2/2), qnorm(p2/2,lower.tail = F))
  return(z)
}

SelectGamma <- function(p){
  ## Gamma is DE proportion  = 1-pi0
  m <- length(p)
  lambda <- seq(0,0.95,by=0.01)
  pi0 <- sapply(lambda, function(x) sum(p>x)/(m*(1-x))  )

  # fit a natural cubic spline
  library(splines)
  dat <- data.frame(pi0=pi0, lambda=lambda)
  lfit <- lm(pi0 ~ ns(lambda, df = 3), data=dat)
  pi0hat <- predict(lfit, data.frame(lambda=1))
  gamma <- 1 - pi0hat
  return(gamma)
}

##################################
###### ACS scores ################
##################################

CSY <- function(d1,d2,index1){
  Sens <- calcSensC(d1[index1,],d2[index1,])
  Spec <- calcSpecC(d1[-index1,],d2[-index1,])

  CS <- Sens + Spec - 1
  if(is.nan(CS)) {
    CS <- 0
  }
  return(CS)
}

CSF <- function(d1,d2,index1,index2){
  Sens <- calcSensC(d1[index1,],d2[index1,])
  Prec <- calcPrecC(d1[index2,],d2[index2,])

  CS <- (2*Sens*Prec)/(Sens+Prec)
  if(is.nan(CS)) {
    CS <- 0
  }
  return(CS)
}

CSG <- function(d1,d2,index1,index2){
  Sens <- calcSensC(d1[index1,],d2[index1,])
  Spec <- calcSpecC(d1[-index1,],d2[-index1,])

  CS <- sqrt(Sens*Spec)
  if(is.nan(CS)) {
    CS <- 0
  }
  return(CS)
}


ECSY <- function(d1,d2){
  ESens <- calcESensC(d1,d2)
  ESpec <- calcESpecC(d1,d2)

  ECS <- ESens + ESpec - 1
  if(is.nan(ECS)) {
    ECS <- 0
  }
  return(ECS)
}

ECSF <- function(d1,d2){
  ESens <- calcESensC(d1,d2)
  EPrec <- calcEPrecC(d1,d2)

  ECS <- (2*ESens*EPrec)/(ESens+EPrec)
  if(is.nan(ECS)) {
    ECS <- 0
  }
  return(ECS)
}

ECSG <- function(d1,d2){
  ESens <- calcESensC(d1,d2)
  ESpec <- calcESpecC(d1,d2)

  ECS <- sqrt(ESens*ESpec)
  if(is.nan(ECS)) {
    ECS <- 0
  }
  return(ECS)
}

permCSY <- function(d1,d2){
  Sens <- calcSensC(d1,d2)
  Spec <- calcSpecC(d1,d2)

  permCS <- Sens + Spec - 1
  if(is.nan(permCS)) {
    permCS <- 0
  }
  return(permCS)
}

permCSF <- function(d1,d2){
  Sens <- calcSensC(d1,d2)
  Prec <- calcPrecC(d1,d2)

  permCS <- (2*Sens*Prec)/(Sens+Prec)
  if(is.nan(permCS)) {
    permCS <- 0
  }
  return(permCS)
}

permCSG <- function(d1,d2){
  Sens <- calcSensC(d1,d2)
  Spec <- calcSpecC(d1,d2)

  permCS <- sqrt(Sens*Spec)
  if(is.nan(permCS)) {
    permCS <- 0
  }
  return(permCS)
}


CS <- function(dat1,dat2,deIndex1,deIndex2,measure="Fmeasure"){
  ## same for both global and pathway
  if(measure=="youden"){
    CS <- CSY(dat1,dat2,deIndex1)
  } else if(measure=="Fmeasure"){
    CS <- CSF(dat1,dat2,deIndex1,deIndex2)
  } else if(measure=="geo.mean"){
    CS <- CSG(dat1,dat2,deIndex1)
  }
  return(CS)
}


ECS <- function(dat1,dat2,measure="Fmeasure"){
  ## same for both global and pathway
  if(measure=="youden"){
    ECS <- ECSY(dat1,dat2)
  } else if(measure=="Fmeasure"){
    ECS <- ECSF(dat1,dat2)
  } else if(measure=="geo.mean"){
    ECS <- ECSG(dat1,dat2)
  }
  return(ECS)
}

permCS <- function(dat1,dat2,measure="Fmeasure"){
  ## same for both global and pathway
  if(measure=="youden"){
    pemrCS <- permCSY(dat1,dat2)
  } else if(measure=="Fmeasure"){
    permCS <- permCSF(dat1,dat2)
  } else if(measure=="geo.mean"){
    permCS <- permCSG(dat1,dat2)
  }
  return(permCS)
}


##################################
###### ADS scores ################
##################################


DSY <- function(d1,d2,index1){
  Sens <- calcSensD(d1[index1,],d2[index1,])
  Spec <- calcSpecD(d1[-index1,],d2[-index1,])

  DS <- Sens + Spec - 1
  if(is.nan(DS)) {
    DS <- 0
  }
  return(DS)
}

DSF <- function(d1,d2,index1,index2){
  Sens <- calcSensD(d1[index1,],d2[index1,])
  Prec <- calcPrecD(d1[index2,],d2[index2,])

  DS <- (2*Sens*Prec)/(Sens+Prec)
  if(is.nan(DS)) {
    DS <- 0
  }
  return(DS)
}

DSG <- function(d1,d2,index1,index2){
  Sens <- calcSensD(d1[index1,],d2[index1,])
  Spec <- calcSpecD(d1[-index1,],d2[-index1,])

  DS <- sqrt(Sens*Spec)
  if(is.nan(DS)) {
    DS <- 0
  }
  return(DS)
}


EDSY <- function(d1,d2){
  ESens <- calcESensD(d1,d2)
  ESpec <- calcESpecD(d1,d2)

  EDS <- ESens + ESpec - 1
  if(is.nan(EDS)) {
    EDS <- 0
  }
  return(EDS)
}

EDSF <- function(d1,d2){
  ESens <- calcESensD(d1,d2)
  EPrec <- calcEPrecD(d1,d2)

  EDS <- (2*ESens*EPrec)/(ESens+EPrec)
  if(is.nan(EDS)) {
    EDS <- 0
  }
  return(EDS)
}

EDSG <- function(d1,d2){
  ESens <- calcESensD(d1,d2)
  ESpec <- calcESpecD(d1,d2)

  EDS <- sqrt(ESens*ESpec)
  if(is.nan(EDS)) {
    EDS <- 0
  }
  return(EDS)
}

permDSY <- function(d1,d2){
  Sens <- calcSensD(d1,d2)
  Spec <- calcSpecD(d1,d2)

  permDS <- Sens + Spec - 1
  if(is.nan(permDS)) {
    permDS <- 0
  }
  return(permDS)
}

permDSF <- function(d1,d2){
  Sens <- calcSensD(d1,d2)
  Prec <- calcPrecD(d1,d2)

  permDS <- (2*Sens*Prec)/(Sens+Prec)
  if(is.nan(permDS)) {
    permDS <- 0
  }
  return(permDS)
}

permDSG <- function(d1,d2){
  Sens <- calcSensD(d1,d2)
  Spec <- calcSpecD(d1,d2)

  permDS <- sqrt(Sens*Spec)
  if(is.nan(permDS)) {
    permDS <- 0
  }
  return(permDS)
}


DS <- function(dat1,dat2,deIndex1,deIndex2,measure="Fmeasure"){
  ## same for both global and pathway
  if(measure=="youden"){
    DS <- DSY(dat1,dat2,deIndex1)
  } else if(measure=="Fmeasure"){
    DS <- DSF(dat1,dat2,deIndex1,deIndex2)
  } else if(measure=="geo.mean"){
    DS <- DSG(dat1,dat2,deIndex1)
  }
  return(DS)
}


EDS <- function(dat1,dat2,measure="Fmeasure"){
  ## same for both global and pathway
  if(measure=="youden"){
    EDS <- EDSY(dat1,dat2)
  } else if(measure=="Fmeasure"){
    EDS <- EDSF(dat1,dat2)
  } else if(measure=="geo.mean"){
    EDS <- EDSG(dat1,dat2)
  }
  return(EDS)
}

permDS <- function(dat1,dat2,measure="Fmeasure"){
  ## same for both global and pathway
  if(measure=="youden"){
    pemrDS <- permDSY(dat1,dat2)
  } else if(measure=="Fmeasure"){
    permDS <- permDSF(dat1,dat2)
  } else if(measure=="geo.mean"){
    permDS <- permDSG(dat1,dat2)
  }
  return(permDS)
}



##################################
#### Global (expected value from marginal,
#### permutate genes to get p-value)
##################################


perm_global <- function(dat1,dat2,measure="Fmeasure",B){
  G <- nrow(dat1)

  #rawCS <- CS(dat1,dat2,deIndex1,deIndex2,measure)
  #expCS <- ECS(dat1,dat2,measure)
  #rawDS <- DS(dat1,dat2,deIndex1,deIndex2,measure)
  #expDS <- EDS(dat1,dat2,measure)

  out <- matrix(NA,B,4)
  colnames(out) <- c("permCS","permECS","permDS","permEDS")

  for(b in 1:B){
    dat1perm <- dat1[sample(1:G,G,replace = F),]
    dat2perm <- dat2[sample(1:G,G,replace = F),]
    out[b,"permCS"] <- permCS(dat1perm,dat2perm,measure)
    out[b,"permECS"] <- ECS(dat1perm,dat2perm,measure)
    out[b,"permDS"] <- permDS(dat1perm,dat2perm,measure)
    out[b,"permEDS"] <- EDS(dat1perm,dat2perm,measure)
  }
  return(out)
}

ACS_global <- function(dat1,dat2,deIndex1,deIndex2,
                       measure="Fmeasure"){
  cs <- CS(dat1,dat2,deIndex1,deIndex2,measure)
  ecs <- ECS(dat1,dat2,measure)
  acs <- (cs - ecs)/(1-ecs)
  return(acs)
}

pACS_global <- function(dat1,dat2,deIndex1,deIndex2,
                        measure="Fmeasure",acs,permOut){

  permcs <- permOut[,"permCS"]
  permecs <- permOut[,"permECS"]
  permacs <- (permcs - permecs)/(1-permecs)

  p_acs <- (sum(permacs>=acs) + 1)/(length(permacs)+1)
  return(p_acs)
}

ADS_global <- function(dat1,dat2,deIndex1,deIndex2,
                       measure="Fmeasure"){
  ds <- DS(dat1,dat2,deIndex1,deIndex2,measure)
  eds <- EDS(dat1,dat2,measure)
  ads <- (ds - eds)/(1-eds)
  return(ads)
}

pADS_global <- function(dat1,dat2,deIndex1,deIndex2,
                        measure="Fmeasure",ads,permOut){
  permds <- permOut[,"permDS"]
  permeds <- permOut[,"permEDS"]
  permads <- (permds - permeds)/(1-permeds)

  p_ads <- (sum(permads>=ads) + 1)/(length(permads)+1)
  return(p_ads)
}


##################################
#### Pathway (expected value from global,
#### permute genes to get p-value)
##################################


margin_pathway <-  function(dat1,dat2,
                            select.pathway.list,
                            measure="Fmeasure"){
  select.pathways <- names(select.pathway.list)
  data_genes <- rownames(dat1)
  pathway.size <- sapply(select.pathway.list,function(x) {
    length(intersect(data_genes,x))})
  K <- length(select.pathways)
  G <- nrow(dat1)

  out <- matrix(NA,nrow=K,ncol=2)
  rownames(out) <- select.pathways
  colnames(out) <- c("ECS","EDS")

  R <- 20 ##fairly enough

  for(k in 1:K){
    #print(k)
    ecsk <- edsk <- rep(NA,R)
    pathsizek <- pathway.size[k]
    for(j in 1:R){
      index <- sample(1:G,pathsizek,replace=F)
      dat1.select <- dat1[index,]
      dat2.select <- dat2[index,]
      ecsk[j] <- ECS(dat1.select,dat2.select,measure)
      edsk[j] <- EDS(dat1.select,dat2.select,measure)
    }
    out[k,"ECS"] <- mean(ecsk)
    out[k,"EDS"] <- mean(edsk)
  }

  return(out)

}

perm_pathway <- function(dat1,dat2,
                         select.pathway.list,
                         measure="Fmeasure",B,parallel=F,n.cores=4){

  select.pathways <- names(select.pathway.list)
  data_genes <- rownames(dat1)
  pathway.size <- sapply(select.pathway.list,function(x) {
    length(intersect(data_genes,x))})
  K <- length(select.pathways)
  G <- nrow(dat1)

  #out <- array(1,dim=c(B,K,4),dimnames=
  #list(1:B,select.pathways,
  #c("permCS","permECS","permDS","permEDS")))

  #rawCS <- CS(dat1,dat2,deIndex1,deIndex2,measure)
  #expCS <- ECS(dat1,dat2,measure)
  #rawDS <- DS(dat1,dat2,deIndex1,deIndex2,measure)
  #expDS <- EDS(dat1,dat2,measure)

  out <- array(1,dim=c(B,K,2),dimnames=
                 list(1:B,select.pathways,c("permCS","permDS")))

  for(k in 1:K){
    #print(k)
    pathsizek <- pathway.size[k]
    if(parallel == T){
      permFunc = function(b){
        dat1perm <- dat1[sample(1:G,G,replace = F),]
        dat2perm <- dat2[sample(1:G,G,replace = F),]
        index <- sample(1:G,pathsizek,replace=F)
        dat1perm.select <- dat1perm[index,]
        dat2perm.select <- dat2perm[index,]
        permCS_res <- permCS(dat1perm.select,dat2perm.select,measure)
        permDS_res <- permDS(dat1perm.select,dat2perm.select,measure)
        return(list(permCS_res = permCS_res, permDS_res = permDS_res))
      }
      out.ls = mclapply(1:B, permFunc, mc.cores = n.cores)
      for(b in 1:B){
        out[b,k,"permCS"] <- out.ls[[b]]$permCS_res
        out[b,k,"permDS"] <- out.ls[[b]]$permDS_res
      }
    }else{
      for(b in 1:B){
        dat1perm <- dat1[sample(1:G,G,replace = F),]
        dat2perm <- dat2[sample(1:G,G,replace = F),]
        index <- sample(1:G,pathsizek,replace=F)
        dat1perm.select <- dat1perm[index,]
        dat2perm.select <- dat2perm[index,]

        out[b,k,"permCS"] <- permCS(dat1perm.select,dat2perm.select,measure)
        #out[b,k,"permECS"] <- ECS(dat1perm.select,dat2perm.select,measure)
        out[b,k,"permDS"] <- permDS(dat1perm.select,dat2perm.select,measure)
        #out[b,k,"permEDS"] <- EDS(dat1perm.select,dat2perm.select,measure)

      }
    }
  }
  return(out)
}

ACS_pathway <- function(dat1,dat2,deIndex1,deIndex2,
                        select.pathway.list,
                        measure="Fmeasure",marginOut){

  select.pathways <- names(select.pathway.list)
  data_genes <- rownames(dat1)
  pathway.size <- sapply(select.pathway.list,function(x) {
    length(intersect(data_genes,x))})
  K <- length(select.pathways)
  G <- nrow(dat1)

  acs <- rep(NA,K)
  names(acs) <- select.pathways

  for(k in 1:K){
    path_genek <- select.pathway.list[[k]]
    genek <- intersect(path_genek,data_genes)
    dat1_k <- dat1[genek,]
    dat2_k <- dat2[genek,]

    if(length(intersect(names(deIndex1), genek))<=3 ){
      deIndex1_k <- 1:nrow(dat1_k)
    } else {
      deIndex1_k <- which(rownames(dat1_k)%in%intersect(names(deIndex1), genek))
    }

    if(length(intersect(names(deIndex2), genek))<=3 ){
      deIndex2_k <- 1:nrow(dat2_k)
    } else {
      deIndex2_k <- which(rownames(dat2_k)%in%intersect(names(deIndex2), genek))
    }

    cs <- CS(dat1_k,dat2_k,deIndex1_k,deIndex2_k,measure)
    ecs <- marginOut[k,"ECS"]
    acs[k] <- (cs - ecs)/(1-ecs)
  }
  return(acs)
}

pACS_pathway <- function(dat1,dat2,deIndex1,deIndex2,
                         select.pathway.list,
                         measure="Fmeasure",acs,permOut,marginOut){

  select.pathways <- names(select.pathway.list)
  K <- length(select.pathways)

  p_acs <- rep(NA,K)
  names(p_acs) <- select.pathways

  for(k in 1:K){

    permcs <- permOut[,k,"permCS"]
    ecs <- marginOut[k,"ECS"]
    #permecs <- permOut[,k,"permECS"]
    permacs <- (permcs - ecs)/(1-ecs)

    p_acs[k] <- (sum(permacs>=acs[k]) + 1)/(length(permacs)+1)
  }

  return(p_acs)

}


ADS_pathway <- function(dat1,dat2,deIndex1,deIndex2,
                        select.pathway.list,
                        measure="Fmeasure",marginOut){

  select.pathways <- names(select.pathway.list)
  data_genes <- rownames(dat1)
  pathway.size <- sapply(select.pathway.list,function(x) {
    length(intersect(data_genes,x))})
  K <- length(select.pathways)
  G <- nrow(dat1)

  ads <- rep(NA,K)
  names(ads) <- select.pathways

  for(k in 1:K){
    path_genek <- select.pathway.list[[k]]
    genek <- intersect(path_genek,data_genes)
    dat1_k <- dat1[genek,]
    dat2_k <- dat2[genek,]

    if(length(intersect(names(deIndex1), genek))<=3 ){
      deIndex1_k <- 1:nrow(dat1_k)
    } else {
      deIndex1_k <- which(rownames(dat1_k)%in%intersect(names(deIndex1), genek))
    }

    if(length(intersect(names(deIndex2), genek))<=3 ){
      deIndex2_k <- 1:nrow(dat2_k)
    } else {
      deIndex2_k <- which(rownames(dat2_k)%in%intersect(names(deIndex2), genek))
    }

    ds <- DS(dat1_k,dat2_k,deIndex1_k,deIndex2_k,measure)
    eds <- marginOut[k,"EDS"]
    ads[k] <- (ds - eds)/(1-eds)
  }
  return(ads)
}

pADS_pathway <- function(dat1,dat2,deIndex1,deIndex2,
                         select.pathway.list,
                         measure="Fmeasure",ads,permOut,marginOut){

  select.pathways <- names(select.pathway.list)
  K <- length(select.pathways)

  p_ads <- rep(NA,K)
  names(p_ads) <- select.pathways

  for(k in 1:K){

    permds <- permOut[,k,"permDS"]
    #permeds <- permOut[,k,"permEDS"]
    eds <- marginOut[k,"EDS"]
    permads <- (permds - eds)/(1-eds)

    p_ads[k] <- (sum(permads>=ads[k]) + 1)/(length(permads)+1)

  }

  return(p_ads)

}

##########################
##Pathway enrich analysis#
##########################

gsa.fisher <- function(x, background, pathway) {
  ####x is the list of query genes
  ####backgroud is a list of background genes that query genes from
  ####pathway is a list of different pathway genes
  count_table<-matrix(0,2,2)
  x<-toupper(x)
  background<-toupper(background)
  index<-which(toupper(background) %in% toupper(x)==FALSE)
  background_non_gene_list<-background[index]
  x<-toupper(x)
  pathway<-lapply(pathway,function(x) intersect(toupper(background),toupper(x)))
  get.fisher <- function(path) {
    res <- NA
    ####in the gene list and in the pathway
    count_table[1,1]<-sum(x %in% path)
    #count_table[1,1]<-sum(is.na(charmatch(x,path))==0)
    ####in the gene list but not in the pathway
    count_table[1,2]<-length(x)-count_table[1,1]
    ####not in the gene list but in the pathway
    count_table[2,1]<-sum(background_non_gene_list%in% path)
    ####not in the gene list and not in the pathway
    count_table[2,2]<-length(background_non_gene_list)-count_table[2,1]
    matched_gene<-x[x %in% path]
    match_num<-length(matched_gene)
    overlap_info<-array(0,dim=4)
    names(overlap_info)<-c("DE in Geneset","DE not in Genese","NonDE in Geneset","NonDE out of Geneset")
    overlap_info[1]=count_table[1,1]
    overlap_info[2]=count_table[1,2]
    overlap_info[3]=count_table[2,1]
    overlap_info[4]=count_table[2,2]
    if(length(count_table)==4){
      res <- fisher.test(count_table, alternative="greater")$p}
    return(list(p_value=res,match_gene=matched_gene,match_num=match_num,
                fisher_table=overlap_info))
  }
  p_val<-rep(0,length(pathway))

  match_gene_list<-list(length(pathway))
  match_gene <- array(0,dim=length(pathway))
  num1<-array(0,dim=length(pathway))
  num2<-matrix(0,nrow=length(pathway),ncol=4)
  colnames(num2)<-c("DE in Geneset","DE not in Genese","NonDE in Geneset","NonDE out of Geneset")
  for(i in 1:length(pathway)){
    result<-get.fisher(pathway[[i]])
    p_val[i]<-result$p_value
    match_gene_list[[i]]<-result$match_gene
    match_gene[i]<-paste(match_gene_list[[i]],collapse="/")
    num1[i]<-result$match_num
    num2[i,]<-result$fisher_table
  }
  names(p_val) <- names(pathway)
  q_val <- p.adjust(p_val, "BH")

  summary<-data.frame(pvalue=p_val,
                      qvalue=q_val,
                      DE_in_Set=num2[,1],
                      DE_not_in_Set=num2[,2],
                      NonDE_in_Set=num2[,3],
                      NonDE_not_in_Set=num2[,4])

  a<-format(summary,digits=3)

  return(a)
}

fisher <- function(x){
  n <- length(x)
  y <- -2*log(x)
  Tf <- sum(y)
  return(1-pchisq(Tf,2*n))
}


##########################
###### SA functions ######
##########################

## energy function

E_tot <- function(delta.mat,a,delta.est){
  ## vector "a" of length n
  ## vector "delta_est" of length K+1: start from theta_0, then ordered from k=1 to K
  n <- nrow(delta.mat)
  K <- length(unique(a))
  #theta_0 <- delta.est[1]
  theta_0 <- 0
  E <- sum(sapply(1:n, function(x) {
    sum(sapply(1:n, function(y){
      if(a[x]==a[y]){
        (delta.mat[x,y] - delta.est[as.character(a[x])])^2
      } else{
        (delta.mat[x,y] - theta_0)^2
      }
    },simplify=T))
  }, simplify=T))

  return(E)
}

## Estimate of delta.est (the means)

Est_mean <- function(delta.mat,a){
  # the first element is always the off-diagonal parts
  n <- nrow(delta.mat)
  K <- length(unique(a))
  total <- rep(0,K+1)
  size <- rep(0,K+1)
  deltamean <- rep(0,K+1)
  names(deltamean) <- c(0,sort(unique(a)))
  for(k in 1:K){
    a_k <- sort(unique(a))[k]
    total[1+k] <- sum(delta.mat[a==a_k,a==a_k])
    size[1+k] <- sum(a==a_k)^2
    deltamean[1+k] <- total[1+k]/size[1+k]
  }
  deltamean[1] <- (sum(delta.mat) - sum(total[-1]))/(n*n - sum(size[-1]))
  return(deltamean)
}

## Trial = split or relocate

Split <- function(a) {
  n <- length(a)
  ua <- unique(a)
  if(length(ua)==n) {
    return(a)
  } else{
    #ua.pick <- sample(x=ua,size=1)
    #a[names(sample(x=which(a==ua.pick),size=1))] <- max(ua)+1
    a.pick <- sample(x=a,size=1)
    pick.ind <- sample(x=which(a==a.pick),size=1)
    a[pick.ind] <- max(a)+1
    return(a)
  }
}

Relocate <- function(a){
  n <- length(a)
  ua <- unique(a)
  if(length(ua)==1) {
    return(a)
  } else{
    pick.ind <- sample(x=1:n,size=1)
    #a.pick <- a[pick.ind]
    #a[pick.ind] <- sample(x=a[-which(a==a.pick)],size=1)
    a[pick.ind] <- sample(x=a[-pick.ind],size=1)
    return(a)
  }
}


##########################
###### Scatterness #######
##########################


scatter = function(dat,cluster.assign,sil_cut=0.1){
  sd_check = apply(dat, 1, sd)

  if(ncol(dat) == 1|!all(sd_check != 0) ){
    dist.dat = as.matrix(dist(dat))
  }else{
    dist.dat = 1 - cor(t(dat))
  }
  sil = silhouette(cluster.assign, dist=dist.dat,diss=T)

  new.dist = dist.dat
  cluster.assign2 = cluster.assign

  while(min(sil[,3]) < sil_cut){
    cluster.assign2<-cluster.assign2[-which(sil[,3] == min(sil[,3]))]
    for (i in unique(cluster.assign2)){
      if(length(which(cluster.assign2==i))==1){
        cluster.assign2<-cluster.assign2[-which(cluster.assign2==i)]
      }
    }
    temp<-cluster.assign2
    for(d in 1:length(cluster.assign2)){
      cluster.assign2[d]<-rank(unique(temp))[which(unique(temp)==temp[d])]
    }#rename cluster index, so it is integer from 1 to k

    new.dist<-new.dist[rownames(new.dist)%in%names(cluster.assign2),
                       colnames(new.dist)%in%names(cluster.assign2)]
    sil <- silhouette(cluster.assign2, dist=new.dist, diss=T)#recalculate silhoutte
  }

  scatter.index = which(!names(cluster.assign)%in%names(cluster.assign2))
  return(scatter.index)
}

##########################
###### Text mining #######
##########################
TextMine <- function(hashtb, pathways, pathway, result, scatter.index=NULL,permutation="all"){
  cat("Performing Text Mining Analysis...\n")
  hashtbAll = hashtb
  k <- length(unique(result))
  if(!is.null(scatter.index)){
    cluster = result
    cluster[scatter.index] = k+1
    hashtb = hashtb[hashtb [,3]%in%which(pathways %in% pathway),]
    tmk = list()
    nperm = 1000
    if (nrow(hashtb) == 0){
      for (i in 1:(k-1)){
        tmk[[i]] = matrix(NA,nrow = 1,ncol = 4)
      }
    }
    else{
      for (i in 1:k){
        e = cluster[cluster == i]
        e = which(pathways %in% names(e))
        hashcl = hashtb[hashtb [,3]%in%e,]
        hashcl = hashcl[duplicated(hashcl[,2]) | duplicated(hashcl[,2], fromLast=TRUE),]
        if (nrow(hashcl) != 0){
          hashf = hashcl
          hashf[,2] = 1
          hashf = aggregate(hashf[,c("row","value")],by = hashf["phrase"],FUN=sum)

          rownames(hashf) = hashf[,"phrase"]
          hashf = hashf[,-1]
          colnames(hashf) = c("count","sum")
          hashap = hashcl
          hashap[,c(2,3,4)] = 0
          mperm = matrix(nrow = nrow(hashf),ncol = nperm)
          for (j in 1:nperm){
            if (permutation=="all"){
              subtb = hashtbAll[hashtbAll[,3]%in%sample(1:length(pathways),length(e)),]
            }else if(permutation=="enriched"){
              subtb = hashtb[hashtb[,3]%in%sample(unique(hashtb[,3]),length(e)),]
            }else{
              stop("Permutation should be 'all' or 'enriched' ")
            }
            subtb = rbind(subtb,hashap)
            subtb = subtb[subtb$phrase %in% hashap$phrase,]
            subtb[,2] = 1
            subtb = aggregate(subtb[,c("row","value")],by = subtb["phrase"],FUN=sum)
            rownames(subtb) = subtb[,"phrase"]
            subtb = subtb[,-1]
            colnames(subtb) = c("count","sum")
            mperm[,j] = subtb[,2]
          }
          hashf[,"p-value"] = apply(cbind(hashf[,2],mperm),1,
                                    function(x)((nperm + 2)-rank(x)[1])/(nperm + 1))
          hashf[,"q-vlaue"] = p.adjust(hashf[,"p-value"],method = "BH")
          tmk[[i]] = hashf[order(hashf[,3],-hashf[,2]),]
        }
        else {tmk[[i]] = matrix(NA,nrow = 1,ncol = 4)}
      }
      tmk[[k+1]] = matrix(NA,nrow = 1,ncol = 4)
    }
  }else{
    hashtb = hashtb[hashtb [,2]%in%which(pathways %in% pathway),]
    tmk = list()
    nperm = 1000
    if (nrow(hashtb) == 0){
      for (i in 1:(k-1)){
        tmk[[i]] = matrix(NA,nrow = 1,ncol = 4)
      }
    }
    else{
      for (i in 1:k){
        e = cluster[cluster == i]
        e = which(pathways %in% names(e))
        hashcl = hashtb[hashtb [,3]%in%e,]
        hashcl = hashcl[duplicated(hashcl[,2]) | duplicated(hashcl[,2], fromLast=TRUE),]
        if (nrow(hashcl) != 0){
          hashf = hashcl
          hashf[,2] = 1
          hashf = aggregate(hashf[,c("row","value")],by = hashf["phrase"],FUN=sum)

          rownames(hashf) = hashf[,"phrase"]
          hashf = hashf[,-1]
          colnames(hashf) = c("count","sum")
          hashap = hashcl
          hashap[,c(2,3,4)] = 0
          mperm = matrix(nrow = nrow(hashf),ncol = nperm)
          for (j in 1:nperm){
            if (permutation=="all"){
              subtb = hashtbAll[hashtbAll[,3]%in%sample(1:length(pathways),length(e)),]
            }else if(permutation=="enriched"){
              subtb = hashtb[hashtb[,3]%in%sample(unique(hashtb[,3]),length(e)),]
            }else{
              stop("Permutation should be 'all' or 'enriched' ")
            }
            subtb = rbind(subtb,hashap)
            subtb = subtb[subtb$phrase %in% hashap$phrase,]
            subtb[,2] = 1
            subtb = aggregate(subtb[,c("row","value")],by = subtb["phrase"],FUN=sum)
            rownames(subtb) = subtb[,"phrase"]
            subtb = subtb[,-1]
            colnames(subtb) = c("count","sum")

            mperm[,j] = subtb[,2]
          }
          hashf[,"p-value"] = apply(cbind(hashf[,2],mperm),1,
                                    function(x)((nperm + 2)-rank(x)[1])/(nperm + 1))
          hashf[,"q-vlaue"] = p.adjust(hashf[,"p-value"],method = "BH")
          tmk[[i]] = hashf[order(hashf[,3],-hashf[,2]),]
        }
        else {tmk[[i]] = matrix(NA,nrow = 1,ncol = 4)}
      }
    }
  }
  return(tmk)
} # End of Text Mining


writeTextOut <- function(tm_filtered,k,pathway.summary) {
  cat("Cluster 1\n", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
  cat("Key words,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
  write.table(t(rownames(tm_filtered[[1]])[1:15]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F,
              append = T, row.names=F,col.names=F,na="")
  cat("q_value,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
  write.table(t(tm_filtered[[1]][1:15,4]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F,
              append = T, row.names=F,col.names=F,na="")
  cat("count,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
  write.table(t(tm_filtered[[1]][1:15,1]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F,
              append = T, row.names=F,col.names=F,na="")
  write.table(pathway.summary[[1]], paste("Clustering_Summary_K",k,".csv",sep=""), sep=",",quote=T,
              append = T, row.names=F,col.names=F)
for (i in 2:k){
    cat(paste("\nCluster ", i, "\n", sep = ""), file = paste("Clustering_Summary_K",k,".csv",sep=""), append = T)
    cat("Key words,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
    write.table(t(rownames(tm_filtered[[i]])[1:15]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F,
                append = T, row.names=F,col.names=F,na="")
    cat("q_value,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
    write.table(t(tm_filtered[[i]][1:15,4]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F,
                append = T, row.names=F,col.names=F,na="")
    cat("count,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
    write.table(t(tm_filtered[[i]][1:15,1]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F,
                append = T, row.names=F,col.names=F,na="")
    write.table(pathway.summary[[i]], paste("Clustering_Summary_K",k,".csv",sep=""), sep=",",quote=T,
                append = T, row.names=F,col.names=F)

  }
}

writeTextOut <- function(tm_filtered,k,pathway.summary,scatter.index=NULL) {
  if(is.null(dim(tm_filtered[[1]]))==TRUE|dim(tm_filtered[[1]])[1] == 0){
    print(paste("No phrase pass q-value threshold in cluster 1"))
    cat("Cluster 1\n", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
    write.table(pathway.summary[[1]], paste("Clustering_Summary_K",k,".csv",sep=""), sep=",",quote=T, append = T, row.names=F,col.names=F)
  } else {
    cat("Cluster 1\n", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
    cat("Key words,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
    write.table(t(as.character(rownames(tm_filtered[[1]])[1:15])), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F, append = T, row.names=F,col.names=F,na="")
    cat("q_value,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
    write.table(t(tm_filtered[[1]][1:15,4]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F, append = T, row.names=F,col.names=F,na="")
    cat("count,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
    write.table(t(tm_filtered[[1]][1:15,1]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F, append = T, row.names=F,col.names=F,na="")
    write.table(pathway.summary[[1]], paste("Clustering_Summary_K",k,".csv",sep=""), sep=",",quote=T, append = T, row.names=F,col.names=F)

  }

  for (i in 2:k){
    if(is.null(dim(tm_filtered[[i]]))==TRUE|dim(tm_filtered[[i]])[1] == 0){
      print(paste("No phrase pass q-value threshold in cluster ",k,sep = ""))
      cat(paste("\nCluster ", i, "\n", sep = ""), file = paste("Clustering_Summary_K",k,".csv",sep=""), append = T)
      write.table(pathway.summary[[i]], paste("Clustering_Summary_K",k,".csv",sep=""), sep=",",quote=T, append = T, row.names=F,col.names=F)
    } else {
      cat(paste("\nCluster ", i, "\n", sep = ""), file = paste("Clustering_Summary_K",k,".csv",sep=""), append = T)
      cat("Key words,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
      write.table(t(as.character(rownames(tm_filtered[[i]])[1:15])), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F, append = T, row.names=F,col.names=F,na="")
      cat("q_value,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
      write.table(t(tm_filtered[[i]][1:15,4]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F, append = T, row.names=F,col.names=F,na="")
      cat("count,", file = paste("Clustering_Summary_K",k,".csv",sep=""),append=T)
      write.table(t(tm_filtered[[i]][1:15,1]), paste("Clustering_Summary_K",k,".csv",sep=""), sep=',',quote=F, append = T, row.names=F,col.names=F,na="")
      write.table(pathway.summary[[i]], paste("Clustering_Summary_K",k,".csv",sep=""), sep=",",quote=T, append = T, row.names=F,col.names=F)
    }

  }
  if(!is.null(scatter.index)){
    cat(paste("\nSingleton Term", "\n", sep = ""), file = paste("Clustering_Summary_K",k,".csv",sep=""), append = T)
    write.table(pathway.summary[[k+1]], paste("Clustering_Summary_K",k,".csv",sep=""), sep=",",quote=T,
                append = T, row.names=F,col.names=F)
  }
}

textMine <- function(hashtb,pathways,cluster.assign,scatter.index=NULL,thres=0.05,permutation="all"){
  tmk <- TextMine(hashtb=hashtb, pathways= pathways,
                  pathway=names(cluster.assign), result=cluster.assign, scatter.index,permutation=permutation)
  C <- length(unique(cluster.assign))
  tm_filtered <- list()
  for (i in 1:C){
    tm_filtered[[i]] <- tmk[[i]][which((as.numeric(tmk[[i]][,4]) < thres)), ]
  }
  if(!is.null(scatter.index)){
    tm_filtered[[C+1]] = tmk[[C+1]]
    cluster = cluster.assign
    cluster[scatter.index] = C+1
    pathway.summary <- lapply(1:(C+1), function(x) names(which(cluster==x)))
    writeTextOut(tm_filtered,C,pathway.summary,scatter.index=scatter.index)
  }else{
    pathway.summary <- lapply(1:C, function(x) names(which(cluster.assign==x)))
    writeTextOut(tm_filtered,C,pathway.summary)
  }
  return(tm_filtered)
}

##########################
###### ACS/ADS_DE plot ###
##########################
ARS_to_size <- function(ARSp,factor=2){
  if(ARSp > 0.05) {
    return(1)
  } else {
    return(-log10(ARSp)*factor)
  }
}

ACS_ADS_DE <- function(ds1,ds2,DEevid1,DEevid2,ACSp,ADSp,cluster=NULL,
                       highlight.pathways=NULL,lb=0,ub=1,size.scale=4){

  P <- length(ACSp)
  ACS_size=sapply(ACSp,ARS_to_size)
  ADS_size=sapply(ADSp,ARS_to_size)


  if(!is.null(cluster)){
    RB = rainbow(length(unique(cluster)))
    color = c()
    for (i in 1:length(cluster)) {
      if(cluster[i] != "scatter"){
        color[i] = RB[as.numeric(cluster[i])]
      }else{
        color[i] = "grey50"
      }
    }
  }else if(!is.null(highlight.pathways)){
    color = rep("grey50",P)
    color[highlight.pathways] = "red"
  }else{
    color = rep("black",P)
  }


  if(!is.null(highlight.pathways)){
    index = 1:P
    index[-highlight.pathways] = ""
  }else{
    index = rep("",P)
  }
  data_ACS <- data.frame(ds1_score=DEevid1,ds2_score=DEevid2,
                         ACS_size=ACS_size,index=index,
                         color_pos=color)
  data_ADS <- data.frame(ds1_score=DEevid1,ds2_score=DEevid2,
                         ADS_size=ADS_size,index=index,
                         color_neg=color)

  p_pos <-ggplot(data_ACS, aes(x=ds2_score, y=ds1_score,label=index)) +
    geom_point(size = data_ACS$ACS_size*size.scale, shape=16, color = data_ACS$color_pos)+
    geom_text(size=8*size.scale,parse=TRUE,color="black",hjust = -0.05,vjust=-0.05) +
    theme_bw() +
    coord_fixed(ylim=c(lb,ub),xlim=c(lb,ub)) +
    labs(x="",y="") +
    scale_x_continuous(name="",breaks=seq(0,1,by=0.5),limits=c(0,1)) +
    scale_y_continuous(name="",breaks=seq(0,1,by=0.5),limits=c(0,1)) +
    theme(#legend.title = element_blank(),
      axis.line = element_line(colour = "black"),
      #axis.line=element_blank(),
      axis.text.x = element_text(size = 40,face = "bold"),
      axis.text.y = element_text(size = 40,face = "bold"),
      panel.border = element_blank(),
      panel.grid.major = element_line(linetype = 'solid',#size = 2,
                                      colour = "white"),
      panel.grid.minor = element_line(linetype = 'solid',
                                      colour = "white"),
      panel.background = element_rect(fill = "#FFF1E1")) +
    annotate("text", x = (ub-0.15), y = lb, fontface=2,
             label = paste(ds2,sep=""),
             size=8*size.scale,colour="blue",hjust=0.6,vjust=0.1) +
    annotate("text", x = lb, y = (ub-0.15), fontface=2,
             label=paste(ds1,sep=""),
             size=8*size.scale,colour="blue",vjust=0,hjust=0.2)

  p_neg <-ggplot(data_ADS, aes(x=ds1_score, y=ds2_score,label=index)) + #label=index
    geom_point(size = data_ADS$ADS_size*size.scale, shape=16, color = data_ADS$color_neg)+
    geom_text(size=8*size.scale,parse=TRUE,color="black",hjust = -0.05,vjust=-0.05) +
    theme_bw() +
    coord_fixed(ylim=c(lb,ub),xlim=c(lb,ub)) +
    labs(x="",y="") +
    scale_x_continuous(name="",breaks=seq(0,1,by=0.5),limits=c(0,1)) +
    scale_y_continuous(name="",breaks=seq(0,1,by=0.5),limits=c(0,1)) +
    theme(#legend.title = element_blank(),
      axis.line = element_line(colour = "black"),
      #axis.line=element_blank(),
      axis.text.x = element_text(size = 40,face = "bold"),
      axis.text.y = element_text(size = 40,face = "bold"),
      panel.border = element_blank(),
      panel.grid.major = element_line(linetype = 'solid',#size = 2,
                                      colour = "white"),
      panel.grid.minor = element_line(linetype = 'solid',
                                      colour = "white"),
      panel.background = element_rect(fill = "#EBF5FF")) +
    annotate("text", x = (ub-0.15), y = lb, fontface=2,
             label = paste(ds1,sep=""),
             size=8*size.scale,colour="blue",hjust=0.6,vjust=0.1) +
    annotate("text", x = lb, y = (ub-0.15), fontface=2,
             label=paste(ds2,sep=""),
             size=8*size.scale,colour="blue",vjust=0,hjust=0.2)

  #ggsave(filename=paste(ds2,"_",ds1,"_ACS_figure",".pdf",sep=""),p_pos,
  #width = 10, height = 10)

  #ggsave(filename=paste(ds1,"_",ds2,"_ADS_figure",".pdf",sep=""),p_neg,
  #width = 10, height = 10)

  plist <- list(p_pos,p_neg)
  return(plist)
}

##########################
######    parseXML     ###
##########################
parseRelation <- function(pathwayID, keggSpecies="hsa", binary = T, sep = "-") {
  # download xml file
  download.kegg(pathway.id = pathwayID, keggSpecies, kegg.dir = ".", file.type="xml")
  # generate relation matrix
  KEGG.pathID2name = lapply(KEGGREST::keggList("pathway",keggSpecies),function(x) strsplit(x," - ")[[1]][-length(strsplit(x," - ")[[1]])])
  names(KEGG.pathID2name) = gsub(paste0("path:",keggSpecies),"",names(KEGG.pathID2name))

  pathName = unlist(KEGG.pathID2name[pathwayID])

  xmlFile = paste0(getwd(), "/",keggSpecies, pathwayID,".xml")
  pathway = KEGGgraph::parseKGML(xmlFile)
  pathway = KEGGgraph::splitKEGGgroup(pathway)

  entries = KEGGgraph::nodes(pathway)
  types = sapply(entries, KEGGgraph::getType)
  relations = unique(KEGGgraph::edges(pathway)) ## to avoid duplicated edges
  relationNum = length(relations)
  entryNames = as.list(sapply(entries, KEGGgraph::getName))
  if(any(types == "group") || any(types=="map")){
    entryNames = entryNames[!(types %in% c("group","map"))]
  }
  entryIds = names(entryNames)
  entryNames = lapply(1:length(entryNames), function(i) paste(entryNames[[i]],collapse=sep))
  names(entryNames) = entryIds

  entryNames.unique = unique(entryNames)
  entryNum = length(entryNames.unique)

  relation.mat = matrix(0, entryNum, entryNum)
  rownames(relation.mat) = colnames(relation.mat) = entryNames.unique

  ## if no relation edge, just return
  if(relationNum == 0){
    print(paste0("There is no topological connected gene nodes in ", pathName))
    return(relation.mat)
  }

  entry1 = KEGGgraph::getEntryID(relations)[,1]
  entry2 = KEGGgraph::getEntryID(relations)[,2]
  for(i in 1:length(relations)){
    if(entry1[i] %in% names(entryNames) && entry2[i] %in% names(entryNames)){
      relation.mat[entryNames[[entry1[i]]],entryNames[[entry2[i]]]]=1
    }
    else{
      print(paste("connections not included:",entry1[i], entry2[i], sep=" "))
    }
  }

  file.remove(xmlFile)
  return(relation.mat)
}
##########################
###### KEGG module SA ####
##########################
SA_module_M = function(sp.mat, xmlG, M, nodes, B = 1000,
                       G.ini.list=NULL, reps_eachM = 100,topG_from_previous=10,
                       Tm0=10,mu=0.95,epsilon=1e-5,
                       N=1000,run=10000,seed=12345,sub.num=1){
  #Null distribution for M
  set.seed(seed)
  null.sp.dist = rep(NA,B)
  for(b in 1:B){
    permute.set <- sample(xmlG,M)
    permute.mat <- sp.mat[match(permute.set,row.names(sp.mat)),
                          match(permute.set,colnames(sp.mat))]
    null.sp.dist[b] <- mean(c(permute.mat[lower.tri(permute.mat)]))
  }
  null.sp.mean = mean(null.sp.dist)
  null.sp.median = median(null.sp.dist)
  if(is.null(G.ini.list)){
    p.mean.ls = c()
    G.module.ls = list()
    SP.ls = c()
    for (l in 1:reps_eachM) {
      ##Initialize
      if(length(nodes) == M){
        G.module = nodes
      }else{
        G.module = sample(nodes, M)
      }
      SPc = avgSP(G.module, sp.mat)
      r = 0
      count = 0
      Tm = Tm0
      while((length(nodes)>M) & (r < run) & (count < N) & (Tm >= epsilon)) {
        #pi = exp(-GPc/Tm) ## Boltzmann dist #may need a different Tm or -logP to be comparable?
        #print(SPc)
        ##New trial
        r = r+1
        a.node = sample(setdiff(nodes,G.module),sub.num)
        G.module_new = G.module
        G.module_new[sample(M,sub.num)] = a.node

        SPn = avgSP(G.module_new, sp.mat)

        if(SPn < SPc | SPc == Inf) {
          ##accept
          SPc = SPn;
          G.module = G.module_new;
        }else{
          count = count + 1;
          p = exp((SPc-SPn)/Tm)
          #print(p)
          r = min(1,p); ## acceptance prob.
          u <- runif(1);
          if(u>r) {
            ##not accept
            Tm <- Tm*mu
          } else {
            SPc = SPn;
            G.module = G.module_new;
          }
        }
      }
      G.module.ls[[l]] = G.module
      p.mean.ls[l] = (sum(null.sp.dist<= SPc) + 1)/(B+1)
      SP.ls[l] = SPc
    }
  }else{
    each.times = round(reps_eachM/topG_from_previous)
    case.index = expand.grid(1:length(G.ini.list),1:each.times)
    p.mean.ls = c()
    G.module.ls = list()
    SP.ls = c()
    for (l in 1:nrow(case.index)) {
      G.ini = G.ini.list[[case.index[l,1]]]
      ##Initialize
      if(length(nodes) == M){
        G.module = nodes
      }else{
        x = M-length(G.ini)
        G.module = c(G.ini,sample(setdiff(nodes,G.ini),x))
      }
      SPc = avgSP(G.module, sp.mat)
      r = 0
      count = 0
      Tm = Tm0
      while((length(nodes)>M) & (r < run) & (count < N) & (Tm >= epsilon)) {
        #pi = exp(-GPc/Tm) ## Boltzmann dist #may need a different Tm or -logP to be comparable?
        #print(SPc)
        ##New trial
        r = r+1
        a.node = sample(setdiff(nodes,G.module),sub.num)
        G.module_new = G.module
        G.module_new[sample(M,sub.num)] = a.node

        SPn = avgSP(G.module_new, sp.mat)

        if(SPn < SPc | SPc == Inf) {
          ##accept
          SPc = SPn;
          G.module = G.module_new;
        }else{
          count = count + 1;
          p = exp((SPc-SPn)/Tm)
          #print(p)
          r = min(1,p); ## acceptance prob.
          u <- runif(1);
          if(u>r) {
            ##not accept
            Tm <- Tm*mu
          } else {
            SPc = SPn;
            G.module = G.module_new;
          }
        }
      }
      G.module.ls[[l]] = G.module
      p.mean.ls[l] = (sum(null.sp.dist<= SPc) + 1)/(B+1)
      SP.ls[l] = SPc
    }

  }
  p.sd.ls = sqrt(p.mean.ls*(1-p.mean.ls)/B)
  r.p = rank(p.mean.ls,ties.method = "random")
  index = match(1:topG_from_previous,r.p)
  best.index = which(r.p == 1)

  minG = G.module.ls[[best.index]]
  sp = SP.ls[best.index]
  p.mean = p.mean.ls[best.index]
  p.sd = p.sd.ls[best.index]

  top.G = G.module.ls[index]
  top.pmean = p.mean.ls[index]
  top.psd = p.sd.ls[index]
  top.sp = SP.ls[index]

  return(list(minG = minG,sp = sp,p.mean = p.mean,p.sd = p.sd,
              top.G = top.G,top.sp = top.sp,top.pmean = top.pmean,top.psd = top.psd,
              null.sp.mean = null.sp.mean,null.sp.median = null.sp.median))
}

avgSP = function(G.module, sp.mat){
  m = length(G.module)
  set.mat = sp.mat[match(G.module,row.names(sp.mat)),
                   match(G.module,colnames(sp.mat))]
  G.sp = mean(c(set.mat[lower.tri(set.mat)]))
  return(G.sp)
}
CAMO-R/Rpackage documentation built on July 20, 2023, 6:04 a.m.