R/runPANDA.R
In SCORPION: Single Cell Oriented Reconstruction of PANDA Individual Optimized Networks

#' @import cli
runPANDA <- function(motif = NULL,expr=NULL,ppi=NULL,alpha=0.1,hamming=0.001, n.cores = 1,
                  iter=NA,output=c('regulatory','coexpression','cooperative'),
                  zScale=TRUE,progress=TRUE,randomize=c("None", "within.gene", "by.gene"), assoc.method="pearson",
                  scale.by.present=FALSE,edgelist=FALSE,remove.missing.ppi=FALSE,
                  remove.missing.motif=FALSE,remove.missing.genes=FALSE,mode="intersection"){

  randomize <- match.arg(randomize)
  if(progress)
    cli::cli_alert_success('Initializing and validating')

  if (is.null(expr)){
    # Use only the motif data here for the gene list
    num.conditions <- 0
    if (randomize!="None"){
      cli::cli_alert_danger("Randomization ignored because gene expression is not used.")
      randomize <- "None"
    }
  } else {
    if(mode=='legacy'){
      if(remove.missing.genes){
        # remove genes from expression data that are not in the motif data
        n <- nrow(expr)
        expr <- expr[which(rownames(expr)%in%motif[,2]),]
        cli::cli_alert_danger(sprintf("%s genes removed that were not present in motif", n-nrow(expr)))
      }
      if(remove.missing.motif){
        # remove genes from motif data that are not in the expression data
        n <- nrow(motif)
        motif <- motif[which(motif[,2]%in%rownames(expr)),]
        cli::cli_alert_danger(sprintf("%s motif edges removed that targeted genes missing in expression data", n-nrow(motif)))
      }
      # Use the motif data AND the expr data (if provided) for the gene list
      # Keep everything sorted alphabetically
      expr <- expr[order(rownames(expr)),]
    } else if(mode=='union'){
      gene.names=unique(union(rownames(expr),unique(motif[,2])))
      tf.names  =unique(union(unique(ppi[,1]),unique(motif[,1])))
      num.TFs    <- length(tf.names)
      num.genes  <- length(gene.names)
      # gene expression matrix
      expr1=as.data.frame(matrix(0,num.genes,ncol(expr)))
      rownames(expr1)=gene.names
      expr1[which(gene.names%in%rownames(expr)),]=expr[]
      expr=expr1
      #PPI matrix
      tfCoopNetwork <- matrix(0,num.TFs,num.TFs)
      colnames(tfCoopNetwork)=tf.names
      rownames(tfCoopNetwork)=tf.names
      Idx1 <- match(ppi[,1], tf.names);
      Idx2 <- match(ppi[,2], tf.names);
      Idx <- (Idx2-1)*num.TFs+Idx1;
      tfCoopNetwork[Idx] <- ppi[,3];
      Idx <- (Idx1-1)*num.TFs+Idx2;
      tfCoopNetwork[Idx] <- ppi[,3];
      #Motif matrix
      regulatoryNetwork=matrix(0,num.TFs,num.genes)
      colnames(regulatoryNetwork) = gene.names
      rownames(regulatoryNetwork) = tf.names
      Idx1=match(motif[,1], tf.names);
      Idx2=match(motif[,2], gene.names);
      Idx=(Idx2-1)*num.TFs+Idx1;
      regulatoryNetwork[Idx]=motif[,3]
    } else if(mode=='intersection'){
      gene.names = sort(intersect(rownames(expr), motif[,2]))
      tf.names  = sort(intersect(unique(c(ppi[,1], ppi[,2])), motif[,1]))
      num.TFs   = length(tf.names)
      num.genes  = length(gene.names)

      # Gene expression matrix
      expr = expr[gene.names,]

      # PPI matrix
      ppi <- ppi[ppi[,1] %in% tf.names,]
      ppi <- ppi[ppi[,2] %in% tf.names,]
      tfCoopNetwork <- Matrix::sparseMatrix(i = as.numeric(factor(ppi[,1], tf.names)),
                                            j = as.numeric(factor(ppi[,2], tf.names)),
                                            x = ppi[,3],
                                            dims = c(num.TFs, num.TFs),
                                            dimnames = list(tf.names, tf.names))
      tfCoopNetwork <- as.matrix(tfCoopNetwork)
      ppi <- tfCoopNetwork[lower.tri(tfCoopNetwork)] + tfCoopNetwork[upper.tri(tfCoopNetwork)]
      ppi[tfCoopNetwork[lower.tri(tfCoopNetwork)] == tfCoopNetwork[upper.tri(tfCoopNetwork)]] = ppi[tfCoopNetwork[lower.tri(tfCoopNetwork)] == tfCoopNetwork[upper.tri(tfCoopNetwork)]]/2
      tfCoopNetwork[upper.tri(tfCoopNetwork)] <- ppi
      tfCoopNetwork[lower.tri(tfCoopNetwork)] <- ppi
      tfCoopNetwork <- Matrix(tfCoopNetwork)

      #Motif matrix
      motif <- motif[motif[,1] %in% tf.names,]
      motif <- motif[motif[,2] %in% gene.names,]
      regulatoryNetwork <- Matrix::sparseMatrix(i = as.numeric(factor(motif[,1], tf.names)),
                                               j = as.numeric(factor(motif[,2], gene.names)),
                                               x = motif[,3],
                                               dims = c(num.TFs, num.genes),
                                               dimnames = list(tf.names, gene.names))
    }

    num.conditions <- ncol(expr)
    if (randomize=='within.gene'){
      expr <- t(apply(expr, 1, sample))
      if(progress)
        cli::cli_alert_info("Randomizing by reordering each gene's expression")
    } else if (randomize=='by.gene'){
      rownames(expr) <- sample(rownames(expr))
      expr           <- expr[order(rownames(expr)),]
      if(progress)
        cli::cli_alert_info("Randomizing by reordering each gene labels")
    }
  }

  if (mode=='legacy'){
    # Create vectors for TF names and Gene names from motif dataset
    tf.names   <- sort(unique(motif[,1]))
    gene.names <- sort(unique(rownames(expr)))
    num.TFs    <- length(tf.names)
    num.genes  <- length(gene.names)
  }

  # Bad data checking
  if (num.genes==0){
    cli::cli_alert_danger("Error validating data.  No matched genes")
    cli::cli_abort("Please ensure that gene names in expression data match gene names in motif data")

  }

  if(num.conditions==0) {
    cli::cli_alert_warning('No expression data given.  PANDA will run based on an identity co-regulation matrix')
    geneCoreg <- diag(num.genes)
  } else if(num.conditions<3) {
    cli::cli_alert_warning('Not enough expression conditions detected to calculate correlation. Co-regulation network will be initialized to an identity matrix.')
    geneCoreg <- diag(num.genes)
  } else {
    if(scale.by.present){
      num.positive=(expr>0)%*%t((expr>0))
      if(assoc.method == 'pcr'){
        geneCoreg <- as.matrix(pcNet(expr)) * (num.positive/num.conditions)
      } else {
        geneCoreg <- cor(t(expr), method=assoc.method, use="pairwise.complete.obs")
      }

    } else {
      if(assoc.method == 'pcr'){
        geneCoreg <- as.matrix(pcNet(expr))
      } else {
        if(assoc.method == 'spearman'){
          geneCoreg <- Matrix(t(apply(expr,1,rank)))
        }
        geneCoreg <- expr - rowMeans(expr)
        geneCoreg <- geneCoreg/sqrt(rowSums(geneCoreg^2))
        geneCoreg <- tcrossprod(geneCoreg)
      }
    }
    if(progress)
      cli::cli_alert_success('Verified sufficient samples')
  }
  if (any(is.na(geneCoreg@x))){ #check for NA and replace them by zero
    diag(geneCoreg)=1
    geneCoreg@x[is.na(geneCoreg@x)]=0
  }

  if (any(duplicated(motif))) {
    cli::cli_alert_warning("Duplicate edges have been found in the motif data. Weights will be summed.")
    motif <- aggregate(motif[,3], by=list(motif[,1], motif[,2]), FUN=sum)
  }

  ## Run PANDA ##
  tic=proc.time()[3]

  if(progress)
    cli::cli_alert_info('Normalizing networks')
  regulatoryNetwork = normalizeNetwork(regulatoryNetwork)
  tfCoopNetwork     = normalizeNetwork(tfCoopNetwork)
  geneCoreg         = normalizeNetwork(geneCoreg)

  if(progress)
    cli::cli_alert_info('Learning Network')

  minusAlpha = 1-alpha
  step=0
  hamming_cur=1
  if(progress)
    cli::cli_alert_info("Using tanimoto similarity")
    cli::cli_progress_bar("Assimilating data", type = 'iterator')
  while(hamming_cur>hamming){
    if ((!is.na(iter))&&step>=iter){
      cli::cli_alert_warning(paste0("Reached maximum iterations, iter =",iter))
      break
    }
    Responsibility=tanimoto(tfCoopNetwork, regulatoryNetwork)
    Availability=tanimoto(regulatoryNetwork, geneCoreg)
    RA = 0.5*(Responsibility+Availability)

    hamming_cur=sum(abs(regulatoryNetwork-RA))/(num.TFs*num.genes)
    regulatoryNetwork=minusAlpha*regulatoryNetwork + alpha*RA

    ppi=tanimoto(regulatoryNetwork, t(regulatoryNetwork))
    ppi=update.diagonal(ppi, num.TFs, alpha, step)
    tfCoopNetwork=minusAlpha*tfCoopNetwork + alpha*ppi

    CoReg2=tanimoto(t(regulatoryNetwork), regulatoryNetwork)
    CoReg2=update.diagonal(CoReg2, num.genes, alpha, step)
    geneCoreg=minusAlpha*geneCoreg + alpha*CoReg2

    if(progress)
      #message("Iteration ", step,": hamming distance = ", round(hamming_cur,5))

    cli::cli_progress_message(paste0("Iteration ", step,": hamming distance = ", round(hamming_cur,5)))
    step=step+1
  }

  toc=proc.time()[3] - tic
  if(progress)
    cli::cli_alert_success(paste0("Successfully ran SCORPION on ", num.genes, " Genes and ", num.TFs, " TFs"))
    cli::cli_alert_info(paste0("Time elapsed: ", round(toc,2), " seconds"))
    cli::cli_end()

  O <- prepResult(zScale, output, regulatoryNetwork, geneCoreg, tfCoopNetwork, edgelist, motif)
  return(O)
}

Any scripts or data that you put into this service are public.

SCORPION documentation built on Sept. 22, 2022, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

SCORPION
Single Cell Oriented Reconstruction of PANDA Individual Optimized Networks

R/runPANDA.R
In SCORPION: Single Cell Oriented Reconstruction of PANDA Individual Optimized Networks

Defines functions runPANDA

Try the SCORPION package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SCORPION Single Cell Oriented Reconstruction of PANDA Individual Optimized Networks

R/runPANDA.R In SCORPION: Single Cell Oriented Reconstruction of PANDA Individual Optimized Networks

Defines functions runPANDA

Try the SCORPION package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SCORPION
Single Cell Oriented Reconstruction of PANDA Individual Optimized Networks

R/runPANDA.R
In SCORPION: Single Cell Oriented Reconstruction of PANDA Individual Optimized Networks