R/cphelper.R

Defines functions cp_addurls_GO cp_compClust cp_filterforNA cp_clusterDPAEuclideanDist cp_clusterHClustEuclideanDistDeepslit cp_clusterHClustEuclideanDist

Documented in cp_addurls_GO cp_clusterDPAEuclideanDist cp_clusterHClustEuclideanDist cp_clusterHClustEuclideanDistDeepslit cp_compClust cp_filterforNA

### Cluster ----
# what are possible input/outputs?
# dendrogram

#' cluster using hclust
#' @family clusterProfiler
#' @param x data matrix
#' @param nrCluster number of clusters
#' @param method default complete
#' @param JK jack knife resmapling default TRUE
#' @export
#'
cp_clusterHClustEuclideanDist <- function(x,
                                          nrCluster,
                                          method = "complete",
                                          JK = TRUE){
  distJK <- if (JK) {
     prora::dist_JK(x)
  } else {
    stats::dist(x)
  }

  bb <- stats::hclust(distJK, method = method)
  k <- stats::cutree(bb, k = nrCluster)
  dend <- stats::as.dendrogram(bb)
  clusterAssignment <- data.frame(protein_Id = rownames(x), Cluster =  k)
  return(list(dendrogram = dend, clusterAssignment = clusterAssignment, nrCluster = nrCluster))
}

#' cluster using hclust and deepsplit
#' @family clusterProfiler
#' @export
#' @param x matrix with data
#' @param method linkage method (hclust argument)
#' @param JK should jack knife resampling be used (defaul TRUE)
#' @return list
#' \itemize{
#' \item dendrogram - instance of class dendrogram
#' \item cluster assignments
#' \item nrCluster number of clusters
#' }
cp_clusterHClustEuclideanDistDeepslit <- function(x,  method = "complete", JK = TRUE){
  if (nrow(x) <= 2) {
    dend <- NULL
    nrCluster <- 1
    clusterAssignment <- data.frame( protein_Id  = as.character(rownames(x)), Cluster = rep(1, nrow(x) ))
  } else {
    distJK <- if (JK) {
      prora::dist_JK(x)
    } else {
      stats::dist(x)
    }

    bb <- stats::hclust(distJK,method = method)
    #cat("MIN cluster size ", min(50, nrow(x)/10))
    k <- dynamicTreeCut::cutreeDynamic(
      bb,
      method = "hybrid",
      deepSplit = FALSE,
      distM = as.matrix(distJK),
      minClusterSize = min(50, nrow(x)/10) , verbose = 0)

    dend <- stats::as.dendrogram(bb)
    clusterAssignment <- data.frame(protein_Id = rownames(x), Cluster =  k)
    nrCluster <-  max(k)
  }
  return(list(dendrogram = dend, clusterAssignment = clusterAssignment, nrCluster = nrCluster))
}

#' Cluster using DPA
#' @family clusterProfiler
#' @param mdata, input data
#' @param Z, the number of standard deviations fixing the level of
#'            statistical confidence at which one decides to consider
#'            a cluster meaningful. Default value is set to 1.
#' @param JK jack knive resampling (default TRUE)
#' @return
#' \itemize{
#' \item dendrogram - instance of class dendrogram
#' \item cluster assignments
#' \item nrCluster number of clusters
#' }
#' @export
#'
cp_clusterDPAEuclideanDist <- function(mdata, Z = 1, JK = TRUE){
  distJK <- if (JK) {
    prora::dist_JK( mdata )
  } else {
    stats::dist(mdata)
  }
  DPAresult <- DPAclustR::runDPAclustering(as.matrix(distJK), Z = Z, metric = "precomputed")
  k <- DPAresult$labels
  maxD <- max(DPAresult$density)
  topography <- DPAresult$topography
  if ( nrow(topography) > 0 ) {
    bb <- DPAclustR::plot_dendrogram(k,
                                     topography,
                                     maxD,
                                     popmin = 0,
                                     method = "average")

    dend <- stats::as.dendrogram(bb)
  } else {
    dend <- NULL
  }
  clusterAssignment <- data.frame(protein_Id = rownames(mdata), Cluster =  k)
  return( list(dendrogram = dend, clusterAssignment = clusterAssignment, nrCluster = max(k)) )
}



#' remove proteins with more NA's than in 60\% of samples
#' @family clusterProfiler
#' @param mdata data matrix
#' @export
#'
cp_filterforNA <- function(mdata) {
  na <- apply(mdata, 1, function(x){sum(is.na(x))})
  nc <- ncol(mdata)
  mdata <- mdata[na < floor(0.5 * nc),]
  return(mdata)
}

#' same as clusterProfiler::compareCluster but with defaults and error handling
#' @family clusterProfiler
#' @param geneClusters a list of entrez gene id.
#' @param orgDB organizm db
#' @param universe see \code{\link[clusterProfiler]{compareCluster}}
#' @param ont see \code{\link[clusterProfiler]{compareCluster}}
#' @export
#'
cp_compClust <- function(geneClusters,
                         orgDB,
                         universe, ont="BP" ) {
  # map to entriz id's
  .ehandler = function(e) {
    warning("WARN :", e)
    # return string here
    as.character(e)
  }

  clustProf = tryCatch(clusterProfiler::compareCluster(
    geneClusters,
    fun = "enrichGO",
    OrgDb =  orgDB,
    ont = ont,
    universe = universe,
    pvalueCutoff = 1,
    qvalueCutoff = 0.2,
    pAdjustMethod = "BH"), error = .ehandler)
  return(clustProf)
}


#' add uris to to cp table outputs
#' @family clusterProfiler
#' @param data dataframe
#' @param caption table caption
#' @param coreEnrichment column with core enrichments
#' @param signif2 which columns contains digits to round to significance 2.
#' @param gocarts default http://amigo.geneontology.org/amigo/term/
#' @param genecarts default https://www.ncbi.nlm.nih.gov/gene/
#' @export
cp_addurls_GO <- function(data, caption, coreEnrichment = "core_enrichment",
                    signif2  = c('NES', 'pvalue', 'FDR'),
                    gocarts = "http://amigo.geneontology.org/amigo/term/",
                    genecarts = "https://www.ncbi.nlm.nih.gov/gene/"
                    ) {
  relevantResult <- data.frame(data)
  rownames(relevantResult) <- NULL
  relevantResult <- dplyr::rename(relevantResult, FDR = .data$p.adjust)
  relevantResult$ID <-
    prora::DT_makeURLfor(relevantResult$ID, path = gocarts)

  relevantResult$core_enrichment <-
    lapply( strsplit(relevantResult[[coreEnrichment]], split = "/"),
            prora::DT_makeURLfor,
            path = genecarts)

  dt <-  DT::datatable(relevantResult, caption = caption, escape = FALSE) %>%
    DT::formatSignif(signif2,2)
  return(dt)
}
protViz/prora documentation built on Dec. 12, 2021, 12:32 a.m.