R/cohesiveness.R

Defines functions cohesiveness plotCohesiveness

Documented in cohesiveness plotCohesiveness

############################################################################
############################################################################
#### Cohesiveness R script
####
# Author: Fco. Jose Campos-Laborie
##
## Cohesiveness statistic is intended as a feature selection method
## to assess the probability of "being close" of any group of samples
## within a feature.

#### INPUT:
# mx = matrix with rows as features and columns as samples
# cl = named vector with group of samples as values and names of samples as
# names of 'cl'.
# comb.p.val = "fisher" for Fisher's combined probability test or "z.score" for
# Stouffer's method.

cohesiveness <- function(mx, cl, comb.p.val = "fisher",
                         method = "complete", trim = 0.01)
{
  require(metap)

  if(!method %in% c("complete","trimmed"))
    stop("'method' must be 'complete' or 'trimmed'")
  # ordering the matrix
  mx <- mx[,names(cl)]
  # counter for biological features
  counter <- 0

  # wrapper for all biological features
  res <- t(apply(mx, 1, function(x) {
    counter <<- counter + 1
    cat("\rFeature: ",counter)
    # ranking the feature
    x1 <- rank(-x, ties.method = "random")
    names(x1) <- names(x)

    res <- sapply(unique(as.character(cl)), function(y) {
      x2 <- x1
      cl2 <- cl
      ## Calculating gaps between elements of each group
      d <- diff(sort(x2[cl2 == y]))
      ## Average, 'r' and 'n'
      if(method == "complete"){
        avrg.samp <- mean(d)
        r <- length(x2[cl2 == y])
        n <- length(x2)
      }
      if(method == "trimmed"){
        avrg.samp <- mean(d, trim = trim)
        r <- length(x2[cl2 == y])
        n <- length(x2)
        r <- r - ceiling(trim*r*2)
        n <- n - ceiling(trim*r*2)
      }
      ## Calculating theoretical average and sd
      avrg <- (n+1)/(r+1)
      variance <- ((n+1)*(n-r)*r)/(((r+1)^2)*(r+2))
      std <- sqrt(variance) ## standard deviation
      ## Z-score
      # z <- (avrg.samp - avrg)/(std/sqrt(r))

      z <- 1-(avrg.samp - 1)/(n/(r-1) - 1)

      return(c(z, pnorm(z)))})
  }))
  colnames(res) <- c(rbind(paste(unique(as.character(cl)),"cohesiveness",sep = "_"),
                           paste(unique(as.character(cl)),"p.value",sep = "_")))

  if(comb.p.val == "fisher"){
    ### Fisher's combination
    summ <- apply(res[,seq(2,dim(res)[2],2)], 1, function(x) -2*sum(log(x)))
    p.val <- 1 - pchisq(summ, df = dim(res)[2])
  }
  else if(comb.p.val == "z.score"){
    ### Stouffer's Z-score
    summ <- t(apply(res[,seq(2,dim(res)[2],2)], 1, function(x) unlist(sumz(x)[c("z","p")])))
    p.val <- summ[,"p"]
    summ <- summ[,"z"]
  }
  ## Multiple correction of p-values
  fdr <- p.adjust(p.val, method = "fdr")

  res <- data.frame(res, summ, p.val, fdr)
  return(res)
}


##################
## plot function
plotCohesiveness <- function(mx, cl, id, coh,
                             category = NA, color = "darkred")
{
  ##
  if(!is.numeric(mx) | !is.matrix(mx))
    stop("'mx' must be a numeric matrix.")
  par(mar = c(6,4,6,4))
  layout(matrix(c(1,2,3,4), nrow = 2, byrow = F))
  ##
  if(is.na(category))
    category <- as.character(unique(cl)[which(coh[id,grepl("cohesiveness",colnames(coh))] ==
                                                max(coh[id,grepl("cohesiveness",colnames(coh))]))[1]])
  ##
  category <- as.character(category)
  cat <- gsub(category, pattern = c(" "), replacement = ".")
  cat <- gsub(cat, pattern = c("-"), replacement = ".")
  cat <- gsub(cat, pattern = c("("), replacement = ".", fixed = T)
  cat <- gsub(cat, pattern = c(")"), replacement = ".", fixed = T)
  value <- coh[id, paste(cat, "cohesiveness", sep = "_")]
  n <- length(table(cl))

  ## Ranking plot
  true <- cl[order(mx[id,])] == category
  yP <- ifelse(true, 0.25, 0.48)
  yP2 <- ifelse(true, 0.75, 0.52)
  col <- ifelse(true, adjustcolor(color,0.9), adjustcolor("black",0.5))
  d <- data.frame(y = yP, yend = yP2)

  p1 <- ggplot(d, aes(y = y, yend = yend, x = seq_len(dim(d)[1]), xend = seq_len(dim(d)[1]))) +
    geom_segment(stat = "identity", col = col) + ylim(0, 1) + xlab("Ranking of samples") +
    ggtitle(paste("Ranked", id, "profile", sep = " ")) + ylab("") +
    theme(panel.background = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank())

  ## Omic signal plot
  d <- data.frame(value = sort(mx[id,]), pos = seq_len(dim(mx)[2]),
                  class = cl[order(mx[id,])] == category)

  p2 <- ggplot(d, aes(x = pos, y = value)) +
    geom_point(size = ifelse(true, 2, 1),
               col = ifelse(true, adjustcolor("darkred", 0.9), adjustcolor("grey65", 0.5))) +
    xlab("Ranking of samples") + ylab("Omic signal") +
    ggtitle(paste("Ordered", id, "profile", sep = " ")) +
    theme_minimal()

  ## Boxplot
  true <- names(table(cl)) == category
  col <- ifelse(true, color, "grey25")

  d <- melt(c(by(mx[id,], cl, c)))

  p3 <- ggplot(d, aes(x = L1, y = value)) +
    geom_boxplot() +
    geom_jitter(height = 0, width = 0.1, col = adjustcolor(rep(col, table(cl)), 0.7)) +
    ylab("Omic signal") + xlab("") +
    scale_fill_manual(values = adjustcolor(col, 0.7), breaks = names(table(cl)),
                      name = "Category", labels = names(table(cl))) +
    ggtitle(paste(id, ": boxplot per category", sep = "")) +
    theme_light()

  ## Barplot
  col <- ifelse(true, color, "grey62")
  d2 <- data.frame(id = as.numeric(coh[id, grepl("cohesiveness", colnames(coh))]),
                   labels = names(table(cl)), col = col)

  p4 <- ggplot(d2, aes(y = id, x = labels)) + geom_bar(stat = "identity", fill = col) +
    ylim(0,1) + xlab("Categories") + ylab("Cohesiveness") +
    theme_light()

  suppressWarnings(gridExtra::grid.arrange(p1, p2, p3, p4,
                                           layout_matrix = matrix(c(1, 3, 2, 4), byrow = TRUE, nrow = 2))
  )
}
fjcamlab/cohesiveness documentation built on May 6, 2019, 12:09 p.m.