############################################################################
############################################################################
#### Cohesiveness R script
####
# Author: Fco. Jose Campos-Laborie
##
## Cohesiveness statistic is intended as a feature selection method
## to assess the probability of "being close" of any group of samples
## within a feature.
#### INPUT:
# mx = matrix with rows as features and columns as samples
# cl = named vector with group of samples as values and names of samples as
# names of 'cl'.
# comb.p.val = "fisher" for Fisher's combined probability test or "z.score" for
# Stouffer's method.
cohesiveness <- function(mx, cl, comb.p.val = "fisher",
method = "complete", trim = 0.01)
{
require(metap)
if(!method %in% c("complete","trimmed"))
stop("'method' must be 'complete' or 'trimmed'")
# ordering the matrix
mx <- mx[,names(cl)]
# counter for biological features
counter <- 0
# wrapper for all biological features
res <- t(apply(mx, 1, function(x) {
counter <<- counter + 1
cat("\rFeature: ",counter)
# ranking the feature
x1 <- rank(-x, ties.method = "random")
names(x1) <- names(x)
res <- sapply(unique(as.character(cl)), function(y) {
x2 <- x1
cl2 <- cl
## Calculating gaps between elements of each group
d <- diff(sort(x2[cl2 == y]))
## Average, 'r' and 'n'
if(method == "complete"){
avrg.samp <- mean(d)
r <- length(x2[cl2 == y])
n <- length(x2)
}
if(method == "trimmed"){
avrg.samp <- mean(d, trim = trim)
r <- length(x2[cl2 == y])
n <- length(x2)
r <- r - ceiling(trim*r*2)
n <- n - ceiling(trim*r*2)
}
## Calculating theoretical average and sd
avrg <- (n+1)/(r+1)
variance <- ((n+1)*(n-r)*r)/(((r+1)^2)*(r+2))
std <- sqrt(variance) ## standard deviation
## Z-score
# z <- (avrg.samp - avrg)/(std/sqrt(r))
z <- 1-(avrg.samp - 1)/(n/(r-1) - 1)
return(c(z, pnorm(z)))})
}))
colnames(res) <- c(rbind(paste(unique(as.character(cl)),"cohesiveness",sep = "_"),
paste(unique(as.character(cl)),"p.value",sep = "_")))
if(comb.p.val == "fisher"){
### Fisher's combination
summ <- apply(res[,seq(2,dim(res)[2],2)], 1, function(x) -2*sum(log(x)))
p.val <- 1 - pchisq(summ, df = dim(res)[2])
}
else if(comb.p.val == "z.score"){
### Stouffer's Z-score
summ <- t(apply(res[,seq(2,dim(res)[2],2)], 1, function(x) unlist(sumz(x)[c("z","p")])))
p.val <- summ[,"p"]
summ <- summ[,"z"]
}
## Multiple correction of p-values
fdr <- p.adjust(p.val, method = "fdr")
res <- data.frame(res, summ, p.val, fdr)
return(res)
}
##################
## plot function
plotCohesiveness <- function(mx, cl, id, coh,
category = NA, color = "darkred")
{
##
if(!is.numeric(mx) | !is.matrix(mx))
stop("'mx' must be a numeric matrix.")
par(mar = c(6,4,6,4))
layout(matrix(c(1,2,3,4), nrow = 2, byrow = F))
##
if(is.na(category))
category <- as.character(unique(cl)[which(coh[id,grepl("cohesiveness",colnames(coh))] ==
max(coh[id,grepl("cohesiveness",colnames(coh))]))[1]])
##
category <- as.character(category)
cat <- gsub(category, pattern = c(" "), replacement = ".")
cat <- gsub(cat, pattern = c("-"), replacement = ".")
cat <- gsub(cat, pattern = c("("), replacement = ".", fixed = T)
cat <- gsub(cat, pattern = c(")"), replacement = ".", fixed = T)
value <- coh[id, paste(cat, "cohesiveness", sep = "_")]
n <- length(table(cl))
## Ranking plot
true <- cl[order(mx[id,])] == category
yP <- ifelse(true, 0.25, 0.48)
yP2 <- ifelse(true, 0.75, 0.52)
col <- ifelse(true, adjustcolor(color,0.9), adjustcolor("black",0.5))
d <- data.frame(y = yP, yend = yP2)
p1 <- ggplot(d, aes(y = y, yend = yend, x = seq_len(dim(d)[1]), xend = seq_len(dim(d)[1]))) +
geom_segment(stat = "identity", col = col) + ylim(0, 1) + xlab("Ranking of samples") +
ggtitle(paste("Ranked", id, "profile", sep = " ")) + ylab("") +
theme(panel.background = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Omic signal plot
d <- data.frame(value = sort(mx[id,]), pos = seq_len(dim(mx)[2]),
class = cl[order(mx[id,])] == category)
p2 <- ggplot(d, aes(x = pos, y = value)) +
geom_point(size = ifelse(true, 2, 1),
col = ifelse(true, adjustcolor("darkred", 0.9), adjustcolor("grey65", 0.5))) +
xlab("Ranking of samples") + ylab("Omic signal") +
ggtitle(paste("Ordered", id, "profile", sep = " ")) +
theme_minimal()
## Boxplot
true <- names(table(cl)) == category
col <- ifelse(true, color, "grey25")
d <- melt(c(by(mx[id,], cl, c)))
p3 <- ggplot(d, aes(x = L1, y = value)) +
geom_boxplot() +
geom_jitter(height = 0, width = 0.1, col = adjustcolor(rep(col, table(cl)), 0.7)) +
ylab("Omic signal") + xlab("") +
scale_fill_manual(values = adjustcolor(col, 0.7), breaks = names(table(cl)),
name = "Category", labels = names(table(cl))) +
ggtitle(paste(id, ": boxplot per category", sep = "")) +
theme_light()
## Barplot
col <- ifelse(true, color, "grey62")
d2 <- data.frame(id = as.numeric(coh[id, grepl("cohesiveness", colnames(coh))]),
labels = names(table(cl)), col = col)
p4 <- ggplot(d2, aes(y = id, x = labels)) + geom_bar(stat = "identity", fill = col) +
ylim(0,1) + xlab("Categories") + ylab("Cohesiveness") +
theme_light()
suppressWarnings(gridExtra::grid.arrange(p1, p2, p3, p4,
layout_matrix = matrix(c(1, 3, 2, 4), byrow = TRUE, nrow = 2))
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.