## load required packages.
#' @import tsne
#' @import pheatmap
#' @import MASS
#' @import cluster
#' @import mclust
#' @import flexmix
#' @import lattice
#' @import fpc
#' @import amap
#' @import RColorBrewer
#' @import locfit
#' @import vegan
#' @import Rtsne
#' @import scran
#' @import DESeq2
#' @import randomForest
NULL
## class definition
#' @export SCseq
#' @exportClass SCseq
SCseq <- setClass("SCseq", slots = c(expdata = "data.frame", ndata = "data.frame", fdata = "data.frame", distances = "matrix", tsne = "data.frame", cluster = "list", background = "list", out = "list", cpart = "vector", fcol = "vector", filterpar = "list", clusterpar = "list", outlierpar ="list" ))
setValidity("SCseq",
function(object) {
msg <- NULL
if ( ! is.data.frame(object@expdata) ){
msg <- c(msg, "input data must be data.frame")
}else if ( nrow(object@expdata) < 2 ){
msg <- c(msg, "input data must have more than one row")
}else if ( ncol(object@expdata) < 2 ){
msg <- c(msg, "input data must have more than one column")
}else if (sum( apply( is.na(object@expdata),1,sum ) ) > 0 ){
msg <- c(msg, "NAs are not allowed in input data")
}else if (sum( apply( object@expdata,1,min ) ) < 0 ){
msg <- c(msg, "negative values are not allowed in input data")
}
if (is.null(msg)) TRUE
else msg
}
)
setMethod("initialize",
signature = "SCseq",
definition = function(.Object, expdata ){
.Object@expdata <- expdata
.Object@ndata <- expdata
.Object@fdata <- expdata
validObject(.Object)
return(.Object)
}
)
#changed default
#' @exportMethod filterdata
setGeneric("filterdata", function(object, mintotal=3000, minexpr=5, minnumber=1, maxexpr=Inf, downsample=FALSE, sfn=FALSE, hkn=FALSE, dsn=1, rseed=17000, CGenes=NULL, FGenes=NULL, ccor=.4) standardGeneric("filterdata"))
setMethod("filterdata",
signature = "SCseq",
definition = function(object,mintotal,minexpr,minnumber,maxexpr,downsample,sfn,hkn,dsn,rseed,CGenes,FGenes,ccor) {
if ( ! is.numeric(mintotal) ) stop( "mintotal has to be a positive number" ) else if ( mintotal <= 0 ) stop( "mintotal has to be a positive number" )
if ( ! is.numeric(minexpr) ) stop( "minexpr has to be a non-negative number" ) else if ( minexpr < 0 ) stop( "minexpr has to be a non-negative number" )
if ( ! is.numeric(minnumber) ) stop( "minnumber has to be a non-negative integer number" ) else if ( round(minnumber) != minnumber | minnumber < 0 ) stop( "minnumber has to be a non-negative integer number" )
if ( ! ( is.numeric(downsample) | is.logical(downsample) ) ) stop( "downsample has to be logical (TRUE/FALSE)" )
if ( ! ( is.numeric(sfn) | is.logical(sfn) ) ) stop( "sfn has to be logical (TRUE/FALSE)" )
if ( ! ( is.numeric(hkn) | is.logical(hkn) ) ) stop( "hkn has to be logical (TRUE/FALSE)" )
if ( ! is.numeric(dsn) ) stop( "dsn has to be a positive integer number" ) else if ( round(dsn) != dsn | dsn <= 0 ) stop( "dsn has to be a positive integer number" )
if ( ! is.numeric(ccor) ) stop( "ccor has to be a non-negative number between 0 and 1" ) else if ( ccor < 0 | ccor > 1 ) stop( "ccor has to be a non-negative number between 0 and 1 " )
object@filterpar <- list(mintotal=mintotal, minexpr=minexpr, minnumber=minnumber, maxexpr=maxexpr, downsample=downsample, dsn=dsn, sfn=sfn, CGenes=CGenes, FGenes=FGenes)
object@ndata <- object@expdata[,apply(object@expdata,2,sum,na.rm=TRUE) >= mintotal]
if ( downsample ){
set.seed(rseed)
object@ndata <- downsample(object@expdata,n=mintotal,dsn=dsn)
}
if ( sfn ){
d <- computeSumFactors(as.matrix( object@ndata ))
object@ndata <- as.data.frame(t(t(object@ndata)/d)) + .1
}
if ( hkn ){
minE <- 1
minN <- .5 * ncol(object@ndata)
g <- apply(object@expdata[,names(object@ndata)] >= minE,1,sum) >= minN
nf <- apply(object@ndata,2,sum)
x <- as.data.frame(t(t(object@ndata)/nf))*min(nf)
x <- x[g,]
m <- apply(x,1,mean)
v <- apply(x,1,var )
ml <- log2(m)
vl <- log2(v)
f <- ml > -Inf & vl > -Inf
ml <- ml[f]
vl <- vl[f]
mm <- -8
repeat{
fit <- lm(vl ~ ml + I(ml^2))
if( coef(fit)[3] >= 0 | mm >= -1){
break
}
mm <- mm + .5
f <- ml > mm
ml <- ml[f]
vl <- vl[f]
}
lvar <- function(x) 2**(coef(fit)[1] + log2(x)*coef(fit)[2] + coef(fit)[3] * log2(x)**2)
vln <- log2(v) - log2(sapply(m,FUN=lvar))
n <- names(vln)[vln<0]
p <- x/apply(x,1,sum) + 1e-10
E <- -apply(p*log(p),1,sum)
f <- m > quantile(m,.25) & m < quantile(m,.9)
f <- f & names(m) %in% n
f <- f & E > quantile(E[f],.9)
#plot(log2(m),log2(v),col="grey",pch=20)
#lines(log2(m[order(m)]),log2(lvar(m[order(m)])),col="red",lwd=2)
#points(log2(m)[f],log2(v)[f],col="red",pch=20)
nn <- names(m)[f]
nf <- apply(object@expdata[nn,names(object@ndata)],2,mean)
object@ndata <- as.data.frame(t(t(object@expdata[,names(nf)])/nf)*min(nf) + .1)
}
if ( ! ( downsample | sfn | hkn ) ){
x <- object@ndata
object@ndata <- as.data.frame( t(t(x)/apply(x,2,sum))*min(apply(x,2,sum,na.rm=TRUE)) + .1 )
}
x <- object@ndata
object@fdata <- x[apply(x>=minexpr,1,sum,na.rm=TRUE) >= minnumber,]
x <- object@fdata
object@fdata <- x[apply(x,1,max,na.rm=TRUE) < maxexpr,]
if ( downsample ){
x <- object@expdata[,names(object@ndata)]
object@ndata <- as.data.frame( t(t(x)/apply(x,2,sum))*min(apply(x,2,sum,na.rm=TRUE)) + .1 )
}
if ( !is.null(FGenes) ){
for ( g in FGenes ){
object@fdata <- object@fdata[! rownames(object@fdata) %in% g,]
}
}
if ( !is.null(CGenes) ){
d <- as.data.frame(t(t(object@expdata)/apply(object@expdata,2,sum)))
d <- d[rownames(object@fdata),names(object@fdata)]
f <- rep(FALSE,nrow(d))
for ( g in CGenes ){
if ( ! g %in% rownames(d) ) next
z <- apply(d,1,function(x,y,cthr) sigcor(x,y,cthr),y=t(d[g,]),cthr=ccor)
f <- f | ( !is.na(z) & ( z > ccor | z < -ccor) )
}
object@fdata <- object@fdata[!f,]
}
return(object)
}
)
sigcor <- function(x,y,cthr=.4){
if ( min(var(x),var(y)) == 0 ) return(NA)
fit <- lm(x ~ y)
pv <- as.data.frame(summary(fit)[4])[2,4]
y <- as.data.frame(summary(fit)[4])[2,1]
if ( is.na(pv) | is.na(y) ) return( NA )
z <- sign(y)*sqrt(summary(fit)$r.square)
if ( is.na(z) ) return(NA)
if ( pv < .01 & abs(z) >= cthr ) return(z) else return(NA)
}
downsample <- function(x,n,dsn){
x <- round( x[,apply(x,2,sum,na.rm=TRUE) >= n], 0)
nn <- min( apply(x,2,sum) )
for ( j in 1:dsn ){
z <- data.frame(GENEID=rownames(x))
rownames(z) <- rownames(x)
initv <- rep(0,nrow(z))
for ( i in 1:dim(x)[2] ){
y <- aggregate(rep(1,nn),list(sample(rep(rownames(x),x[,i]),nn)),sum)
na <- names(x)[i]
names(y) <- c("GENEID",na)
rownames(y) <- y$GENEID
z[,na] <- initv
k <- intersect(rownames(z),y$GENEID)
z[k,na] <- y[k,na]
z[is.na(z[,na]),na] <- 0
}
rownames(z) <- as.vector(z$GENEID)
ds <- if ( j == 1 ) z[,-1] else ds + z[,-1]
}
ds <- ds/dsn + .1
return(ds)
}
dist.gen <- function(x,method="euclidean", ...) if ( method %in% c("spearman","pearson","kendall") ) as.dist( 1 - cor(t(x),method=method,...) ) else if ( method %in% c("eupearson") ) sqrt(2*as.dist( 1 - cor(t(x),method="pearson",...) )) else if ( method %in% c("logpearson") ) as.dist( 1 - cor(log2(t(x)),method="pearson",...) ) else if ( method == "binary" ) dist( 1*(x > .1) ) else if ( method == "relentr") dist(x,FUN=relentr) else if ( method == "cosine" ) dist(x,FUN=coss) else dist(x,method=method,...)
relentr <- function(x,y){ px <- x/sum(x); py <- y/sum(y); H <- sum(px*log2(px/py) + py*log2(py/px)); return(H) }
coss <- function(x,y){ sum(x*y)/sqrt(sum(x*x))/sqrt(sum(y*y)) }
dist.gen.pairs <- function(x,y,...) dist.gen(t(cbind(x,y)),...)
binompval <- function(p,N,n){
pval <- pbinom(n,round(N,0),p,lower.tail=TRUE)
pval[!is.na(pval) & pval > 0.5] <- 1-pval[!is.na(pval) & pval > 0.5]
return(pval)
}
setGeneric("plotgap", function(object) standardGeneric("plotgap"))
setMethod("plotgap",
signature = "SCseq",
definition = function(object){
if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before plotgap")
if ( sum(is.na(object@cluster$gap$Tab[,3])) > 0 ) stop("run clustexp with do.gap = TRUE first")
plot(object@cluster$gap,ylim=c( min(object@cluster$gap$Tab[,3] - object@cluster$gap$Tab[,4]), max(object@cluster$gap$Tab[,3] + object@cluster$gap$Tab[,4])))
}
)
setGeneric("plotjaccard", function(object) standardGeneric("plotjaccard"))
setMethod("plotjaccard",
signature = "SCseq",
definition = function(object){
if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before plotjaccard")
if ( length(unique(object@cluster$kpart)) < 2 ) stop("only a single cluster: no Jaccard's similarity plot")
barplot(object@cluster$jaccard,names.arg=1:length(object@cluster$jaccard),ylab="Jaccard's similarity")
}
)
setGeneric("plotoutlierprobs", function(object) standardGeneric("plotoutlierprobs"))
setMethod("plotoutlierprobs",
signature = "SCseq",
definition = function(object){
if ( length(object@cpart) == 0 ) stop("run findoutliers before plotoutlierprobs")
p <- object@cluster$kpart[ order(object@cluster$kpart,decreasing=FALSE)]
x <- object@out$cprobs[names(p)]
fcol <- object@fcol
for ( i in 1:max(p) ){
y <- -log10(x + 2.2e-16)
y[p != i] <- 0
if ( i == 1 ) b <- barplot(y,ylim=c(0,max(-log10(x + 2.2e-16))*1.1),col=fcol[i],border=fcol[i],names.arg=FALSE,ylab="-log10prob") else barplot(y,add=TRUE,col=fcol[i],border=fcol[i],names.arg=FALSE,axes=FALSE)
}
abline(-log10(object@outlierpar$probthr),0,col="black",lty=2)
d <- b[2,1] - b[1,1]
y <- 0
for ( i in 1:max(p) ) y <- append(y,b[sum(p <=i),1] + d/2)
axis(1,at=(y[1:(length(y)-1)] + y[-1])/2,lab=1:max(p))
box()
}
)
setGeneric("plotbackground", function(object) standardGeneric("plotbackground"))
setMethod("plotbackground",
signature = "SCseq",
definition = function(object){
if ( length(object@cpart) == 0 ) stop("run findoutliers before plotbackground")
m <- apply(object@fdata,1,mean)
v <- apply(object@fdata,1,var)
fit <- locfit(v~lp(m,nn=.7),family="gamma",maxk=500)
plot(log2(m),log2(v),pch=20,xlab="log2mean",ylab="log2var",col="grey")
lines(log2(m[order(m)]),log2(object@background$lvar(m[order(m)],object)),col="red",lwd=2)
lines(log2(m[order(m)]),log2(fitted(fit)[order(m)]),col="orange",lwd=2,lty=2)
legend("topleft",legend=substitute(paste("y = ",a,"*x^2 + ",b,"*x + ",d,sep=""),list(a=round(coef(object@background$vfit)[3],2),b=round(coef(object@background$vfit)[2],2),d=round(coef(object@background$vfit)[1],2))),bty="n")
}
)
setGeneric("plotsensitivity", function(object) standardGeneric("plotsensitivity"))
setMethod("plotsensitivity",
signature = "SCseq",
definition = function(object){
if ( length(object@cpart) == 0 ) stop("run findoutliers before plotsensitivity")
plot(log10(object@out$thr), object@out$stest, type="l",xlab="log10 Probability cutoff", ylab="Number of outliers")
abline(v=log10(object@outlierpar$probthr),col="red",lty=2)
}
)
setGeneric("diffgenes", function(object,cl1,cl2,mincount=1) standardGeneric("diffgenes"))
setMethod("diffgenes",
signature = "SCseq",
definition = function(object,cl1,cl2,mincount){
part <- object@cpart
cl1 <- c(cl1)
cl2 <- c(cl2)
if ( length(part) == 0 ) stop("run findoutliers before diffgenes")
if ( ! is.numeric(mincount) ) stop("mincount has to be a non-negative number") else if ( mincount < 0 ) stop("mincount has to be a non-negative number")
if ( length(intersect(cl1, part)) < length(unique(cl1)) ) stop( paste("cl1 has to be a subset of ",paste(sort(unique(part)),collapse=","),"\n",sep="") )
if ( length(intersect(cl2, part)) < length(unique(cl2)) ) stop( paste("cl2 has to be a subset of ",paste(sort(unique(part)),collapse=","),"\n",sep="") )
f <- apply(object@ndata[,part %in% c(cl1,cl2)],1,max) > mincount
x <- object@ndata[f,part %in% cl1]
y <- object@ndata[f,part %in% cl2]
if ( sum(part %in% cl1) == 1 ) m1 <- x else m1 <- apply(x,1,mean)
if ( sum(part %in% cl2) == 1 ) m2 <- y else m2 <- apply(y,1,mean)
if ( sum(part %in% cl1) == 1 ) s1 <- sqrt(x) else s1 <- sqrt(apply(x,1,var))
if ( sum(part %in% cl2) == 1 ) s2 <- sqrt(y) else s2 <- sqrt(apply(y,1,var))
d <- ( m1 - m2 )/ apply( cbind( s1, s2 ),1,mean )
names(d) <- rownames(object@ndata)[f]
if ( sum(part %in% cl1) == 1 ){
names(x) <- names(d)
x <- x[order(d,decreasing=TRUE)]
}else{
x <- x[order(d,decreasing=TRUE),]
}
if ( sum(part %in% cl2) == 1 ){
names(y) <- names(d)
y <- y[order(d,decreasing=TRUE)]
}else{
y <- y[order(d,decreasing=TRUE),]
}
return(list(z=d[order(d,decreasing=TRUE)],cl1=x,cl2=y,cl1n=cl1,cl2n=cl2))
}
)
plotdiffgenes <- function(z,gene=g){
if ( ! is.list(z) ) stop("first arguments needs to be output of function diffgenes")
if ( length(z) < 5 ) stop("first arguments needs to be output of function diffgenes")
if ( sum(names(z) == c("z","cl1","cl2","cl1n","cl2n")) < 5 ) stop("first arguments needs to be output of function diffgenes")
if ( length(gene) > 1 ) stop("only single value allowed for argument gene")
if ( !is.numeric(gene) & !(gene %in% names(z$z)) ) stop("argument gene needs to be within rownames of first argument or a positive integer number")
if ( is.numeric(gene) ){ if ( gene < 0 | round(gene) != gene ) stop("argument gene needs to be within rownames of first argument or a positive integer number") }
x <- if ( is.null(dim(z$cl1)) ) z$cl1[gene] else t(z$cl1[gene,])
y <- if ( is.null(dim(z$cl2)) ) z$cl2[gene] else t(z$cl2[gene,])
plot(1:length(c(x,y)),c(x,y),ylim=c(0,max(c(x,y))),xlab="",ylab="Expression",main=gene,cex=0,axes=FALSE)
axis(2)
box()
u <- 1:length(x)
rect(u - .5,0,u + .5,x,col="red")
v <- c(min(u) - .5,max(u) + .5)
axis(1,at=mean(v),lab=paste(z$cl1n,collapse=","))
lines(v,rep(mean(x),length(v)))
lines(v,rep(mean(x)-sqrt(var(x)),length(v)),lty=2)
lines(v,rep(mean(x)+sqrt(var(x)),length(v)),lty=2)
u <- ( length(x) + 1 ):length(c(x,y))
v <- c(min(u) - .5,max(u) + .5)
rect(u - .5,0,u + .5,y,col="blue")
axis(1,at=mean(v),lab=paste(z$cl2n,collapse=","))
lines(v,rep(mean(y),length(v)))
lines(v,rep(mean(y)-sqrt(var(y)),length(v)),lty=2)
lines(v,rep(mean(y)+sqrt(var(y)),length(v)),lty=2)
abline(v=length(x) + .5)
}
setGeneric("plottsne", function(object,final=TRUE) standardGeneric("plottsne"))
setMethod("plottsne",
signature = "SCseq",
definition = function(object,final){
if ( length(object@tsne) == 0 ) stop("run comptsne before plottsne")
if ( final & length(object@cpart) == 0 ) stop("run findoutliers before plottsne")
if ( !final & length(object@cluster$kpart) == 0 ) stop("run clustexp before plottsne")
part <- if ( final ) object@cpart else object@cluster$kpart
plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,cex=1.5,col="lightgrey")
for ( i in 1:max(part) ){
if ( sum(part == i) > 0 ) text(object@tsne[part == i,1],object@tsne[part == i,2],i,col=object@fcol[i],cex=.75,font=4)
}
}
)
setGeneric("plotlabelstsne", function(object,labels=NULL) standardGeneric("plotlabelstsne"))
setMethod("plotlabelstsne",
signature = "SCseq",
definition = function(object,labels){
if ( is.null(labels ) ) labels <- names(object@ndata)
if ( length(object@tsne) == 0 ) stop("run comptsne before plotlabelstsne")
plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,cex=1.5,col="lightgrey")
text(object@tsne[,1],object@tsne[,2],labels,cex=.5)
}
)
setGeneric("plotsymbolstsne", function(object,types=NULL) standardGeneric("plotsymbolstsne"))
setMethod("plotsymbolstsne",
signature = "SCseq",
definition = function(object,types){
if ( is.null(types) ) types <- names(object@fdata)
if ( length(object@tsne) == 0 ) stop("run comptsne before plotsymbolstsne")
if ( length(types) != ncol(object@fdata) ) stop("types argument has wrong length. Length has to equal to the column number of object@ndata")
coloc <- rainbow(length(unique(types)))
syms <- c()
plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,col="grey")
for ( i in 1:length(unique(types)) ){
f <- types == sort(unique(types))[i]
syms <- append( syms, ( (i-1) %% 25 ) + 1 )
points(object@tsne[f,1],object@tsne[f,2],col=coloc[i],pch=( (i-1) %% 25 ) + 1,cex=1)
}
legend("topleft", legend=sort(unique(types)), col=coloc, pch=syms)
}
)
setGeneric("plotexptsne", function(object,g,n="",logsc=FALSE) standardGeneric("plotexptsne"))
setMethod("plotexptsne",
signature = "SCseq",
definition = function(object,g,n="",logsc=FALSE){
if ( length(object@tsne) == 0 ) stop("run comptsne before plottsne")
if ( length(intersect(g,rownames(object@ndata))) < length(unique(g)) ) stop("second argument does not correspond to set of rownames slot ndata of SCseq object")
if ( !is.numeric(logsc) & !is.logical(logsc) ) stop("argument logsc has to be logical (TRUE/FALSE)")
if ( n == "" ) n <- g[1]
l <- apply(object@ndata[g,] - .1,2,sum) + .1
if (logsc) {
f <- l == 0
l <- log2(l)
l[f] <- NA
}
mi <- min(l,na.rm=TRUE)
ma <- max(l,na.rm=TRUE)
ColorRamp <- colorRampPalette(rev(brewer.pal(n = 7,name = "RdYlBu")))(100)
ColorLevels <- seq(mi, ma, length=length(ColorRamp))
v <- round((l - mi)/(ma - mi)*99 + 1,0)
layout(matrix(data=c(1,3,2,4), nrow=2, ncol=2), widths=c(5,1,5,1), heights=c(5,1,1,1))
par(mar = c(3,5,2.5,2))
plot(object@tsne,xlab="Dim 1",ylab="Dim 2",main=n,pch=20,cex=0,col="grey")
kk <- order(v,decreasing=F)
points(object@tsne[kk,1],object@tsne[kk,2],col=ColorRamp[v[kk]],pch=20,cex=1.5)
par(mar = c(3,2.5,2.5,2))
image(1, ColorLevels,
matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
col=ColorRamp,
xlab="",ylab="",
xaxt="n")
layout(1)
}
)
plot.err.bars.y <- function(x, y, y.err, col="black", lwd=1, lty=1, h=0.1){
arrows(x,y-y.err,x,y+y.err,code=0, col=col, lwd=lwd, lty=lty)
arrows(x-h,y-y.err,x+h,y-y.err,code=0, col=col, lwd=lwd, lty=lty)
arrows(x-h,y+y.err,x+h,y+y.err,code=0, col=col, lwd=lwd, lty=lty)
}
clusGapExt <-function (x, FUNcluster, K.max, B = 100, verbose = interactive(), method="euclidean",random=TRUE,diss=FALSE,
...)
{
stopifnot(is.function(FUNcluster), length(dim(x)) == 2, K.max >=
2, (n <- nrow(x)) >= 1, (p <- ncol(x)) >= 1)
if (B != (B. <- as.integer(B)) || (B <- B.) <= 0)
stop("'B' has to be a positive integer")
if (is.data.frame(x))
x <- as.matrix(x)
ii <- seq_len(n)
W.k <- function(X, kk) {
clus <- if (kk > 1)
FUNcluster(X, kk, ...)$cluster
else rep.int(1L, nrow(X))
0.5 * sum(vapply(split(ii, clus), function(I) {
if ( diss ){
xs <- X[I,I, drop = FALSE]
sum(xs/nrow(xs))
}else{
xs <- X[I, , drop = FALSE]
sum(dist.gen(xs,method=method)/nrow(xs))
}
}, 0))
}
logW <- E.logW <- SE.sim <- numeric(K.max)
if (verbose)
cat("Clustering k = 1,2,..., K.max (= ", K.max, "): .. ",
sep = "")
for (k in 1:K.max){
cat("k =",k,"\n")
logW[k] <- log(W.k(x, k))
}
if (verbose)
cat("done\n")
if (random){
xs <- scale(x, center = TRUE, scale = FALSE)
m.x <- rep(attr(xs, "scaled:center"), each = n)
V.sx <- svd(xs)$v
rng.x1 <- apply(xs %*% V.sx, 2, range)
logWks <- matrix(0, B, K.max)
if (verbose)
cat("Bootstrapping, b = 1,2,..., B (= ", B, ") [one \".\" per sample]:\n",
sep = "")
for (b in 1:B) {
z1 <- apply(rng.x1, 2, function(M, nn) runif(nn, min = M[1],
max = M[2]), nn = n)
z <- tcrossprod(z1, V.sx) + m.x
##z <- apply(x,2,function(m) runif(length(m),min=min(m),max=max(m)))
##z <- apply(x,2,function(m) sample(m))
for (k in 1:K.max) {
logWks[b, k] <- log(W.k(z, k))
}
if (verbose)
cat(".", if (b%%50 == 0)
paste(b, "\n"))
}
if (verbose && (B%%50 != 0))
cat("", B, "\n")
E.logW <- colMeans(logWks)
SE.sim <- sqrt((1 + 1/B) * apply(logWks, 2, var))
}else{
E.logW <- rep(NA,K.max)
SE.sim <- rep(NA,K.max)
}
structure(class = "clusGap", list(Tab = cbind(logW, E.logW,
gap = E.logW - logW, SE.sim), n = n, B = B, FUNcluster = FUNcluster))
}
clustfun <- function(x,clustnr=20,bootnr=50,metric="pearson",do.gap=FALSE,sat=TRUE,SE.method="Tibs2001SEmax",SE.factor=.25,B.gap=50,cln=0,rseed=17000,FUNcluster="kmedoids",distances=NULL,link="single")
{
if ( clustnr < 2) stop("Choose clustnr > 1")
if ( FUNcluster %in% c("kmedoids","clara","hclust") ){
di <- t(x)
diM <- dist.gen(di,method=metric)
}else{
di <- as.matrix(dist.gen(t(x),method=metric))
}
if ( FUNcluster %in% c("clara") ){
diCL <- as.data.frame( cmdscale(as.matrix(diM),k=nrow(di)-1) )
rownames(diCL) <- names(x)
}
if ( nrow(di) - 1 < clustnr ) clustnr <- nrow(di) - 1
if ( do.gap | sat | cln > 0 ){
gpr <- NULL
f <- if ( cln == 0 ) TRUE else FALSE
if ( do.gap ){
set.seed(rseed)
if ( FUNcluster == "kmeans" ) gpr <- clusGapExt(as.matrix(di), FUN = kmeans, K.max = clustnr, B = B.gap, iter.max=100)
if ( FUNcluster == "kmedoids" ) gpr <- clusGapExt(as.matrix(di), FUN = function(x,k) pam(dist.gen(x,method=metric),k), K.max = clustnr, B = B.gap, method=metric)
if ( FUNcluster == "clara" ) gpr <- clusGapExt(diCL, FUN = function(x,k) clara(x,k), K.max = clustnr, B = B.gap, method="euclidean")
if ( FUNcluster == "hclust" ) gpr <- clusGapExt(as.matrix(di), FUN = function(x,k){ y <- disthclustCBI(dist.gen(x,method=metric),k,link=link,scaling=FALSE,method="ward.D2"); y$cluster <- y$partition; y }, K.max = clustnr, B = B.gap)
if ( f ) cln <- maxSE(gpr$Tab[,3],gpr$Tab[,4],method=SE.method,SE.factor)
}
if ( sat ){
if ( ! do.gap ){
if ( FUNcluster == "kmeans" ) gpr <- clusGapExt(as.matrix(di), FUN = kmeans, K.max = clustnr, B = B.gap, iter.max=100, random=FALSE)
##if ( FUNcluster == "kmedoids" ) gpr <- clusGapExt(as.matrix(di), FUN = function(x,k) pam(dist.gen(x,method=metric),k), K.max = clustnr, B = B.gap, random=FALSE, method=metric)
if ( FUNcluster == "kmedoids" ) gpr <- clusGapExt(as.matrix(diM), FUN = function(x,k) pam(as.dist(x),k), K.max = clustnr, B = B.gap, random=FALSE, method=metric,diss=TRUE)
if ( FUNcluster == "clara" ) gpr <- clusGapExt(as.matrix(diCL), FUN = function(x,k) clara(x,k), K.max = clustnr, B = B.gap, random=FALSE, method="euclidean")
if ( FUNcluster == "hclust" ) gpr <- clusGapExt(as.matrix(di), FUN = function(x,k){ y <- disthclustCBI(dist.gen(x,method=metric),k,link=link,scaling=FALSE,method="ward.D2"); y$cluster <- y$partition; y }, K.max = clustnr, B = B.gap, random=FALSE)
}
g <- gpr$Tab[,1]
y <- g[-length(g)] - g[-1]
mm <- numeric(length(y))
nn <- numeric(length(y))
for ( i in 1:length(y)){
mm[i] <- mean(y[i:length(y)])
nn[i] <- sqrt(var(y[i:length(y)]))
}
if ( f ) cln <- max(min(which( y - (mm + nn) < 0 )),1)
}
if ( cln <= 1 ) {
clb <- list(result=list(partition=rep(1,dim(x)[2])),bootmean=1)
names(clb$result$partition) <- names(x)
return(list(x=x,clb=clb,gpr=gpr,di=if ( FUNcluster %in% c("kmedoids","clara","hclust") ) diM else di))
}
if ( FUNcluster == "kmeans" ) clb <- clusterboot(di,B=bootnr,distances=FALSE,bootmethod="boot",clustermethod=kmeansCBI,krange=cln,scaling=FALSE,multipleboot=FALSE,bscompare=TRUE,seed=rseed)
##if ( FUNcluster == "kmedoids" ) clb <- clusterboot(diM,B=bootnr,bootmethod="boot",clustermethod=pamkCBI,k=cln,multipleboot=FALSE,bscompare=TRUE,seed=rseed)
if ( FUNcluster == "kmedoids" ) clb <- clusterboot(diM,B=bootnr,bootmethod="boot",clustermethod=pamkdCBI,scaling=FALSE,diss=TRUE,k=cln,multipleboot=FALSE,bscompare=TRUE,seed=rseed)
if ( FUNcluster == "clara" ) clb <- clusterboot(diCL,B=bootnr,bootmethod="boot",clustermethod=pamkdCBI,usepam=FALSE,scaling=FALSE,krange=cln,multipleboot=FALSE,bscompare=TRUE,seed=rseed)
if ( FUNcluster == "hclust" ) clb <- clusterboot(diM,B=bootnr,distances=TRUE,bootmethod="boot",clustermethod=disthclustCBI,method="ward.D2",k=cln,link=link,scaling=FALSE,multipleboot=FALSE,bscompare=TRUE,seed=rseed,diss=TRUE)
return(list(x=x,clb=clb,gpr=gpr,di=if ( FUNcluster %in% c("kmedoids","clara","hclust") ) diM else di))
}
}
#' @exportMethod clustexp
setGeneric("clustexp", function(object,clustnr=20,bootnr=50,metric="pearson",do.gap=FALSE,sat=TRUE,SE.method="Tibs2001SEmax",SE.factor=.25,B.gap=50,cln=0,rseed=17000,FUNcluster="kmedoids",FSelect=TRUE) standardGeneric("clustexp"))
setMethod("clustexp",
signature = "SCseq",
definition = function(object,clustnr,bootnr,metric,do.gap,sat,SE.method,SE.factor,B.gap,cln,rseed,FUNcluster,FSelect) {
if ( ! is.numeric(clustnr) ) stop("clustnr has to be a positive integer") else if ( round(clustnr) != clustnr | clustnr <= 0 ) stop("clustnr has to be a positive integer")
if ( ! is.numeric(bootnr) ) stop("bootnr has to be a positive integer") else if ( round(bootnr) != bootnr | bootnr <= 0 ) stop("bootnr has to be a positive integer")
if ( ! ( metric %in% c( "spearman","pearson","eupearson","logpearson","kendall","euclidean","maximum","manhattan","canberra","binary","minkowski","relentr") ) ) stop("metric has to be one of the following: spearman, pearson, eupearson, logpearson, kendall, euclidean, maximum, manhattan, canberra, binary, minkowski","relentr")
if ( ! ( SE.method %in% c( "firstSEmax","Tibs2001SEmax","globalSEmax","firstmax","globalmax") ) ) stop("SE.method has to be one of the following: firstSEmax, Tibs2001SEmax, globalSEmax, firstmax, globalmax")
if ( ! is.numeric(SE.factor) ) stop("SE.factor has to be a non-negative integer") else if ( SE.factor < 0 ) stop("SE.factor has to be a non-negative integer")
if ( ! ( is.numeric(do.gap) | is.logical(do.gap) ) ) stop( "do.gap has to be logical (TRUE/FALSE)" )
if ( ! ( is.numeric(sat) | is.logical(sat) ) ) stop( "sat has to be logical (TRUE/FALSE)" )
if ( ! is.numeric(B.gap) ) stop("B.gap has to be a positive integer") else if ( round(B.gap) != B.gap | B.gap <= 0 ) stop("B.gap has to be a positive integer")
if ( ! is.numeric(cln) ) stop("cln has to be a non-negative integer") else if ( round(cln) != cln | cln < 0 ) stop("cln has to be a non-negative integer")
if ( ! is.numeric(rseed) ) stop("rseed has to be numeric")
if ( !do.gap & !sat & cln == 0 ) stop("cln has to be a positive integer or either do.gap or sat has to be TRUE")
if ( ! ( FUNcluster %in% c("kmeans","hclust","kmedoids","clara") ) ) stop("FUNcluster has to be one of the following: kmeans, hclust,kmedoids","clara")
if ( ! ( is.numeric(FSelect) | is.logical(FSelect) ) ) stop( "FSelect has to be logical (TRUE/FALSE)" )
object@clusterpar <- list(clustnr=clustnr,bootnr=bootnr,metric=metric,do.gap=do.gap,sat=sat,SE.method=SE.method,SE.factor=SE.factor,B.gap=B.gap,cln=cln,rseed=rseed,FUNcluster=FUNcluster,FSelect=FSelect)
if ( FSelect ){
x <- object@fdata
m <- apply(x,1,mean)
v <- apply(x,1,var )
ml <- log2(m)
vl <- log2(v)
f <- ml > -Inf & vl > -Inf
ml <- ml[f]
vl <- vl[f]
mm <- -8
repeat{
fit <- lm(vl ~ ml + I(ml^2))
if( coef(fit)[3] >= 0 | mm >= -1){
break
}
mm <- mm + .5
f <- ml > mm
ml <- ml[f]
vl <- vl[f]
}
lvar <- function(x) 2**(coef(fit)[1] + log2(x)*coef(fit)[2] + coef(fit)[3] * log2(x)**2)
vln <- log2(v) - log2(sapply(m,FUN=lvar))
n <- names(vln)[vln>0]
##n <- names(head(vln[order(vln,decreasing=T)],round(length(vln)/5,0)))
}else{
n <- rownames(object@fdata)
}
y <- clustfun(object@fdata[n,],clustnr,bootnr,metric,do.gap,sat,SE.method,SE.factor,B.gap,cln,rseed,FUNcluster)
object@cluster <- list(kpart=y$clb$result$partition, jaccard=y$clb$bootmean, gap=y$gpr, clb=y$clb, features=n)
object@distances <- as.matrix( y$di )
set.seed(111111)
object@fcol <- sample(rainbow(max(y$clb$result$partition)))
return(object)
}
)
#' @exportMethod findoutliers
setGeneric("findoutliers", function(object,outminc=5,outlg=2,probthr=1e-3,thr=2**-(1:40),outdistquant=.95) standardGeneric("findoutliers"))
setMethod("findoutliers",
signature = "SCseq",
definition = function(object,outminc,outlg,probthr,thr,outdistquant) {
if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before findoutliers")
if ( ! is.numeric(outminc) ) stop("outminc has to be a non-negative integer") else if ( round(outminc) != outminc | outminc < 0 ) stop("outminc has to be a non-negative integer")
if ( ! is.numeric(outlg) ) stop("outlg has to be a non-negative integer") else if ( round(outlg) != outlg | outlg < 0 ) stop("outlg has to be a non-negative integer")
if ( ! is.numeric(probthr) ) stop("probthr has to be a number between 0 and 1") else if ( probthr < 0 | probthr > 1 ) stop("probthr has to be a number between 0 and 1")
if ( ! is.numeric(thr) ) stop("thr hast to be a vector of numbers between 0 and 1") else if ( min(thr) < 0 | max(thr) > 1 ) stop("thr hast to be a vector of numbers between 0 and 1")
if ( ! is.numeric(outdistquant) ) stop("outdistquant has to be a number between 0 and 1") else if ( outdistquant < 0 | outdistquant > 1 ) stop("outdistquant has to be a number between 0 and 1")
object@outlierpar <- list( outminc=outminc,outlg=outlg,probthr=probthr,thr=thr,outdistquant=outdistquant )
### calibrate background model
m <- log2(apply(object@fdata,1,mean))
v <- log2(apply(object@fdata,1,var))
f <- m > -Inf & v > -Inf
m <- m[f]
v <- v[f]
mm <- -8
repeat{
fit <- lm(v ~ m + I(m^2))
##if( coef(fit)[3] >= 0 | mm >= 3){
if( coef(fit)[3] >= 0 | mm >= -1){
break
}
mm <- mm + .5
f <- m > mm
m <- m[f]
v <- v[f]
}
object@background <- list()
object@background$vfit <- fit
object@background$lvar <- function(x,object) 2**(coef(object@background$vfit)[1] + log2(x)*coef(object@background$vfit)[2] + coef(object@background$vfit)[3] * log2(x)**2)
object@background$lsize <- function(x,object) x**2/(max(x + 1e-6,object@background$lvar(x,object)) - x)
### identify outliers
out <- c()
stest <- rep(0,length(thr))
cprobs <- c()
outgene <- list()
for ( n in 1:max(object@cluster$kpart) ){
x <- object@fdata[,object@cluster$kpart == n, drop = F]
if ( ncol(x) == 1 ){
cprobs <- append(cprobs,setNames(.5, colnames(x)))
} else {
x <- x[apply(x,1,max) > outminc,]
z <- t( apply(x,1,function(x){ apply( cbind( pnbinom(round(x,0),mu=mean(x),size=object@background$lsize(mean(x),object)) , 1 - pnbinom(round(x,0),mu=mean(x),size=object@background$lsize(mean(x),object)) ),1, min) } ) )
if(nrow(z) == 1) {
cp <- setNames(rep(1, ncol(z)),colnames(z))
} else {
cp <- apply(z,2,function(x){
y <- p.adjust(x,method="BH")
y <- y[order(y,decreasing=FALSE)]
return(y[outlg])
})
}
f <- cp < probthr
cprobs <- append(cprobs,cp)
if ( sum(f) > 0 ) out <- append(out,names(x)[f])
for ( j in 1:length(thr) ) stest[j] <- stest[j] + sum( cp < thr[j] )
fg <- apply(z,1,min) < probthr
outgene[[n]] <- if ( sum(fg) > 0 ) z[fg,] else 0
}
}
object@out <-list(out=out,stest=stest,thr=thr,cprobs=cprobs,outgene=outgene)
### cluster outliers
clp2p.cl <- c()
cols <- names(object@fdata)
cpart <- object@cluster$kpart
di <- as.data.frame(object@distances)
for ( i in 1:max(cpart) ) {
tcol <- cols[cpart == i]
if ( sum(!(tcol %in% out)) > 1 ) clp2p.cl <- append(clp2p.cl,as.vector(t(di[tcol[!(tcol %in% out)],tcol[!(tcol %in% out)]])))
}
clp2p.cl <- clp2p.cl[clp2p.cl>0]
cadd <- list()
if ( length(out) > 0 ){
if (length(out) == 1){
cadd <- list(out)
}else{
n <- out
m <- as.data.frame(di[out,out])
for ( i in 1:length(out) ){
if ( length(n) > 1 ){
o <- order(apply(cbind(m,1:dim(m)[1]),1,function(x) min(x[1:(length(x)-1)][-x[length(x)]])),decreasing=FALSE)
m <- m[o,o]
n <- n[o]
f <- m[,1] < quantile(clp2p.cl,outdistquant) | m[,1] == min(clp2p.cl)
ind <- 1
if ( sum(f) > 1 ) for ( j in 2:sum(f) ) if ( apply(m[f,f][j,c(ind,j)] > quantile(clp2p.cl,outdistquant) ,1,sum) == 0 ) ind <- append(ind,j)
cadd[[i]] <- n[f][ind]
g <- ! n %in% n[f][ind]
n <- n[g]
m <- m[g,g]
if ( sum(g) == 0 ) break
}else if (length(n) == 1){
cadd[[i]] <- n
break
}
}
}
for ( i in 1:length(cadd) ){
cpart[cols %in% cadd[[i]]] <- max(cpart) + 1
}
}
### determine final clusters
for ( i in min(cpart):max(cpart) ){
if ( sum(cpart == i) == 0 ) next
f <- cols[cpart == i]
d <- object@fdata[object@cluster$features,]
if ( length(f) == 1 ){
cent <- d[,f]
md <- f
}else{
if ( object@clusterpar$FUNcluster %in% c("kmedoids","clara","hclust") ){
##x <- apply(as.matrix(dist.gen(t(d[,f]),method=object@clusterpar$metric)),2,mean)
x <- apply(object@distances[f,f],2,mean)
md <- f[which(x == min(x))[1]]
cent <- d[,md]
}else{
cent <- apply(d[,f],1,mean)
}
}
if ( i == min(cpart) ) dcent <- data.frame(cent) else dcent <- cbind(dcent,cent)
if ( object@clusterpar$FUNcluster %in% c("kmedoids","clara","hclust") ){
if ( i == min(cpart) ) tmp <- data.frame(object@distances[,md]) else tmp <- cbind(tmp,object@distances[,md])
}else{
if ( i == min(cpart) ) tmp <- data.frame(apply(d,2,dist.gen.pairs,y=cent,method=object@clusterpar$metric)) else tmp <- cbind(tmp,apply(d,2,dist.gen.pairs,y=cent,method=object@clusterpar$metric))
}
}
cpart <- apply(tmp,1,function(x) order(x,decreasing=FALSE)[1])
for ( i in max(cpart):1){if (sum(cpart==i)==0) cpart[cpart>i] <- cpart[cpart>i] - 1 }
object@cpart <- cpart
set.seed(111111)
object@fcol <- sample(rainbow(max(cpart)))
return(object)
}
)
#' @exportMethod comptsne
setGeneric("comptsne", function(object,rseed=15555,sammonmap=FALSE,initial_cmd=TRUE,fast=FALSE,perplexity=30,...) standardGeneric("comptsne"))
setMethod("comptsne",
signature = "SCseq",
definition = function(object,rseed,sammonmap,initial_cmd,fast,perplexity,...){
if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before comptsne")
set.seed(rseed)
di <- if ( object@clusterpar$FUNcluster %in% c("kmedoids","clara","hclust")) as.dist(object@distances) else dist.gen(as.matrix(object@distances))
if ( sammonmap ){
object@tsne <- as.data.frame(sammon(di,k=2)$points)
}else{
if ( fast ){
ts <- if ( initial_cmd ) Rtsne(di,dims=2,initial_config=cmdscale(di,k=2),perplexity=perplexity,...)$Y else Rtsne(di,dims=2,perplexity=perplexity,...)$Y
}else{
ts <- if ( initial_cmd ) tsne(di,k=2,initial_config=cmdscale(di,k=2),perplexity=perplexity,...) else tsne(di,k=2,perplexity=perplexity,...)
}
object@tsne <- as.data.frame(ts)
}
return(object)
}
)
setGeneric("clustdiffgenes", function(object,pvalue=.01) standardGeneric("clustdiffgenes"))
setMethod("clustdiffgenes",
signature = "SCseq",
definition = function(object,pvalue){
if ( length(object@cpart) == 0 ) stop("run findoutliers before clustdiffgenes")
if ( ! is.numeric(pvalue) ) stop("pvalue has to be a number between 0 and 1") else if ( pvalue < 0 | pvalue > 1 ) stop("pvalue has to be a number between 0 and 1")
cdiff <- list()
part <- object@cpart
for ( i in 1:max(part) ){
if ( sum(part == i) == 0 ) next
B <- names(part)[part == i]
A <- names(part)[part != i]
de <- diffexpnb(object@fdata, A=A, B=B, method="pooled",norm=FALSE, DESeq=FALSE, vfit=object@background$vfit, locreg=FALSE)
d <- data.frame(mean.ncl=de$res$baseMeanA,mean.cl=de$res$baseMeanB,fc=de$res$foldChange,pv=de$res$pv,padj=de$res$padj)
rownames(d) <- rownames(de$res)
d <- d[order(d$pv,decreasing=FALSE),]
cdiff[[paste("cl",i,sep=".")]] <- d[d$pv < pvalue,]
}
return(cdiff)
}
)
setGeneric("plotsaturation", function(object,disp=FALSE) standardGeneric("plotsaturation"))
setMethod("plotsaturation",
signature = "SCseq",
definition = function(object,disp){
if ( length(object@cluster$gap) == 0 ) stop("run clustexp before plotsaturation")
g <- object@cluster$gap$Tab[,1]
y <- g[-length(g)] - g[-1]
mm <- numeric(length(y))
nn <- numeric(length(y))
for ( i in 1:length(y)){
mm[i] <- mean(y[i:length(y)])
nn[i] <- sqrt(var(y[i:length(y)]))
}
cln <- max(min(which( y - (mm + nn) < 0 )),1)
x <- 1:length(y)
if (disp){
x <- 1:length(g)
plot(x,g,pch=20,col="grey",xlab="k",ylab="log within cluster dispersion")
f <- x == cln
points(x[f],g[f],col="blue")
}else{
plot(x,y,pch=20,col="grey",xlab="k",ylab="Change in log within cluster dispersion")
points(x,mm,col="red",pch=20)
plot.err.bars.y(x,mm,nn,col="red")
points(x,y,col="grey",pch=20)
f <- x == cln
points(x[f],y[f],col="blue")
}
}
)
setGeneric("plotsilhouette", function(object) standardGeneric("plotsilhouette"))
setMethod("plotsilhouette",
signature = "SCseq",
definition = function(object){
if ( length(object@cluster$kpart) == 0 ) stop("run clustexp before plotsilhouette")
if ( length(unique(object@cluster$kpart)) < 2 ) stop("only a single cluster: no silhouette plot")
kpart <- object@cluster$kpart
distances <- if ( object@clusterpar$FUNcluster %in% c("kmedoids","clara","hclust") ) as.dist(object@distances) else dist.gen(object@distances)
si <- silhouette(kpart,distances)
plot(si)
}
)
compmedoids <- function(x,part,metric="pearson"){
m <- c()
for ( i in sort(unique(part)) ){
f <- names(x)[part == i]
if ( length(f) == 1 ){
m <- append(m,f)
}else{
y <- apply(as.matrix(dist.gen(t(x[,f]),method=metric)),2,mean)
m <- append(m,f[which(y == min(y))[1]])
}
}
m
}
setGeneric("clustheatmap", function(object,final=FALSE,hmethod="single") standardGeneric("clustheatmap"))
setMethod("clustheatmap",
signature = "SCseq",
definition = function(object,final,hmethod){
if ( final & length(object@cpart) == 0 ) stop("run findoutliers before clustheatmap")
if ( !final & length(object@cluster$kpart) == 0 ) stop("run clustexp before clustheatmap")
x <- object@fdata
part <- if ( final ) object@cpart else object@cluster$kpart
na <- c()
j <- 0
for ( i in 1:max(part) ){
if ( sum(part == i) == 0 ) next
j <- j + 1
na <- append(na,i)
d <- x[,part == i]
if ( sum(part == i) == 1 ) cent <- d else cent <- apply(d,1,mean)
if ( j == 1 ) tmp <- data.frame(cent) else tmp <- cbind(tmp,cent)
}
names(tmp) <- paste("cl",na,sep=".")
ld <- if ( object@clusterpar$FUNcluster %in% c("kmedoids","clara","hclust") ) dist.gen(t(tmp),method=object@clusterpar$metric) else dist.gen(as.matrix(dist.gen(t(tmp),method=object@clusterpar$metric)))
if ( max(part) > 1 ) cclmo <- hclust(ld,method=hmethod)$order else cclmo <- 1
q <- part
for ( i in 1:max(part) ){
q[part == na[cclmo[i]]] <- i
}
part <- q
di <- if ( object@clusterpar$FUNcluster %in% c("kmedoids","clara","hclust") ) object@distances else as.data.frame( as.matrix( dist.gen(t(object@distances)) ) )
pto <- part[order(part,decreasing=FALSE)]
ptn <- c()
for ( i in 1:max(pto) ){ pt <- names(pto)[pto == i]; z <- if ( length(pt) == 1 ) pt else pt[hclust(as.dist(t(di[pt,pt])),method=hmethod)$order]; ptn <- append(ptn,z) }
col <- object@fcol
mi <- min(di,na.rm=TRUE)
ma <- max(di,na.rm=TRUE)
layout(matrix(data=c(1,3,2,4), nrow=2, ncol=2), widths=c(5,1,5,1), heights=c(5,1,1,1))
ColorRamp <- colorRampPalette(brewer.pal(n = 7,name = "RdYlBu"))(100)
ColorLevels <- seq(mi, ma, length=length(ColorRamp))
if ( mi == ma ){
ColorLevels <- seq(0.99*mi, 1.01*ma, length=length(ColorRamp))
}
par(mar = c(3,5,2.5,2))
image(as.matrix(di[ptn,ptn]),col=ColorRamp,axes=FALSE)
abline(0,1)
box()
tmp <- c()
for ( u in 1:max(part) ){
ol <- (0:(length(part) - 1)/(length(part) - 1))[ptn %in% names(x)[part == u]]
points(rep(0,length(ol)),ol,col=col[cclmo[u]],pch=15,cex=.75)
points(ol,rep(0,length(ol)),col=col[cclmo[u]],pch=15,cex=.75)
tmp <- append(tmp,mean(ol))
}
axis(1,at=tmp,lab=cclmo)
axis(2,at=tmp,lab=cclmo)
par(mar = c(3,2.5,2.5,2))
image(1, ColorLevels,
matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
col=ColorRamp,
xlab="",ylab="",
xaxt="n")
layout(1)
return(cclmo)
}
)
plot3dtsne <- function(object,perplexity=30,fast=FALSE,x=NULL,g=NULL,logsc=FALSE,final=TRUE,ret=FALSE,tp=1){
require(rgl)
if ( is.null(x) ){
di <- dist.gen(as.matrix(object@distances))
if (fast){
tt <- Rtsne(di,dims=3,initial_config=cmdscale(di,k=3),perplexity=perplexity)$Y
}else{
tt <- tsne(di,k=3,initial_config=cmdscale(di,k=3),perplexity=perplexity)
}
}else{
tt <- x
}
if ( final ) part <- object@cpart else part <- object@cluster$kpart
plot3d(tt[,1], tt[,2], tt[,3], xlab = "", ylab = "", zlab = "", alpha = tp, col = "grey", pch="16", type="p", size = 8, point_antialias = TRUE)
if ( is.null(g) ){
for ( i in sort(unique(part)) ){ f <- part == i; text3d(tt[f,1], tt[f,2], tt[f,3], rep(i,sum(f)), font=10, alpha=tp, size=9, depth_test = "always", color=object@fcol[i])}
}
if ( !is.null(g) ){
l <- apply(object@ndata[g,] - .1,2,sum) + .1
if (logsc) {
f <- l == 0
l <- log2(l)
l[f] <- NA
}
mi <- min(l,na.rm=TRUE)
ma <- max(l,na.rm=TRUE)
ColorRamp <- colorRampPalette(rev(brewer.pal(n = 7,name = "RdYlBu")))(100)
ColorLevels <- seq(mi, ma, length=length(ColorRamp))
v <- round((l - mi)/(ma - mi)*99 + 1,0)
kk <- order(v,decreasing=F)
apply(cbind(tt[kk,],ColorRamp[v[kk]]),1,function(x) points3d(x[1],x[2],x[3],col=x[4],pch="16",size=8))
image(1, ColorLevels,
matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
col=ColorRamp,
xlab="",ylab="",
xaxt="n")
}
if (ret) return(tt)
}
name2id <- function(x,id) {
n <- c()
for ( j in x ){ n <- append(n,id[grep(j,id)])
}
n
}
varRegression <- function(x,vars){
z <- apply(log2(x),1,
function(x,vars){
d <- cbind(vars, x); names(d) <- c(names(vars),"gene");
f <- as.formula(paste("gene ", " ~ ", paste(names(vars),collapse="+"),sep=""));
m <- lm( f, data=d)
residuals(m) + coef(m)[1]
},vars=vars)
z <- as.data.frame(t(2**z))
names(z) <- names(x)
rownames(z) <- rownames(x)
z
}
CCcorrect <- function(x,vset=NULL,CGenes=NULL,ccor=.4,nComp=NULL,pvalue=.01,quant=.01,mode="pca"){
x <- log2(x)
if ( is.null(nComp) ) nComp <- min(ncol(x),nrow(x))
## x <- object@fdata
## mode pca or ica
X <- as.matrix(t(x))
if ( mode == "pca" ){
mu <- colMeans(X)
Xpca <- prcomp(X)
y <- Xpca$rotation
}
if ( mode == "ica" ){
require(ica)
Xica <- icafast(t(X),nComp)
E <- t(X) - tcrossprod(Xica$S,Xica$M)
y <- as.data.frame(Xica$S)
rownames(y) <- rownames(x)
}
n <- c()
if ( !is.null(vset) ){
for ( k in vset){
m <- intersect(rownames(y),k)
if ( length(m) > 0 ){
for ( i in 1:nComp){
q <- quantile(y[,i],1-quant)
nq <- rownames(y)[y[,i] > q]
nqm <- intersect(m,nq)
pvg <- fisher.test(matrix(c(ncol(y)-length(nq),length(nq),length(m) - length(nqm),length(nqm)),ncol=2),alternative="g")$p.value
q <- quantile(y[,i],quant)
nq <- rownames(y)[y[,i] < q]
nqm <- intersect(m,nq)
pvl <- fisher.test(matrix(c(ncol(y)-length(nq),length(nq),length(m) - length(nqm),length(nqm)),ncol=2),alternative="g")$p.value
if ( min(pvg,pvl) < pvalue ){
n <- append(n,i)
##break
}
}
}
}
}
if ( !is.null(CGenes) ){
for ( g in CGenes ){
if ( g %in% rownames(x) ){
z <- apply(t(X),1,function(x,y) sigcor(x,y,cthr=ccor),y=t(x[g,]))
m <- rownames(x)[ !is.na(z) & ( z > ccor | z < -ccor ) ]
if ( length(m) > 0 ){
for ( i in 1:nComp){
q <- quantile(y[,i],1-quant)
nq <- rownames(y)[y[,i] > q]
nqm <- intersect(m,nq)
pvg <- fisher.test(matrix(c(ncol(y)-length(nq),length(nq),length(m) - length(nqm),length(nqm)),ncol=2),alternative="g")$p.value
q <- quantile(y[,i],.01)
nq <- rownames(y)[y[,i] < q]
nqm <- intersect(m,nq)
pvl <- fisher.test(matrix(c(ncol(y)-length(nq),length(nq),length(m) - length(nqm),length(nqm)),ncol=2),alternative="g")$p.value
if ( min(pvg,pvl) < pvalue ){
n <- append(n,i)
##break
}
}
}
}
}
}
n <- unique(n)
f <- ! 1:nComp %in% n
if ( mode == "pca" ){
Xhat = Xpca$x[,(1:nComp)[f]] %*% t(Xpca$rotation[,(1:nComp)[f]])
Xhat = scale(Xhat, center = -mu, scale = FALSE)
Xhat <- as.data.frame(t(Xhat))
}
if ( mode == "ica" ){
Xhat <- tcrossprod(Xica$S[,(1:nComp)[! 1:nComp %in% n]],Xica$M[,(1:nComp)[! 1:nComp %in% n]]) + E
Xhat <- as.data.frame(Xhat)
}
names(Xhat) <- names(x)
rownames(Xhat) <- rownames(x)
Xhat <- as.data.frame(2**Xhat)
#for ( i in 1:ncol(Xhat) ) Xhat[Xhat[,i] < .1,i] <- .1
if ( mode == "pca" ){
return(list(xcor=Xhat,n=n,pca=Xpca))
}
if ( mode == "ica" ){
return(list(xcor=Xhat,n=n,ica=Xica))
}
}
## class definition
#' @export Ltree
#' @exportClass Ltree
Ltree <- setClass("Ltree", slots = c(sc = "SCseq", ldata = "list", entropy = "vector", trproj = "list", par = "list", prback = "data.frame", prbacka = "data.frame", ltcoord = "matrix", prtree = "list", sigcell = "vector", cdata = "list", trl = "list" ))
setValidity("Ltree",
function(object) {
msg <- NULL
if ( class(object@sc)[1] != "SCseq" ){
msg <- c(msg, "input data must be of class SCseq")
}
if (is.null(msg)) TRUE
else msg
}
)
setMethod("initialize",
signature = "Ltree",
definition = function(.Object, sc ){
.Object@sc <- sc
validObject(.Object)
return(.Object)
}
)
#' @exportMethod compentropy
setGeneric("compentropy", function(object) standardGeneric("compentropy"))
setMethod("compentropy",
signature = "Ltree",
definition = function(object){
probs <- t(t(object@sc@ndata)/apply(object@sc@ndata,2,sum))
object@entropy <- -apply(probs*log(probs)/log(nrow(object@sc@ndata)),2,sum)
return(object)
}
)
compproj <- function(pdiloc,lploc,cnloc,mloc,d=NULL){
pd <- data.frame(pdiloc)
npd <- names(pd)
k <- paste("X",sort(rep(1:nrow(pdiloc),length(mloc))),sep="")
pd <- cbind(k,as.data.frame(t(matrix(as.vector(apply(pdiloc,1,function(x) rep(x,length(mloc)))),nrow=ncol(pdiloc)))))
names(pd) <- c("k",npd)
pd <- pd[order(pd$k),]
if ( is.null(d) ){
cnv <- t(matrix(rep(t(cnloc),nrow(pdiloc)),nrow=ncol(pdiloc)))
pdcl <- paste("X",lploc[as.numeric(sub("X","",pd$k))],sep="")
rownames(cnloc) <- paste("X",mloc,sep="")
pdcn <- cnloc[pdcl,]
v <- cnv - pdcn
}else{
v <- d$v
pdcn <- d$pdcn
}
w <- pd[,names(pd) != "k"] - pdcn
h <- apply(cbind(v,w),1,function(x){
x1 <- x[1:(length(x)/2)];
x2 <- x[(length(x)/2 + 1):length(x)];
x1s <- sqrt(sum(x1**2)); x2s <- sqrt(sum(x2**2)); y <- sum(x1*x2)/x1s/x2s; return( if (x1s == 0 | x2s == 0 ) NA else y ) })
rma <- as.data.frame(matrix(h,ncol=nrow(pdiloc)))
names(rma) <- unique(pd$k)
pdclu <- lploc[as.numeric(sub("X","",names(rma)))]
pdclp <- apply(t(rma),1,function(x) if (sum(!is.na(x)) == 0 ) NA else mloc[which(abs(x) == max(abs(x),na.rm=TRUE))[1]])
pdclh <- apply(t(rma),1,function(x) if (sum(!is.na(x)) == 0 ) NA else x[which(abs(x) == max(abs(x),na.rm=TRUE))[1]])
pdcln <- names(lploc)[as.numeric(sub("X","",names(rma)))]
names(rma) <- pdcln
rownames(rma) <- paste("X",mloc,sep="")
res <- data.frame(o=pdclu,l=pdclp,h=pdclh)
rownames(res) <- pdcln
return(list(res=res[names(lploc),],rma=as.data.frame(t(rma[,names(lploc)])),d=list(v=v,pdcn=pdcn)))
}
pdishuffle <- function(pdi,lp,cn,m,all=FALSE){
if ( all ){
d <- as.data.frame(pdi)
y <- t(apply(pdi,1,function(x) runif(length(x),min=min(x),max=max(x))))
names(y) <- names(d)
rownames(y) <- rownames(d)
return(y)
}else{
fl <- TRUE
for ( i in unique(lp)){
if ( sum(lp == i) > 1 ){
y <-data.frame( t(apply(as.data.frame(pdi[,lp == i]),1,sample)) )
}else{
y <- as.data.frame(pdi[,lp == i])
}
names(y) <- names(lp)[lp == i]
rownames(y) <- names(lp)
z <- if (fl) y else cbind(z,y)
fl <- FALSE
}
z <- t(z[,names(lp)])
return(z)
}
}
#' @exportMethod projcells
setGeneric("projcells", function(object,cthr=0,nmode=FALSE,fmode=FALSE) standardGeneric("projcells"))
setMethod("projcells",
signature = "Ltree",
definition = function(object,cthr,nmode,fmode){
if ( ! is.numeric(cthr) ) stop( "cthr has to be a non-negative number" ) else if ( cthr < 0 ) stop( "cthr has to be a non-negative number" )
if ( ! length(object@sc@cpart == 0) ) stop( "please run findoutliers on the SCseq input object before initializing Ltree" )
if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
if ( !is.numeric(fmode) & !is.logical(fmode) ) stop("argument fmode has to be logical (TRUE/FALSE)")
object@par$cthr <- cthr
object@par$nmode <- nmode
object@par$fmode <- fmode
lp <- object@sc@cpart
ld <- object@sc@distances
n <- aggregate(rep(1,length(lp)),list(lp),sum)
n <- as.vector(n[order(n[,1],decreasing=FALSE),-1])
m <- (1:length(n))[n>cthr]
f <- lp %in% m
lp <- lp[f]
ld <- ld[f,f]
pdil <- object@sc@tsne[f,]
cnl <- aggregate(pdil,by=list(lp),median)
cnl <- cnl[order(cnl[,1],decreasing=FALSE),-1]
pdi <- suppressWarnings( cmdscale(as.matrix(ld),k=min(length(object@sc@cluster$features),ncol(ld)-1)) )
cn <- as.data.frame(pdi[compmedoids(object@sc@fdata[object@sc@cluster$features,names(lp)],lp,metric=object@sc@clusterpar$metric),])
rownames(cn) <- 1:nrow(cn)
x <- compproj(pdi,lp,cn,m)
res <- x$res
if ( nmode ){
rma <- x$rma
if ( fmode ){
d <- object@sc@out$rfvotes
z <- paste("X",as.numeric(apply(d[names(lp),],1,function(x) names(d)[order(x,decreasing=TRUE)][2])),sep="")
}
else{
med <- compmedoids(object@sc@fdata[object@sc@cluster$features,names(lp)],lp,metric=object@sc@clusterpar$metric)
z <- paste("X",t(as.vector(apply(cbind(lp,ld[,med]),1,function(x){ f <- lp[med] != x[1]; lp[med][f][which(x[-1][f] == min(x[-1][f]))[1]] }))),sep="")
}
k <- apply(cbind(z,rma),1,function(x) (x[-1])[names(rma) == x[1]])
rn <- res
rn$l <- as.numeric(sub("X","",z))
rn$h <- as.numeric(k)
res <- rn
x$res <- res
}
object@ldata <- list(lp=lp,ld=ld,m=m,pdi=pdi,pdil=pdil,cn=cn,cnl=cnl)
object@trproj <- x
return(object)
}
)
#' @exportMethod projback
setGeneric("projback", function(object,pdishuf=2000,nmode=FALSE,fast=FALSE,rseed=17000) standardGeneric("projback"))
setMethod("projback",
signature = "Ltree",
definition = function(object,pdishuf,nmode,fast,rseed){
if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
if ( !is.numeric(fast) & !is.logical(fast) ) stop("argument fast has to be logical (TRUE/FALSE)")
if ( ! is.numeric(pdishuf) ) stop( "pdishuf has to be a non-negative integer number" ) else if ( round(pdishuf) != pdishuf | pdishuf < 0 ) stop( "pdishuf has to be a non-negative integer number" )
if ( length(object@trproj) == 0 ) stop("run projcells before projback")
object@par$pdishuf <- pdishuf
object@par$rseed <- rseed
object@par$fast <- fast
if ( ! nmode & ! fast ){
set.seed(rseed)
for ( i in 1:pdishuf ){
cat("pdishuffle:",i,"\n")
x <- compproj(pdishuffle(object@ldata$pdi,object@ldata$lp,object@ldata$cn,object@ldata$m,all=TRUE),object@ldata$lp,object@ldata$cn,object@ldata$m,d=object@trproj$d)$res
y <- if ( i == 1 ) t(x) else cbind(y,t(x))
}
##important
object@prback <- as.data.frame(t(y))
x <- object@prback
x$n <- as.vector(t(matrix(rep(1:(nrow(x)/nrow(object@ldata$pdi)),nrow(object@ldata$pdi)),ncol=nrow(object@ldata$pdi))))
object@prbacka <- aggregate(data.frame(count=rep(1,nrow(x))),by=list(n=x$n,o=x$o,l=x$l),sum)
}
return( object )
}
)
#' @exportMethod lineagetree
setGeneric("lineagetree", function(object,pthr=0.01,nmode=FALSE,fast=FALSE) standardGeneric("lineagetree"))
setMethod("lineagetree",
signature = "Ltree",
definition = function(object,pthr,nmode,fast){
if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
if ( !is.numeric(fast) & !is.logical(fast) ) stop("argument fast has to be logical (TRUE/FALSE)")
if ( length(object@trproj) == 0 ) stop("run projcells before lineagetree")
if ( max(dim(object@prback)) == 0 & ! nmode & ! fast ) stop("run projback before lineagetree")
if ( ! is.numeric(pthr) ) stop( "pthr has to be a non-negative number" ) else if ( pthr < 0 ) stop( "pthr has to be a non-negative number" )
object@par$pthr <- pthr
cnl <- object@ldata$cnl
pdil <- object@ldata$pdil
cn <- object@ldata$cn
pdi <- object@ldata$pdi
m <- object@ldata$m
lp <- object@ldata$lp
res <- object@trproj$res
rma <- object@trproj$rma
prback <- object@prback
cm <- as.matrix(dist(cnl))*0
linl <- list()
linn <- list()
for ( i in 1:length(m) ){
for ( j in i:length(m) ){
linl[[paste(m[i],m[j],sep=".")]] <- c()
linn[[paste(m[i],m[j],sep=".")]] <- c()
}
}
sigcell <- c()
for ( i in 1:nrow(res) ){
a <- which( m == res$o[i])
if ( sum( lp == m[a] ) == 1 ){
k <- t(cnl)[,a]
k <- NA
sigcell <- append(sigcell, FALSE)
}else{
b <- which(m == res$l[i] )
h <- res$h[i]
if ( nmode | fast ){
sigcell <- append(sigcell, FALSE)
}else{
f <- prback$o == m[a] & prback$l == m[b]
if ( sum(f) > 0 ){
ql <- quantile(prback[f,"h"],pthr,na.rm=TRUE)
qh <- quantile(prback[f,"h"],1 - pthr,na.rm=TRUE)
}else{
ql <- 0
qh <- 0
}
sigcell <- if (is.na(h) ) append(sigcell, NA) else if ( h > qh | h < min(0,ql) ) append(sigcell, TRUE) else append(sigcell, FALSE)
}
if ( !is.na(res$h[i]) ){
w <- t(pdil)[,i] - t(cnl)[,a]
v <- t(cnl)[,b] - t(cnl)[,a]
wo <- t(pdi)[,i] - t(cn)[,a]
vo <- t(cn)[,b] - t(cn)[,a]
df <-( h*sqrt(sum(wo*wo)) )/sqrt(sum(vo*vo))*v
k <- df + t(cnl)[,a]
cm[a,b] <- cm[a,b] + 1
so <- m[sort(c(a,b))]
dfl <- sign(h)*sqrt( sum( df*df ) )/sqrt(sum(v*v))
if ( a > b ) dfl <- 1 - dfl
linn[[paste(so[1],so[2],sep=".")]] <- append( linn[[paste(so[1],so[2],sep=".")]], rownames(pdi)[i] )
linl[[paste(so[1],so[2],sep=".")]] <- append( linl[[paste(so[1],so[2],sep=".")]], dfl )
}else{
k <- t(cnl)[,a]
for ( j in unique(lp[lp != m[a]]) ){
b <- which(j == m)
so <- m[sort(c(a,b))]
dfl <- 0
if ( a > b ) dfl <- 1 - dfl
linn[[paste(so[1],so[2],sep=".")]] <- append( linn[[paste(so[1],so[2],sep=".")]], rownames(pdi)[i] )
linl[[paste(so[1],so[2],sep=".")]] <- append( linl[[paste(so[1],so[2],sep=".")]], dfl )
}
}
}
lt <- if ( i == 1 ) data.frame(k) else cbind(lt,k)
}
lt <- t(lt)
cm <- as.data.frame(cm)
names(cm) <- paste("cl",m,sep=".")
rownames(cm) <- paste("cl",m,sep=".")
lt <- as.data.frame(lt)
rownames(lt) <- rownames(res)
object@ltcoord <- as.matrix(lt)
object@prtree <- list(n=linn,l=linl)
object@cdata$counts <- cm
names(sigcell) <- rownames(res)
object@sigcell <- sigcell
return( object )
}
)
setGeneric("comppvalue", function(object,pethr=0.01,nmode=FALSE,fast=FALSE) standardGeneric("comppvalue"))
setMethod("comppvalue",
signature = "Ltree",
definition = function(object,pethr,nmode,fast){
if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
if ( !is.numeric(fast) & !is.logical(fast) ) stop("argument fast has to be logical (TRUE/FALSE)")
if ( length(object@prtree) == 0 ) stop("run lineagetree before comppvalue")
if ( ! is.numeric(pethr) ) stop( "pethr has to be a non-negative number" ) else if ( pethr < 0 ) stop( "pethr has to be a non-negative number" )
object@par$pethr <- pethr
cm <- object@cdata$counts
cmpv <- cm*NA
cmpvd <- cm*NA
cmbr <- cm*NA
cmpvn <- cm*NA
cmpvnd <- cm*NA
cmfr <- cm/apply(cm,1,sum)
if ( nmode ){
N <- apply(cm,1,sum) + 1
N0 <- sum(N) - N
n0 <- t(matrix(rep(N,length(N)),ncol=length(N)))
p <- n0/N0
n <- cm
k <- cbind(N,p,n)
cmpv <- t(apply(k,1,function(x){N <- x[1]; p <- x[2:( ncol(cm) + 1 )]; n <- x[( ncol(cm) + 2 ):( 2*ncol(cm) + 1)]; apply(cbind(n,p),1,function(x,N) binom.test(x[1],N,min(1,x[2]),alternative="g")$p.value,N=N)}))
cmpvd <- t(apply(k,1,function(x){N <- x[1]; p <- x[2:( ncol(cm) + 1 )]; n <- x[( ncol(cm) + 2 ):( 2*ncol(cm) + 1)]; apply(cbind(n,p),1,function(x,N) binom.test(x[1],N,min(1,x[2]),alternative="l")$p.value,N=N)}))
cmpvn <- cmpv
cmpvnd <- cmpvd
cmbr <- as.data.frame(n0/N0*N)
names(cmbr) <- names(cm)
rownames(cmbr) <- rownames(cm)
}else if ( fast ){
set.seed(object@par$rseed)
p <- sort(unique(object@ldata$lp))
for ( i in 1:length(p) ){
mm <- 1
dloc <- object@trproj$d
dloc$v <- as.data.frame(matrix(rep(as.matrix(dloc$v),mm),ncol=ncol(dloc$v)))
dloc$pdcn <- as.data.frame(matrix(rep(as.matrix(dloc$pdcn),mm),ncol=ncol(dloc$pdcn)))
lploc <- rep(p[i] + object@ldata$lp*0,mm)
names(lploc) <- 1:length(lploc)
x <- compproj(pdishuffle(matrix(rep(t(object@ldata$pdi),mm),ncol=ncol(object@ldata$pdi)),object@ldata$lp,object@ldata$cn,object@ldata$m,all=TRUE),lploc,object@ldata$cn,object@ldata$m,d=dloc)$res
z <-merge(data.frame(p=p),aggregate(rep(1,nrow(x)),by=list(x$l),sum),by.x="p",by.y="Group.1",all.x=T)
z$x[is.na(z$x)] <- 0
pr <- z$x/sum(z$x)
if ( i == 1 ) d <- data.frame(z) else d[,i] <- pr
}
N <- apply(cm,1,sum) + 1
k <- cbind(N,d,cm)
cmpv <- t(apply(k,1,function(x){N <- x[1]; p <- x[2:( ncol(cm) + 1 )]; n <- x[( ncol(cm) + 2 ):( 2*ncol(cm) + 1)]; apply(cbind(n,p),1,function(x,N) binom.test(x[1],N,min(1,x[2]),alternative="g")$p.value,N=N)}))
cmpvd <- t(apply(k,1,function(x){N <- x[1]; p <- x[2:( ncol(cm) + 1 )]; n <- x[( ncol(cm) + 2 ):( 2*ncol(cm) + 1)]; apply(cbind(n,p),1,function(x,N) binom.test(x[1],N,min(1,x[2]),alternative="l")$p.value,N=N)}))
cmpvn <- cmpv
cmpvnd <- cmpvd
cmbr <- as.data.frame(d*N)
names(cmbr) <- names(cm)
rownames(cmbr) <- rownames(cm)
}else{
for ( i in 1:nrow(cm) ){
for ( j in 1:ncol(cm) ){
f <- object@prbacka$o == object@ldata$m[i] & object@prbacka$l == object@ldata$m[j]
x <- object@prbacka$count[f]
if ( sum(f) < object@par$pdishuf ) x <- append(x,rep(0, object@par$pdishuf - sum(f)))
cmbr[i,j] <- if ( sum(f) > 0 ) mean(x) else 0
cmpv[i,j] <- if ( quantile(x,1 - pethr) < cm[i,j] ) 0 else 0.5
cmpvd[i,j] <- if ( quantile(x,pethr) > cm[i,j] ) 0 else 0.5
cmpvn[i,j] <- sum( x >= cm[i,j])/length(x)
cmpvnd[i,j] <- sum( x <= cm[i,j])/length(x)
}
}
}
diag(cmpv) <- .5
diag(cmpvd) <- .5
diag(cmpvn) <- NA
diag(cmpvnd) <- NA
object@cdata$counts.br <- cmbr
object@cdata$pv.e <- cmpv
object@cdata$pv.d <- cmpvd
object@cdata$pvn.e <- cmpvn
object@cdata$pvn.d <- cmpvnd
m <- object@ldata$m
linl <- object@prtree$l
ls <- as.data.frame(matrix(rep(NA,length(m)**2),ncol=length(m)))
names(ls) <- rownames(ls) <- paste("cl",m,sep=".")
for ( i in 1:( length(m) - 1 )){
for ( j in (i + 1):length(m) ){
na <- paste(m[i],m[j],sep=".")
if ( na %in% names(linl) & min(cmpv[i,j],cmpv[j,i],na.rm=TRUE) < pethr ){
y <- sort(linl[[na]])
nn <- ( 1 - max(y[-1] - y[-length(y)]) )
}else{
nn <- 0
}
ls[i,j] <- nn
}
}
object@cdata$linkscore <- ls
return(object)
}
)
setGeneric("plotlinkpv", function(object) standardGeneric("plotlinkpv"))
setMethod("plotlinkpv",
signature = "Ltree",
definition = function(object){
if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotlinkpv")
pheatmap(-log2(object@cdata$pvn.e + 1/object@par$pdishuf/2))
}
)
setGeneric("plotlinkscore", function(object) standardGeneric("plotlinkscore"))
setMethod("plotlinkscore",
signature = "Ltree",
definition = function(object){
if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotlinkscore")
pheatmap(object@cdata$linkscore,cluster_rows=FALSE,cluster_cols=FALSE)
}
)
setGeneric("plotmapprojections", function(object) standardGeneric("plotmapprojections"))
setMethod("plotmapprojections",
signature = "Ltree",
definition = function(object){
if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotmapprojections")
cent <- object@sc@fdata[object@sc@cluster$features,compmedoids(object@sc@fdata[object@sc@cluster$features,],object@sc@cpart,metric=object@sc@clusterpar$metric)]
dc <- as.data.frame(as.matrix(dist.gen(t(cent),method=object@sc@clusterpar$metric)))
names(dc) <- sort(unique(object@sc@cpart))
rownames(dc) <- sort(unique(object@sc@cpart))
trl <- spantree(dc[object@ldata$m,object@ldata$m])
u <- object@ltcoord[,1]
v <- object@ltcoord[,2]
cnl <- object@ldata$cnl
plot(u,v,cex=1.5,col="grey",pch=20,xlab="Dim 1",ylab="Dim 2")
for ( i in unique(object@ldata$lp) ){ f <- object@ldata$lp == i; text(u[f],v[f],i,cex=.75,font=4,col=object@sc@fcol[i]) }
points(cnl[,1],cnl[,2])
text(cnl[,1],cnl[,2],object@ldata$m,cex=2)
for ( i in 1:length(trl$kid) ){
lines(c(cnl[i+1,1],cnl[trl$kid[i],1]),c(cnl[i+1,2],cnl[trl$kid[i],2]),col="black")
}
}
)
#' @exportMethod compspantree
setGeneric(
"compspantree",
function(object)
standardGeneric("compspantree")
)
setMethod(
"compspantree",
signature = "Ltree",
definition = function(object) {
cent <- object@sc@fdata[object@sc@cluster$features,compmedoids(object@sc@fdata[object@sc@cluster$features,],object@sc@cpart,metric=object@sc@clusterpar$metric)]
dc <- as.data.frame(as.matrix(dist.gen(t(cent),method=object@sc@clusterpar$metric)))
names(dc) <- sort(unique(object@sc@cpart))
rownames(dc) <- sort(unique(object@sc@cpart))
trl <- spantree(dc[object@ldata$m,object@ldata$m])
# medoids <- compmedoids(object@sc@fdata, object@sc@cpart)
# cent <- object@sc@fdata[,medoids]
# dc <- as.data.frame(1 - cor(cent))
# names(dc) <- sort(unique(object@sc@cpart))
# rownames(dc) <- sort(unique(object@sc@cpart))
# trl <- vegan::spantree(dc[object@ldata$m,object@ldata$m])
object@trl <- list(
dc = dc,
cent = cent,
trl = trl
)
object
}
)
setGeneric("plotmap", function(object) standardGeneric("plotmap"))
setMethod("plotmap",
signature = "Ltree",
definition = function(object){
if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotmap")
cent <- object@sc@fdata[object@sc@cluster$features,compmedoids(object@sc@fdata[object@sc@cluster$features,],object@sc@cpart,metric=object@sc@clusterpar$metric)]
dc <- as.data.frame(as.matrix(dist.gen(t(cent),method=object@sc@clusterpar$metric)))
names(dc) <- sort(unique(object@sc@cpart))
rownames(dc) <- sort(unique(object@sc@cpart))
trl <- spantree(dc[object@ldata$m,object@ldata$m])
u <- object@ldata$pdil[,1]
v <- object@ldata$pdil[,2]
cnl <- object@ldata$cnl
plot(u,v,cex=1.5,col="grey",pch=20,xlab="Dim 1",ylab="Dim 2")
for ( i in unique(object@ldata$lp) ){ f <- object@ldata$lp == i; text(u[f],v[f],i,cex=.75,font=4,col=object@sc@fcol[i]) }
points(cnl[,1],cnl[,2])
text(cnl[,1],cnl[,2],object@ldata$m,cex=2)
for ( i in 1:length(trl$kid) ){
lines(c(cnl[i+1,1],cnl[trl$kid[i],1]),c(cnl[i+1,2],cnl[trl$kid[i],2]),col="black")
}
}
)
setGeneric("plottree", function(object,showCells=TRUE,nmode=FALSE,scthr=0) standardGeneric("plottree"))
setMethod("plottree",
signature = "Ltree",
definition = function(object,showCells,nmode,scthr){
if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotmap")
if ( !is.numeric(nmode) & !is.logical(nmode) ) stop("argument nmode has to be logical (TRUE/FALSE)")
if ( !is.numeric(showCells) & !is.logical(showCells) ) stop("argument showCells has to be logical (TRUE/FALSE)")
if ( ! is.numeric(scthr) ) stop( "scthr has to be a non-negative number" ) else if ( scthr < 0 | scthr > 1 ) stop( "scthr has to be a number between 0 and 1" )
ramp <- colorRamp(c("darkgreen", "yellow", "brown"))
mcol <- rgb( ramp(seq(0, 1, length = 101)), max = 255)
co <- object@cdata
fc <- (co$counts/( co$counts.br + .5))*(co$pv.e < object@par$pethr)
fc <- fc*(fc > t(fc)) + t(fc)*(t(fc) >= fc)
fc <- log2(fc + (fc == 0))
k <- -log10(sort(unique(as.vector(t(co$pvn.e))[as.vector(t(co$pv.e))<object@par$pethr])) + 1/object@par$pdishuf)
if (length(k) == 1) k <- c(k - k/100,k)
mlpv <- -log10(co$pvn.e + 1/object@par$pdishuf)
diag(mlpv) <- min(mlpv,na.rm=TRUE)
dcc <- t(apply(round(100*(mlpv - min(k))/(max(k) - min(k)),0) + 1,1,function(x){y <- c(); for ( n in x ) y <- append(y,if ( n < 1 ) NA else mcol[n]); y }))
cx <- c()
cy <- c()
va <- c()
m <- object@ldata$m
for ( i in 1:( length(m) - 1 ) ){
for ( j in ( i + 1 ):length(m) ){
if ( min(co$pv.e[i,j],co$pv.e[j,i],na.rm=TRUE) < object@par$pethr ){
if ( mlpv[i,j] > mlpv[j,i] ){
va <- append(va,dcc[i,j])
}else{
va <- append(va,dcc[j,i])
}
cx <- append(cx,i)
cy <- append(cy,j)
}
}
}
cnl <- object@ldata$cnl
u <- object@ltcoord[,1]
v <- object@ltcoord[,2]
layout( cbind(c(1, 1), c(2, 3)),widths=c(5,1,1),height=c(5,5,1))
par(mar = c(12,5,1,1))
if ( showCells ){
plot(u,v,cex=1.5,col="grey",pch=20,xlab="Dim 1",ylab="Dim 2")
if ( !nmode ) points(u[object@sigcell],v[object@sigcell],col="black")
}else{
plot(u,v,cex=0,xlab="Dim 1",ylab="Dim 2")
}
if ( length(va) > 0 ){
f <- order(va,decreasing=TRUE)
for ( i in 1:length(va) ){
if ( object@cdata$linkscore[cx[i],cy[i]] > scthr ){
if ( showCells ){
lines(cnl[c(cx[i],cy[i]),1],cnl[c(cx[i],cy[i]),2],col=va[i],lwd=2)
}else{
lines(cnl[c(cx[i],cy[i]),1],cnl[c(cx[i],cy[i]),2],col=va[i],lwd=5*object@cdata$linkscore[cx[i],cy[i]])
}
}
}
}
en <- aggregate(object@entropy,list(object@sc@cpart),median)
en <- en[en$Group.1 %in% m,]
mi <- min(en[,2],na.rm=TRUE)
ma <- max(en[,2],na.rm=TRUE)
w <- round((en[,2] - mi)/(ma - mi)*99 + 1,0)
ramp <- colorRamp(c("red","orange", "pink","purple", "blue"))
ColorRamp <- rgb( ramp(seq(0, 1, length = 101)), max = 255)
ColorLevels <- seq(mi, ma, length=length(ColorRamp))
if ( mi == ma ){
ColorLevels <- seq(0.99*mi, 1.01*ma, length=length(ColorRamp))
}
for ( i in m ){
f <- en[,1] == m
points(cnl[f,1],cnl[f,2],cex=5,col=ColorRamp[w[f]],pch=20)
}
text(cnl[,1],cnl[,2],m,cex=1.25,font=4,col="white")
par(mar = c(5, 4, 1, 2))
image(1, ColorLevels,
matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
col=ColorRamp,
xlab="",ylab="",
xaxt="n")
coll <- seq(min(k), max(k), length=length(mcol))
image(1, coll,
matrix(data=coll, ncol=length(mcol),nrow=1),
col=mcol,
xlab="",ylab="",
xaxt="n")
layout(1)
}
)
setGeneric("plotdistanceratio", function(object) standardGeneric("plotdistanceratio"))
setMethod("plotdistanceratio",
signature = "Ltree",
definition = function(object){
if ( length(object@ldata) <= 0 ) stop("run projcells before plotdistanceratio")
l <- as.matrix(dist(object@ldata$pdi))
z <- (l/object@ldata$ld)
hist(log2(z),breaks=100,xlab=" log2 emb. distance/distance",main="")
}
)
setGeneric("getproj", function(object,i) standardGeneric("getproj"))
setMethod("getproj",
signature = "Ltree",
definition = function(object,i){
if ( length(object@ldata) <= 0 ) stop("run projcells before plotdistanceratio")
if ( ! i %in% object@ldata$m ) stop(paste("argument i has to be one of",paste(object@ldata$m,collapse=",")))
x <- object@trproj$rma[names(object@ldata$lp)[object@ldata$lp == i],]
x <- x[,names(x) != paste("X",i,sep="")]
f <- !is.na(x[,1])
x <- x[f,]
if ( nrow(x) > 1 ){
y <- x
y <- as.data.frame(t(apply(y,1,function(x) (x - mean(x))/sqrt(var(x)))))
}
names(x) = sub("X","cl.",names(x))
names(y) = sub("X","cl.",names(y))
return(list(pr=x,prz=y))
}
)
setGeneric("projenrichment", function(object) standardGeneric("projenrichment"))
setMethod("projenrichment",
signature = "Ltree",
definition = function(object){
if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotmap")
ze <- ( object@cdata$pv.e < object@par$pethr | object@cdata$pv.d < object@par$pethr) * (object@cdata$counts + .1)/( object@cdata$counts.br + .1 )
pheatmap(log2(ze + ( ze == 0 ) ),cluster_rows=FALSE,cluster_cols=FALSE)
}
)
setGeneric("compscore", function(object,nn=1,scthr=0) standardGeneric("compscore"))
setMethod("compscore",
signature = "Ltree",
definition = function(object,nn,scthr){
if ( length(object@cdata) <= 0 ) stop("run comppvalue before compscore")
if ( ! is.numeric(nn) ) stop( "nn has to be a non-negative integer number" ) else if ( round(nn) != nn | nn < 0 ) stop( "nn has to be a non-negative integer number" )
if ( ! is.numeric(scthr) ) stop( "scthr has to be a non-negative number" ) else if ( scthr < 0 | scthr > 1 ) stop( "scthr has to be a number between 0 and 1" )
scm <- as.matrix(object@cdata$linkscore)
diag(scm) <- rep(0,ncol(scm))
for ( i in 1:ncol(scm)) scm[i:nrow(scm),i] <- scm[i,i:ncol(scm)]
x <- object@cdata$counts*(object@cdata$pv.e < object@par$pethr)*(scm > scthr)>0
y <- x | t(x)
if ( max(y) > 0 ){
z <- apply(y,1,sum)
nl <- list()
n <- list()
for ( i in 1:nn ){
if ( i == 1 ){
n[[i]] <- as.list(apply(y,1,function(x) grep(TRUE,x)))
nl <- data.frame( apply(y,1,sum) )
}
if ( i > 1 ){
v <- rep(0,nrow(nl))
n[[i]] <- list()
for ( j in 1:length(n[[i-1]]) ){
cl <- n[[i-1]][[j]]
if ( length(cl) == 0 ){
n[[i]][[paste("d",j,sep="")]] <- NA
v[j] <- 0
}else{
k <- if ( length(cl) > 1 ) apply(y[cl,],2,sum) > 0 else if ( length(cl) == 1 ) y[cl,]
n[[i]][[paste("d",j,sep="")]] <- sort(unique(c(cl,grep(TRUE,k))))
v[j] <- length(n[[i]][[paste("d",j,sep="")]])
}
}
names(n[[i]]) <- names(z)
nl <- cbind(nl,v)
}
}
x <- nl[,i]
names(x) <- rownames(nl)
}else{
x <- rep(0,length(object@ldata$m))
names(x) <- paste("cl",object@ldata$m,sep=".")
}
v <- aggregate(object@entropy,list(object@sc@cpart),median)
v <- v[v$Group.1 %in% object@ldata$m,]
w <- as.vector(v[,-1])
names(w) <- paste("cl.",v$Group.1,sep="")
w <- w - min(w)
return(list(links=x,entropy=w,StemIDscore=x*w))
}
)
setGeneric("plotscore", function(object,nn=1,scthr=0) standardGeneric("plotscore"))
setMethod("plotscore",
signature = "Ltree",
definition = function(object,nn,scthr){
if ( length(object@cdata) <= 0 ) stop("run comppvalue before plotscore")
if ( ! is.numeric(scthr) ) stop( "scthr has to be a non-negative number" ) else if ( scthr < 0 | scthr > 1 ) stop( "scthr has to be a number between 0 and 1" )
x <- compscore(object,nn,scthr)
layout(1:3)
barplot(x$links,names.arg=sub("cl\\.","",object@ldata$m),xlab="Cluster",ylab="Number of links",cex.names=1)
barplot(x$entropy,names.arg=sub("cl\\.","",object@ldata$m),xlab="Cluster",ylab="Delta-Entropy",cex.names=1)
barplot(x$StemIDscore,names.arg=sub("cl\\.","",object@ldata$m),xlab="Cluster",ylab="Number of links * Delta-Entropy",cex.names=1)
layout(1)
}
)
setGeneric("branchcells", function(object,br) standardGeneric("branchcells"))
setMethod("branchcells",
signature = "Ltree",
definition = function(object,br){
if ( length(object@ldata) <= 0 ) stop("run projcells before branchcells")
msg <- paste("br needs to be list of length two containing two branches, where each has to be one of", paste(names(object@prtree$n),collapse=","))
if ( !is.list(br) ) stop(msg) else if ( length(br) != 2 ) stop(msg) else if ( ! br[[1]] %in% names(object@prtree$n) | ! br[[2]] %in% names(object@prtree$n) ) stop(msg)
n <- list()
scl <- object@sc
k <- c()
cl <- intersect( as.numeric(strsplit(br[[1]],"\\.")[[1]]), as.numeric(strsplit(br[[2]],"\\.")[[1]]))
if ( length(cl) == 0 ) stop("the two branches in br need to have one cluster in common.")
for ( i in 1:length(br) ){
f <- object@sc@cpart[ object@prtree$n[[br[[i]]]] ] %in% cl
if ( sum(f) > 0 ){
n[[i]] <- names(object@sc@cpart[ object@prtree$n[[br[[i]]]] ])[f]
k <- append(k, max( scl@cpart ) + 1)
scl@cpart[n[[i]]] <- max( scl@cpart ) + 1
}else{
stop(paste("no cells on branch",br[[i]],"fall into clusters",cl))
}
}
set.seed(111111)
scl@fcol <- sample(rainbow(max(scl@cpart)))
z <- diffgenes(scl,k[1],k[2])
return( list(n=n,scl=scl,k=k,diffgenes=z) )
}
)
pamk <- function (data, krange = 2:10, criterion = "asw", usepam = TRUE,
scaling = FALSE, alpha = 0.001, diss = inherits(data, "dist"),
critout = FALSE, ns = 10, seed = NULL, ...)
{
ddata <- as.matrix(data)
if (!identical(scaling, FALSE))
sdata <- scale(ddata, scale = scaling)
else sdata <- ddata
cluster1 <- 1 %in% krange
critval <- numeric(max(krange))
pams <- list()
for (k in krange) {
if (usepam)
pams[[k]] <- pam(as.dist(sdata), k, diss = TRUE)
else pams[[k]] <- clara(as.dist(sdata), k, diss = TRUE)
if (k != 1)
critval[k] <- switch(criterion, asw = pams[[k]]$silinfo$avg.width,
multiasw = distcritmulti(sdata, pams[[k]]$clustering,
seed = seed, ns = ns)$crit.overall, ch = ifelse(diss,
cluster.stats(sdata, pams[[k]]$clustering)$ch,
calinhara(sdata, pams[[k]]$clustering)))
if (critout)
cat(k, " clusters ", critval[k], "\n")
}
k.best <- if ( length(krange) == 1 ) krange else (1:max(krange))[which.max(critval)]
if (cluster1) {
if (diss)
cluster1 <- FALSE
else {
cxx <- dudahart2(sdata, pams[[2]]$clustering, alpha = alpha)
critval[1] <- cxx$p.value
cluster1 <- cxx$cluster1
}
}
if (cluster1)
k.best <- 1
out <- list(pamobject = pams[[k.best]], nc = k.best, crit = critval)
out
}
pamkdCBI <- function (data, krange = 2:10, k = NULL, criterion = "asw", usepam = TRUE,
scaling = TRUE, diss = inherits(data, "dist"), ...)
{
if (!is.null(k))
krange <- k
c1 <- pamk(as.dist(data), krange = krange, criterion = criterion,
usepam = usepam, scaling = scaling, diss = diss, ...)
partition <- c1$pamobject$clustering
cl <- list()
nc <- c1$nc
for (i in 1:nc) cl[[i]] <- partition == i
out <- list(result = c1, nc = nc, clusterlist = cl, partition = partition,
clustermethod = "pam/estimated k", criterion = criterion)
out
}
#' @export
rfcorrect <- function(sc,rfseed=12345,nbtree=NULL,final=TRUE,nbfactor=5,...){
set.seed(rfseed)
part <- if (final) sc@cpart else sc@cluster$kpart
if ( is.null(nbtree) ) nbtree = ncol(sc@fdata[sc@cluster$features,])*nbfactor
rf <- randomForest::randomForest(sc@distances,as.factor(part),nbtree=nbtree,...)
cpo <- part
cpart <- as.numeric(as.vector(rf$predicted))
names(cpart ) <- names(cpo)
for ( i in max(cpart):1){if (sum(cpart==i)==0) cpart[cpart>i] <- cpart[cpart>i] - 1 }
if ( final ) sc@cpart <- cpart else sc@cluster$kpart <- cpart
d <- as.data.frame(rf$votes)
scpo <- sort(unique(cpo))
scpa <- sort(unique(cpart))
for ( i in 1:ncol(d) ) names(d)[i] <- scpa[which(names(d)[i] == scpo)]
sc@out$rfvotes <- d
sc
}
##
#' @title Function for differential expression analysis
#'
#' @description This function performs differential expression analysis between two sets of single cell transcriptomes. The inference is based on a noise model or relies on the \code{DESeq2} approach.
#' @param x expression data frame with genes as rows and cells as columns. Gene IDs should be given as row names and cell IDs should be given as column names. This can be a reduced expression table only including the features (genes) to be used in the analysis. This input has to be provided if \code{g} (see below) is given and corresponds to a valid gene ID, i. e. one of the rownames of \code{x}. The default value is \code{NULL}. In this case, cluster identities are highlighted in the plot.
#' @param A vector of cell IDs corresponding column names of \code{x}. Differential expression in set \code{A} versus set \code{B} will be evaluated.
#' @param B vector of cell IDs corresponding column names of \code{x}. Differential expression in set \code{A} versus set \code{B} will be evaluated.
#' @param DESeq logical value. If \code{TRUE}, then \pkg{DESeq2} is used for the inference of differentially expressed genes. In this case, it is recommended to provide non-normalized input data \code{x}. Default value is \code{FALSE}
#' @param method either "per-condition" or "pooled". If DESeq is not used, this parameter determines, if the noise model is fitted for each set separately ("per-condition") or for the pooled set comprising all cells in \code{A} and \code{B}. Default value is "pooled".
#' @param norm logical value. If \code{TRUE} then the total transcript count in each cell is normalized to the minimum number of transcripts across all cells in set \code{A} and \code{B}. Default value is \code{FALSE}.
#' @param vfit function describing the background noise model. Inference of differentially expressed genes can be performed with a user-specified noise model describing the expression variance as a function of the mean expression. Default value is \code{NULL}.
#' @param locreg logical value. If \code{FALSE} then regression of a second order polynomial is perfomed to determine the relation of variance and mean. If \code{TRUE} a local regression is performed instead. Default value is \code{FALSE}.
#' @param ... additional arguments to be passed to the low level function \code{DESeqDataSetFromMatrix}.
#' @return If \code{DESeq} equals \code{TRUE}, the function returns the output of \pkg{DESeq2}. In this case list of the following two components is returned:
#' \item{cds}{object returned by the \pkg{DESeq2} function \code{DESeqDataSetFromMatrix}.}
#' \item{res}{data frame containing the results of the \pkg{DESeq2} analysis.}
#'Otherwise, a list of three components is returned:
#' \item{vf1}{a data frame of three columns, indicating the mean \code{m}, the variance \code{v} and the fitted variance \code{vm} for set \code{A}.}
#' \item{vf2}{a data frame of three columns, indicating the mean \code{m}, the variance \code{v} and the fitted variance \code{vm} for set \code{B}.}
#' \item{res}{a data frame with the results of the differential gene expression analysis with the structure of the \code{DESeq} output, displaying mean expression of the two sets, fold change and log2 fold change between the two sets, the p-value for differential expression (\code{pval}) and the Benjamini-Hochberg corrected false discovery rate (\code{padj}).}
#' @examples
#' x <- intestine$x
#' y <- intestine$y
#' v <- intestine$v
#'
#' tar <- c(6,9,13)
#' fb <- fateBias(x,y,tar,z=NULL,minnr=5,minnrh=10,nbfactor=5,use.dist=FALSE,seed=NULL,nbtree=NULL)
#'
#' thr <- .3
#'
#' A <- rownames(fb$probs)[fb$probs[,"t6"] > .3]
#' B <- rownames(fb$probs)[fb$probs[,"t13"] > .3]
#' de <- diffexpnb(v,A=A,B=B)
#' @export
diffexpnb <- function(x,A,B,DESeq=FALSE,method="pooled",norm=FALSE,vfit=NULL,locreg=FALSE,...){
if ( ! method %in% c("per-condition","pooled") ) stop("invalid method: choose pooled or per-condition")
x <- x[,c(A,B)]
if ( DESeq ){
require(DESeq2)
# run on sc@expdata
des <- data.frame( row.names = colnames(x), condition = factor(c( rep(1,length(A)), rep(2,length(B)) )), libType = rep("single-end", dim(x)[2]))
cds <- DESeqDataSetFromMatrix(countData=round(x,0),colData=des,design =~ condition,...)
cds <- DESeq(cds,fitType='local')
res <- results(cds)
list(des=des,cds=cds,res=res)
}else{
if (norm) x <- as.data.frame( t(t(x)/apply(x,2,sum))*min(apply(x,2,sum,na.rm=TRUE)) )
fit <- list()
m <- list()
v <- list()
for ( i in 1:2 ){
g <- if ( i == 1 ) A else B
m[[i]] <- if ( length(g) > 1 ) apply(x[,g],1,mean) else x[,g]
v[[i]] <- if ( length(g) > 1 ) apply(x[,g],1,var) else apply(x,1,var)
if ( method == "pooled"){
mg <- apply(x,1,mean)
vg <- apply(x,1,var)
vl <- log2(vg)
ml <- log2(mg)
}else{
vl <- log2(v[[i]])
ml <- log2(m[[i]])
}
if ( locreg ){
f <- order(ml,decreasing=FALSE)
u <- 2**ml[f]
y <- 2**vl[f]
lf <- locfit(y~lp(u,nn=.7),family="gamma",maxk=500)
fit[[i]] <- approxfun(u, fitted(lf), method = "const")
}else{
if ( is.null(vfit) ){
f <- ml > -Inf & vl > -Inf
ml <- ml[f]
vl <- vl[f]
mm <- -8
repeat{
fit[[i]] <- lm(vl ~ ml + I(ml^2))
if( coef(fit[[i]])[3] >= 0 | mm >= -1){
break
}
mm <- mm + .5
f <- ml > mm
ml <- ml[f]
vl <- vl[f]
}
}else{
fit[[i]] <- vfit
}
}
}
if ( locreg ){
vf <- function(x,i) fit[[i]](x)
}else{
vf <- function(x,i) 2**(coef(fit[[i]])[1] + log2(x)*coef(fit[[i]])[2] + coef(fit[[i]])[3] * log2(x)**2)
}
sf <- function(x,i) x**2/(max(x + 1e-6,vf(x,i)) - x)
psp <- 1e-99
pv <- apply(data.frame(m[[1]],m[[2]]),1,function(x){ p12 <- (dnbinom(0:round(x[1]*length(A) + x[2]*length(B),0),mu=mean(x)*length(A),size=length(A)*sf(mean(x),1)) + psp)*(dnbinom(round(x[1]*length(A) + x[2]*length(B),0):0,mu=mean(x)*length(B),size=length(B)*sf(mean(x),2)) + psp); sum(p12[p12 <= p12[round(x[1]*length(A),0) + 1]])/sum(p12)} )
res <- data.frame(baseMean=(m[[1]] + m[[2]])/2,baseMeanA=m[[1]],baseMeanB=m[[2]],foldChange=m[[2]]/m[[1]],log2FoldChange=log2(m[[2]]/m[[1]]),pval=pv,padj=p.adjust(pv,method="BH"))
vf1 <- data.frame(m=m[[1]],v=v[[1]],vm=vf(m[[1]],1))
vf2 <- data.frame(m=m[[2]],v=v[[2]],vm=vf(m[[2]],2))
rownames(res) <- rownames(x)
rownames(vf1) <- rownames(x)
rownames(vf2) <- rownames(x)
list(vf1=data.frame(m=m[[1]],v=v[[1]],vm=vf(m[[1]],1)),vf2=data.frame(m=m[[2]],v=v[[2]],vm=vf(m[[2]],2)),res=res)
}
}
#' @title Function for plotting differentially expressed genes
#'
#' @description This is a plotting function for visualizing the output of the \code{diffexpnb} function.
#' @param x output of the function \code{diffexpnb}.
#' @param pthr real number between 0 and 1. This number represents the p-value cutoff applied for displaying differentially expressed genes. Default value is 0.05. The parameter \code{padj} (see below) determines if this cutoff is applied to the uncorrected p-value or to the Benjamini-Hochberg corrected false discovery rate.
#' @param padj logical value. If \code{TRUE}, then genes with a Benjamini-Hochberg corrected false discovery rate lower than \code{pthr} are displayed. If \code{FALSE}, then genes with a p-value lower than \code{pthr} are displayed.
#' @param lthr real number between 0 and Inf. Differentially expressed genes are displayed only for log2 fold-changes greater than \code{lthr}. Default value is 0.
#' @param mthr real number between -Inf and Inf. Differentially expressed genes are displayed only for log2 mean expression greater than \code{mthr}. Default value is -Inf.
#' @param Aname name of expression set \code{A}, which was used as input to \code{diffexpnb}. If provided, this name is used in the axis labels. Default value is \code{NULL}.
#' @param Bname name of expression set \code{B}, which was used as input to \code{diffexpnb}. If provided, this name is used in the axis labels. Default value is \code{NULL}.
#' @param show_names logical value. If \code{TRUE} then gene names displayed for differentially expressed genes. Default value is \code{FALSE}.
#' @return None
#' @examples
#' x <- intestine$x
#' y <- intestine$y
#' v <- intestine$v
#'
#' tar <- c(6,9,13)
#' fb <- fateBias(x,y,tar,z=NULL,minnr=5,minnrh=10,nbfactor=5,use.dist=FALSE,seed=NULL,nbtree=NULL)
#'
#' thr <- .3
#'
#' A <- rownames(fb$probs)[fb$probs[,"t6"] > .3]
#' B <- rownames(fb$probs)[fb$probs[,"t13"] > .3]
#' de <- diffexpnb(v,A=A,B=B)
#' plotdiffgenesnb(de,pthr=.05)
#' @export
plotdiffgenesnb <- function(x,pthr=.05,padj=TRUE,lthr=0,mthr=-Inf,Aname=NULL,Bname=NULL,show_names=TRUE){
y <- as.data.frame(x$res)
if ( is.null(Aname) ) Aname <- "baseMeanA"
if ( is.null(Bname) ) Bname <- "baseMeanB"
plot(log2(y$baseMean),y$log2FoldChange,pch=20,xlab=paste("log2 ( ( #mRNA[",Aname,"] + #mRNA[",Bname,"] )/2 )",sep=""),ylab=paste("log2 #mRNA[",Bname,"] - log2 #mRNA[",Aname,"]",sep=""),col="grey")
abline(0,0)
if ( ! is.null(pthr) ){
if ( padj ) f <- y$padj < pthr else f <- y$pval < pthr
points(log2(y$baseMean)[f],y$log2FoldChange[f],col="red",pch=20)
}
if ( !is.null(lthr) ) f <- f & abs( y$log2FoldChange ) > lthr
if ( !is.null(mthr) ) f <- f & log2(y$baseMean) > mthr
if ( show_names ){
if ( sum(f) > 0 ) text(log2(y$baseMean)[f],y$log2FoldChange[f],rownames(y)[f],cex=.5)
}
}
#' @title Function filtering expression data
#'
#' @description This function discards lowly expressed genes from the expression data frame.
#' @param x expression data frame with genes as rows and cells as columns. Gene IDs should be given as row names and cell IDs should be given as column names.
#' @param n ordered vector of cell IDs to be included. Cell IDs need to be column names of \code{x}. If not provided, then all cell IDs are included in arbitray order. Default value is \code{NULL}.
#' @param minexpr positive real number. This is the minimum expression required for at least \code{minnumber} cells. All genes that do not fulfill this criterion are removed. The default value is 2.
#' @param minnumber positive integer number. This is the minimum number of cells in which a gene needs to be expressed at least at a level of \code{minexpr}. All genes that do not fulfill this criterion are removed. The default value is 1.
#' @return Reduced expression data frame with genes as rows and cells as columns in the same order as in \code{n}.
#' @examples
#' x <- intestine$x
#' y <- intestine$y
#' v <- intestine$v
#'
#' tar <- c(6,9,13)
#' fb <- fateBias(x,y,tar,z=NULL,minnr=5,minnrh=10,nbfactor=5,use.dist=FALSE,seed=NULL,nbtree=NULL)
#' trc <- dptTraj(x,y,fb,trthr=.25,distance="euclidean",sigma=1000)
#' n <- trc[["t6"]]
#' fs <- filterset(v,n,minexpr=2,minnumber=1)
#' @export
filterset <- function(x,n=NULL,minexpr=2,minnumber=1){
if ( is.null(n) ) n <- names(x)
x[apply(x[,n] >= minexpr,1,sum) >= minnumber,n]
}
#' @title Topological ordering of pseudo-temporal expression profiles
#'
#' @description This function computes a topological ordering of pseudo-temporal expression profiles of all genes by using 1-dimensional self-organizing maps.
#' @param x expression data frame with genes as rows and cells as columns. Gene IDs should be given as row names and cell IDs should be given as column names. The pseudo-temporal expression profile of each gene is defined by the order of cell IDs, i. e. columns, in \code{x}.
#' @param nb positive integer number. Number of nodes of the self-organizing map. Default value is 1000.
#' @param k positive integer number. Pseudo-temporal expression profiles are either derived using a running mean of expression values across the ordered cells with window-size \code{k}, or by a local regression (if \code{locreg} is \code{TRUE}). Default value is 5.
#' @param locreg logical value. If \code{TRUE}, then pseudo-temporal expression profiles are derived by a local regression of expression values across the ordered cells using the function \code{loess} from the package \pkg{stats}. Default value is \code{TRUE}.
#' @param alpha positive real number. This is the parameter, which controls the degree of smoothing. Larger values return smoother profiles. Default value is 0.5.
#' @return A list of the following three components:
#' \item{som}{a \code{som} object returned by the function \code{som} of the package \pkg{som}}
#' \item{x}{pseudo-temporal expression profiles, i. e. the input expression data frame \code{x} after smoothing by running mean or local regression, respectivey, and normalization. The sum of smoothened gene expression values across all cells is normalized to 1.}
#' \item{zs}{data frame of z-score transformed pseudo-temporal expression profiles.}
#' @examples
#' x <- intestine$x
#' y <- intestine$y
#' v <- intestine$v
#'
#' tar <- c(6,9,13)
#' fb <- fateBias(x,y,tar,z=NULL,minnr=5,minnrh=10,nbfactor=5,use.dist=FALSE,seed=NULL,nbtree=NULL)
#' trc <- dptTraj(x,y,fb,trthr=.25,distance="euclidean",sigma=1000)
#' n <- trc[["t6"]]
#' fs <- filterset(v,n,minexpr=2,minnumber=1)
#' s1d <- getsom(fs,nb=1000,k=5,locreg=TRUE,alpha=.5)
#' @export
getsom <- function(x,nb=1000,k=5,locreg=TRUE,alpha=.5){
require(som)
require(caTools)
if ( locreg ){
x <- t(apply(x,1,function(x,alpha){ v <- 1:length(x); predict(loess( x ~ v, span=alpha ))},alpha=alpha))
x <- t(apply(x,1,function(x){ x[x<0] <- .1; x }))
}else{
x <- t(apply(x,1,runmean,k=k))
}
x <- x/apply(x,1,sum)
zs <- ( x - apply(x,1,mean) )/sqrt ( apply(x,1,var) )
return( list(som=som(zs,1,nb),x=x,z=zs) )
}
#' @title Processing of self-organizing maps for pseudo-temporal expression profiles
#'
#' @description This function processes the self-organizing maps produced by the function \code{getsom}.
#' @param s1d output of function \code{getsom}
#' @param corthr correlation threshold, i. e. a real number between 0 and 1. The z-score of the average normalized pseudo-temporal expression profiles within each node of the self-organizing map is computed, and the correlation of these z-scores between neighbouring nodes is computed. If the correlation is greater than \code{corthr}, neighbouring nodes are merged. Default value is 0.85.
#' @param minsom positive integer number. Nodes of the self-organizing map with less than \code{minsom} transcripts are discarded. Default value is 3.
#' @return A list of the following seven components:
#' \item{k}{vector of Pearson's correlation coefficient between node \code{i} and node \code{i+1} of the populated nodes of the self-organizing map.}
#' \item{nodes}{vector with assignment of genes to nodes of the final self-organizing map (after merging). Components are node numbers and component names are gene IDs.}
#' \item{nodes.e}{data frame with average normalized pseudo-temporal expression profile for each node, ordered by node number.}
#' \item{nodes.z}{data frame with z-score transformed average normalized pseudo-temporal expression profile for each node, ordered by node number.}
#' \item{all.e}{data frame with normalized pseudo-temporal expression profile for all genes in the self-organizing map, ordered by node number.}
#' \item{all.z}{data frame with z-score transformed normalized pseudo-temporal expression profile for all genes in the self-organizing map, ordered by node number.}
#' \item{all.b}{data frame with binarized pseudo-temporal expression profile for all genes in the self-organizing map, ordered by node number. Expression is 1 in cells with z-score > 1 and -1 in cells with z-score < -1, and 0 otherwise.}
#' @examples
#' x <- intestine$x
#' y <- intestine$y
#' v <- intestine$v
#'
#' tar <- c(6,9,13)
#' fb <- fateBias(x,y,tar,z=NULL,minnr=5,minnrh=10,nbfactor=5,use.dist=FALSE,seed=NULL,nbtree=NULL)
#' trc <- dptTraj(x,y,fb,trthr=.25,distance="euclidean",sigma=1000)
#' n <- trc[["t6"]]
#' fs <- filterset(v,n,minexpr=2,minnumber=1)
#' s1d <- getsom(fs,nb=1000,k=5,locreg=TRUE,alpha=.5)
#' ps <- procsom(s1d,corthr=.85,minsom=3)
#' @export
procsom <- function(s1d,corthr=.85,minsom=3){
f <- order(s1d$som$visual$y,decreasing=FALSE)
x <- s1d$x[f,]
y <- t(apply(x,1,function(x){ z <- (x - mean(x))/sqrt(var(x)); z[z < 1 & z > -1] <- 0; z[z > 1] <- 1; z[z < -1] <- -1; z }))
xz <- (x - apply(x,1,mean))/sqrt(apply(x,1,var))
ma <- aggregate(s1d$x,by=list(x=s1d$som$visual$y),mean)
ma <- ma[order(ma$x,decreasing=FALSE),]
ma <- ma[-1]/apply(ma[,-1],1,sum)
maz <- ( ma - apply(ma,1,mean))/sqrt(apply(ma,1,var))
k <- c(1)
for ( i in 2:nrow(maz) ) k <- append(k, cor(t(maz[(i-1):i,]),method="spearman")[1,2] )
kp <- 1
for ( i in 2:length(k) ){
if ( k[i] < corthr ){
kp[i] <- kp[i-1] + 1
}else{
kp[i] <- kp[i-1]
}
}
u <- sort(unique(s1d$som$visual$y))
v <- s1d$som$visual$y
for ( i in unique(kp) ){
v[ s1d$som$visual$y %in% u[kp == i] ] <- i
}
ma <- aggregate(s1d$x[f,],by=list(x=v[f]),mean)
ma <- ma[order(ma$x,decreasing=FALSE),]
ma <- ma[-1]/apply(ma[,-1],1,sum)
maz <- ( ma - apply(ma,1,mean))/sqrt(apply(ma,1,var))
q <- v[f]
names(q) <- rownames(s1d$som$data)[f]
g <- aggregate(rep(1,length(q)),by=list(x=q),sum)
h <- q %in% g[g[,2]>minsom,1]
ah <- sort(unique(q)) %in% g[g[,2]>minsom,1]
q <- q[h]
for ( i in max(q):1){if (sum(q==i)==0) q[q>i] <- q[q>i] - 1 }
ma <- ma[ah,]
maz <- maz[ah,]
x <- x[h,]
xz <- xz[h,]
y <- y[h,]
return(list(k=k,nodes=q,nodes.e=ma,nodes.z=maz,all.e=x,all.z=xz,all.b=y))
}
#' @title Heatmap of expression profiles
#'
#' @description This function allows plotting of normalized or z-score transformed pseudo-temporal expression profiles and permits highlighting of partitioning along the x-axis and the y-axis
#' @param x data frame with input data to show. Columns will be displayed on the x-axis and rows on the y-axis in the order given in \code{x}. For example, columns can correspond to cells in pseudo-temporal order and rows contain gene expression, i. e. rows can represent pseudo-temporal gene expression profiles.
#' @param xpart optional vector with integer values indicating partitioning of the data points along the x-axis. For instance, \code{xpart} can be a cluster assignment of cell IDs. The order of the components has to be the same as for the columns in \code{x}. Default value is \code{NULL}.
#' @param xcol optional vector with valid color names. The number of components has to be equal to the number of different values on \code{xpart}. If provided, these colors are used to highlight partitioning along the x-axis based on \code{xpart}. Default value is \code{NULL}.
#' @param xlab logical value. If \code{TRUE} then the average position is indicated for each partition value along the x-axis. Default value is \code{TRUE}.
#' @param xgrid logical value. If \code{TRUE} then the partitioning along the x-axis is indicated by vertical lines representing the boundaries of all positions with a given value in \code{xpart}.
#' @param ypart optional vector with integer values indicating partitioning of the data points along the y-axis. For instance, \code{ypart} can be the assignment of gene IDs to nodes of a sel-organizing map. The order of the components has to be the same as for the rows in \code{x}. Default value is \code{NULL}.
#' @param ycol optional vector with valid color names. The number of components has to be equal to the number of different values on \code{ypart}. If provided, these colors are used to highlight partitioning along the y-axis based on \code{ypart}. Default value is \code{NULL}.
#' @param ylab logical value. If \code{TRUE} then the average position is indicated for each partition value along the y-axis. Default value is \code{TRUE}.
#' @param ygrid logical value. If \code{TRUE} then the partitioning along the y-axis is indicated by horizontal lines representing the boundaries of all positions with a given value in \code{ypart}.
#' @return None
#' @examples
#' x <- intestine$x
#' y <- intestine$y
#' v <- intestine$v
#'
#' tar <- c(6,9,13)
#' fb <- fateBias(x,y,tar,z=NULL,minnr=5,minnrh=10,nbfactor=5,use.dist=FALSE,seed=NULL,nbtree=NULL)
#' trc <- dptTraj(x,y,fb,trthr=.25,distance="euclidean",sigma=1000)
#' n <- trc[["t6"]]
#' fs <- filterset(v,n,minexpr=2,minnumber=1)
#' s1d <- getsom(fs,nb=1000,k=5,locreg=TRUE,alpha=.5)
#' ps <- procsom(s1d,corthr=.85,minsom=3)
#' plotheatmap(ps$all.e,xpart=y[n],xcol=fcol,ypart=ps$nodes,xgrid=FALSE,ygrid=TRUE,xlab=FALSE)
#' @export
plotheatmap <- function(x,xpart=NULL,xcol=NULL,xlab=TRUE,xgrid=FALSE,ypart=NULL,ycol=NULL,ylab=TRUE,ygrid=FALSE){
require(RColorBrewer)
mi <- min(x,na.rm=TRUE)
ma <- max(x,na.rm=TRUE)
layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(5,1), heights=c(5,1))
ColorRamp <- rev(colorRampPalette(brewer.pal(n = 7,name = "RdYlBu"))(100))
ColorLevels <- seq(mi, ma, length=length(ColorRamp))
if ( mi == ma ){
ColorLevels <- seq(0.99*mi, 1.01*ma, length=length(ColorRamp))
}
par(mar = c(3,5,2.5,2))
image(t(as.matrix(x)),col=ColorRamp,axes=FALSE)
box()
set.seed(20)
if ( !is.null(xpart) ){
tmp <- c()
for ( u in unique(xpart) ){
ol <- (0:(length(xpart) - 1)/(length(xpart) - 1))[xpart == u]
if ( !is.null(xcol) ) points(ol,rep(0,length(ol)),col=xcol[u],pch=15,cex=.75)
tmp <- append(tmp,mean(ol))
delta <- .5/(length(xpart) - 1)
if ( xgrid & max(ol) < 1) abline(v=max(ol) + delta,col="grey",lty=2)
}
if ( xlab ) axis(1,at=tmp,lab=unique(xpart))
}
set.seed(20)
if ( !is.null(ypart) ){
tmp <- c()
for ( u in unique(ypart) ){
ol <- (0:(length(ypart) - 1)/(length(ypart) - 1))[ypart == u]
if ( !is.null(ycol) ) points(rep(0,length(ol)),ol,col=ycol[u + 1],pch=15,cex=.75)
tmp <- append(tmp,mean(ol))
delta <- .5/(length(ypart) - 1)
if ( ygrid & max(ol) < 1) abline(a=max(ol) + delta,b=0,col="grey",lty=2)
}
if ( ylab ) axis(2,at=tmp,lab=unique(ypart))
}
par(mar = c(20,2.5,2.5,2))
image(1, ColorLevels,
matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
col=ColorRamp,
xlab="",ylab="",
xaxt="n")
layout(1)
}
#' @title Plotting of pseudo-temporal expression profiles
#'
#' @description This function allows plotting pseudo-temporal expression profiles for single genes or groups of genes.
#' @param x expression data frame with genes as rows and cells as columns. Gene IDs should be given as row names and cell IDs should be given as column names.
#' @param y clustering partition. A vector with an integer cluster number for each cell. The order of the cells has to be the same as for the columns of \code{x}.
#' @param g a gene ID corresponding to one of the rownames of \code{x}. In the latter case, the input argument \code{x} needs to be provided. A vector of gene IDs can also be provided. In this case, the aggregated expression across all gene IDs is plotted.
#' @param n ordered vector of cell IDs to be included. Cell IDs need to be column names of \code{x}.
#' @param col optional vector of valid color names for all clusters in \code{y} ordered by increasing cluster number. Default value is \code{NULL}.
#' @param name optional character string. This argument corresponds to a title for the plot. Default value is \code{NULL}. If not provided, and \code{g} is given, then \code{name} will equal \code{g} or \code{g[1]}, respectively, if \code{g} is a vector of gene IDs.
#' @param cluster logical value. If \code{TRUE} then the partitioning along the x-axis is indicated be vertical lines representing the boundaries of all positions with a given value in \code{y}. The average position across all cells in a cluster will be indicated on the x-axis.
#' @param k positive integer number. Pseudo-temporal expression profiles are either derived using a running mean of expression values across the ordered cells with window-size \code{k}, or by a local regression (if \code{locreg} is \code{TRUE}). Default value is 5.
#' @param locreg logical value. If \code{TRUE}, then pseudo-temporal expression profiles are derived by a local regression of expression values across the ordered cells using the function \code{loess} from the package \pkg{stats}. Default value is \code{TRUE}.
#' @param alpha positive real number. This is the parameter, which controls the degree of smoothing. Larger values return smoother profiles. Default value is 0.5.
#' @param types optional vector with IDs for different subsets of cells in \code{y}, e. g. different batches. All cells with the same ID will be displayed by the same symbol and color. Default value is \code{NULL}
#' @examples
#' x <- intestine$x
#' y <- intestine$y
#' v <- intestine$v
#' fcol <- intestine$col
#' tar <- c(6,9,13)
#' fb <- fateBias(x,y,tar,z=NULL,minnr=5,minnrh=10,nbfactor=5,use.dist=FALSE,seed=NULL,nbtree=NULL)
#' trc <- dptTraj(x,y,fb,trthr=.25,distance="euclidean",sigma=1000)
#' n <- trc[["t6"]]
#' fs <- filterset(v,n,minexpr=2,minnumber=1)
#' s1d <- getsom(fs,nb=1000,k=5,locreg=TRUE,alpha=.5)
#' ps <- procsom(s1d,corthr=.85,minsom=3)
#' # plot average profile of all genes of node 1 in the self-organizing map
#' g <- names(ps$nodes)[ps$nodes == 1]
#' plotexpression(v,y,g,n,k=25,col=fcol,name="Node 1",cluster=FALSE,locreg=TRUE,alpha=.5,types=NULL)
#' @export
plotexpression <- function(x,y,g,n,col=NULL,name=NULL,cluster=FALSE,k=5,locreg=FALSE,alpha=.5,types=NULL){
require(caTools)
cl <- unique(y[n])
set.seed(111111)
if ( is.null(col) ) col <- sample(rainbow(max(y)))
xlim <- c(1,length(n))
if ( !is.null(types) ) xlim[1] <- 1.25 * xlim[1]
z <- if ( length(g) == 1 ) x[g,n] else t(apply(x[g,n],2,sum))
if ( is.null(name) ) name <- g[1]
plot(1:length(n),t(z),cex=0,axes=FALSE,xlab="",ylab="Expression",main=name,xlim=xlim)
if ( ! is.null(types) ){
coloc <- rainbow(length(unique(types)))
syms <- c()
for ( i in 1:length(unique(types)) ){
f <- types == sort(unique(types))[i]
syms <- append( syms, ( (i-1) %% 25 ) + 1 )
points((1:length(n))[f],t(z)[f],col=coloc[i],pch=( (i-1) %% 25 ) + 1,cex=1)
}
}else{
points((1:length(n)),t(z),col="grey",pch=20,cex=3)
}
for ( i in 1:length(cl) ){
f <- y[n] == cl[i]
if ( is.null(types) ){
text((1:length(n))[f],t(z)[f],cl[i],font=4,col=col[cl[i]])
}
zc <- if ( i == 1 ) sum(f) else append(zc, zc[i-1] + sum(f))
xc <- if ( i == 1 ) sum(f)/2 else append(xc, zc[i-1] + sum(f)/2)
if ( cluster ) abline(v=zc[i],col="grey",lty=2)
}
u <- 1:length(n)
if ( locreg ){
v <- t(z)
zc <- predict(loess( v ~ u, span=alpha ))
zc[zc<0] <- .1
lines(u,zc)
}else{
lines(u,runmean(t(z),k=k))
}
if ( !is.null(types) ) legend("topleft", legend=sort(unique(types)), col=coloc, pch=syms)
axis(2)
box()
if ( cluster ) axis(1,at=xc,lab=cl)
}
cellsfromtree <- function(ltr,z){
prtr <- ltr@prtree
f <- c()
g <- c()
for ( i in 1:( length(z) - 1 ) ){
rf <- if ( z[i+1] > z[i] ) FALSE else TRUE
k <- if ( rf ) paste(z[i + 1],z[i],sep=".") else paste(z[i],z[i+1],sep=".")
p <- prtr$l[[k]]
n <- prtr$n[[k]]
if ( rf ){
##h <- p < Inf & p > -Inf
if ( i == 1 & i + 1 == length(z) ) h <- p < Inf & p > -Inf
if ( i == 1 & i + 1 < length(z) ) h <- p < Inf & p >= 0
if ( i > 1 & i + 1 == length(z) ) h <- p <= 1 & p > -Inf
if ( i > 1 & i + 1 < length(z) ) h <- p <= 1 & p >= 0
}else{
##h <- p > -Inf & p < Inf
if ( i == 1 & i + 1 == length(z) ) h <- p > -Inf & p < Inf
if ( i == 1 & i + 1 < length(z) ) h <- p > -Inf & p <= 1
if ( i > 1 & i + 1 == length(z) ) h <- p >= 0 & p < Inf
if ( i > 1 & i + 1 < length(z) ) h <- p >= 0 & p <= 1
}
v <- n[h][order(p[h],decreasing=FALSE)]
if ( rf ) v <- rev(v)
v <- v[! v %in% f ]
f <- append(f,v)
g <- append(g,rep(i,length(v)))
}
return(list(f=f,g=g))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.