greg <- function(
x, # matrix object (dgCMatrix or dense), or a file name
cell_sample,
cell_type,
cell_names = NULL, # cell names, to be matched with names in x
u_0 = NULL,
s2_0 = 1/12,
sep="\t", verbose = 1 )
{
if(length(cell_sample) != length(cell_type))
stop("length(cell_sample) != length(cell_type)")
n_cells <- length(cell_sample)
if(!is.null(cell_names) && length(cell_names) != n_cells )
stop("length(cell_names) != length(cell_sample)")
u_sample <- unique(as.character(cell_sample))
u_ctype <- unique(as.character(cell_type))
n_sample <- length(u_sample)
n_ctype <- length(u_ctype)
# cell
N <- array(0,dim=c(n_sample,n_ctype),dimnames=list(u_sample,u_ctype))
if( is.matrix(x) || is(x,"sparseMatrix" ))
{
src <- paste("Matrix",deparse(substitute(x)))
# cell_names && no colnames: error
# cell_names && colnames: intersect
# no cell_nmaes && no colnames: assume integer, length must match
# no cell_names && colnames: ditto
if( is.null(cell_names))
{
if( ncol(x) != n_cells )
stop("number of cells annotated doesn't match number of cells in x")
la2lx <- 1:n_cells
}
else
{
la2lx <- pmatch(cell_names, colnames(x))
n_nomatch <- sum(is.na(la2lx))
if(n_nomatch != 0)
warning(n_nomatch," cells are not matched.")
}
n_gene <- nrow(x)
y <- array( c(0,Inf), dim=c(2,n_sample,n_gene,n_ctype),
dimnames=list(c("u","v"),u_sample,rownames(x),u_ctype))
for(k in u_ctype )
{
for(i in u_sample )
{
message(i," ",k)
l <- intersect(
grep(i, cell_sample, fixed=T),
grep(k, cell_type, fixed=T))
N[i,k] <- Nik <- length(l)
l <- na.omit(la2lx[l])
if( Nik == 1 )
{
y[1,i,,k] <- x[,l]
}
else
{
Sx <- rowSums( x[,l] )
Sxx <- rowSums( x[,l]^2 )
y[1,i,,k] <- Sx/Nik
y[2,i,,k] <- ((Sxx-Sx*Sx/Nik)/(Nik-1))/Nik
}
} # k
} # i
}
else if( is.character(x) && length(x)==1 ) # interpret x as a filename
{
src <- paste("file",x)
f <- gzfile(x,open="rt")
h <- strsplit( readLines(f,1), sep, fixed=TRUE )[[1]]
n_headers <- length(h)
if(verbose)
message(sprintf("n_headers = %d, n_cells = %d",n_headers,n_cells))
if( is.null(cell_names))
{
if( n_headers != n_cells + 1 )
stop("number of cells annotated doesn't match number of cells in x")
la2lx <- 1:n_cells
}
else
{
la2lx <- pmatch( cell_names, h[-1])
n_nomatch <- sum(is.na(la2lx))
if(n_nomatch != 0)
warning(n_nomatch," cells are not matched.")
}
chunk <- 1200
n_gene <- 0
y <- array(c(0,Inf),dim=c(2,n_sample, chunk, n_ctype),
dimnames=list(c("u","v"),u_sample,1:chunk,u_ctype))
cidx <- sapply( u_ctype,function(k)
sapply( u_sample,function(i)
na.omit(la2lx[intersect(grep(k,cell_type,fixed=T),
grep(i,cell_sample,fixed=T))]),simplify=FALSE))
N <- sapply( u_ctype, function(k)
sapply( u_sample, function(i) length(cidx[i,k][[1]])))
repeat
{
gene <- scan(f,"",1,quiet=TRUE)
if(length(gene)==0) break # EOF
counts <- scan(f,0.0,n_cells,quiet=TRUE)
n_gene <- n_gene + 1
dimnames(y)[[3]][n_gene] <- gene
for(k in u_ctype )
for(i in u_sample)
{
yc <- counts[cidx[i,k][[1]]]
y[1,i,n_gene,k] <- mean(yc)
SE2 <- (stats::var(yc))/N[i,k]
if(is.na(SE2)) SE2 <- Inf
y[2,i,n_gene,k] <- SE2
}
if( verbose && ( n_gene %% 50 == 0) )
message(sprintf("\rprocessed: %6d genes (alloc. chunk %7d)",n_gene,chunk),appendLF=FALSE)
if( n_gene == chunk )
{
chunk <- 2*chunk
y <- abind( y, array(c(0,Inf),dim=c(2,n_sample,chunk,n_ctype)),along=3)
gc()
}
}
if(verbose)
message(sprintf("\rprocessed: %6d genes",n_gene),appendLF=TRUE)
close(f)
y <- y[,,1:n_gene,]
gc()
}
else if( class(x) == "ac" || class(x) == "greg") # to convert old format
{
src <- paste(class(x), deparse(substitute(x)))
y <- x$y
N <- x$N
}
min_nz <- min( y[1,,,][y[1,,,] > 0],na.rm=T )
gene_mean <- apply(y[1,,,],2,mean,na.rm=T)
gene_cv <- apply(y[1,,,],2,sd,na.rm=T)/gene_mean
# aggregate and gene scale factors
gene_scale <- array(dim=c(n_gene,n_ctype))
aggr_scale <- array(dim=c(n_sample,n_ctype))
if( is.null(u_0)) u_0 <- 0.5*min_nz
for(k in 1:n_ctype)
{
fit <- malm11( y[,,,k], w=ifelse(N[,k] > 1,1,0),
u_0 = u_0, v_0 = s2_0/N[,k] )
gene_scale[,k] <- exp(fit$beta0)
aggr_scale[,k] <- exp(fit$alpha)
}
structure(
list(
N=N,
y=y,
source=src,
min_nz = min_nz,
gene_mean = gene_mean,
gene_cv = gene_cv,
gene_scale = gene_scale,
aggr_scale = aggr_scale
),
class="ac")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.