#' @name SingleCellsNGS
#' @title SingleCellsNGS
#' @docType package
#' @description An S4 class to visualize Expression data from a single cell NGS experiment.
#' @slot data a data.frame containing the expression values for each gene x sample (gene = row)
#' @slot samples a data.frame describing the columnanmes in the data column
#' @slot annotation a data.frame describing the rownames in the data rows
#' @slot outpath the default outpath for the plots and tables from this package
#' @slot name the name for this package (all filesnames contain that)
#' @slot rownamescol the column name in the annotation table that represents the rownames of the data table
#' @slot sampleNamesCol the column name in the samples table that represents the colnames of the data table
#' @slot stats the stats list for all stats created in the object
setClass(
Class='SingleCellsNGS',
representation = representation (
## NGS = 'binary'
),
contains='NGSexpressionSet',
prototype(outpath ='', name = 'SingleCellsNGS',
rownamescol=NA_character_,
sampleNamesCol=NA_character_,
stats=list() )
)
extends("SingleCellsNGS","NGSexpressionSet")
#' @name SingleCellsNGS
#' @aliases SingleCellsNGS,SingleCellsNGS-method
#' @rdname SingleCellsNGS-methods
#' @docType methods
#' @description create a new SingleCellsNGS object This object is mainly used for plotting the
#' @description data
#' @param dat data frame or matrix containing all expression data
#' @param Samples A sample description table
#' @param Analysis If the samples table contains a Analysis column you can subset the data already here to the Analysis here (String). This table has to contain a column filenames that is expected to connect the sample table to the dat column names.
#' @param name The name of this object is going to be used in the output of all plots and tables - make it specific
#' @param namecol The samples table column that contains the (to be set) names of the samples in the data matrix
#' @param namerow This is the name of the gene level annotation column in the dat file to use as ID
#' @param outpath Where to store the output from the analysis
#' @param annotation The annotation table from e.g. affymetrix csv data
#' @param newOrder The samples column name for the new order (default 'Order')
#' @return A new SingleCellsNGS object
#' @title description of function SingleCellsNGS
#' @export
setGeneric('SingleCellsNGS', ## Name
function ( dat, Samples, Analysis = NULL, name='WorkingSet', namecol='GroupName', namerow= 'GeneID', usecol='Use' , outpath = '') { ## Argumente der generischen Funktion
standardGeneric('SingleCellsNGS') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('SingleCellsNGS', signature = c ('data.frame'),
definition = function ( dat, Samples, Analysis = NULL, name='WorkingSet', namecol='GroupName', namerow= 'GeneID', usecol='Use' , outpath = '') {
x <- StefansExpressionSet( dat, Samples, Analysis = Analysis, name=name, namecol=namecol, namerow= namerow, usecol= usecol, outpath = outpath)
as(x,'SingleCellsNGS')
} )
#' @name old_LTHSC
#' @title Read counts for the expression data described in PMID:26430063
#' @description The data was re-mapped against mouse mm10 using HISAT
#' @description and quantified using the R subreads package and then restricted the old_LTHSC single cells only.
#' @docType data
#' @usage old_LTHSC
#' @format SingleCellsNGS
'old_LTHSC'
#' @name normalize
#' @aliases normalize,NGSexpressionSet-method
#' @rdname normalize-methods
#' @docType methods
#' @description
#' normalize the expression data by subsampling as described in PMID 24531970
#' @param x The SingleCellsNGS object
#' @param reads the required read depth
#' @param name the name of the new object
#' @return the normalized data set (original data stored in slot 'raw'
#' @title description of function normalize
#' @export
setGeneric('normalize', ## Name
function ( object , ..., reads= 600, name='normalized' ) { ## Argumente der generischen Funktion
standardGeneric('normalize') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('normalize', signature = c ('SingleCellsNGS'),
definition = function ( object, ..., reads=600, name='normalized' ) {
if ( ! object@snorm ) {
if ( length( object@samples$counts ) == 0 ) {
object@samples$counts <- apply( object@data, 2, sum)
}
object <- drop.samples( object, object@samples[which(object@samples$counts < reads), object@sampleNamesCol ]
, name=name )
if ( ! object@snorm ){
object@raw <- object@data
object@snorm = TRUE
}
## resample the data
n <- nrow(object@raw)
object@data[] <- 0
for ( i in 1:ncol(object@raw) ) {
d <- sample(rep ( 1:n, object@raw[,i]) , reads)
t <- table(d)
object@data[ as.numeric(names(t)),i] <- as.numeric(t)
}
}
object
}
)
#' @name simpleAnova
#' @aliases simpleAnova,SingleCellsNGS-method
#' @rdname simpleAnova-methods
#' @docType methods
#' @description This function calculates an annova to identify significant changes in the StefansExpressionSet
#' @description has a higher sensitivity for multi group analyses to identify group specific changes
#' @description or general trends in the dataset. This function adds the results into the stats slot
#' @description of the SingleCellsNGS object. In contrast to the StefansExpressionSet version of the function,
#' @description the cells showing no expression are axcluded from the stats.
#' @param x the SingleCellsNGS object
#' @param groupCol the samples table column that contains the grouping information
#' @param padjMethod the p value correction method as described in \code{\link[stats]{p.adjust}}
#' @title description of function simpleAnova
#' @export
setMethod('simpleAnova', signature = c ( 'SingleCellsNGS') ,
definition = function ( x, groupCol='GroupName', padjMethod='BH' ) {
if ( is.null(x@stats[[paste('simpleAnova', groupCol)]] )) {
x <- normalize(x)
significants <- apply ( x@data ,1, function(a) {
ids <- which(a > 0 )
not <- names(which (table(x@samples[ids,groupCol ]) < 10 ))
ids <- ids[ is.na(match(x@samples[ids,groupCol], not))==T]
if ( length(table(x@samples[ids,groupCol ]) ) > 1 ) {
try(anova( lm (a[ids] ~ x@samples[ids,groupCol ]))$"Pr(>F)"[1])
}
else {
1
}
} )
adj.p <- p.adjust( significants, method = padjMethod)
res <- cbind(significants,adj.p )
res <- data.frame(cbind( rownames(res), res ))
colnames(res) <- c('genes', 'pvalue', paste('padj',padjMethod) )
res[,2] <- as.numeric(as.vector(res[,2]))
res[,3] <- as.numeric(as.vector(res[,3]))
if ( length (x@stats) == 0 ){
x@stats <- list ( 'simpleAnova' = res )
}
else {
x@stats[[length(x@stats)+1]] <- res
names(x@stats)[length(x@stats)] = paste('simpleAnova', groupCol)
}
}
x
}
)
#' @name z.score
#' @aliases z.score,SingleCellsNGS-method
#' @rdname z.score-methods
#' @docType methods
#' @description Use most of'StefansExpressionSet'functionallity with minor changes to NGS data (like normalizing)
#' @description This package is mainly meant to plot the data - all important quantification or gene
#' @description annotation is performed using DEseq functionallity. Read a set of bam files and perform
#' @description the quantification (better do that without using this function!)
#' @param x the SingleCellsNGS object
#' @title description of function z.score
#' @export
setMethod('z.score', signature = c ('SingleCellsNGS'),
definition = function ( m ) {
if ( ! m@zscored ) {
m@raw <- m@data
ma <- as.matrix(m@data)
i = 0
ret <- t(
apply(ma,1, function (x) {
i = i+1
n <- which(x==0)
if ( length(x) - length(n) > 1 ){
if (length(n) == 0 ){
x <- scale(as.vector(t(x)))
}
else {
x[-n] <- scale(as.vector(t(x[-n])))
x[n] <- -20
}
}
else {
x[] = -20
}
x}
)
)
m@data <- data.frame(ret)
colnames(m@data)<- colnames(m@raw)
m@zscored = TRUE
}
m
})
#' @name defineHeatmapColors
#' @aliases defineHeatmapColors,SingleCellsNGS-method
#' @rdname defineHeatmapColors-methods
#' @docType methods
#' @description uses ggplot2 to plot heatmaps
#' @param merged the merged data object with the Expression column that should be colored
#' @param colrs and optional colors vector( gray + bluered for data and rainbow for samples)
#' @title description of function gg.heatmap.list
#' @return a list with the modified merged table and the colors vector
#' @export
setGeneric('defineHeatmapColors', ## Name
function ( melted,..., colrs=NULL) { ## Argumente der generischen Funktion
standardGeneric('defineHeatmapColors') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('defineHeatmapColors', signature = c ( 'data.frame') ,
definition = function (melted, colrs=NULL ){
if ( is.factor( melted$Expression )) {
## here might be some row grouping going on!
d <- levels(melted$Expression)[melted$Expression]
prob.id <- which(is.na(as.numeric(d))==T)
treat.separate <- unique(d[prob.id])
n <- as.numeric(d[-prob.id])
brks= c( -20.1, as.vector(quantile(n[which(n != -20)],seq(0,1,by=0.1)) ))
brks = unique(brks)
d[-prob.id] <- brks [cut( n, breaks= brks)]
melted$Expression <- factor( d, levels= c(brks, treat.separate ) )
colors= c(
'gray',
gplots::bluered(length(brks) -2 ), ## the expression
rainbow( length(treat.separate) ) ## the sample descriptions
)
}
else {
n <- as.numeric(melted$Expression )
brks= c( -20.1, as.vector(quantile(n[which(n != -20)],seq(0,1,by=0.1)) ))
brks = unique(brks)
melted$Expression <- factor( brks [cut( n, breaks= brks)] , levels= c(brks) )
colors= c(
'gray',
gplots::bluered(length(brks) -2 ) ## the expression
)
}
list (melted = melted, colors = colors)
}
)
#' @name gg.heatmap.list
#' @aliases gg.heatmap.list,SingleCellsNGS-method
#' @rdname gg.heatmap.list-methods
#' @docType methods
#' @description uses ggplot2 to plot heatmaps
#' @param dat the StefansExpressionSet object
#' @param glist a list of probesets to plot (or all)
#' @param colrs a list of colors for the sample level boxes (or rainbow colors)
#' @param groupCol the column group in the samples table that contains the grouping strings
#' @param colCol the column group in the samples table that contains the color groups
#' @title description of function gg.heatmap.list
#' @export
setMethod('gg.heatmap.list', signature = c ( 'SingleCellsNGS') ,
definition = function (dat,glist=NULL, colrs=NULL, groupCol='GroupID', colCol=NULL) {
if ( ! is.null(glist) ) {
isect <- reduce.Obj ( dat, glist)
}else {
isect <- dat
}
if ( is.null(colrs) ){
colrs = rainbow( length(unique(isect@samples[,colCol])))
}
if ( ! isect@zscored ) {isect <- z.score(isect)}
dat.ss <- melt.StefansExpressionSet ( isect, probeNames=isect@rownamescol, groupcol=groupCol,colCol=colCol)
#dat.ss <- dat[which(is.na(match(dat$Gene.Symbol,isect))==F),]
colnames(dat.ss) <- c( 'Gene.Symbol', 'Sample', 'Expression', 'Group',
paste('ColorGroup', 1:10) )[1:ncol(dat.ss)]
r <- defineHeatmapColors( dat.ss )
dat.ss <- r$melted
ord.genes <- rownames(isect@data)[hclust(dist(isect@data),method="ward.D2")$order]
if ( ! is.null(colCol) ){
ord.genes <- c( ord.genes,colCol )
}
dat.ss$Gene.Symbol <- with(dat.ss,factor(Gene.Symbol,levels =
unique(as.character(ord.genes))))
dat.ss$Sample <- with(dat.ss,factor(Sample,levels =
unique(as.character(Sample))))
dat.ss$Group <- with(dat.ss,factor(Group,levels =
unique(as.character(Group))))
dat.ss$colrss <- colrs[dat.ss$Group]
ss <-dat.ss[which(dat.ss$Gene.Symbol==dat.ss$Gene.Symbol[1]),]
p = ( ggplot2::ggplot(dat.ss, ggplot2::aes(x=Sample,y=Gene.Symbol))
+ ggplot2::geom_tile(ggplot2::aes(fill=Expression))
+ ggplot2::scale_fill_manual( values = r$colors )
+ ggplot2::theme(
legend.position= 'bottom',
axis.text.x=ggplot2::element_blank(),
axis.ticks.length=ggplot2::unit(0.00,"cm")
)+ ggplot2::labs( y='') )
if ( ncol(dat.ss) == 6 ){
p <- p + ggplot2::facet_grid( colrss ~ Group,scales="free", space='free')
}else if ( ncol(dat.ss) == 5 ) {
p <- p + ggplot2::facet_grid( . ~ Group,scales="free", space='free')
}
list ( plot = p, not.in = setdiff( glist, rownames(isect@data)) )
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.