HandlingClusterizations | R Documentation |
These functions manage the clusterizations and their
associated cluster COEX
data.frame
s.
A clusterization is any partition of the cells where to each cell it is assigned a label; a group of cells with the same label is called cluster.
For each cluster is also possible to define a COEX
value for each gene,
indicating its increased or decreased expression in the cluster compared
to the whole background. A data.frame
with these values listed in a
column for each cluster is stored separately for each clusterization in
the clustersCoex
member.
The formulae for this In/Out COEX
are similar to those used in the
calculateCoex()
method, with the role of the second gene taken by the
In/Out status of the cells with respect to each cluster.
## S4 method for signature 'COTAN'
estimateNuLinearByCluster(objCOTAN, clName = "", clusters = NULL)
## S4 method for signature 'COTAN'
getClusterizations(objCOTAN, dropNoCoex = FALSE, keepPrefix = FALSE)
## S4 method for signature 'COTAN'
getClusterizationName(objCOTAN, clName = "", keepPrefix = FALSE)
## S4 method for signature 'COTAN'
getClusterizationData(objCOTAN, clName = "")
getClusters(objCOTAN, clName = "")
## S4 method for signature 'COTAN'
getClustersCoex(objCOTAN)
## S4 method for signature 'COTAN'
addClusterization(
objCOTAN,
clName,
clusters,
coexDF = data.frame(),
override = FALSE
)
## S4 method for signature 'COTAN'
addClusterizationCoex(objCOTAN, clName, coexDF)
## S4 method for signature 'COTAN'
dropClusterization(objCOTAN, clName)
DEAOnClusters(objCOTAN, clName = "", clusters = NULL)
pValueFromDEA(coexDF, numCells, adjustmentMethod = "none")
logFoldChangeOnClusters(
objCOTAN,
clName = "",
clusters = NULL,
floorLambdaFraction = 0.05
)
distancesBetweenClusters(
objCOTAN,
clName = "",
clusters = NULL,
coexDF = NULL,
useDEA = TRUE,
distance = NULL
)
UMAPPlot(
df,
clusters = NULL,
elements = NULL,
title = "",
colors = NULL,
numNeighbors = 0L,
minPointsDist = NaN
)
cellsUMAPPlot(
objCOTAN,
clName = "",
clusters = NULL,
dataMethod = "",
genesSel = "HVG_Seurat",
numGenes = 2000L,
colors = NULL,
numNeighbors = 0L,
minPointsDist = NA
)
clustersMarkersHeatmapPlot(
objCOTAN,
groupMarkers = list(),
clName = "",
clusters = NULL,
coexDF = NULL,
kCuts = 3L,
adjustmentMethod = "bonferroni",
condNameList = NULL,
conditionsList = NULL
)
clustersSummaryData(
objCOTAN,
clName = "",
clusters = NULL,
condName = "",
conditions = NULL
)
clustersSummaryPlot(
objCOTAN,
clName = "",
clusters = NULL,
condName = "",
conditions = NULL,
plotTitle = ""
)
clustersTreePlot(
objCOTAN,
kCuts,
clName = "",
clusters = NULL,
useDEA = TRUE,
distance = NULL,
hclustMethod = "ward.D2"
)
findClustersMarkers(
objCOTAN,
n = 10L,
markers = NULL,
clName = "",
clusters = NULL,
coexDF = NULL,
adjustmentMethod = "bonferroni"
)
geneSetEnrichment(clustersCoex, groupMarkers = list())
reorderClusterization(
objCOTAN,
clName = "",
clusters = NULL,
coexDF = NULL,
reverse = FALSE,
keepMinusOne = TRUE,
useDEA = TRUE,
distance = NULL,
hclustMethod = "ward.D2"
)
objCOTAN |
a |
clName |
The name of the clusterization. If not given the last available clusterization will be used, as it is probably the most significant! |
clusters |
A clusterization to use. If given it will take precedence
on the one indicated by |
dropNoCoex |
When |
keepPrefix |
When |
coexDF |
a |
override |
When |
numCells |
the number of overall cells in all clusters |
adjustmentMethod |
p-value multi-test adjustment method. Defaults to
|
floorLambdaFraction |
Indicates the lower bound to the average count
sums inside or outside the cluster for each gene as fraction of the
relevant |
useDEA |
Boolean indicating whether to use the DEA to define the distance; alternatively it will use the average Zero-One counts, that is faster but less precise. |
distance |
type of distance to use. Default is |
df |
The |
elements |
a named |
title |
a string giving the plot title. Will default to UMAP Plot if not specified |
colors |
an |
numNeighbors |
Overrides the |
minPointsDist |
Overrides the |
dataMethod |
selects the method to use to create the
|
genesSel |
Decides whether and how to perform gene-selection. It can be a straight list of genes or a string indicating one of the following selection methods:
|
numGenes |
the number of genes to select using the above method. Will be ignored when no selection have been asked or when an explicit list of genes was passed in |
groupMarkers |
an optional named |
kCuts |
the number of estimated cluster (this defines the height for the tree cut) |
condNameList |
a |
conditionsList |
a |
condName |
The name of a condition in the |
conditions |
The conditions to use. If given it will take precedence
on the one indicated by |
plotTitle |
The title to use for the returned plot |
hclustMethod |
It defaults is |
n |
the number of extreme |
markers |
a |
clustersCoex |
the |
reverse |
a flag to the output order |
keepMinusOne |
a flag to decide whether to keep the cluster |
estimateNuLinearByCluster()
does a linear estimation of nu:
cells' counts averages normalized cluster by cluster
getClusterizations()
extracts the list of the clusterizations
defined in the COTAN
object.
getClusterizationName()
normalizes the given clusterization name
or, if none were given, returns the name of last available clusterization
in the COTAN
object. It can return the clusterization internal name
if needed
getClusterizationData()
extracts the asked clusterization and
its associated COEX
data.frame
from the COTAN
object
getClusters()
extracts the asked clusterization from the COTAN
object
getClustersCoex()
extracts the full clusterCoex
member list
addClusterization()
adds a clusterization to the current COTAN
object, by adding a new column in the metaCells
data.frame
and adding a
new element in the clustersCoex
list
using the passed in COEX
data.frame
or an empty data.frame
if none were passed in.
addClusterizationCoex()
adds a clusterization COEX
data.frame
to the current COTAN
object. It requires the named
clusterization to be already present.
dropClusterization()
drops a clusterization from the current
COTAN
object, by removing the corresponding column in the metaCells
data.frame
and the corresponding COEX
data.frame
from the
clustersCoex
list
.
DEAOnClusters()
is used to run the Differential Expression
analysis using the COTAN
contingency tables on each cluster in the
given clusterization
pValueFromDEA()
is used to convert to p-value the Differential
Expression analysis using the COTAN
contingency tables on each cluster
in the given clusterization
logFoldChangeOnClusters()
is used to get the log difference of the
expression levels for each cluster in the given clusterization against
the rest of the data-set
distancesBetweenClusters()
is used to obtain a distance between
the clusters. Depending on the value of the useDEA
flag will base the
distance on the DEA columns or the averages of the Zero-One matrix.
UMAPPlot()
plots the given data.frame
containing genes
information related to clusters after applying the umap::umap()
transformation
cellsUMAPPlot()
returns a ggplot2
plot where the given
clusters are placed on the base of their relative distance. Also if
needed calculates and stores the DEA
of the relevant clusterization.
clustersMarkersHeatmapPlot()
returns the heatmap plot of a summary
score for each cluster and each gene marker in the given
clusterization. It also returns the numerosity and percentage of each
cluster on the right and a clusterization dendogram on the left, as
returned by the function clustersTreePlot()
. The heatmap cells' colors
express the DEA, that is whether a gene is enriched or depleted in the
cluster, while the stars are aligned to the corresponding adjusted
p-
value: ***
for p < 0.1\%
, **
for p < 1\%
, *
for
p < 5\%
, .
for p < 10\%
clustersSummaryData()
calculates various statistics about each
cluster (with an optional further condition
to separate the cells).
clustersSummaryPlot()
calculates various statistics about each
cluster via clustersSummaryData()
and puts them together into a plot.
clustersTreePlot()
returns the dendogram
plot where the given
clusters are placed on the base of their relative distance. Also if
needed calculates and stores the DEA
of the relevant clusterization.
findClustersMarkers()
takes in a COTAN
object and a
clusterization and produces a data.frame
with the n
most positively
enriched and the n
most negatively enriched genes for each cluster. The
function also provides whether and the found genes are in the given
markers
list or not. It also returns the adjusted p-value for
multi-tests using the stats::p.adjust()
geneSetEnrichment()
returns a cumulative score of enrichment in a
cluster over a gene set. In formulae it calculates
\frac{1}{n}\sum_i(1-e^{-\theta X_i})
, where the X_i
are the
positive values from DEAOnClusters()
and \theta = -\frac{1}{0.1}
\ln(0.25)
reorderClusterization()
takes in a clusterizations and reorder
its labels so that in the new order near labels indicate near clusters
according to a DEA (or Zero-One) based distance
estimateNuLinearByCluster()
returns the updated COTAN
object
getClusterizations()
returns a vector of clusterization names,
usually without the CL_
prefix
getClusterizationName()
returns the normalized clusterization
name or NULL
if no clusterizations are present
getClusterizationData()
returns a list
with 2 elements:
"clusters"
the named cluster labels array
"coex"
the associated COEX
data.frame
. This will be an empty
data.frame
when not specified for the relevant clusterization
getClusters()
returns the named cluster labels array
getClustersCoex()
returns the list with a COEX
data.frame
for
each clusterization. When not empty, each data.frame
contains a COEX
column for each cluster.
addClusterization()
returns the updated COTAN
object
addClusterizationCoex()
returns the updated COTAN
object
dropClusterization()
returns the updated COTAN
object
DEAOnClusters()
returns the co-expression data.frame
for the
genes in each cluster
pValueFromDEA()
returns a data.frame
containing the p-values
corresponding to the given COEX
adjusted for multi-test
logFoldChangeOnClusters()
returns the log-expression-change
data.frame
for the genes in each cluster
distancesBetweenClusters()
returns a dist
object
UMAPPlot()
returns a ggplot2
object
cellsUMAPPlot()
returns a list with 2 objects:
"plot"
a ggplot2
object representing the umap
plot
"cellsPCA"
the data.frame
PCA used to create the plot
clustersMarkersHeatmapPlot()
returns a list with:
"heatmapPlot"
the complete heatmap plot
"dataScore"
the data.frame
with the score values
"pValueDF"
the data.frame
with the corresponding adjusted
p-
values
clustersSummaryData()
returns a data.frame
with the following
statistics: The calculated statistics are:
"clName"
the cluster labels
"condName"
the relevant condition (that sub-divides the clusters)
"CellNumber"
the number of cells in the group
"MeanUDE"
the average "UDE" in the group of cells
"MedianUDE"
the median "UDE" in the group of cells
"ExpGenes25"
the number of genes expressed in at the least 25% of the
cells in the group
"ExpGenes"
the number of genes expressed at the least once in any of
the cells in the group
"CellPercentage"
fraction of the cells with respect to the total cells
clustersSummaryPlot()
returns a list
with a data.frame
and a
ggplot
objects
"data"
contains the data,
"plot"
is the returned plot
clustersTreePlot()
returns a list with 2 objects:
"dend"
a ggplot2
object representing the dendrogram
plot
"objCOTAN"
the updated COTAN
object
findClustersMarkers()
returns a data.frame
containing n
genes
for each cluster scoring top/bottom COEX
scores. The data.frame
also
contains:
"CL"
the cluster
"Gene"
the gene
"Score"
the COEX
score of the gene
"adjPVal"
the p-values associated to the COEX
adjusted for multi-testing
"DEA"
the differential expression of the gene
"IsMarker"
whether the gene is among the given markers
"logFoldCh"
the log-fold-change of the gene expression inside versus
outside the cluster from logFoldChangeOnClusters()
geneSetEnrichment()
returns a data.frame
with the cumulative
score
reorderClusterization()
returns a list
with 2 elements:
"clusters"
the newly reordered cluster labels array
"coex"
the associated COEX
data.frame
"permMap"
the reordering mapping
data("test.dataset")
objCOTAN <- COTAN(raw = test.dataset)
objCOTAN <- proceedToCoex(objCOTAN, cores = 6L, calcCoex = FALSE,
optimizeForSpeed = TRUE, saveObj = FALSE)
data("test.dataset.clusters1")
clusters <- test.dataset.clusters1
coexDF <- DEAOnClusters(objCOTAN, clusters = clusters)
groupMarkers <- list(G1 = c("g-000010", "g-000020", "g-000030",
"g-000150", "g-000160", "g-000170"),
G2 = c("g-000300", "g-000330", "g-000450",
"g-000460", "g-000470"),
G3 = c("g-000510", "g-000530", "g-000550",
"g-000570", "g-000590"))
geneClusters <- rep(1:3, each = 240)[1:600]
names(geneClusters) <- getGenes(objCOTAN)
umapPlot <- UMAPPlot(coexDF, clusters = NULL, elements = groupMarkers)
plot(umapPlot)
objCOTAN <- addClusterization(objCOTAN, clName = "first_clusterization",
clusters = clusters, coexDF = coexDF)
lfcDF <- logFoldChangeOnClusters(objCOTAN, clusters = clusters)
umapPlot2 <- UMAPPlot(lfcDF, clusters = geneClusters)
plot(umapPlot2)
objCOTAN <- estimateNuLinearByCluster(objCOTAN, clusters = clusters)
clSummaryPlotAndData <-
clustersSummaryPlot(objCOTAN, clName = "first_clusterization",
plotTitle = "first clusterization")
plot(clSummaryPlotAndData[["plot"]])
if (FALSE) {
objCOTAN <- dropClusterization(objCOTAN, "first_clusterization")
}
clusterizations <- getClusterizations(objCOTAN, dropNoCoex = TRUE)
stopifnot(length(clusterizations) == 1)
cellsUmapPlotAndDF <- cellsUMAPPlot(objCOTAN, dataMethod = "LogNormalized",
clName = "first_clusterization",
genesSel = "HVG_Seurat")
plot(cellsUmapPlotAndDF[["plot"]])
enrichment <- geneSetEnrichment(clustersCoex = coexDF,
groupMarkers = groupMarkers)
clHeatmapPlotAndData <- clustersMarkersHeatmapPlot(objCOTAN, groupMarkers)
conditions <- as.integer(substring(getCells(objCOTAN), 3L))
conditions <- factor(ifelse(conditions <= 600, "L", "H"))
names(conditions) <- getCells(objCOTAN)
clHeatmapPlotAndData2 <-
clustersMarkersHeatmapPlot(objCOTAN, groupMarkers, kCuts = 2,
condNameList = list("High/Low"),
conditionsList = list(conditions))
clName <- getClusterizationName(objCOTAN)
clusterDataList <- getClusterizationData(objCOTAN, clName = clName)
clusters <- getClusters(objCOTAN, clName = clName)
allClustersCoexDF <- getClustersCoex(objCOTAN)
summaryData <- clustersSummaryData(objCOTAN)
treePlotAndObj <- clustersTreePlot(objCOTAN, 2)
objCOTAN <- treePlotAndObj[["objCOTAN"]]
plot(treePlotAndObj[["dend"]])
clMarkers <- findClustersMarkers(objCOTAN, markers = list(),
clusters = clusters)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.