# pipe.CellTypeProportions.R -- do various ways of calling immune cell subset proportions from gene transcription
`pipe.CellTypeProportions` <- function( sampleID, annotationFile="Annotation.txt", optionsFile="Options.txt",
speciesID=getCurrentSpecies(), results.path=NULL, geneUniverse=NULL, genePercentage=NULL,
recalculate=c("none", "all", "missing", "profile", "deconvolution", "nls", "GenSA","steep"),
mode=c("weighted.mean","average","best"), makePlots=c( "final", "none", "all"),
verbose=TRUE, ...) {
if ( length(sampleID) > 1) {
cat( "\nWarning: expected a single sample ID..")
sampleID <- sampleID[1]
}
# get needed paths, etc. from the options file
optT <- readOptionsTable( optionsFile)
annT <- readAnnotationTable( annotationFile)
# set up for this species
if ( speciesID != getCurrentSpecies()) {
setCurrentSpecies(speciesID)
}
prefix <- getCurrentSpeciesFilePrefix()
if ( is.null( results.path)) {
results.path <- getOptionValue( optT, "results.path", notfound=".", verbose=F)
}
celltype.path <- file.path( results.path, "CellTypeProportions", sampleID)
if ( ! file.exists( celltype.path)) dir.create( celltype.path, recursive=TRUE)
# we will use the transcriptome as our input data
transcriptFile <- file.path( results.path, "transcript", paste( sampleID, prefix, "Transcript.txt", sep="."))
if ( ! file.exists( transcriptFile)) {
cat( "\nError: required transcriptome result file not found: ", transcriptFile)
return( NULL)
}
# grab the gene expression values for later RMS deviation calcuations
unitsColumn <- getExpressionUnitsColumn( optionsFile, verbose=verbose)
units <- sub( "_[MU]$", "", unitsColumn)
sampleMatrix <- expressionFileSetToMatrix( transcriptFile, sampleID)
# set up the current defined cell type details
CellTypeSetup( reload=TRUE)
# get the details of the cell type data
cellTypeColors <- getCellTypeColors()
cellTypeNames <- names(cellTypeColors)
N_CellTypes <- length( cellTypeColors)
reference <- getCellTypeReference()
cellTypeMatrix <- getCellTypeMatrix()
# let the caller reduce the gene universe by percentage instead of by naming genes
if ( ! is.null( genePercentage)) {
if ( ! is.null( geneUniverse)) {
cat( "\n Warning: only one of 'geneUniverse' or 'genePercentage' can be non-NULL")
return( NULL)
}
genePercentage <- as.numeric( genePercentage)
if ( genePercentage > 100) {
cat( "\n Warning: 'genePercentage' must be in the range (0 to 1) or (2 to 100)")
return( NULL)
} else if ( genePercentage > 1.0) {
genePercentage <- genePercentage / 100
}
# get the target matrix, and decide which genes we will keep, least DE get discarded
de <- apply( cellTypeMatrix, 1, function(x) diff( range( x, na.rm=T)) / max( mean(x,na.rm=T,1)))
ord <- order( de, decreasing=T)
# decide how many genes we keep
nGenesKeep <- round( nrow(cellTypeMatrix) * genePercentage)
# that gives us our universe of genes
keep <- ord[ 1:nGenesKeep]
geneUniverse <- rownames(cellTypeMatrix)[ keep]
cat( "\nPercentage of genes to use in fitting =", genePercentage*100, "\nThus reducing to a universe of", nGenesKeep, "most DE genes.\n")
}
# name for the file of results
celltypeDetailsFile <- file.path( celltype.path, paste( sampleID, prefix, reference, "Proportions.csv", sep="."))
celltypeStatsFile <- file.path( celltype.path, paste( sampleID, prefix, reference, "Fit.Statistics.csv", sep="."))
celltypePlotFile <- file.path( celltype.path, paste( sampleID, prefix, reference, "Proportions.Final.pdf", sep="."))
pcts1 <- pcts2 <- pcts3 <- pcts4 <- pcts5 <- pcts6 <- rep.int( NA, N_CellTypes)
pcts7 <- pcts8 <- pcts9 <- pcts10 <- pcts11 <- pcts12 <- rep.int( NA, N_CellTypes)
names(pcts1) <- names(pcts2) <- names(pcts3) <- names(pcts4) <- names(pcts5) <- names(pcts6) <- cellTypeNames
names(pcts7) <- names(pcts8) <- names(pcts9) <- names(pcts10) <- names(pcts11) <- names(pcts12) <- cellTypeNames
TestNames <- c( "Fit.Steep", "Fit.NLS", "Fit.GenSA", "Deconv.NLS", "Deconv.GenSA", "Deconv.Steep",
"Fit.Steep.QN", "Fit.NLS.QN", "Fit.GenSA.QN", "Deconv.NLS.QN", "Deconv.GenSA.QN", "Deconv.Steep.QN")
recalculate <- match.arg( recalculate)
makePlots <- match.arg( makePlots)
deconvPlot <- (makePlots %in% c( "final", "all"))
cellAns <- NULL
if ( file.exists(celltypeDetailsFile)) {
cellAns <- read.csv( celltypeDetailsFile, as.is=T)
# make sure existing data is in the currently expected order
cellWhere <- match( cellTypeNames, cellAns$CellType)
cellAns <- cellAns[ cellWhere, ]
if ( TestNames[1] %in% colnames(cellAns)) pcts1 <- as.numeric( cellAns[[ TestNames[1]]])
if ( TestNames[2] %in% colnames(cellAns)) pcts2 <- as.numeric( cellAns[[ TestNames[2]]])
if ( TestNames[3] %in% colnames(cellAns)) pcts3 <- as.numeric( cellAns[[ TestNames[3]]])
if ( TestNames[4] %in% colnames(cellAns)) pcts4 <- as.numeric( cellAns[[ TestNames[4]]])
if ( TestNames[5] %in% colnames(cellAns)) pcts5 <- as.numeric( cellAns[[ TestNames[5]]])
if ( TestNames[6] %in% colnames(cellAns)) pcts6 <- as.numeric( cellAns[[ TestNames[6]]])
if ( TestNames[7] %in% colnames(cellAns)) pcts7 <- as.numeric( cellAns[[ TestNames[7]]])
if ( TestNames[8] %in% colnames(cellAns)) pcts8 <- as.numeric( cellAns[[ TestNames[8]]])
if ( TestNames[9] %in% colnames(cellAns)) pcts9 <- as.numeric( cellAns[[ TestNames[9]]])
if ( TestNames[10] %in% colnames(cellAns)) pcts10 <- as.numeric( cellAns[[ TestNames[10]]])
if ( TestNames[11] %in% colnames(cellAns)) pcts11 <- as.numeric( cellAns[[ TestNames[11]]])
if ( TestNames[12] %in% colnames(cellAns)) pcts12 <- as.numeric( cellAns[[ TestNames[12]]])
}
if (recalculate != "none" || is.null(cellAns)) {
cat( "\nProcessing Sample: ", sampleID, "\n")
# we have 2 different methods, using 3 different fit tools, and 2 different reference normalization modes. Do them all and average.
myColor <- annT$Color[ match( sampleID, annT$SampleID)]
if ( is.null(myColor) || is.na(myColor) || myColor == "") myColor <- "orchid1"
# 1) Steepest Descent of the 27-dimensional immune profile
if ( is.null(cellAns) || (recalculate %in% c("all","profile","steep")) || (recalculate == "missing" && all(is.na(pcts1)))) {
cat( "\n1. Cell Type Profile: Fit Dimension Proportions by Steepest Descent:\n")
ans1 <- fitCellTypeProfileFromFile( f=transcriptFile, sid=sampleID, col=myColor, max.iterations=200,
makePlots=makePlots, plot.path=celltype.path, algorithm='steep',
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans1)) {
pcts1 <- ans1$CellProportions
cellWhere <- match( cellTypeNames, names(pcts1))
pcts1 <- pcts1[ cellWhere]
}
}
# 2) Nonlinear Least Squares of the 27-dimensional immune profile
if ( is.null(cellAns) || (recalculate %in% c("all","profile","nls")) || (recalculate == "missing" && all(is.na(pcts2)))) {
cat( "\n2. Cell Type Profile: Fit Dimension Proportions by Nonlinear Least Squares (NLS):\n")
ans2 <- fitCellTypeProfileFromFile( f=transcriptFile, sid=sampleID, col=myColor,
makePlots=makePlots, plot.path=celltype.path, algorithm='nls',
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans2)) {
pcts2 <- ans2$CellProportions
cellWhere <- match( cellTypeNames, names(pcts2))
pcts2 <- pcts2[ cellWhere]
}
}
# 3) GenSA of the 27-dimensional immune profile
if ( is.null(cellAns) || (recalculate %in% c("all","profile","GenSA")) || (recalculate == "missing" && all(is.na(pcts3)))) {
cat( "\n3. Cell Type Profile: Fit Dimension Proportions by Simulated Annealing (GenSA):\n")
ans3 <- fitCellTypeProfileFromFile( f=transcriptFile, sid=sampleID, col=myColor,
makePlots=makePlots, plot.path=celltype.path, algorithm='GenSA',
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans3)) {
pcts3 <- ans3$CellProportions
cellWhere <- match( cellTypeNames, names(pcts3))
pcts3 <- pcts3[ cellWhere]
}
}
# 4) Transcriptome Deconvolution, by NLS using the 'port' algorithm...
if ( is.null(cellAns) || (recalculate %in% c("all","deconvolution","nls")) || (recalculate == "missing" && all(is.na(pcts4)))) {
cat( "\n4. Cell Type Deconvolution: Fit Gene", units, "by Nonlinear Least Squares (NLS):\n")
ans4 <- fileSet.TranscriptDeconvolution( files=transcriptFile, fids=sampleID, algorithm="port",
useLog=FALSE, plot=deconvPlot, plot.path=celltype.path, plot.col=cellTypeColors,
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans4)) {
# deconvolution tool returns weights. Trun them to percentages
pcts4 <- ans4$BestFit * 100
cellWhere <- match( cellTypeNames, rownames(pcts4))
pcts4 <- pcts4[ cellWhere, 1]
}
}
# 5) Transcriptome Deconvolution, by NLS using the 'GenSA' simulated annealing algorithm...
if ( is.null(cellAns) || (recalculate %in% c("all","deconvolution","GenSA")) || (recalculate == "missing" && all(is.na(pcts5)))) {
cat( "\n5. Cell Type Deconvolution: Fit Gene", units, "by Simulated Annealing (GenSA):\n")
ans5 <- fileSet.TranscriptDeconvolution( files=transcriptFile, fids=sampleID, algorithm="GenSA",
useLog=FALSE, plot=deconvPlot, plot.path=celltype.path, plot.col=cellTypeColors,
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans5)) {
# deconvolution tool returns weights. Trun them to percentages
pcts5 <- ans5$BestFit * 100
cellWhere <- match( cellTypeNames, rownames(pcts5))
pcts5 <- pcts5[ cellWhere, 1]
}
}
# 6) Transcriptome Deconvolution, by Steepest Descent...
if ( is.null(cellAns) || (recalculate %in% c("all","deconvolution","steep")) || (recalculate == "missing" && all(is.na(pcts6)))) {
cat( "\n6. Cell Type Deconvolution: Fit Gene", units, "by Steepest Descent:\n")
ans6 <- fileSet.TranscriptDeconvolution( files=transcriptFile, fids=sampleID, algorithm="steep",
useLog=FALSE, plot=deconvPlot, plot.path=celltype.path, plot.col=cellTypeColors,
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans6)) {
# deconvolution tool returns weights. Trun them to percentages
pcts6 <- ans6$BestFit * 100
cellWhere <- match( cellTypeNames, rownames(pcts6))
pcts6 <- pcts6[ cellWhere, 1]
}
}
}
# save a copy of the original cell type matrix
cellTypeMatrix.orig <- cellTypeMatrix
# now modify the shape of the cell type data to reflect the actual transcriptome we are fitting against
reshapeCellTypeMatrix( f=transcriptFile, intensityColumn=unitsColumn, verbose=verbose)
cellTypeMatrix <- getCellTypeMatrix()
cellTypeMatrix.qn <- cellTypeMatrix
# now call all 6 tools again, using this normalized reference
if (recalculate != "none" || is.null(cellAns)) {
# 7) Steepest Descent of the 27-dimensional immune profile
if ( is.null(cellAns) || (recalculate %in% c("all","profile","steep")) || (recalculate == "missing" && all(is.na(pcts7)))) {
cat( "\n7. Cell Type Profile: Fit QN Dimension Proportions by Steepest Descent:\n")
ans7 <- fitCellTypeProfileFromFile( f=transcriptFile, sid=sampleID, col=myColor, max.iterations=200,
makePlots=makePlots, plot.path=celltype.path, algorithm='steep',
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans7)) {
pcts7 <- ans7$CellProportions
cellWhere <- match( cellTypeNames, names(pcts7))
pcts7 <- pcts7[ cellWhere]
}
}
# 8) Nonlinear Least Squares of the 27-dimensional immune profile
if ( is.null(cellAns) || (recalculate %in% c("all","profile","nls")) || (recalculate == "missing" && all(is.na(pcts8)))) {
cat( "\n8. Cell Type Profile: Fit QN Dimension Proportions by Nonlinear Least Squares (NLS):\n")
ans8 <- fitCellTypeProfileFromFile( f=transcriptFile, sid=sampleID, col=myColor,
makePlots=makePlots, plot.path=celltype.path, algorithm='nls',
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans8)) {
pcts8 <- ans8$CellProportions
cellWhere <- match( cellTypeNames, names(pcts8))
pcts8 <- pcts8[ cellWhere]
}
}
# 9) GenSA of the 27-dimensional immune profile
if ( is.null(cellAns) || (recalculate %in% c("all","profile","GenSA")) || (recalculate == "missing" && all(is.na(pcts9)))) {
cat( "\n9. Cell Type Profile: Fit QN Dimension Proportions by Simulated Annealing (GenSA):\n")
ans9 <- fitCellTypeProfileFromFile( f=transcriptFile, sid=sampleID, col=myColor,
makePlots=makePlots, plot.path=celltype.path, algorithm='GenSA',
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans9)) {
pcts9 <- ans9$CellProportions
cellWhere <- match( cellTypeNames, names(pcts9))
pcts9 <- pcts9[ cellWhere]
}
}
# 10) Transcriptome Deconvolution, by NLS using the 'port' algorithm...
if ( is.null(cellAns) || (recalculate %in% c("all","deconvolution","nls")) || (recalculate == "missing" && all(is.na(pcts10)))) {
cat( "\n10. Cell Type Deconvolution: Fit QN Gene", units, "by Nonlinear Least Squares (NLS):\n")
ans10 <- fileSet.TranscriptDeconvolution( files=transcriptFile, fids=sampleID, algorithm="port",
useLog=FALSE, plot=deconvPlot, plot.path=celltype.path, plot.col=cellTypeColors,
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans10)) {
# deconvolution tool returns weights. Trun them to percentages
pcts10 <- ans10$BestFit * 100
cellWhere <- match( cellTypeNames, rownames(pcts10))
pcts10 <- pcts10[ cellWhere, 1]
}
}
# 11) Transcriptome Deconvolution, by NLS using the 'GenSA' simulated annealing algorithm...
if ( is.null(cellAns) || (recalculate %in% c("all","deconvolution","GenSA")) || (recalculate == "missing" && all(is.na(pcts11)))) {
cat( "\n11. Cell Type Deconvolution: Fit QN Gene", units, "by Simulated Annealing (GenSA):\n")
ans11 <- fileSet.TranscriptDeconvolution( files=transcriptFile, fids=sampleID, algorithm="GenSA",
useLog=FALSE, plot=deconvPlot, plot.path=celltype.path, plot.col=cellTypeColors,
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans11)) {
# deconvolution tool returns weights. Trun them to percentages
pcts11 <- ans11$BestFit * 100
cellWhere <- match( cellTypeNames, rownames(pcts11))
pcts11 <- pcts11[ cellWhere, 1]
}
}
# 12) Transcriptome Deconvolution, by Steepest Descent...
if ( is.null(cellAns) || (recalculate %in% c("all","deconvolution","steep")) || (recalculate == "missing" && all(is.na(pcts12)))) {
cat( "\n12. Cell Type Deconvolution: Fit QN Gene", units, "by Steepest Descent:\n")
ans12 <- fileSet.TranscriptDeconvolution( files=transcriptFile, fids=sampleID, algorithm="steep",
useLog=FALSE, plot=deconvPlot, plot.path=celltype.path, plot.col=cellTypeColors,
geneUniverse=geneUniverse, verbose=verbose)
if ( ! is.null(ans12)) {
# deconvolution tool returns weights. Trun them to percentages
pcts12 <- ans12$BestFit * 100
cellWhere <- match( cellTypeNames, rownames(pcts12))
pcts12 <- pcts12[ cellWhere, 1]
}
}
}
# restore the non-normalized cell type details after all QN steps are done
CellTypeSetup( reload=TRUE)
# done with all methods, now merge
cellM <- matrix( NA, nrow=N_CellTypes, ncol=12)
colnames(cellM) <- TestNames
cellM[ ,1] <- pcts1
cellM[ ,2] <- pcts2
cellM[ ,3] <- pcts3
cellM[ ,4] <- pcts4
cellM[ ,5] <- pcts5
cellM[ ,6] <- pcts6
cellM[ ,7] <- pcts7
cellM[ ,8] <- pcts8
cellM[ ,9] <- pcts9
cellM[ ,10] <- pcts10
cellM[ ,11] <- pcts11
cellM[ ,12] <- pcts12
# catch any that are all zero
isAllZero <- which( apply( cellM, 2, function(x) all( x == 0, na.rm=T)))
if ( length(isAllZero)) cellM[ , isAllZero] <- NA
# let's independently eavaluate the RMS and COD fit for every method
evalAns1 <- rms.cod.evaluation( obsMatrix=sampleMatrix, calcMatrix=cellTypeMatrix.orig, calcPcts=cellM[ ,1:6], geneUniverse=geneUniverse)
evalAns2 <- rms.cod.evaluation( obsMatrix=sampleMatrix, calcMatrix=cellTypeMatrix.qn, calcPcts=cellM[ ,7:12], geneUniverse=geneUniverse)
rmsdAns <- c( evalAns1$rmsd, evalAns2$rmsd)
codAns <- c( evalAns1$cod, evalAns2$cod)
if (verbose) {
cat( "\nGene expression RMS Deviation by method:\n")
print( rmsdAns)
cat( "Gene expression COD (Coefficient of Determination) by method:\n")
print( codAns)
}
# final answer is either the average or the one method with lowest RMS
# final answer will now be actual 'proportion', not 'percentage'. Not summing to 100% anymore.
mode <- match.arg( mode)
if ( mode == "weighted.mean") {
# since COD is usually in 0..1, it is a better metric than RMS for doing the weights
# but the COD can in fact go negative... Can we deal with that cleanly? As there 'could'
# be cases where ALL are negative, but some clearly better than others...
cod <- codAns
cod[ is.na(cod)] <- min( c(0,cod), na.rm=T)
codRange <- range( cod)
# set the range to allow all CoD to have some vote
if ( codRange[1] > 0) {
codRange[1] <- codRange[1] / 2
} else {
codRange[1] <- codRange[1] - abs(codRange[1]/2)
}
normCOD <- (cod - codRange[1])/diff(codRange)
wts <- normCOD ^ 3
wts <- round(wts, digits=4)
names(wts) <- names(codAns)
if (verbose) {
cat( "\nFit Weights by 'Weighted Mean':\n")
print( wts);
}
cellMean <- apply( cellM, 1, weighted.mean, w=wts, na.rm=T)
cellMean <- round( cellMean, digits=5)
if (verbose) cat( "\nFinal answer is weighted mean of all methods, using relative CoD squared as weights")
} else if ( mode == "average") {
wts <- rep.int( 1, 12)
names(wts) <- names(codAns)
if (verbose) {
cat( "\nFit Weights by 'Average':\n")
print( wts);
}
cellMean <- apply( cellM, 1, mean, na.rm=T)
cellMean <- round( cellMean, digits=5)
if (verbose) cat( "\nFinal answer is average of all methods")
} else {
best <- which.max(codAns)[1]
wts <- rep.int( 0, 12)
wts[ best] <- 1
names(wts) <- names(codAns)
if (verbose) {
cat( "\nFit Weights by 'Best':\n")
print( wts);
}
cellMean <- round( cellM[ , best], digits=5)
if (verbose) cat( "\nFinal answer is method with best (highest) CoD: ", names(codAns)[best])
}
cellAns <- data.frame( "CellType"=cellTypeNames, "Final.Proportions"=cellMean, round(cellM,digits=3), stringsAsFactors=F)
write.table( cellAns, celltypeDetailsFile, sep=",", quote=T, row.names=F)
statsM <- matrix( c( rmsdAns, codAns, wts), nrow=12, ncol=3)
rownames(statsM) <- names(rmsdAns)
statsAns <- data.frame( "Statistic"=c("RMSD", "CoD", "Weight"), t(statsM), stringsAsFactors=F)
write.table( statsAns, celltypeStatsFile, sep=",", quote=T, row.names=F)
# make a plot image of the final average
if ( makePlots != "none") {
tmpM <- cbind( cellM, cellMean)
rownames(tmpM) <- cellTypeNames
colnames(tmpM) <- sub( "\\.", "\n", c( colnames(cellM), "Final.Proportions"))
plotAns <- plotTranscriptProportions( tmpM, col=cellTypeColors, label=paste( sampleID, ": Final average of all methods", sep=""), ...)
text( c(-0.05,plotAns$bar.centers), rep.int(101.5,14), c("CoD=", as.character(round(codAns,dig=3)),""), cex=0.65, col=1, xpd=NA)
text( c(-0.05,plotAns$bar.centers), rep.int(-1.5,14), c("Wt=", as.character(round(wts,dig=3)),""), cex=0.65, col=1, xpd=NA)
dev.flush()
printPlot( celltypePlotFile, width=12, height=9)
}
out <- data.frame( "SampleID"=sampleID, cellAns, stringsAsFactors=F)
return( invisible( out))
}
`rms.cod.evaluation` <- function( obsMatrix, calcMatrix, calcPcts, geneUniverse=NULL, DEBUG=FALSE) {
# measure the Root Mean Square (RMS) deviation and the CoD of each set of fitted cell type percentages
# first get the set of genes to use
obsGenes <- shortGeneName( rownames( obsMatrix), keep=1)
calcGenes <- rownames( calcMatrix)
useGenes <- sort( intersect( obsGenes, calcGenes))
if ( ! is.null( geneUniverse)) {
geneUniverse <- as.GeneUniverse( geneUniverse)
useGenes <- intersect( useGenes, geneUniverse)
}
NG <- length( useGenes)
whereObs <- match( useGenes, obsGenes)
whereCalc <- match( useGenes, calcGenes)
# force the reference to scale match the observed, to be exactly what the deconvolution tools saw
NcellTypes <- nrow(calcPcts)
Nmethods <- ncol( calcPcts)
obsValue <- obsMatrix[ whereObs, 1]
obsSum <- sum( obsValue, na.rm=T)
calcValues <- calcMatrix[ whereCalc, ]
calcSums <- apply( calcValues, 2, sum, na.rm=T)
calcScales <- obsSum / calcSums
for ( j in 1:NcellTypes) calcValues[ , j] <- calcValues[ ,j] * calcScales[j]
if (DEBUG) {
cat( "\n Debug: Total Observed Expression: ", obsSum)
cat( "\n Debug: Total Calculated Expression: \n")
print( apply( calcValues, 2, sum, na.rm=T))
}
# now calc the deviation for each proportions answer
rmsOut <- codOut <- rep.int( NA, Nmethods)
names(rmsOut) <- names(codOut) <- colnames(calcPcts)
for ( j in 1:Nmethods) {
if ( all( is.na( calcPcts[ ,j]))) next
# build a calculated transcriptome
thisCalcValue <- rep.int( 0, NG)
for ( i in 1:NcellTypes) {
# turn the given percentages back to weights
thisCellValue <- calcValues[ , i] * calcPcts[ i, j] / 100
thisCellValue[ is.na( thisCellValue)] <- 0
thisCalcValue <- thisCalcValue + thisCellValue
}
resSq <- (obsValue - thisCalcValue) ^ 2
rms <- sqrt( mean( resSq))
rmsOut[j] <- round( rms, digits=3)
SSres <- sum( resSq)
obsSq <- (obsValue - mean(obsValue)) ^ 2
SStot <- sum( obsSq)
cod <- 1 - (SSres/SStot)
codOut[j] <- round( cod, digits=3)
}
return( list( "rmsd"=rmsOut, "cod"=codOut))
}
`pipe.CellTypeCompare` <- function( sampleIDset, groups=sampleIDset, levels=sort(unique(as.character(groups))),
annotationFile="Annotation.txt", optionsFile="Options.txt",
speciesID=getCurrentSpecies(), results.path=NULL, cellProportionsColumn="Final.Proportions",
prefix=NULL,
minPerGroup=3, test=t.test, plot=TRUE, plot.mode=c("bars", "auto", "pie", "lines"),
label="", wt.fold=1, wt.pvalue=2, min.percent=0.1,
significance.scaling=TRUE, verbose=TRUE, ...) {
# get needed paths, etc. from the options file
optT <- readOptionsTable( optionsFile)
annT <- readAnnotationTable( annotationFile)
# set up for this species
if ( speciesID != getCurrentSpecies()) {
setCurrentSpecies(speciesID)
}
if ( is.null( results.path)) {
results.path <- getOptionValue( optT, "results.path", notfound=".", verbose=F)
}
celltype.path <- file.path( results.path, "CellTypeProportions")
if ( ! file.exists( celltype.path)) {
cat( "\nExpected folder of Cell Type Proportion calls not found: ", celltype.path)
return(NULL)
}
# set up if needed
CellTypeSetup()
cellTypeColors <- getCellTypeColors()
N_CellTypes <- length( cellTypeColors)
reference <- getCellTypeReference()
# gather the cell type proportions data from every sample
NS <- length( sampleIDset)
if ( length(groups) != NS) stop( "Length of 'groups' must equal number of samples")
# allow passsing in of explicit prefix to allow mixed organism copmares..
if ( is.null( prefix)) {
prefix <- getCurrentSpeciesFilePrefix()
}
prefix <- rep( prefix, length.out=NS)
ctpM <- matrix( NA, nrow=N_CellTypes, ncol=NS)
colnames(ctpM) <- sampleIDset
rownames(ctpM) <- names( cellTypeColors)
for ( i in 1:NS) {
sid <- sampleIDset[i]
# name for file of results
my.celltype.path <- file.path( celltype.path, sid)
celltypeDetailsFile <- file.path( my.celltype.path, paste( sid, prefix[i], reference, "Proportions.csv", sep="."))
if ( ! file.exists(celltypeDetailsFile)) {
cat( "\nCell Type Proportions results not found for sample: ", sid, "|", celltypeDetailsFile)
next
}
cellAns <- read.csv( celltypeDetailsFile, as.is=T)
myCellPcts <- cellAns[[ cellProportionsColumn]]
if ( is.null( myCellPcts)) {
cat( "\nError: No column of cell type proportions file has name '", cellProportionColumn, "'", sep="")
return(NULL)
}
# stash these values, allowing some to be missing
myCellNames <- cellAns$CellType
where <- match( rownames(ctpM), myCellNames)
ctpM[ , i] <- myCellPcts[ where]
}
# now with this matrix of cell type percentages, call the comparison tool
plot.mode <- match.arg( plot.mode)
ans <- compareTranscriptProportions( ctpM, groups=groups, levels=levels, col=cellTypeColors,
minPerGroup=minPerGroup, test=test, plot=plot, plot.mode=plot.mode, label=label,
wt.fold=wt.fold, wt.pvalue=wt.pvalue, min.percent=min.percent,
significance.scaling=significance.scaling, ...)
# if we did plot, decide what to call it...
dev.type <- getPlotDeviceType( optT)
levelString <- paste( levels, collapse=".v.")
if ( length(levels) > 3) levelString <- paste( paste( levels[1:3], collapse=".v."), ".v.etc", sep="")
plotFile <- paste( "Compare.", prefix[1], ".", reference, "_", levelString, "_", toupper(plot.mode), ".Plot.", dev.type, sep="")
printPlot( file.path( celltype.path, plotFile))
# package up the final details
out <- vector( mode="list")
out[[1]] <- ctpM
names(out)[1] <- "Proportions.Matrix"
outFile <- file.path( celltype.path, paste( "All", prefix[1], reference, "Proportion.Details.csv", sep="."))
write.csv( ctpM, outFile)
if ( length(levels) < 3) {
out[[2]] <- ans
names(out)[2] <- "Comparison.Results"
outFile <- file.path( celltype.path, paste( prefix[1], ".", reference, ".Compare_", levelString, "_Details.csv", sep=""))
write.csv( ans, outFile, row.names=F)
} else {
for (j in 1:length(ans)) {
out[[j+1]] <- ans[[j]]
names(out)[j+1] <- names(ans)[j]
outFile <- file.path( celltype.path, paste( prefix[1], ".", reference, ".Compare_", names(ans)[j], "_Details.csv", sep=""))
write.csv( ans[[j]], outFile, row.names=F)
}
}
return( invisible( out))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.