#' getTopPatients
#'
#' @param TCGA_data Preprocessed TCGA dataset to be used
#' @param numReturn The number of top patients to be returned. Default is entire set
#'
#' @return Returns the column sums of the means of the X %*% t(X) matrix and the mean of the column sums
#' @export
#'
#' @examples
#' data(OV)
#' X = TCGA_cleanData(OV)
#' topPatients = TCGA_getImportantPatients(X)
TCGA_getImportantPatients = function( TCGA_data, numReturn = NULL )
{
# get X %*% t(X)
XXt = tcrossprod( TCGA_data )
# return the column sums
topPatients = colSums( XXt ) / nrow( XXt )
names = rownames( TCGA_data ) # get patient names
# combine all data together
data = cbind.data.frame( names, topPatients )
colnames( data ) = c( "Patients", "Rank" )
rownames( data ) = c( 1: nrow(data) )
data[, 2] = sort( data[, 2], decreasing = T )
mean = mean( topPatients )
# if there is no specificed number to return, return all
if ( is.null( numReturn ) )
{
numReturn = nrow( XXt )
}
# if num_return is too large, warn the user
if ( !is.null( numReturn ) & numReturn > nrow( data ) )
{
warning( paste( "Cannot return", numReturn, "because there are only", nrow( data ), "to plot. Instead returning 50" ) )
numReturn = 50
}
return( list( patientSums = data[1:numReturn, ], mean = mean) )
}
#' plotTopPatients
#'
#' @param patientSums patientSums column result from running getTopPatients
#' @param numPlotted Number of patients to be plotted (default is 15)
#' @param mean mean result from running gettopPatients (optional)
#' @param fullID Do you wnat the full patient ID plotted? (Default is FALSE)
#'
#' @return Barplot of patient importance
#' @export
#'
#' @examples
#' data(OV)
#' X = TCGA_cleanData(OV)
#' topPatients = TCGA_getImportantPatients(X)
#' TCGA_plotImportantPatients(topPatients$patientSums, 20, topPatients$mean)
TCGA_plotImportantPatients = function ( patientSums, numPlotted = 15, mean = NULL, fullID = FALSE )
{
# check that the number of patients is reasonable to plot
if ( numPlotted > nrow( patientSums ) )
{
warning( paste( "Cannot plot", numPlotted, "because there are only", nrow( patientSums ), "to plot. Instead plotting 15" ) )
numPlotted = 15
}
# set up labels nad data
label = "Barplot of Patients That Contibute Most to XX^t Matrix"
x = patientSums[1:numPlotted, 2]
namesPlot = substr( as.character( patientSums[1:numPlotted, 1] ), 6, 12 )
if ( fullID )
{
namesPlot = as.character( patientSums[1:numPlotted, 1] )
}
# plot if no mean is provdied
if ( is.null( mean ) )
{
barplot( x, main = label, names.arg = namesPlot, las = 2, axes = FALSE )
}
# if mean is supplied, add a horizontal line for the mean
else
{
barplot( x, main = label, names.arg = namesPlot, las = 2 )
abline( h = mean, col = "red" )
legend( "topright", legend = c( "Mean of all patients" ), fill = c( "red" ) )
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.