R/FindImportantPatients.R

Defines functions TCGA_getImportantPatients

Documented in TCGA_getImportantPatients

#' getTopPatients
#'
#' @param TCGA_data Preprocessed TCGA dataset to be used
#' @param numReturn The number of top patients to be returned. Default is entire set
#'
#' @return Returns the column sums of the means of the X %*% t(X) matrix and the mean of the column sums
#' @export
#'
#' @examples
#' data(OV)
#' X = TCGA_cleanData(OV)
#' topPatients = TCGA_getImportantPatients(X)
TCGA_getImportantPatients = function( TCGA_data, numReturn = NULL )
{
  # get X %*% t(X)
  XXt = tcrossprod( TCGA_data )

  # return the column sums
  topPatients = colSums( XXt ) / nrow( XXt )
  names = rownames( TCGA_data ) # get patient names

  # combine all data together
  data = cbind.data.frame( names, topPatients )
  colnames( data ) = c( "Patients", "Rank" )
  rownames( data ) = c( 1: nrow(data) )
  data[, 2] = sort( data[, 2], decreasing = T )

  mean = mean( topPatients )

  # if there is no specificed number to return, return all
  if ( is.null( numReturn ) )
  {
    numReturn = nrow( XXt )
  }

  # if num_return is too large, warn the user
  if ( !is.null( numReturn ) & numReturn > nrow( data ) )
  {
    warning( paste( "Cannot return", numReturn, "because there are only", nrow( data ), "to plot. Instead returning 50" ) )
    numReturn = 50
  }

  return( list( patientSums = data[1:numReturn, ], mean = mean) )
}

#' plotTopPatients
#'
#' @param patientSums patientSums column result from running getTopPatients
#' @param numPlotted Number of patients to be plotted (default is 15)
#' @param mean mean result from running gettopPatients (optional)
#' @param fullID Do you wnat the full patient ID plotted? (Default is FALSE)
#'
#' @return Barplot of patient importance
#' @export
#'
#' @examples
#' data(OV)
#' X = TCGA_cleanData(OV)
#' topPatients = TCGA_getImportantPatients(X)
#' TCGA_plotImportantPatients(topPatients$patientSums, 20, topPatients$mean)
TCGA_plotImportantPatients = function ( patientSums, numPlotted = 15, mean = NULL, fullID = FALSE )
{
  # check that the number of patients is reasonable to plot
  if ( numPlotted > nrow( patientSums ) )
  {
    warning( paste( "Cannot plot", numPlotted, "because there are only", nrow( patientSums ), "to plot. Instead plotting 15" ) )
    numPlotted = 15
  }
  # set up labels nad data
  label = "Barplot of Patients That Contibute Most to XX^t Matrix"
  x = patientSums[1:numPlotted, 2]
  namesPlot = substr( as.character( patientSums[1:numPlotted, 1] ), 6, 12 )

  if ( fullID )
  {
    namesPlot = as.character( patientSums[1:numPlotted, 1] )
  }

  # plot if no mean is provdied
  if ( is.null( mean ) )
  {
    barplot( x, main = label, names.arg = namesPlot, las = 2, axes = FALSE )
  }

  # if mean is supplied, add a horizontal line for the mean
  else
  {
    barplot( x, main = label, names.arg = namesPlot, las = 2 )
    abline( h = mean, col = "red" )
    legend( "topright", legend = c( "Mean of all patients" ), fill = c( "red" ) )
  }
}
rshudde/RJclust documentation built on Dec. 8, 2019, 4:06 p.m.