Nothing
#' Load all cancer meta-analysis datasets
#'
#' Returns a list of lists containing all cancer meta-analysis datasets
#'
#'
#' @param tumour.only Logical indicating if we should only load tumour samples
#' (TRUE, the default)
#' @param with.survival.only Logical indicating if we should only load samples
#' with survival data (TRUE, the default)
#' @param truncate.survival A numeric value specifying survival truncation in
#' years. Defaults to 100 years which effectively means no truncation
#' @param datasets.to.load A vector of datasets to be loaded. If 'all', then
#' all available datasets are loaded
#' @param data.types A vector of molecular datatypes to load. Defaults to
#' c('mRNA')
#' @param datasets.file A file in data.directory containing a listing of all
#' usable datasets
#' @param data.directory A directory containing all data-files to be loaded
#' @param verbose Logical indicating whether or not status messages should be
#' given
#' @param subset A list with a Field and Entry component specifying a subset of
#' patients to be selected whose annotation Field matches Entry
#' @return Returns a meta-analysis list of lists
#' @author Syed Haider & Paul C. Boutros
#' @keywords IO
#' @examples
#'
#' data.dir <- get.program.defaults()[["test.data.dir"]];
#' x1 <- load.cancer.datasets(
#' datasets.to.load = c('Breastdata1'),
#' data.types = c("mRNA"),
#' data.directory = data.dir
#' );
#'
#' @export load.cancer.datasets
load.cancer.datasets <- function(tumour.only = TRUE, with.survival.only = TRUE, truncate.survival = 100, datasets.to.load = 'all', data.types = c('mRNA'), datasets.file = 'datasets.txt', data.directory = '.', verbose = FALSE, subset = NULL) {
# read in the listing of all datasets
datasets <- read.table(
file = paste(data.directory, "/", datasets.file, sep = ''),
header = TRUE,
sep = "\t",
row.names = NULL,
as.is = TRUE
);
# handle the user requesting all datasets
if ('all' %in% datasets.to.load) { datasets.to.load <- datasets$dataset; }
# make sure all requested datasets exist
if ( !all( datasets.to.load %in% datasets$dataset ) ) { stop('Non-existant dataset requested'); }
# initialize data objects
all.data <- list();
all.survobj <- list();
all.probesets <- vector();
# load all requested datasets one-by-one
for (dataset in datasets.to.load) {
# give the user some status info if requested
if (verbose) { cat('\nreading annotations for dataset: ', dataset); }
# set up the path to the dataset's files
dataset.directory <- paste(data.directory, dataset, '/', sep = '');
annotation <- read.table(
paste(dataset.directory, datasets$annotation[datasets$dataset == dataset], sep = ''),
header = TRUE,
row.names = 1,
sep = "\t"
);
# only extract tumour samples
if (tumour.only) {
annotation <- annotation[annotation$Tumour == "Yes",];
}
# if a subset of samples has been specified, discard any samples not belonging to the subset
if (!is.null(subset)) {
annotation <- annotation[annotation[[subset$Field]] == subset$Entry, ];
}
# select the appropriate survtime and survstat variable for each dataset
if (!("survstat" %in% colnames(annotation))) {
annotation$survstat <- annotation[,datasets$survstat[datasets$dataset == dataset]];
}
if (!("survtime" %in% colnames(annotation))) {
annotation$survtime <- annotation[,datasets$survtime[datasets$dataset == dataset]];
}
if (!("survtime.unit" %in% colnames(annotation))) {
annotation$survtime.unit <- annotation[,datasets$survtime.unit[datasets$dataset == dataset]];
}
# handle survtime <= 0
annotation$survtime[annotation$survtime <= 0] <- 1e-05;
# only keep samples with survival data
if (with.survival.only) {
annotation <- annotation[!is.na(annotation$survtime) & !is.na(annotation$survstat),];
}
# make R like rownames
rownames(annotation) <- make.names(as.character(rownames(annotation)));
common.samples <- rownames(annotation);
for (data.type in data.types) {
# ensure the data type has a corresponding column with exact name in datasets.txt
tryCatch(
expr = {
all.data[[data.type]][[dataset]] <- read.table(
paste(dataset.directory, datasets[[data.type]][datasets$dataset == dataset], sep = ""),
header = TRUE,
row.names = 1,
sep = "\t"
)
},
error = function(ex) {
stop("\nWell... you asked for data.type: [", data.type, "] but i cant find a column named as this data.type in datasets.txt - hence dieing");
}
);
# remove samples (columns) which have NA for all genes
all.data[[data.type]][[dataset]] <- all.data[[data.type]][[dataset]][,
apply(
all.data[[data.type]][[dataset]],
2,
function(x) any(!is.na(x))
)
];
# compile a list of samples that are common across all molecular features & survival
common.samples <- intersect(
common.samples,
colnames(all.data[[data.type]][[dataset]])
);
}
# limit to common samples only
annotation <- annotation[common.samples,];
# only keep the dataset if it has at least one sample
if (nrow(annotation) >= 1) {
for (data.type in data.types) {
# ensure equivalent sorting of the annotation and data objects
tryCatch(
expr = { all.data[[data.type]][[dataset]] <- all.data[[data.type]][[dataset]][, common.samples] },
error = function(ex) {
cat("\n\nSome columns in the data file do not match the rows in the annotation file\n\n");
}
);
all.probesets <- unique( c(all.probesets, rownames(all.data[[data.type]][[dataset]])) );
}
# save survival objects
all.survobj[[dataset]] <- SIMMS::create.survobj(annotation = annotation, truncate.survival = truncate.survival);
}
else {
stop("\n\nDataset: [", dataset, "] does not have any valid data to process. Please remove from feature selection, training and validation dataset vectors\n\n");
}
}
# return the final object
return(
list(
all.data = all.data,
all.survobj = all.survobj,
all.probesets = all.probesets
)
);
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.