Nothing
# Functions that allow to filter the dataset by some criteria
# SUBSET functions - allow to define criteria for the information to keep
# returns dataset with selected set of samples
# samples - vector with indexes or names of the samples to select
"subset_samples" = function(dataset, samples, rebuild.factors = TRUE) {
dataset$metadata = dataset$metadata[samples,,drop= FALSE]
if (rebuild.factors) dataset$metadata = rebuild_factors_df(dataset$metadata)
dataset$data = dataset$data[,samples, drop=FALSE]
dataset
}
# selects set of samples by the value of a metadata variable
"subset_samples_by_metadata_values" = function(dataset, metadata.varname, values)
{
indexes = which(dataset$metadata[,metadata.varname] %in% values)
subset_samples(dataset, indexes)
}
# selects a random subset of nsamples from the dataset
# returns a new dataset with the selected samples
"subset_random_samples" = function(dataset, nsamples)
{
indexes = sample(num_samples(dataset), nsamples)
subset_samples(dataset, indexes)
}
"subset_x_values" = function(dataset, variables, by.index = FALSE) {
if (!by.index) {
variables = as.character(variables)
indexes = which(rownames(dataset$data) %in% variables)
}
else indexes = variables
dataset$data = dataset$data[indexes,,drop=FALSE]
dataset
}
"subset_x_values_by_interval" = function(dataset, min.value, max.value)
{
x.values = get_x_values_as_num(dataset)
indexes = which(x.values >= min.value & x.values <= max.value)
subset_x_values(dataset, indexes, by.index = TRUE)
}
"subset_by_samples_and_xvalues" = function(dataset, samples, variables = NULL, by.index = FALSE,
variable.bounds = NULL, rebuild.factors = TRUE)
{
if (!by.index) {
if (is.null(variables)) {
if (is.null(variable.bounds))
stop("One of variables or variable.bounds parameters needs to be defined")
else {
x.values = get_x_values_as_num(dataset)
x.indexes = which(x.values >= variable.bounds[1] & x.values <= variable.bounds[2])
}
}
else {
variables = as.character(variables)
x.indexes = which(rownames(dataset$data) %in% variables)
}
}
else x.indexes = variables
dataset$metadata = dataset$metadata[samples,,drop= FALSE]
if (rebuild.factors) dataset$metadata = rebuild_factors_df(dataset$metadata)
dataset$data = dataset$data[x.indexes,samples, drop=FALSE]
dataset
}
"subset_metadata" = function(dataset, variables)
{
dataset$metadata = dataset$metadata[,variables, drop = FALSE]
dataset
}
# REMOVE functions
# Used to remove SAMPLES, DATA VARIABLES or METADATA
# removes selected samples, data points or metadata variables from a dataset
# type: "sample", "data" or "metadata"
# data.to.remove - defines names or indexes of data to be removed
# by.index - specifies if data.to.remove has indexes or names
# rebuild.factors - if T, in metadata categorical variables are recalculated so that levels that
# disapear are removed (this does not maintain order of the levels)
# xaxis.num - indicates if data.to.remove for data variables is given as numeric vector (if T) or text
"remove_data" = function(dataset, data.to.remove, type = "sample", by.index = FALSE, rebuild.factors = TRUE) {
if (type == "sample")
dataset = remove_samples(dataset, data.to.remove, rebuild.factors)
else if(type == "data")
dataset = remove_data_variables(dataset, data.to.remove, by.index)
else if(type == "metadata")
dataset = remove_metadata_variables(dataset, data.to.remove)
else stop("Type of data to remove is undefined")
dataset
}
"remove_samples" = function(dataset, samples.to.remove, rebuild.factors = TRUE) {
if (is.numeric(samples.to.remove))
res = subset_samples(dataset, -samples.to.remove, rebuild.factors = rebuild.factors)
else {
indexes.to.remove = which(colnames(dataset$data) %in% samples.to.remove)
res = subset_samples(dataset, -indexes.to.remove, rebuild.factors = rebuild.factors)
}
res
}
"remove_data_variables" = function(dataset, variables.to.remove, by.index = FALSE) {
if (length(variables.to.remove) == 0) {
warning("No variables to remove")
return (dataset)
}
if (!by.index) {
variables.to.remove = as.character(variables.to.remove)
indexes.to.remove = which(rownames(dataset$data) %in% variables.to.remove)
}
else indexes.to.remove = variables.to.remove
subset_x_values(dataset, -indexes.to.remove, by.index = TRUE)
}
"remove_x_values_by_interval" = function(dataset, min.value, max.value)
{
x.values = get_x_values_as_num(dataset)
indexes.to.remove = which(x.values >= min.value & x.values <= max.value)
subset_x_values(dataset, -indexes.to.remove, by.index = TRUE)
}
"remove_metadata_variables" = function(dataset, variables.to.remove)
{
if (!is.numeric(variables.to.remove))
indexes.to.remove = which(colnames(dataset$metadata) %in% variables.to.remove)
else indexes.to.remove = variables.to.remove
if (!is.null(indexes.to.remove) & length(indexes.to.remove) > 0)
dataset$metadata = dataset$metadata[,-indexes.to.remove, drop = FALSE]
else warning("No metadata variables removed since no fields matched the criteria")
dataset
}
# functions to remove samples / variables with NAs
"remove_samples_by_nas" = function(dataset, max.nas = 0, by.percent = FALSE)
{
if (by.percent== TRUE) max.nas = max.nas * num_x_values(dataset) / 100 #100 * max.nas / num_x_values(dataset)
res = apply(dataset$data, 2, function(x) sum(is.na(x)))
to.remove = which(res > max.nas)
remove_samples(dataset, to.remove)
}
"remove_samples_by_na_metadata" = function(dataset, metadata.var)
{
to.remove = which(is.na(dataset$metadata[,metadata.var]))
remove_samples(dataset, to.remove)
}
"remove_variables_by_nas" = function(dataset, max.nas = 0, by.percent = FALSE)
{
if (by.percent== TRUE) max.nas = max.nas * num_samples(dataset) / 100 #100 * max.nas / num_samples(dataset)
res = apply(dataset$data, 1, function(x) { sum(is.na(x)) } )
to.remove = which(res > max.nas)
remove_data_variables(dataset, to.remove, by.index = TRUE)
}
# aggregate samples
# can be used to merge replicates
# indexes - vector where each index will indicate different groups
# c(1,1,2,2,3,3) would mean that the 6 samples would be aggregated into 3 groups
# with the first with samples 1 and 2, the second with samples 3 and 4, and so on
# aggreg.fn - function used to aggregate data values: "mean", "median", "sum", "max", "min", ...
# metadata variables are handled as data variables if numeric; if factor, the most common value is taken
# meta.to.remove - metadata variables to be removed
"aggregate_samples" = function(dataset, indexes, aggreg.fn = "mean", meta.to.remove = c()) {
groups = unique(indexes)
newdata = matrix(NA, nrow(dataset$data), length(groups))
rownames(newdata) = rownames(dataset$data)
colnames(newdata) = vector(mode = "character", length=length(groups) )
newmeta = data.frame()
for (i in 1:length(dataset$metadata)) {
if (is.numeric(dataset$metadata[[i]]))
newmeta[[i]] = vector(mode= "numeric", length=0 )
else if (is.factor(dataset$metadata[[i]]))
newmeta[[i]] = factor(levels=levels(dataset$metadata[[i]]) )
#labels = labels(dataset$metadata[[i]]) )
else newmeta[[i]] = vector(mode="character", length=length(groups))
}
names(newmeta) = names(dataset$metadata)
for (g in 1:length(groups)) {
to.merge = which(indexes == groups[g])
newdata[,g] = apply(dataset$data[,to.merge,drop=FALSE], 1, aggreg.fn)
for (i in 1:length(dataset$metadata)) {
if (is.numeric(dataset$metadata[[i]])) {
func= match.fun(aggreg.fn)
newmeta[g,i] = func(dataset$metadata[to.merge,i])
}
# newmeta[g,i] = eval(parse(text=paste(aggreg.fn,"(",dataset$metadata[to.merge,i],")",sep="")))
else if (is.factor(dataset$metadata[[i]]))
newmeta[g,i] = levels(newmeta[[i]])[which.max(table(dataset$metadata[to.merge,i]))]
else
newmeta[g,i] = dataset$metadata[to.merge[1],i]
}
first.index = to.merge[1]
colnames(newdata)[g] = colnames(dataset$data)[first.index]
rownames(newmeta)[g] = colnames(dataset$data)[first.index]
}
newdataset = list()
newdataset$data = newdata
newdataset$metadata = newmeta
newdataset$labels = dataset$labels
newdataset$type = dataset$type
newdataset$description = dataset$description
newdataset = remove_metadata_variables(newdataset, meta.to.remove)
newdataset
}
# Merge data and metadata into a data frame (rows are samples; columns are x.values and metadata)
# samples - if defined, allows to specify which samples will be kept; if undefined, all samples
# x.values - if defined, allows to specify which x.values to keep; if undefined, all will be kept
# metadata.vars - if defined, allows to specify which metadata to keep; if undefined, all will be kept
"merge_data_metadata" = function(dataset, samples = NULL, metadata.vars = NULL, x.values = NULL,
by.index = FALSE)
{
if (!is.null(samples) )
dataset = subset_samples(dataset, samples, rebuild.factors = TRUE)
if (!is.null(x.values) )
dataset = subset_x_values(dataset, x.values, by.index = by.index)
if (!is.null(metadata.vars))
dataset = subset_metadata(dataset, metadata.vars)
df = as.data.frame(t(dataset$data))
df = cbind(df, dataset$metadata)
df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.