#
# tb.treatments = table(c(df.annotation$condition, df.annotation$condition_optional))
# tb.treatments = tb.treatments[names(tb.treatments) != ""]
#
# # tissue and treatment distributions
# tb.tissues <- table(df.annotation$tissue)
# v.conditionGroups = names(tb.treatments)
#
# # loading gene expressino matrix
# m.expression <- read.table(filename.geneExpression, row.names = 1, header = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
# m.expression <- as.matrix(m.expression)
#
# tb.conditions = table(df.annotation$condition)
# tb.conditions[names(table(df.annotation$condition_optional))] = tb.conditions[names(table(df.annotation$condition_optional))] + table(df.annotation$condition_optional)
#
#' Load dataset function
#'
#' This function loads a datasets
#' @param
#' @keywords
#' @export
#' @examples
#' load_datasets()
load_datasets = function(filename.genes = "data/genes.txt",
filename.experiment_ids = "data/experiment_ids.txt",
filename.foldChange_differentialExpression = "data/m.foldChange_differentialExpression.txt",
filename.pvalue_differentialExpression = "data/m.pvalue_differentialExpression.txt",
filename.experiment_condition_tissue_annotation = "data/df.experiment_condition_annotation.txt",
filename.transcriptionfactor_annotation = "data/df.transcriptionFactorAnnotation.txt",
filename.geneGroups = "data/df.enzymes_w_metabolic_domains.txt"){
genes = read.table(filename.genes, header = F, sep = "\t", stringsAsFactors = F)[,1]
experiment_series_ids = read.table(filename.experiment_ids, header = F, sep = "\t", stringsAsFactors = F)[,1]
experiment_series_ids = as.character(experiment_series_ids)
df.annotation <- read.csv(filename.experiment_condition_tissue_annotation, header = TRUE, sep = "\t", fill = TRUE, stringsAsFactors = FALSE)
v.colnames_mandatory = c("series_id", "condition", "condition_optional", "tissue", "unique_ID")
if(!all(v.colnames_mandatory %in% names(df.annotation))){
stop(paste("could not find all mandatory columns in file:", paste(v.colnames_mandatory, collapse = ", ")))
}
# df.annotation = df.annotation[,c("series_id", "condition", "condition_optional", "tissue")]
df.annotation <- subset(df.annotation, !is.na(df.annotation$unique_ID))
df.foldChange_differentialExpression = read.table(filename.foldChange_differentialExpression, header = F, sep = "\t", stringsAsFactors = F)
df.pvalue_differentialExpression = read.table(filename.pvalue_differentialExpression, header = F, sep = "\t", stringsAsFactors = F)
if(length(genes) == 0){
stop("Error: no genes found")
}
if(length(experiment_series_ids) == 0){
stop("Error: no experiments found")
}
if(nrow(df.annotation) == 0){
stop("Error: no condition annotation found")
}
if(nrow(df.foldChange_differentialExpression) == 0){
stop("Error: no differential expression foldchange found")
}
if(nrow(df.pvalue_differentialExpression) == 0){
stop("Error: no differential expression pvalue found")
}
m.foldChange_differentialExpression = data.matrix(df.foldChange_differentialExpression, rownames.force = NA)
m.pvalue_differentialExpression = data.matrix(df.pvalue_differentialExpression, rownames.force = NA)
rownames(m.foldChange_differentialExpression) = rownames(m.pvalue_differentialExpression) = genes
colnames(m.foldChange_differentialExpression) = colnames(m.pvalue_differentialExpression) = experiment_series_ids
tb.treatments = table(c(df.annotation$condition, df.annotation$condition_optional))
tb.treatments = tb.treatments[!is.na(tb.treatments)]
tb.condition_tissues = table(df.annotation$tissue)
v.treatments = unique(c(df.annotation$condition, df.annotation$condition_optional))
v.treatments = v.treatments[!v.treatments == ""]
tb.condition_treatments = tb.treatments[v.treatments]
#tb.experiment_series_ids = table(experiment_series_ids)
#df.annotation["number_series"] = 0
#for(i in 1:nrow(df.annotation)){
# df.annotation$number_series[i] = tb.experiment_series_ids[as.character(df.annotation$unique_ID[i])]
#}
# tb.tissues = numeric(length(v.tissues))
# names(tb.tissues) = v.tissues
# for(i in 1:length(v.tissues)){
# idx = which(df.annotation$tissue == v.tissues[i])
# tissues.i = df.annotation$number_series[idx]
# tissues.i = tissues.i[!is.na(tissues.i)]
# tb.tissues[i] = sum(tissues.i)
# }
# tb.condition_tissues = tb.tissues
# tb.condition_treatments = numeric(length(v.treatments))
# names(tb.condition_treatments) = unique(v.treatments)
# for(i in 1:length(tb.condition_treatments)){
# idx_1 = which(df.annotation$condition %in% names(tb.condition_treatments)[i])
# idx_2 = which(df.annotation$condition_optional %in% names(tb.condition_treatments)[i])
# tb.condition_treatments[i] = length(idx_1) + length(idx_2)# sum(df.annotation$number_series[idx_1]) + sum(df.annotation$number_series[idx_2])
# }
df.transcriptionFactorAnnotation = read.table(filename.transcriptionfactor_annotation, header = T, sep = "\t", stringsAsFactors = F)
if(nrow(df.transcriptionFactorAnnotation) == 0){
stop("Error: no transcription factor annotation found")
}
df.transcriptionFactorAnnotation["with_geneExpression"] = "no"
df.transcriptionFactorAnnotation$with_geneExpression[which(df.transcriptionFactorAnnotation$TF_ID %in% genes)] = "yes"
df.geneGroups = read.table(filename.geneGroups, header = T, sep = "\t", stringsAsFactors = F)
if(nrow(df.geneGroups) == 0){
stop("Error: no gene group annotation found")
}
rownames(df.geneGroups) = df.geneGroups$Gene_ID
df.geneGroups <- df.geneGroups[,!names(df.geneGroups) %in% c("Gene_ID")]
tb.geneGroups = colSums(df.geneGroups)
v.geneGroups = colnames(df.geneGroups)
l.geneGroups <- vector(mode = "list", length = length(v.geneGroups))
names(l.geneGroups) <- v.geneGroups
for(i in 1:length(v.geneGroups)){
l.geneGroups[[i]] <- rownames(df.geneGroups)[which(df.geneGroups[,v.geneGroups[i]] == 1)]
l.geneGroups[[i]] <- intersect(l.geneGroups[[i]], genes)
}
df.geneGroups["with_geneExpression"] = "no"
df.geneGroups$with_geneExpression[which(rownames(df.geneGroups) %in% genes)] = "yes"
return(list(m.foldChange_differentialExpression=m.foldChange_differentialExpression,
m.pvalue_differentialExpression=m.pvalue_differentialExpression,
df.experiment_condition_annotation=df.annotation,
tb.condition_treatments=tb.condition_treatments,
tb.condition_tissues=tb.condition_tissues,
df.transcriptionFactorAnnotation=df.transcriptionFactorAnnotation,
df.geneGroups=df.geneGroups,
tb.geneGroups=tb.geneGroups,
v.geneGroups=v.geneGroups,
l.geneGroups=l.geneGroups,
genes = genes
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.