library(S4Vectors)
# OBJECTS
# METHODS
setGeneric("discriminateTargets", function(tar) standarGeneric("discriminateTargets"))
setMethod("discriminateTargets", signature("SimpleList"),
function(tar) {
# /*
# It will generates the list of genes that are in common in 2
# datasets. Dataset comes from targets object created by selectTargets
# function
# */
# /* tar
# Object created by selectTargets function implemented in pRelude.R
# */
tags <- colnames(tar[[1]][["sizes_table"]])[-length(colnames(tar[[1]][["sizes_table"]]))]
# /*
# How many combinations of length 2 do you can do with length(tags) elements?
# */
combinations <- comb(2, length(tags))
results <- SimpleList()
# // Iterating over the total number of posible combinations of length 2
for(i in 1:length(combinations[,1])){
paths <- names(tar)
# // Iterating over paths stored in tar
for(p in paths) {
files <- names(tar[[p]])[-length(names(tar[[p]]))]
genes_basket <- c()
out_tag <- paste(tags[combinations[i,1]], tags[combinations[i,2]], sep="")
# // Iterating over files for ever path sotred in tar
for(f in files) {
tag <- tags[combinations[i,1]]
genes <- tar[[p]][[f]][[tag]]
tag <- tags[combinations[i,2]]
genes <- c(genes, tar[[p]][[f]][[tag]])
genes <- eliminateHeavyDuplicates(names=genes, times=2)
genes <- gsub("\\.[[:alpha:]]$", "", genes)
genes <- genes[duplicatedWithMoreThan(names=genes, times=1, drop=FALSE)]
genes <- unique(sort(genes))
genes_basket <- c(genes_basket, genes)
}
results[[out_tag]] <- t(table(genes_basket))
results[[paste(out_tag, "_CRgenes", sep="")]] <- unique(sort(genes_basket))
# /* TODO LIST
# Select the Uncoupled regulated genes from L, S and X
# in combination of length 2.
# Select the coupled and uncoupled regulated genes for
# the LSX set
# */
}
}
results
})
setGeneric("getLists", function(fasta) standardGeneric("getLists"))
setMethod("getLists", signature("character"),
function(fasta) {
dna <- readDNAStringSet(fasta, format="fasta")
genes <- names(dna)
# /*
# Fasta sequences were removed
# to make memory storage efficient.
# */
rm(dna)
# /* extractTags comes from helpe.R
tags <- extractTags(genes)
# /*
# Calculating homologues and orthlogues genes
# */
genes <- unlist(lapply(genes, function(x) strsplit(x, split="\\|")[[1]][1]))
genes <- genes[grepl("\\.[[:alpha:]]$", genes)]
alltags <- gsub(".*\\.", "", genes)
homologs <- SimpleList()
homologs@listData <- as.list(table(alltags)) # */* Select total L, S and X genes to select singletons genes.
homologs <- combinations(tags, genes, homologs)
# /*
# select total genes total genes and singletons
# */
homologs
})
setGeneric("selectTargets", function(paths, percentage, notags) standardGeneric("selectTargets"))
setMethod("selectTargets", signature("character", "numeric", "numeric"),
function(paths, percentage, notags) {
# /* paths.
# It is a character vector with the complete paths were *.data files are.
# */
# /* percentage.
# It is a numeric vector of length 1 which tells the percentage of
# the top genes will be selected.
# */
# /* notags
# It is a numeric vector of length 1 which tells the number of tags included in tar object
# */
# /* What it does.
# Takes *.data files for each microRNA family.
# Select genes by tags.
# Sort them by context score from negative to positive.
# Takes the $(pecrentage)% of top genes for each subset of genes.
# Calculates the vector length of each set of genes, and the total genes
# for each tag.
# Finally, all results are placed in a SimpleList object.
# */
result <- SimpleList()
for(path in paths) {
result[[basename(path)]] <- SimpleList()
lof <- list.files(path=path, pattern="\\.data$")
min <- 0
max <- length(lof)
pb <- txtProgressBar(min=min, max=max, style=3)
for(f in lof) {
result[[basename(path)]][[f]] <- SimpleList()
table <- read.delim(paste(path, f, sep=""), header=FALSE, sep="\t", as.is=TRUE)
gene_tags <- table[,1]
gene_tags <- gene_tags[grepl("\\.[[:alnum:]]$", gene_tags)]
gene_tags <- gsub(".*\\.", "", gene_tags)
gene_tags <- names(sort(table(gene_tags), decreasing=TRUE))
for(tag in gene_tags){
sub_table <- table[grepl(paste("\\.", tag, "$", sep=""), table[,1]),]
sub_table <- sub_table[order(sub_table[,2], decreasing=FALSE),]
len_subtable <- length(sub_table[,1])
sub_table <- sub_table[,1]
sub_table <- sub_table[1:round(len_subtable*(percentage/100))]
result[[basename(path)]][[f]][[tag]] <- sub_table
}
min <- min + 1
setTxtProgressBar(pb, min)
}
}
result <- intersect.targets(tar=result, notags=notags)
result
})
##################################
##################################
##################################
setGeneric("getLists.deprecated", function(total_genes, genes, stage, homoeologos_fasta) standardGeneric("getLists.deprecated"))
setMethod("getLists.deprecated", signature("character", "character", "character", "character"),
function(total_genes, genes, stage, homoeologos_fasta) {
list <- SimpleList()
if (stage == "fu"){
lof <- list.files(path=genes, pattern="\\.data\\.fu$")
}
else if (stage == "fd") {
lof <- list.files(path=genes, pattern="\\.data\\.fd$")
}
else {
stop(paste(stage, " stage is not understood!", sep=""))
}
CRH <- c()
URH <- c()
NRH <- c()
SIN <- c()
gene_tags <- gsub(".*\\.", "", total_genes)
gene_tags <- rownames(sort(table(gene_tags), decreasing=TRUE))
gene_tags <- gene_tags[1:2]
gene_tags <- sort(gene_tags)
tgenes <- total_genes[grepl(paste("\\.[", gene_tags[1], gene_tags[2], "]$", sep=""), total_genes)]
tgenes <- gsub(paste("\\.[", gene_tags[1], gene_tags[2], "]$", sep=""), "", tgenes)
tgenes <- sort(tgenes)
tgenes <- eliminateHeavyDuplicates(tgenes, 2)
homoeologos <- tgenes[duplicated(tgenes)]
singletons <- tgenes[!(tgenes %in% homoeologos)]
singletons <- unique(sort(singletons))
for (f in lof){
tmp_fu <- read.delim(paste(genes, f, sep=""), header=FALSE, sep="\t", as.is=TRUE)
tmp_fu <- tmp_fu[,1]
tmp_fu <- tmp_fu[grepl(paste("\\.[", gene_tags[1], gene_tags[2], "]$", sep=""), tmp_fu)]
tmp_fu <- gsub(paste("\\.[", gene_tags[1], gene_tags[2], "]$", sep=""), "", tmp_fu)
tmp_fu <- sort(tmp_fu)
tmp_fu <- eliminateHeavyDuplicates(tmp_fu, 2)
tmp_CRH <- tmp_fu[duplicated(tmp_fu)]
tmp_URH <- tmp_fu[!(tmp_fu %in% tmp_CRH) & tmp_fu %in% homoeologos]
tmp_SIN <- tmp_fu[!(tmp_fu %in% tmp_CRH) & tmp_fu %in% singletons]
CRH <- c(CRH, tmp_CRH)
URH <- c(URH, tmp_URH)
SIN <- c(SIN, tmp_SIN)
}
URH <- URH[!(URH %in% CRH)]
tCRH <- table(CRH)
tURH <- table(URH)
tSIN <- table(SIN)
CRH <- unique(sort(CRH))
URH <- unique(sort(URH))
NRH <- homoeologos[!(homoeologos %in% CRH) & !(homoeologos %in% URH)]
# Preparing homoeolog genes with reported UTR sequence to filtering NHR. We are extracting the names from homoeologs fasta sequence
# Be carefully to place the gene name at the begining of the header.
homoeogenesWUTRs <- readDNAStringSet(homoeologos_fasta, format="fasta")
homoeogenesWUTRs <- names(homoeogenesWUTRs)
homoeogenesWUTRs <- lapply(homoeogenesWUTRs, function(x) strsplit(x=x, split="\\|")[[1]])
items <- c()
for(item in homoeogenesWUTRs){
items <- c(items, item[1])
}
homoeogenesWUTRs <- items
homoeogenesWUTRs <- gsub(paste("\\.[", gene_tags[1], gene_tags[2], "]$", sep=""), "", homoeogenesWUTRs)
homoeogenesWUTRs <- eliminateHeavyDuplicates(homoeogenesWUTRs, 2)
homoeogenesWUTRs <- homoeogenesWUTRs[duplicated(homoeogenesWUTRs)]
NRH <- NRH[NRH %in% homoeogenesWUTRs]
NRH <- eliminateHeavyDuplicates(NRH, 1)
URH <- URH[URH %in% homoeogenesWUTRs]
URH <- eliminateHeavyDuplicates(URH, 1)
list[["total_genes"]] <- total_genes
list[["homoeologos"]] <- homoeologos
list[["singletons"]] <- singletons
list[["CRH"]] <- CRH
list[["URH"]] <- URH
list[["NRH"]] <- NRH
list[["tCRH"]] <- tCRH
list[["tURH"]] <- tURH
list[["tSIN"]] <- tSIN
print("debbuging2")
list
})
setGeneric("loadTSresults", function(path, glist) standarGeneric("loadTSresults"))
setMethod("loadTSresults", signature("character", "SimpleList"),
# /* what it does
# Load ts-tools results. Takes the *.data files and stores the tables in a SimpleList
# object
# */
# /* path
# Character vector with the name of the path where the *.data files are
# */
# /* glist
# Glist object created by getLists.deprecated function where positions 2 are for homeologs
# */
function(path, glist) {
lof <- list.files(path, pattern="\\.data$")
print("Loaded files...")
print(lof)
tsresults <- SimpleList()
for (f in lof) {
data <- read.delim(paste(path, f, sep=""), header=FALSE, as.is=TRUE, sep="\t")
t <- with(data, {Homeo <- ifelse(gsub("\\.[LS]$", "", V1) %in% gL.fu[[2]], TRUE, FALSE)})
data <- cbind(data, Homeo=t)
tsresults[[gsub("\\.data$", "", f)]] <- data
}
tsresults
})
setGeneric("loadTargets", function(path, lof, collapse=TRUE) standarGeneric("loadTargets"))
setMethod("loadTargets", signature("character", "character"),
# /*
# Load the names of the target genes from each *.data.fu or *.data.fd file.
# Those files are the output of the target scan analysis and *.fu are the
# best prediction targets and *.fd are the worst prediction targets.
# */
# /* path
# Character vector with the name of the path where the *.data.f* are.
# */
# /* lof
# Character vector with the names of the *.data.f* files
# */
# /* collapse
# Logical vector. If TRUE gene names will be filtered and untagged, sorted
# and unique-ed
function(path, lof, collapse=TRUE) {
if(length(path) == 0 | length(lof) == 0){
stop("Path or list of files were not specified")
}
results <- SimpleList()
for (f in lof) {
tmp_data <- read.delim(paste(path, f, sep=""), header=FALSE, sep="\t", as.is=TRUE)[,1]
tmp_data <- gsub("\\|.*$", "", tmp_data)
if (collapse) {
tmp_data <- gsub("\\.[LSPX]$", "", tmp_data)
tmp_data <- tmp_data[tmp_data %!in% "unnamed"]
tmp_data <- unique(sort(tmp_data))
}
tmp_data <- tmp_data[tmp_data %!in% "unnamed"]
results[[ strsplit(f, split="\\.")[[1]][1] ]] <- tmp_data
}
results
})
setGeneric("filter.bad.targs", function(targs, badtargs) standarGeneric("filter.bad.targs"))
setMethod("filter.bad.targs", signature("SimpleList", "SimpleList"),
# /*
# Takes good targets and the bad predicted targets and eliminates the bad predicted
# targets presented in good targets list.
# */
# /* targs
# SimpleList with the good predicted targets separated by microRNA family
# */
# /* badtargs
# SimpleList with the bad predicted targets separated by microRNA family
# */
function(targs, badtargs) {
realbadtargs <- SimpleList()
alltargs <- c()
for (name in names(targs)) {
alltargs <- c(alltargs, targs[[name]])
}
for (name in names(badtargs)) {
realbadtargs[[name]] <- badtargs[[name]][badtargs[[name]] %!in% alltargs]
}
realbadtargs
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.