#----------------------------------------------------------------------------------------------------
library(TReNA)
library(plyr)
library(limma)
#----------------------------------------------------------------------------------------------------
assess_methodsAgainstDistributions <- function(mtx.sub, target.gene, tfs)
{
#fivenum(mtx.sub)# 0.000000 1.753137 12.346965 43.247467 1027.765854
# Transform with log2
mtx.tmp <- mtx.sub - min(mtx.sub) + 0.001
mtx.log2 <- log2(mtx.tmp)
#fivenum(mtx.log2) # [1] -9.9657843 0.8107618 3.6262014 5.4345771 10.005297
# Transform sub with asinh
mtx.asinh <- asinh(mtx.sub)
#fivenum(mtx.asinh) # [1] 0.000000 1.327453 3.208193 4.460219 7.628290
# Transform via VOOM transformation
mtx.voom <- voom(mtx.sub)$E
#fivenum(mtx.voom)
printf("--- Testing LASSO")
trena <- TReNA(mtx.assay=mtx.sub, solver="lasso", quiet=FALSE)
lasso1 <- solve(trena, target.gene, tfs)
lasso1 <- data.frame(gene = rownames(lasso1),
lasso.as.is = lasso1$beta)
trena <- TReNA(mtx.assay=mtx.log2, solver="lasso", quiet=FALSE)
lasso2 <- solve(trena, target.gene, tfs)
lasso2 <- data.frame(lasso.log2 = lasso2$beta,
gene = rownames(lasso2))
trena <- TReNA(mtx.assay=mtx.asinh, solver="lasso", quiet=FALSE)
lasso3 <- solve(trena, target.gene, tfs)
lasso3 <- data.frame(lasso.asinh = lasso3$beta,
gene = rownames(lasso3))
trena <- TReNA(mtx.assay=mtx.voom, solver="lasso", quiet=FALSE)
lasso4 <- solve(trena, target.gene, tfs)
lasso4 <- data.frame(lasso.voom = lasso4$beta,
gene = rownames(lasso4))
lasso1$gene <- as.character(lasso1$gene)
lasso2$gene <- as.character(lasso2$gene)
lasso3$gene <- as.character(lasso3$gene)
lasso4$gene <- as.character(lasso4$gene)
# Grab the top 10 genes from each
lasso1.top10 <- lasso1$gene[order(abs(lasso1$lasso.as.is), decreasing=TRUE)][1:10]
lasso2.top10 <- lasso2$gene[order(abs(lasso2$lasso.log2), decreasing=TRUE)][1:10]
lasso3.top10 <- lasso3$gene[order(abs(lasso3$lasso.asinh), decreasing=TRUE)][1:10]
lasso4.top10 <- lasso4$gene[order(abs(lasso4$lasso.voom), decreasing = TRUE)][1:10]
printf("--- Testing Bayes Spike")
trena <- TReNA(mtx.assay=mtx.sub, solver="bayesSpike", quiet=FALSE)
bs1 <- solve(trena, target.gene, tfs)
bs1 <- data.frame(bs.as.is = bs1$beta,
gene = rownames(bs1))
trena <- TReNA(mtx.assay=mtx.log2, solver="bayesSpike", quiet=FALSE)
bs2 <- solve(trena, target.gene, tfs)
bs2 <- data.frame(bs.log2 = bs2$beta,
gene = rownames(bs2))
trena <- TReNA(mtx.assay=mtx.asinh, solver="bayesSpike", quiet=FALSE)
bs3 <- solve(trena, target.gene, tfs)
bs3 <- data.frame(bs.asinh = bs3$beta,
gene = rownames(bs3))
trena <- TReNA(mtx.assay=mtx.voom, solver="bayesSpike", quiet=FALSE)
bs4 <- solve(trena, target.gene, tfs)
bs4 <- data.frame(bs.voom = bs4$beta,
gene = rownames(bs4))
bs1$gene <- as.character(bs1$gene)
bs2$gene <- as.character(bs2$gene)
bs3$gene <- as.character(bs3$gene)
bs4$gene <- as.character(bs4$gene)
# Grab the top 10 genes from each
bs1.top10 <- bs1$gene[order(abs(bs1$bs.as.is), decreasing=TRUE)][1:10]
bs2.top10 <- bs2$gene[order(abs(bs2$bs.log2), decreasing=TRUE)][1:10]
bs3.top10 <- bs3$gene[order(abs(bs3$bs.asinh), decreasing=TRUE)][1:10]
bs4.top10 <- bs4$gene[order(abs(bs4$bs.voom), decreasing = TRUE)][1:10]
printf("--- Testing Random Forest")
trena <- TReNA(mtx.assay=mtx.sub, solver="randomForest", quiet=FALSE)
rf.result <- solve(trena, target.gene, tfs)
rf1 <- data.frame(rf.as.is = rf.result$edges$IncNodePurity,
gene = rownames(rf.result$edges))
trena <- TReNA(mtx.assay=mtx.log2, solver="randomForest", quiet=FALSE)
rf.result.2 <- solve(trena, target.gene, tfs)
rf2 <- data.frame(rf.log2 = rf.result.2$edges$IncNodePurity,
gene = rownames(rf.result.2$edges))
trena <- TReNA(mtx.assay=mtx.asinh, solver="randomForest", quiet=FALSE)
rf.result.3 <- solve(trena, target.gene, tfs)
rf3 <- data.frame(rf.asinh = rf.result.3$edges$IncNodePurity,
gene = rownames(rf.result.3$edges))
trena <- TReNA(mtx.assay=mtx.voom, solver="randomForest", quiet=FALSE)
rf.result.4 <- solve(trena, target.gene, tfs)
rf4 <- data.frame(rf.voom = rf.result.4$edges$IncNodePurity,
gene = rownames(rf.result.4$edges))
rf1$gene <- as.character(rf1$gene)
rf2$gene <- as.character(rf2$gene)
rf3$gene <- as.character(rf3$gene)
rf4$gene <- as.character(rf4$gene)
# Grab the top 10 genes from each
rf1.top10 <- rf1$gene[order(abs(rf1$rf.as.is), decreasing=TRUE)][1:10]
rf2.top10 <- rf2$gene[order(abs(rf2$rf.log2), decreasing=TRUE)][1:10]
rf3.top10 <- rf3$gene[order(abs(rf3$rf.asinh), decreasing=TRUE)][1:10]
rf4.top10 <- rf4$gene[order(abs(rf4$rf.voom), decreasing=TRUE)][1:10]
# Use Square Root LASSO
printf("--- Testing Square Root LASSO")
trena <- TReNA(mtx.assay=mtx.sub, solver="sqrtlasso", quiet=FALSE)
sqrt.lasso1 <- solve(trena, target.gene, tfs)
sqrt.lasso1 <- data.frame(gene = rownames(sqrt.lasso1),
sqrt.lasso.as.is = sqrt.lasso1$beta)
trena <- TReNA(mtx.assay=mtx.log2, solver="sqrtlasso", quiet=FALSE)
sqrt.lasso2 <- solve(trena, target.gene, tfs)
sqrt.lasso2 <- data.frame(sqrt.lasso.log2 = sqrt.lasso2$beta,
gene = rownames(sqrt.lasso2))
trena <- TReNA(mtx.assay=mtx.asinh, solver="sqrtlasso", quiet=FALSE)
sqrt.lasso3 <- solve(trena, target.gene, tfs)
sqrt.lasso3 <- data.frame(sqrt.lasso.asinh = sqrt.lasso3$beta,
gene = rownames(sqrt.lasso3))
trena <- TReNA(mtx.assay=mtx.voom, solver="sqrtlasso", quiet=FALSE)
sqrt.lasso4 <- solve(trena, target.gene, tfs)
sqrt.lasso4 <- data.frame(sqrt.lasso.voom = sqrt.lasso4$beta,
gene = rownames(sqrt.lasso4))
sqrt.lasso1$gene <- as.character(sqrt.lasso1$gene)
sqrt.lasso2$gene <- as.character(sqrt.lasso2$gene)
sqrt.lasso3$gene <- as.character(sqrt.lasso3$gene)
sqrt.lasso4$gene <- as.character(sqrt.lasso4$gene)
# Grab the top 10 genes from each
sqrt.lasso1.top10 <- sqrt.lasso1$gene[order(abs(sqrt.lasso1$sqrt.lasso.as.is), decreasing=TRUE)][1:10]
sqrt.lasso2.top10 <- sqrt.lasso2$gene[order(abs(sqrt.lasso2$sqrt.lasso.log2), decreasing=TRUE)][1:10]
sqrt.lasso3.top10 <- sqrt.lasso3$gene[order(abs(sqrt.lasso3$sqrt.lasso.asinh), decreasing=TRUE)][1:10]
sqrt.lasso4.top10 <- sqrt.lasso4$gene[order(abs(sqrt.lasso4$sqrt.lasso.voom), decreasing = TRUE)][1:10]
# Take the union of all the genes
all.genes <- unique(c(lasso1.top10,
lasso2.top10,
lasso3.top10,
lasso4.top10,
bs1.top10,
bs2.top10,
bs3.top10,
bs4.top10,
rf1.top10,
rf2.top10,
rf3.top10,
rf4.top10,
sqrt.lasso1.top10,
sqrt.lasso2.top10,
sqrt.lasso3.top10,
sqrt.lasso4.top10)) # Returns 41 genes
# Pull out the specified genes and assemble a table
lasso1.sub <- subset(lasso1, lasso1$gene %in% all.genes)
lasso2.sub <- subset(lasso2, lasso2$gene %in% all.genes)
lasso3.sub <- subset(lasso3, lasso3$gene %in% all.genes)
lasso4.sub <- subset(lasso4, lasso4$gene %in% all.genes)
bs1.sub <- subset(bs1, bs1$gene %in% all.genes)
bs2.sub <- subset(bs2, bs2$gene %in% all.genes)
bs3.sub <- subset(bs3, bs3$gene %in% all.genes)
bs4.sub <- subset(bs4, bs4$gene %in% all.genes)
rf1.sub <- subset(rf1, rf1$gene %in% all.genes)
rf2.sub <- subset(rf2, rf2$gene %in% all.genes)
rf3.sub <- subset(rf3, rf3$gene %in% all.genes)
rf4.sub <- subset(rf4, rf4$gene %in% all.genes)
sqrt.lasso1.sub <- subset(sqrt.lasso1, sqrt.lasso1$gene %in% all.genes)
sqrt.lasso2.sub <- subset(sqrt.lasso2, sqrt.lasso2$gene %in% all.genes)
sqrt.lasso3.sub <- subset(sqrt.lasso3, sqrt.lasso3$gene %in% all.genes)
sqrt.lasso4.sub <- subset(sqrt.lasso4, sqrt.lasso4$gene %in% all.genes)
# Join it all in a table (requires plyr package)
tbl.all <- join_all(list(lasso1.sub,
lasso2.sub,
lasso3.sub,
lasso4.sub,
bs1.sub,
bs2.sub,
bs3.sub,
bs4.sub,
rf1.sub,
rf2.sub,
rf3.sub,
rf4.sub,
sqrt.lasso1.sub,
sqrt.lasso2.sub,
sqrt.lasso3.sub,
sqrt.lasso4.sub),
by = 'gene', type = 'full')
# Replace NA's in LASSO columns with 0s
tbl.all[is.na(tbl.all)] <- 0
# Add the gene correlation column
tbl.all$gene.cor <- NA
for(gene in tbl.all$gene){
tbl.all$gene.cor[[which(tbl.all$gene == gene)]] <-
rf.result$edges$gene.cor[[which(rownames(rf.result$edges) == gene)]]
}
# Order the rows and return it
tbl.all <- tbl.all[order(abs(tbl.all$gene.cor), decreasing = TRUE),]
invisible(tbl.all)
} #assess_methodsAgainstDistributions
#----------------------------------------------------------------------------------------------------
assess_ampAD154AllSolversAndDistributions <- function(){
# Load the matrix and create the transformed versions
load(system.file(package="TReNA", "extdata/ampAD.154genes.mef2cTFs.278samples.RData"))
target.gene <- "MEF2C"
tfs <- setdiff(rownames(mtx.sub), "MEF2C")
tbl.all <- assess_methodsAgainstDistributions(mtx.sub,target.gene,tfs)
} #assess_ampAD154AllSolversAndDistributions
#----------------------------------------------------------------------------------------------------
transform_ampADFromRaw <- function(){
library(edgeR)
file.path <- "./inst/extdata/MayoRNAseq_RNAseq_TCX_geneCounts2.tsv"
transposedCounts <- read.table(file.path, header = TRUE, check.names = FALSE, as.is = TRUE, sep = "\t", stringsAsFactors = TRUE)
rownames(transposedCounts) <- transposedCounts$ensembl_id
# Make it into a matrix of counts and drop the ENSEMBL IDs
mtx <- as.matrix(transposedCounts[, -1])
# Make a DGEList object
expr <- DGEList(mtx, group = rep(1, ncol(mtx)))
# Calculate the normalization factors for TMM
normFactors <- calcNormFactors(expr, method = ("TMM"))
# Compute counts per million (returns a matrix)
normalizedCpm <- cpm(normFactors)
# Organize as a data frame w/ENSEMBL IDs and data values
### Where are the IDs to map to ENSEMBL?
ids <- as.data.frame(row.names(normalizedCpm))
colnames(ids) <-c("ensembl_id")
row.names(normalizedCpm) <- NULL
df <- as.data.frame(normalizedCpm)
newDF <- cbind(ids, df)
# Return the normalized data frame
invisible(newDF)
} #transform_ampADFromRaw
#----------------------------------------------------------------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.