library(trena)
library(RUnit)
#----------------------------------------------------------------------------------------------------
printf <- function(...) print(noquote(sprintf(...)))
#----------------------------------------------------------------------------------------------------
if(!exists("tbl.mef2c")){
print(load(system.file(package="trena", "extdata", "tbl.mef2c.model.RData")))
}
#----------------------------------------------------------------------------------------------------
runTests <- function()
{
test_constructor()
test_.normalize_randomForest()
test_.normalize_pvals()
test_.normalize_betas()
test_.normalize_correlations()
test_normalizeModel()
test_addStats()
} # runTests
#----------------------------------------------------------------------------------------------------
test_constructor <- function()
{
printf("--- test_constructor")
pcaMaxCalculator <- PCAMax(tbl.mef2c)
checkEquals(is(pcaMaxCalculator), "PCAMax")
} # test_constructor
#----------------------------------------------------------------------------------------------------
test_.normalize_randomForest <- function()
{
printf("--- test_normalize_randomFores")
desired.max <- 10 # for random forest, the 3rd quartile is scaled up or down to 75% of this
vn <- trena:::.normalize_randomForest(tbl.mef2c$rfScore, normalizing.max=desired.max)
checkEqualsNumeric(fivenum(vn)[4], 0.75 * desired.max)
} # test_.mormalize_randomForest
#----------------------------------------------------------------------------------------------------
test_.normalize_pvals <- function()
{
printf("--- test_normalize_pvals")
desired.max <- 10
vn <- trena:::.normalize_pval(tbl.mef2c$lassoPValue, normalizing.max=desired.max)
checkEqualsNumeric(max(vn), desired.max)
} # test_.mormalize_pvals
#----------------------------------------------------------------------------------------------------
test_.normalize_betas <- function()
{
printf("--- test_normalize_betas")
vn.0 <- trena:::.normalize_betaValues(tbl.mef2c$betaLasso, normalizing.max=10)
vn.1 <- trena:::.normalize_betaValues(tbl.mef2c$betaRidge, normalizing.max=10)
checkEquals(as.numeric(lapply(list(vn.0, vn.1, vn.2), max)), c(10, 10, 10))
} # test_.normalize_betas
#----------------------------------------------------------------------------------------------------
test_.normalize_correlations <- function()
{
printf("--- test_normalize_betas")
vn.0 <- trena:::.normalize_correlationValues(tbl.mef2c$spearmanCoeff, normalizing.max=10)
vn.1 <- trena:::.normalize_correlationValues(tbl.mef2c$pearsonCoeff, normalizing.max=10)
checkTrue(max(vn.0) > 8)
checkTrue(max(vn.1) > 8)
} # test_.normalize_correlations
#----------------------------------------------------------------------------------------------------
test_pca <- function()
{
} # test_pca
#----------------------------------------------------------------------------------------------------
test_normalizeModel <- function()
{
printf("--- test_normalizeModel")
x <- PCAMax(tbl.mef2c)
mtx <- normalizeModel(x, normalizing.max=10)
checkEquals(dim(mtx), c(20,7))
coi <- c("betaLasso", "lassoPValue", "pearsonCoeff", "rfScore",
"betaRidge", "spearmanCoeff")
checkEquals(colnames(mtx), coi)
mtx.summary <- apply(mtx, 2, fivenum)
} # test_normalizeModel
#----------------------------------------------------------------------------------------------------
test_addStats <- function()
{
printf("--- test_addStats")
coi <- c(2,5:11,15:17)
coi.2 <- c(2,5:11,15)
x <- PCAMax(tbl.mef2c)
mtx <- normalizeModel(x, normalizing.max=10)
tbl.05 <- addStatsSimple(x, varianceToInclude=0.5, scalePCA=TRUE, quiet=FALSE)
tbl.05 <- tbl.05[, coi]
tbl.05 <- tbl.05[order(tbl.05$score, decreasing=TRUE),]
tbl.05.2 <- addStats(x, varianceToInclude=0.5, scalePCA=TRUE)
tbl.10 <- addStats(x, varianceToInclude=.99, scalePCA=FALSE)
checkEquals(head(tbl.05$tf.hgnc), c("GABPA", "SMAD5", "STAT4", "TCF12", "TBR1", "PKNOX2"))
checkEquals(head(tbl.10$tf.hgnc), c("GABPA", "SMAD5", "TCF12", "STAT4", "TBR1", "PKNOX2"))
# mtx.summary <- apply(mtx, 2, fivenum)
} # test_normalizeModel
#----------------------------------------------------------------------------------------------------
explore <- function ()
{
print(load("~/github/eqtlTrenaNotebooks/mef2c/trena/data/mtx.withDimers.cer.ros.tcx.RData"))
tfs <- subset(tbl.mef2c, abs(pearsonCoeff) > 0.7)$tf.hgnc
noquote(intersect(tfs, rownames(mtx.tcx)))
fit <- lm(formula = MEF2C ~ STAT4 + PKNOX2 + HLF + GABPA + SMAD5 + TCF12 + SATB2 + EMX1 + 0,
#fit <- lm(formula = MEF2C ~ STAT4 + TCF12 + GABPA + PKNOX2,
fit <- lm(formula = MEF2C~STAT4+TBR1+PKNOX2+HLF+EGR3+GABPA+PAX7+SMAD5+TCF12+SATB2+SATB1+EMX1+MKX+TSHZ3+MEF2D+DLX5,
data=as.data.frame(t(mtx.tcx)))
tbl.coef <- as.data.frame(summary(fit)$coefficients)
colnames(tbl.coef) <- c("estimate", "error", "t", "pval")
tbl.coef <- tbl.coef[order(tbl.coef$pval, decreasing=FALSE),]
} # explore
#----------------------------------------------------------------------------------------------------
simulatedData <- function()
{
printf("--- simulatedData")
set.seed(37)
mtx <- matrix(nrow=3, ncol=100, dimnames=list(paste0("g", 1:3), paste0("S", 1:100)))
mtx[1,] <- round(runif(100, -3, 3), 2)
mtx[2,] <- jitter(mtx[1,], amount=0.11)
mtx[3,] <- jitter(mtx[2,], amount=0.1)
cor(mtx[1,], mtx[2,])
cor(mtx[1,], mtx[3,])
cor(mtx[2,], mtx[3,])
#------------------------------------------------------------
# linear regression with base R's lm
#------------------------------------------------------------
fit <- lm(formula = g1 ~ g2 + g3 + 0, data=as.data.frame(t(mtx)))
tbl.coef <- as.data.frame(summary(fit)$coefficients)
colnames(tbl.coef) <- c("estimate", "error", "t", "pval")
tbl.coef
# estimate error t pval
# g2 0.95095814 0.1232547 7.7153921 1.021649e-11
# g3 0.04690956 0.1229210 0.3816237 7.035667e-01
# with some earlier matrix, got these results (closer to what I expected)
# estimate error t pval
# g2 0.5045317 0.05157506 9.782474 3.589792e-16
# g3 0.4896006 0.05161448 9.485722 1.582422e-15
#------------------------------------------------------------
# use our wrapper around lasso. ridge (get all predictors) with alpha=0
#------------------------------------------------------------
lasso <- LassoSolver(mtx, "g1", c("g2", "g3"), alpha=0, lambda=1)
run(lasso)
# beta intercept
# g2 0.5084780 0.001498278
# g3 0.4710166 0.001498278
rf <- RandomForestSolver(mtx,targetGene = "g1", candidateRegulators = c("g2","g3"))
run(rf)
# IncNodePurity
# g3 138.4487
# g2 131.0454
} # simulatedData
#----------------------------------------------------------------------------------------------------
mef2c.and.tcx <- function()
{
if(!exists("mtx.tcx"))
load("~/github/eqtlTrenaNotebooks/mef2c/trena/data/mtx.withDimers.cer.ros.tcx.RData")
tfs <- subset(tbl.mef2c, abs(pearsonCoeff) > 0.7)$tf.hgnc
noquote(intersect(tfs, rownames(mtx.tcx)))
#fit <- lm(formula = MEF2C ~ STAT4 + PKNOX2 + HLF + GABPA + SMAD5 + TCF12 + SATB2 + EMX1 + 0,
#fit <- lm(formula = MEF2C ~ STAT4 + TCF12 + GABPA + PKNOX2,
fit <- lm(formula = MEF2C~STAT4+TBR1+PKNOX2+HLF+EGR3+GABPA+PAX7+SMAD5+TCF12+SATB2+SATB1+EMX1+MKX+TSHZ3+MEF2D+DLX5,
data=as.data.frame(t(mtx.tcx)))
tbl.lm.coef <- as.data.frame(summary(fit)$coefficients)
colnames(tbl.lm.coef) <- c("estimate", "error", "t", "pval")
tbl.lm.coef <- round(tbl.lm.coef[order(tbl.lm.coef$pval, decreasing=FALSE),],2)
tbl.lm.coef
lm.scaler <- 100/max(tbl.lm.coef$estimate)
tbl.lm.coef$scaled <- round(lm.scaler * tbl.lm.coef$estimate,2)
tbl.lm.coef <- tbl.lm.coef[order(abs(tbl.lm.coef$scaled), decreasing=TRUE),]
# get rid of (Intercept) row
deleter <- grep("Intercept", rownames(tbl.lm.coef))
if(length(deleter) > 0)
tbl.lm.coef <- tbl.lm.coef[-deleter,]
genes <- c("MEF2C", tfs)
setdiff(genes, rownames(mtx.tcx))
lasso <- LassoSolver(mtx.tcx[genes,], "MEF2C", tfs, alpha=0, lambda=1)
tbl.lasso.coef <- round(run(lasso),2)
tbl.lasso.coef
lasso.factor <- 100/max(abs(tbl.lasso.coef$beta))
tbl.lasso.coef$scaled <- round(lasso.factor * tbl.lasso.coef$beta, 2)
tbl.lasso.coef <- tbl.lasso.coef[order(abs(tbl.lasso.coef$scaled), decreasing=TRUE),]
tbl.combined <- cbind(tbl.lm.coef, tbl.lasso.coef[rownames(tbl.lm.coef),])[, c(5, 8)]
colnames(tbl.combined) <- c("lm", "lasso")
# lm lasso
# PKNOX2 100.00 77.78
# TBR1 100.00 88.89
# STAT4 83.33 100.00
# EGR3 66.67 77.78
# HLF 66.67 77.78
# TCF12 -61.11 -88.89
# SMAD5 -55.56 -88.89
# GABPA -44.44 -88.89
# PAX7 38.89 66.67
# SATB1 -33.33 33.33
# EMX1 -33.33 44.44
# DLX5 27.78 77.78
# MKX 22.22 44.44
# TSHZ3 5.56 44.44
# SATB2 5.56 55.56
# MEF2D 0.00 55.56
indices <- match(rownames(tbl.lasso.coef), rownames(tbl.lm.coef))
} # mef2c.and.tcx
#----------------------------------------------------------------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.