explore/rna.protein.discordance/trenaModels/individualGenes/exploreModels.R

library(RUnit)
files <- list.files(pattern="trena.*RData")
dir <- "~/github/TrenaProjectErythropoiesis/inst/extdata/harmonized-rna-srm-14-timepoints"
mtx.rna <- get(load(file.path(dir, "mtx-rna-pkfm-27170x14.RData")))[,-14]
mtx.srm <- get(load(file.path(dir, "mtx-srm-copyNumber-100x13.RData")))

#------------------------------------------------------------------------------------------------------------------------
runTests <- function()
{
   test_assignRank()
   test_assignRankAllTables()

} # runTests
#------------------------------------------------------------------------------------------------------------------------
assignRank <- function(tbl, scoreName)
{
   stopifnot(length(unique(tbl$target)) == 1)
   stopifnot(scoreName %in% colnames(tbl))

      # for instance, for OGT, the largest rf value is in row 95
      # head(tbl$rf, n=3):  95  85  91
      # tbl$rfScore[head(new.order, n=3)]  146.5121 117.2359 103.3986

   new.order <- tbl[, scoreName]
   tbl.reordered <- tbl[new.order,]
   tbl.reordered$rank <- seq_len(nrow(tbl.reordered))

   tbl.reordered

} # assignRank
#------------------------------------------------------------------------------------------------------------------------
test_assignRank <- function()
{
    message(sprintf("--- test_assignRandk"))

   if(!exists("tbl.test"))
      tbl.test <- get(load("tbl.test.RData"))
   tbl.sub <- subset(tbl.test, target=="OGT")
   dim(tbl.sub)  # 117 16

   tbl.ranked <- assignRank(tbl.sub, "rf")
   checkEquals(nrow(tbl.ranked), nrow(tbl.sub))
   checkEquals(tbl.ranked$gene[1:3], c("MAZ", "NR2C2", "NCBP3"))
   checkEqualsNumeric(tbl.ranked$rfScore[1:3], c(146.5121, 117.2359, 103.3986), tol=0.001)

   tbl.ranked <- assignRank(tbl.sub, "pear.abs")
   checkEquals(nrow(tbl.ranked), nrow(tbl.sub))
   checkEquals(tbl.ranked$gene[1:3], c("CSTF2T", "ELAVL1", "UCHL5"))
   checkEqualsNumeric(tbl.ranked$pearsonCoeff[1:3], c(-0.9153902, -0.9117892, -0.9094098), tol=0.001)

   tbl.ranked <- assignRank(tbl.sub, "spear.pos")
   checkEquals(nrow(tbl.ranked), nrow(tbl.sub))
   checkEquals(tbl.ranked$gene[1:3], c("PBX2", "STAU1", "AKAP8L"))
   checkEqualsNumeric(tbl.ranked$spearmanCoeff[1:3], c(0.8769231, 0.8505495, 0.7978022), tol=0.001)

} # test_assignRank
#------------------------------------------------------------------------------------------------------------------------
assignRankAllTables <- function(tbl, scoreName)
{
   targetGenes <- unique(tbl$target)
   tbls <- list()
   for(targetGene in targetGenes){
      tbl.sub <- subset(tbl, target==targetGene)
      tbl.ranked <- assignRank(tbl.sub, scoreName)
      tbls[[targetGene]] <- tbl.ranked
      }
   tbl.all.ranked <- do.call(rbind, tbls)
   rownames(tbl.all.ranked) <- NULL
   invisible(tbl.all.ranked)

} # assignRankAllTables
#------------------------------------------------------------------------------------------------------------------------
test_assignRankAllTables <- function()
{
   message(sprintf("--- test_assignRankAllTables"))
   if(!exists("tbl.test"))
      tbl.test <- get(load("tbl.test.RData"))

   tbl.ranked <- assignRankAllTables(tbl.test, "spear.pos")
   checkEquals(nrow(tbl.test), nrow(tbl.ranked))
   checkEquals(subset(tbl.ranked, gene=="STAU1" & rank <=10)$target, c("BACH1", "OGT", "RCOR1"))

   tbl.ranked <- assignRankAllTables(tbl.test, "rf")
   x <- as.list(head(sort(table(subset(tbl.ranked, rank <= 5)$gene), decreasing=TRUE), n=3))
   checkEquals(x, list(KLF9=5, SP1=4, FOSL1=3))

} # test_assingRankAllTables
#------------------------------------------------------------------------------------------------------------------------
combine <- function()
{

    tbls <- list()
    i <- 0

    for(file in files){
      i <- i + 1
      printf("file: %s", file)
      load(file)
      tbl <- tbl.trena.both
      new.order <- order(abs(tbl$spearmanCoeff), decreasing=TRUE)
      tbl <- tbl[new.order,]
      if("rank" %in% colnames(tbl)){
          rank.col <- grep("rank", colnames(tbl))
          tbl <- tbl[, -rank.col]
      }
      tbl$spear.pos <- order(tbl$spearmanCoeff, decreasing=TRUE)
      tbl$spear.abs  <- order(abs(tbl$spearmanCoeff), decreasing=TRUE)
      tbl$pear.pos  <- order(tbl$pearsonCoeff, decreasing=TRUE)
      tbl$pear.abs  <- order(abs(tbl$pearsonCoeff), decreasing=TRUE)
      tbl$rf <- order(tbl$rfScore, decreasing=TRUE)
      tbl$lasso.pos <- order(tbl$betaLasso, decreasing=TRUE)
      tbl$lasso.abs <- order(abs(tbl$betaLasso), decreasing=TRUE)
      # if(tbl$target[1]=="OGT") browser()
      tbls[[i]] <- tbl
      }

    tbl <- do.call(rbind, tbls)
    dim(tbl)

    tbl.ogt <- subset(tbl, target=="OGT")
    sort(table(subset(tbl, rank <= 10 & class=="rbp")$gene), decreasing=TRUE)

} # combine
#------------------------------------------------------------------------------------------------------------------------
top.regulators <- function()
{
   if(!exists("tbl.test"))
     tbl.test <- get(load("tbl.test.RData"))

   max <- 10

   tbl.ranked <- assignRankAllTables(tbl.test, "rf")
   head(as.data.frame(sort(table(subset(tbl.ranked, rank <= max)$gene), decreasing=TRUE)), n=10)

   tbl.ranked <- assignRankAllTables(tbl.test, "spear.pos")
   head(as.data.frame(sort(table(subset(tbl.ranked, rank <= max)$gene), decreasing=TRUE)), n=10)

   tbl.ranked <- assignRankAllTables(tbl.test, "spear.abs")
   head(as.data.frame(sort(table(subset(tbl.ranked, rank <= max)$gene), decreasing=TRUE)), n=10)

   tbl.ranked <- assignRankAllTables(tbl.test, "pear.pos")
   head(as.data.frame(sort(table(subset(tbl.ranked, rank <= max)$gene), decreasing=TRUE)), n=10)

   tbl.ranked <- assignRankAllTables(tbl.test, "pear.abs")
   head(as.data.frame(sort(table(subset(tbl.ranked, rank <= max)$gene), decreasing=TRUE)), n=10)

   tbl.ranked <- assignRankAllTables(tbl.test, "lasso.abs")
   head(as.data.frame(sort(table(subset(tbl.ranked, rank <= max)$gene), decreasing=TRUE)), n=10)

   tbl.ranked <- assignRankAllTables(tbl.test, "lasso.pos")
   head(as.data.frame(sort(table(subset(tbl.ranked, rank <= max)$gene), decreasing=TRUE)), n=10)



} # top.6
#----------------------------------------------------------------------------------------------------
# marjorie:  The ProEB to EB transition is roughly from Day 10 to Day 14.
#
# More precisely (based on cytof) it is from Day 12 to Day 14, but a
# lot of proteins start to change at Day 11. So I think it is fair
# to look from Day10 to 14.
find.discordant.genes <- function()
{
  source("~/s/data/public/human/symToGeneID.R"); test_assignGeneIDs()

  proteins <- rownames(mtx.srm)
  length(proteins)
  length(intersect(
  setdiff(proteins, rownames(mtx.rna))
  no.gene <- setdiff(proteins, rownames(mtx.rna))
  length(no.gene)
  x <- assignGeneIDs(no.gene)

  map <- c("BCL11A_XL_L" = "BCL11A",
           "CBP" = "CREBBP",
           "coREST" = "RCOR1",
           "E12/E47" =  "TCF3",
           "GR" = "NR3C1",
           "HXB4" = "HOXB4",
           "MLL3" = "KMT2C",
           "MLL4 (KMT2D)" = "KMT2D",
           "NC2B" = "DR1",
           "PO2F1" = "POU2F1",
           "RPB1" = "POLR2A",
           "SET1B" = "SETD1B",
           "SETB1" = "SETDB1",
           "SIR6" = "SIRT6",
           "SMCA4" = "SMARCA4",
           "SMRC1" = "SMARCC1",
           "SNF5" = "SMARCB1",
           "STA5A" = "STAT5A",
           "T2FA" = "GTF2F1",
           "TF2B" = "GTF2F2",
           "TF3C2" = "GTF3C2",
           "UBF1" = "UBTF",
           "UTX"  = "KDM6A",
           "ZC11A" =  "ZC3H11A")

   all(names(map) %in% rownames(mtx.srm))
   all(as.character(map) %in% rownames(mtx.rna))
   indices <- match(names(map), rownames(mtx.srm))

   rownames(mtx.srm)[indices] <- as.character(map)
   dir <- "~/github/TrenaProjectErythropoiesis/inst/extdata/harmonized-rna-srm-14-timepoints"

   save(mtx.srm, file=file.path(dir, "mtx-srm-copyNumber-100x13.RData"))

} # find.discordant.genes
#----------------------------------------------------------------------------------------------------
gpkow <- function()
{
    subset(tbl, gene=="GPKOW" & spear.abs < 10)$target  # "RAD21"  "RCOR1"  "SAP130" "TAL1"
    gpkow <- mtx.rna["GPKOW",]
    gpkow <- gpkow/max(gpkow)
    plot(gpkow, type="b", col="black", ylim=c(0,1))

    rad21 <- mtx.rna["RAD21",]
    rad21 <- rad21/max(rad21)
    lines(rad21, type="b", col="blue")

    rcor1 <- mtx.rna["RCOR1",]
    rcor1 <- rcor1/max(rcor1)
    lines(rcor1, type="b", col="green")

    sap130 <- mtx.rna["SAP130",]
    sap130 <- sap130/max(sap130)
    lines(sap130, type="b", col="brown")

    tal1 <- mtx.rna["TAL1",]
    tal1 <- tal1/max(tal1)
    lines(tal1, type="b", col="red")

    tal1.p <- mtx.srm["TAL1",]
    tal1.p <- tal1.p/max(tal1.p)
    lines(tal1.p, type="b", col="blue")

    legend(1, 0.98, c("GPKOW mRNA", "TAL1 mRNA", "TAL1 protein"),
           c("black", "red",       "blue")
           )

} # gpkow
#----------------------------------------------------------------------------------------------------
PriceLab/TrenaProjectErythropoiesis documentation built on Jan. 10, 2022, 3:55 a.m.