inst/doc/RegEnrich.R

## ---- global_options, echo=FALSE, results="hide", eval=TRUE-------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, 
                      comment = "#>", 
                      fig.width = 5, 
                      fig.height = 4.5, 
                      fig.align = "center",
                      echo=TRUE, 
                      warning=FALSE, 
                      message=TRUE, 
                      tidy.opts=list(width.cutoff=80), 
                      tidy=FALSE)
rm(list = ls())
gc(reset = TRUE)
options(max.print = 200, width = 110)

## ----setup, warning=FALSE, message=FALSE, results="hide"----------------------------------------------------
library(RegEnrich)

## ----quickExample_Initializing, eval=FALSE, warning=FALSE, message=FALSE, results="hide"--------------------
#  object = RegenrichSet(expr = expressionMatrix, # expression data (matrix)
#                        colData = sampleInfo, # sample information (data frame)
#                        reg = regulators, # regulators
#                        method = "limma", # differentila expression analysis method
#                        design = designMatrix, # desing model matrix
#                        contrast = contrast, # contrast
#                        networkConstruction = "COEN", # network inference method
#                        enrichTest = "FET") # enrichment analysis method

## ----quickExample_4Steps, eval=FALSE, warning=FALSE, message=FALSE, results="hide"--------------------------
#  # Differential expression analysis
#  object = regenrich_diffExpr(object)
#  # Regulator-target network inference
#  object = regenrich_network(object)
#  # Enrichment analysis
#  object = regenrich_enrich(object)
#  # Regulator scoring and ranking
#  object = regenrich_rankScore(object)
#  
#  # Obtaining results
#  res = results_score(object)

## ----quickExample_simpler, eval=FALSE, warning=FALSE, message=FALSE, results="hide"-------------------------
#  # Perform 4 steps in RegEnrich analysis
#  object = regenrich_diffExpr(object) %>%
#    regenrich_network() %>%
#    regenrich_enrich() %>%
#    regenrich_rankScore()
#  
#  res = results_score(object)

## ----loadExpr-----------------------------------------------------------------------------------------------
data(Lyme_GSE63085)
FPKM = Lyme_GSE63085$FPKM

## ----transformFPKM------------------------------------------------------------------------------------------
log2FPKM = log2(FPKM + 1)
print(log2FPKM[1:6, 1:5])

## ----loadSampleInformation----------------------------------------------------------------------------------
sampleInfo = Lyme_GSE63085$sampleInfo
head(sampleInfo)

## ----loadTFs------------------------------------------------------------------------------------------------
data(TFs)
head(TFs)

## ----design-------------------------------------------------------------------------------------------------
method = "LRT_LM"
# design and reduced formulae
design = ~ patientID + week
reduced = ~ patientID

## ----networkConstruction------------------------------------------------------------------------------------
networkConstruction = "COEN"

## ----enrichTest---------------------------------------------------------------------------------------------
enrichTest = "FET"

## ----Initializing-------------------------------------------------------------------------------------------
object = RegenrichSet(expr = log2FPKM[1:2000, ],
                      colData = sampleInfo,
                      method = "LRT_LM", 
                      minMeanExpr = 0.01,
                      design = ~ patientID + week,
                      reduced = ~ patientID,
                      networkConstruction = "COEN", 
                      enrichTest = "FET")
print(object)

## ----regenrich_diffExpr-------------------------------------------------------------------------------------
object = regenrich_diffExpr(object)
print(object)
print(results_DEA(object))

## ----regenrich_diffExprLimma--------------------------------------------------------------------------------
object2 = regenrich_diffExpr(object, method = "limma", coef = "week3")
print(object2)
print(results_DEA(object2))

## ----network_COEN-------------------------------------------------------------------------------------------
set.seed(123)
object = regenrich_network(object)
print(object)

## ----network_COEN_topNet------------------------------------------------------------------------------------
# TopNetwork class
print(results_topNet(object))

## ----object_paramsIn_reg------------------------------------------------------------------------------------
# All parameters are stored in object@paramsIn slot
head(slot(object, "paramsIn")$reg)

## ----save_object, eval=FALSE--------------------------------------------------------------------------------
#  # Saving object to 'fileName.Rdata' file in '/folderName' directory
#  save(object, file = "/folderName/fileName.Rdata")

## ----regenrich_network_GRN, eval=FALSE----------------------------------------------------------------------
#  ### not run
#  library(BiocParallel)
#  # on non-Windows computers (use 2 workers)
#  bpparam = register(MulticoreParam(workers = 2, RNGseed = 1234))
#  # on Windows computers (use 2 workers)
#  bpparam = register(SnowParam(workers = 2, RNGseed = 1234))
#  
#  object3 = regenrich_network(object, networkConstruction = "GRN",
#                             BPPARAM = bpparam, minR = 0.3)
#  print(object3)
#  print(results_topNet(object3))
#  save(object3, file = "/folderName/fileName3.Rdata")

## ----user_defined_network1----------------------------------------------------------------------------------
network_user = results_topNet(object)
print(class(network_user))
regenrich_network(object2) = network_user
print(object2)

## ----user_defined_network_edges-----------------------------------------------------------------------------
network_user = slot(results_topNet(object), "elementset")
print(class(network_user))
regenrich_network(object2) = as.data.frame(network_user)
print(object2)

## ----regenrich_enrich_FET-----------------------------------------------------------------------------------
object = regenrich_enrich(object)
print(results_enrich(object))
# enrich_FET = results_enrich(object)@allResult
enrich_FET = slot(results_enrich(object), "allResult")
head(enrich_FET[, 1:6])

## ----regenrich_enrich_GSEA----------------------------------------------------------------------------------
set.seed(123)
object2 = regenrich_enrich(object, enrichTest = "GSEA", nperm = 5000)
print(results_enrich(object))
# enrich_GSEA = results_enrich(object2)@allResult
enrich_GSEA = slot(results_enrich(object2), "allResult")
head(enrich_GSEA[, 1:6])

## ----comparingFET_GSEA, fig.height=5, fig.width=5, eval=FALSE-----------------------------------------------
#  plotOrders(enrich_FET[[1]][1:20], enrich_GSEA[[1]][1:20])

## ----regenrich_rankScore------------------------------------------------------------------------------------
object = regenrich_rankScore(object)
results_score(object)

## ----plotExpressionRegulatorTarget, fig.height=4.5, fig.width=6, eval=FALSE---------------------------------
#  plotRegTarExpr(object, reg = "ARNTL2")

## ----case1ReadData1_1, eval=FALSE, include=FALSE------------------------------------------------------------
#  # Download all .cel files from GEO database (GSE31193 dataset) to current folder
#  # and then decompress it to GSE31193_RAW folder.
#  library(affy)
#  abatch = ReadAffy(celfile.path = "./GSE31193_RAW/")
#  
#  # Data normalization
#  eset <- rma(abatch)
#  
#  # Annotation
#  library(annotate)
#  library(hgu133plus2.db)
#  
#  ID <- featureNames(eset)
#  Symbol <- getSYMBOL(ID, "hgu133plus2.db")
#  fData(eset) <- data.frame(Symbol=Symbol)
#  

## ----case1ReadData1_2, warning=FALSE, message=FALSE---------------------------------------------------------
if (!requireNamespace("GEOquery"))
 BiocManager::install("GEOquery")

library(GEOquery)
eset <- getGEO(GEO = "GSE31193")[[1]]

## ----case1ReadData1_3---------------------------------------------------------------------------------------
# Samples information
pd0 = pData(eset)
pd = pd0[, c("title", "time:ch1", "agent:ch1")]
colnames(pd) = c("title", "time", "group")
pd$time = factor(c(`n/a` = "0", `6` = "6", `24` = "24")[pd$time],
                 levels = c("0", "6", "24"))
pd$group = factor(pd$group, levels = c("none", "IFN", "IL28B"))
pData(eset) = pd

# Only samples without or after 6 and 24 h IFN-α treatment
eset_IFN = eset[, pd$group %in% c("none", "IFN")]

# Order the samples based on time of treatment
pData(eset_IFN) = pData(eset_IFN)[order(pData(eset_IFN)$time),]

# Rename samples
colnames(eset_IFN) = pData(eset_IFN)$title

# Probes information
probeGene = fData(eset_IFN)[, "Gene Symbol", drop = FALSE]

## ----case1ReadData1_4---------------------------------------------------------------------------------------
probeGene$meanExpr = rowMeans(exprs(eset_IFN))
probeGene = probeGene[order(probeGene$meanExpr, decreasing = TRUE),]

# Keep a single probe for a gene, and remove the probe matching no gene.
probeGene_noDu = probeGene[!duplicated(probeGene$`Gene Symbol`), ][-1,]

data = eset_IFN[rownames(probeGene_noDu), ]
rownames(data) = probeGene_noDu$`Gene Symbol`

## ----case1ReadData1_5---------------------------------------------------------------------------------------
set.seed(1234)
data = data[sample(1:nrow(data), 5000), ]

## ----case1RegEnrichAnalyais, message=FALSE------------------------------------------------------------------
expressionMatrix = exprs(data) # expression data
# rownames(expressionMatrix) = probeGene_noDu$`Gene Symbol`
sampleInfo = pData(data) # sample information

design = ~time
contrast = c(0, 0, 1) # to extract the coefficient "time24"

data(TFs)
# Initializing a RegenrichSet object
object = RegenrichSet(expr = expressionMatrix, # expression data (matrix)
                      colData = sampleInfo, # sample information (data frame)
                      reg = unique(TFs$TF_name), # regulators
                      method = "limma", # differentila expression analysis method
                      design = design, # desing fomula
                      contrast = contrast, # contrast
                      networkConstruction = "COEN", # network inference method
                      enrichTest = "FET") # enrichment analysis method

# Perform RegEnrich analysis
set.seed(123)

# This procedure takes quite a bit of time.
object = regenrich_diffExpr(object) %>%
  regenrich_network() %>%
  regenrich_enrich() %>%
  regenrich_rankScore()

## ----case1RegEnrichResults, message=FALSE-------------------------------------------------------------------
res = results_score(object)
res

## ----case1Plot, , fig.height=4.5, fig.width=6, eval=FALSE---------------------------------------------------
#  plotRegTarExpr(object, reg = "STAT1")

## ----case2ReadData1, message = FALSE------------------------------------------------------------------------
library(GEOquery)
eset <- getGEO(GEO = "GSE130567")[[1]]
pdata = pData(eset)[, c("title", "geo_accession", "cultured in:ch1", "treatment:ch1")]
colnames(pdata) = c("title", "accession", "cultured", "treatment")
pData(eset) = pdata

# Only samples cultured with M-CSF in the presence or absence of IFN-γ
eset = eset[, pdata$treatment %in% c("NT", "IFNG-3h") & pdata$cultured == "M-CSF"]

# Sample information
sampleInfo = pData(eset)
rownames(sampleInfo) = paste0(rep(c("Resting", "IFNG"), each = 3), 1:3)
sampleInfo$treatment = factor(rep(c("Resting", "IFNG"), each = 3),
                              levels = c("Resting", "IFNG"))

## ----case2DownloadReads, message=FALSE, warning=FALSE-------------------------------------------------------
tmpFolder = tempdir()
tmpFile = tempfile(pattern = "GSE130567_", tmpdir = tmpFolder, fileext = ".tar")
download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE130567&format=file",
              destfile = tmpFile, mode = "wb")
untar(tmpFile, exdir = tmpFolder)
files = untar(tmpFile, list = TRUE)
filesFull = file.path(tmpFolder, files)

## ----case2ReadData2, message=FALSE, warning=FALSE-----------------------------------------------------------
dat = list()
for (file in filesFull){
  accID = gsub(".*(GSM\\d{7}).*", "\\1", file)
  if(accID %in% sampleInfo$accession){
    zz = gzfile(file, "rt")
    zzdata = read.csv(zz, header = FALSE, sep = "\t", skip = 4, row.names = 1)
    close(zz)
    zzdata = zzdata[,1, drop = FALSE] # Extract the first numeric column
    colnames(zzdata) = accID
    dat = c(dat, list(zzdata))
  }
}
edata = do.call(cbind, dat)

edata = edata[grep(".*[0-9]+$", rownames(edata)),] # remove PAR locus genes
rownames(edata) = substr(rownames(edata), 1, 15)
colnames(edata) = rownames(sampleInfo)

# Retain genes with average read counts higher than 1
edata = edata[rowMeans(edata) > 1,]

## ----case2ReadData1_3---------------------------------------------------------------------------------------
set.seed(1234)
edata = edata[sample(1:nrow(edata), 5000), ]

## ----case2RegEnrich-----------------------------------------------------------------------------------------
expressionMatrix = as.matrix(edata) # expression data

design = ~ treatment
reduced = ~ 1

data(TFs)
# Initializing a RegenrichSet object
object = RegenrichSet(expr = expressionMatrix, # expression data (matrix)
                      colData = sampleInfo, # sample information (data frame)
                      reg = unique(TFs$TF), # regulators
                      method = "LRT_DESeq2", # differentila expression analysis method
                      design = design, # desing fomula
                      reduced = reduced, # reduced
                      networkConstruction = "COEN", # network inference method
                      enrichTest = "FET") # enrichment analysis method

# Perform RegEnrich analysis
set.seed(123)

# This procedure takes quite a bit of time.
object = regenrich_diffExpr(object) %>%
  regenrich_network() %>%
  regenrich_enrich() %>%
  regenrich_rankScore()

## ----case2RegEnrichResults, message=FALSE-------------------------------------------------------------------
res = results_score(object)
res$name = TFs[res$reg, "TF_name"]
res

## ----case2Plot, , fig.height=4.5, fig.width=6, eval=FALSE---------------------------------------------------
#  plotRegTarExpr(object, reg = "ENSG00000028277")

## ----session------------------------------------------------------------------------------------------------
sessionInfo()

Try the RegEnrich package in your browser

Any scripts or data that you put into this service are public.

RegEnrich documentation built on March 7, 2021, 2 a.m.