inst/doc/MOSim.R

## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()
options(tidy.opts=list(blank=FALSE, width.cutoff=60))

## ----load, echo=FALSE, eval=TRUE, include=FALSE----------------------------
library(MOSim)

## ----code, echo=TRUE, eval=FALSE-------------------------------------------
#  library(MOSim)
#  
#  omic_list <- c("RNA-seq")
#  
#  rnaseq_simulation <- mosim(omics = omic_list)

## ----code2, echo=TRUE, eval=TRUE, include=FALSE----------------------------
rnaseq_simulation <- mosim(omics = c("RNA-seq"),
                           times = 0,
                           numberGroups = 2,
                           numberReps = 4)

## ----code3, echo=TRUE, eval=TRUE, include=TRUE, error=TRUE-----------------
rnaseq_simulation <- mosim(omics = c("RNA-seq"),
                           times = 0,
                           numberGroups = 1,
                           numberReps = 4)

## ----code4, echo=TRUE, eval=FALSE, include=FALSE---------------------------
#  multi_simulation <- mosim(omics = c("RNA-seq", "DNase-seq"),
#                            times = c(0, 5, 10),
#                            numberGroups = 2,
#                            numberReps = 4,
#                            diffGenes = .3)
#  
#  dnase_simulation <- mosim(omics = c("DNase-seq"),
#                            times = c(0, 5, 10),
#                            numberGroups = 2,
#                            numberReps = 4,
#                            diffGenes = .3)

## ----code5, echo=TRUE, eval=TRUE,cache=TRUE, tidy=FALSE, results='hide', message=FALSE----
# Take a subset of the included dataset for illustration
# purposes. We could also load it from a csv file or RData,
# as long as we transform it to have 1 column named "Counts"
# and the identifiers as row names.
data("sampleData")

custom_rnaseq <- head(sampleData$SimRNAseq$data, 100)

# In this case, 'custom_rnaseq' is a data frame with
# the structure:
head(custom_rnaseq)
##                    Counts
## ENSMUSG00000000001   6572
## ENSMUSG00000000003      0
## ENSMUSG00000000028   4644
## ENSMUSG00000000031      8
## ENSMUSG00000000037      0
## ENSMUSG00000000049      0


# The helper 'omicData' returns an object with our custom data.
rnaseq_customdata <- omicData("RNA-seq", data = custom_rnaseq)

# We use the associative list of 'omics' parameter to pass
# the RNA-seq object.
rnaseq_simulation <- mosim(omics = list("RNA-seq" = rnaseq_customdata))

## ----code6, echo=TRUE, eval=TRUE, tidy=FALSE, include=TRUE, results='hide', message=FALSE----
# Select a subset of the available data as a custom dataset
data("sampleData")

custom_dnaseseq <- head(sampleData$SimDNaseseq$data, 100)

# Retrieve a subset of the default association list.
dnase_genes <- sampleData$SimDNaseseq$idToGene
dnase_genes <- dnase_genes[dnase_genes$ID %in%
                               rownames(custom_dnaseseq), ]

# In this case, 'custom_dnaseseq' is a data frame with
# the structure:
head(custom_dnaseseq)
##                       Counts
## 1_63176480_63177113      513
## 1_125435495_125436168   1058
## 1_128319376_128319506     37
## 1_139067124_139067654    235
## 1_152305595_152305752    105
## 1_172490322_172490824    290

# The association list 'dnase_genes' is another data frame
# with the structure:
head(dnase_genes)
##                      ID               Gene
## 29195 1_3670777_3670902 ENSMUSG00000051951
## 29196 1_3873195_3873351 ENSMUSG00000089420
## 29197 1_4332428_4332928 ENSMUSG00000025900
## 29198 1_4346315_4346445 ENSMUSG00000025900
## 29199 1_4416827_4416973 ENSMUSG00000025900
## 29200 1_4516660_4516798 ENSMUSG00000096126

dnaseseq_customdata <- omicData("DNase-seq",
                                data = custom_dnaseseq,
                                associationList = dnase_genes)

multi_simulation <- mosim(omics = list(
    "RNA-seq" = rnaseq_customdata,
    "DNase-seq" = dnaseseq_customdata)
    )

## ----code6.1, echo=TRUE, eval=TRUE, tidy=FALSE, results='hide', message=FALSE----
# Select a subset of the available data as a custom dataset
data("sampleData")

custom_tf <- head(sampleData$SimRNAseq$TFtoGene, 100)
#      TF             TFgene         LinkedGene
# 1  Aebp2 ENSMUSG00000030232 ENSMUSG00000000711
# 2  Aebp2 ENSMUSG00000030232 ENSMUSG00000001157
# 3  Aebp2 ENSMUSG00000030232 ENSMUSG00000001211
# 4  Aebp2 ENSMUSG00000030232 ENSMUSG00000001227
# 5  Aebp2 ENSMUSG00000030232 ENSMUSG00000001305
# 6  Aebp2 ENSMUSG00000030232 ENSMUSG00000001794

multi_simulation <- mosim(omics = list(
    "RNA-seq" = rnaseq_customdata,
    "DNase-seq" = dnaseseq_customdata),
    # The option is passed directly to mosim function instead of
    # being an element inside "omics" parameter.
    TFtoGene = custom_tf
    )

## ----code6.2, echo=TRUE, eval=TRUE, tidy=FALSE, results='hide', message=FALSE----
# Select a subset of the available data as a custom dataset
data("sampleData")

custom_cpgs <- head(sampleData$SimMethylseq$idToGene, 100)

# The ID column will be splitted using the "_" char
# assuming <chr>_<start>_<end>.
#
# These positions will be considered as CpG sites
# and used to generate CpG islands and other elements.
#
# Please refer to MOSim paper for more information.
#
#                    ID               Gene
# 1  11_3101154_3101154 ENSMUSG00000082286
# 2  11_3101170_3101170 ENSMUSG00000082286
# 3  11_3101229_3101229 ENSMUSG00000082286
# 4  11_3101287_3101287 ENSMUSG00000082286
# 5  11_3101329_3101329 ENSMUSG00000082286
# 6  11_3101404_3101404 ENSMUSG00000082286

## ----code7, echo=TRUE, eval=TRUE, results='hide', message=FALSE------------
omic_list <- c("RNA-seq")

rnaseq_options <- omicSim("RNA-seq", totalFeatures = 2500)

# The return value is an associative list compatible with
# 'omicsOptions'
rnaseq_simulation <- mosim(omics = omic_list,
                           omicsOptions = rnaseq_options)

## ----code8, echo=TRUE, eval=TRUE, results='hide', message=FALSE------------
omics_list <- c("RNA-seq", "DNase-seq")

# In R concatenaning two lists creates another one merging
# its elements, we use that for 'omicsOptions' parameter.
omics_options <- c(omicSim("RNA-seq", totalFeatures = 2500),
                   omicSim("DNase-seq",
                           # Limit the number of features to simulate
                           totalFeatures = 1500,
                           # Modify the percentage of regulators with effects.
                           regulatorEffect = list(
                                'activator' = 0.68,
                                'repressor' = 0.3,
                                'NE' = 0.02
                            )))

set.seed(12345)

multi_simulation <- mosim(omics = omics_list,
                          omicsOptions = omics_options)

## ----code9, echo=TRUE, eval=TRUE, results='hide', message=FALSE------------
rnaseq_customdata <- omicData("RNA-seq", data = custom_rnaseq)
rnaseq_options <- omicSim("RNA-seq", totalFeatures = 100)

rnaseq_simulation <- mosim(omics = list("RNA-seq" = rnaseq_customdata),
                           omicsOptions = rnaseq_options)

## ----code10b, echo=TRUE, eval=TRUE, results='hide', message=FALSE----------
# This will be a data frame with RNA-seq settings (DE flag, profiles)
rnaseq_settings <- omicSettings(multi_simulation, "RNA-seq")

# This will be a list containing all the simulated omics (RNA-seq
# and DNase-seq in this case)
all_settings <- omicSettings(multi_simulation)

## ----code10c, echo=TRUE, eval=TRUE, tidy=TRUE, results='hide', message=FALSE----
# This will be a list with 3 keys: settings, association and regulators
dnase_settings <- omicSettings(multi_simulation, "DNase-seq",
                               association = TRUE)

## ----code10, echo=TRUE, eval=TRUE, results='hide', message=FALSE-----------
# multi_simulation is an object returned by mosim function.

# This will be a data frame with RNA-seq counts
rnaseq_simulated <- omicResults(multi_simulation, "RNA-seq")

#                    Group1.Time0.Rep1 Group1.Time0.Rep2 Group1.Time0.Rep3 ...
# ENSMUSG00000073155              4539              5374              5808 ...
# ENSMUSG00000026251                 0                 0                 0 ...
# ENSMUSG00000040472              2742              2714              2912 ...
# ENSMUSG00000021598              5256              4640              5130 ...
# ENSMUSG00000032348               421               348               492 ...
# ENSMUSG00000097226                16                14                 9 ...
# ENSMUSG00000027857                 0                 0                 0 ...
# ENSMUSG00000032081                 1                 0                 0 ...
# ENSMUSG00000097164               794               822               965 ...
# ENSMUSG00000097871                 0                 0                 0 ...

# This will be a list containing RNA-seq and DNase-seq counts
all_simulated <- omicResults(multi_simulation)

## ----code11, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE------------

# The methods returns a ggplot plot, if called directly
# it will be automatically plotted.
plotProfile(multi_simulation,
            omics = c("RNA-seq", "DNase-seq"),
            featureIDS = list(
                "RNA-seq" = "ENSMUSG00000024691",
                "DNase-seq" = "19_12574278_12574408"
            ))

## ----code12, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE------------
library(ggplot2)

# Store the plot in a variable
profile_plot <- plotProfile(multi_simulation,
            omics = c("RNA-seq", "DNase-seq"),
            featureIDS = list(
                "RNA-seq" = "ENSMUSG00000024691",
                "DNase-seq" = "19_12574278_12574408"
            ))

# Modify the title and print
profile_plot +
    ggtitle("My custom title") +
    theme_bw() +
    theme(legend.position="top")

## ----session, echo=FALSE, eval=TRUE, results='asis'------------------------
toLatex(sessionInfo())

Try the MOSim package in your browser

Any scripts or data that you put into this service are public.

MOSim documentation built on Nov. 8, 2020, 5:50 p.m.