Rnits-vignette.R
In Rnits: R Normalization and Inference of Time Series data

## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()

## ----setup, include=FALSE, cache=FALSE----------------------------------------
library(knitr)
# set global chunk options
opts_knit$set(base.dir = '.')
opts_chunk$set(fig.path='figure/minimal-', fig.align='center', fig.show='hold')
options(replace.assign=TRUE,width=80)

## ----loaddata, cache=TRUE, warning=FALSE, tidy=TRUE, dependson='data_import'----
# Download NCBI GEO data with GEOquery
library(knitr)
rm(list = ls())
library(GEOquery)
library(stringr)
library(Rnits)
gds <- getGEO('GSE4158', AnnotGPL = FALSE)[[1]]
class(gds)
gds

# Extract non-replicate samples
pdata <- pData(gds)
filt <- pdata$characteristics_ch2 %in% names(which(table(pdata$characteristics_ch2) == 2))
gds <- gds[, filt]
pdata <- pData(gds)
time <- as.numeric(str_extract(pdata$characteristics_ch2, "\\d+"))
sample <- rep("2g/l", length(time))
sample[grep("0.2g/l", gds[["title"]])] <- "0.2g/l"

# Format phenotype data with time and sample information
gds[["Time"]] <- time
gds[["Sample"]] <- sample
dat <- gds
fData(dat)["Gene Symbol"] <- fData(dat)$ORF

## ----buildrnitsobj, tidy=TRUE, cache=TRUE, dependson='loaddata'---------------
# Build rnits data object from formatted data (samples can be in any order)
# and between-array normalization.
rnitsobj = build.Rnits(dat[, sample(ncol(dat))], logscale = TRUE, normmethod = "Between")
rnitsobj


# Alternatively, we can also build the object from just a data matrix, by supplying the "probedata" and "phenodata" tables
datdf <-  exprs(dat)
rownames(datdf) <- fData(dat)$ID
probedata <- data.frame(ProbeID=fData(dat)$ID, GeneName=fData(dat)$ORF)
phenodata <- pData(dat)
rnitsobj0 = build.Rnits(datdf, probedata = probedata, phenodata = phenodata, logscale = TRUE, normmethod = 'Between')

# Extract normalized expression values
lr <- getLR(rnitsobj)
head(lr)

# Impute missing values using K-nn imputation
lr <- getLR(rnitsobj, impute = TRUE)
head(lr)


## ----fit_model, cache=TRUE, fig.keep='first', dependson='buildrnitsobj'-------

# Fit model using gene-level summarization
rnitsobj <- fit(rnitsobj0, gene.level = TRUE, clusterallsamples = TRUE)

## ----fit_model_noclustering, eval=FALSE, dependson='buildrnitsobj'------------
#  rnitsobj_nocl <- fit(rnitsobj0, gene.level = TRUE, clusterallsamples = FALSE)
#  
#  opt_model <- calculateGCV(rnitsobj0)
#  rnitsobj_optmodel <- fit(rnitsobj0, gene.level = TRUE, model = opt_model)

## ----modelsummary, cache=TRUE, dependson='fit_model',  tidy=TRUE--------------

# Get pvalues from fitted model
pval <- getPval(rnitsobj)
head(pval)

# Get ratio statistics from fitted model
stat <- getStat(rnitsobj)
head(stat)


# If clustering was used, check the cluster gene distribution
table(getCID(rnitsobj))

# P-values, ratio statistics and cluster ID's can be retrieved for all genes together
fitdata <- getFitModel(rnitsobj)
head(fitdata)

# View summary of top genes
summary(rnitsobj, top = 10)

# Extract data of top genes (5% FDR)
td <- topData(rnitsobj, fdr = 5)
head(td)

## ----plotresults, dependson='modelsummary', cache=TRUE, tidy=TRUE-------------
# Plot top genes trajectories
plotResults(rnitsobj, top = 16)



## ----sessionInfo--------------------------------------------------------------
sessionInfo()