ORFikExperiment.R
In ORFik: Open Reading Frames in Genomics

## ----eval = TRUE, echo = TRUE, message = FALSE--------------------------------
library(ORFik)

## ----eval = TRUE, echo = TRUE-------------------------------------------------
# Read from (create.experiment() template)
# 1. Pick directory (normally a folder with bam / bed / wig files)
dir <- system.file("extdata", "", package = "ORFik")
list.files(dir)

## ----eval = TRUE, echo = TRUE-------------------------------------------------
# 2. Pick an experiment name
exper <- "ORFik"
# 3. Pick .gff/.gtf and fasta location
txdb <- system.file("extdata", "annotations.gtf", package = "ORFik")
fasta <- system.file("extdata", "genome.fasta", package = "ORFik")
template <- create.experiment(dir = dir,   # dir is the NGS files
                              exper,       # Experiment name
                              txdb = txdb, # gtf / gff / gff.db annotation
                              fa = fasta,  # Fasta genome
                              organism = "Homo sapiens", # Scientific naming
                              saveDir = NULL, # If not NULL, saves experiment directly
                              viewTemplate = FALSE)
data.frame(template)

## ----eval = TRUE, echo = TRUE-------------------------------------------------
template$X5[6] <- "heart_valve" # <- fix non unique row (tissue fraction is heart valve)
# read experiment from template
df <- read.experiment(template)

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  save.experiment(df, file = "path/to/save/experiment.csv")

## ----eval = TRUE, echo = TRUE-------------------------------------------------
df

## ----eval = TRUE, echo = TRUE-------------------------------------------------
filepath(df, type = "default")

## ----eval = TRUE, echo = TRUE, warning = FALSE--------------------------------
# First load experiment if not present
# We use our already loaded experiment: (df) here

# Load transcript annotation
txdb <- loadTxdb(df) # transcript annotation
# And now NGS data
outputLibs(df, chrStyle = seqlevelsStyle(txdb)) # Use txdb as seqlevelsStyle reference

## ----eval = TRUE, echo = TRUE-------------------------------------------------
bamVarName(df) #This will be the names:

## ----eval = TRUE, echo = TRUE-------------------------------------------------
df@expInVarName <- TRUE
bamVarName(df) #This will be the names:

## ----eval = TRUE, echo = TRUE-------------------------------------------------
df@expInVarName <- FALSE
remove.experiments(df)
outputLibs(df, chrStyle = seqlevelsStyle(txdb)) 

## ----eval = TRUE, echo = TRUE-------------------------------------------------
txNames <- filterTranscripts(txdb, minFiveUTR = 30,minCDS = 30, minThreeUTR = 30)
loadRegions(txdb, parts = c("leaders", "cds", "trailers"), names.keep = txNames)

## ----eval = TRUE, echo = TRUE, warning=FALSE----------------------------------
transcriptWindow(leaders, cds, trailers, df[3,])

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  shiftFootprintsByExperiment(df[df$libtype == "RFP",])

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  df.baz <- read.experiment("zf_bazzini14_RFP")
#  shiftPlots(df.baz, title = "Ribo-seq, zebrafish, Bazzini et al. 2014")

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  shifts.load(df)

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  filepath(df[df$libtype == "RFP",], type = "pshifted")

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  outputLibs(df[df$libtype == "RFP",], type = "pshifted")

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  QCreport(df)

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  QCstats(df)

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  RiboQC.plot(df)

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  outputLibs(df, type = "pshifted") # Output all libraries, fastest way
#  libs <- bamVarName(df) # <- here are names of the libs that were outputed
#  cds <- loadRegion(df, "cds")
#  # parallel loop
#  bplapply(libs, FUN = function(lib, cds) {
#      return(entropy(cds, get(lib)))
#  }, cds = cds)
#  

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  files <- filepath(df, type = "pshifted")
#  cds <- loadRegion(df, "cds")
#  # parallel loop
#  res <- bplapply(files, FUN = function(file, cds) {
#      return(entropy(cds, fimport(file)))
#  }, cds = cds)
#  

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  files <- filepath(df, type = "pshifted")
#  cds <- loadRegion(df, "cds")
#  # Single thread loop
#  lapply(files, FUN = function(file, cds) {
#      return(entropy(cds, fimport(file)))
#  }, cds = cds)
#  

## ----eval = FALSE, echo = TRUE, warning=FALSE---------------------------------
#  library(data.table)
#  
#  outputLibs(df, type = "pshifted")
#  libs <- bamVarName(df) # <- here are names of the libs that were outputed
#  cds <- loadRegion(df, "cds")
#  # parallel loop
#  res <- bplapply(libs, FUN = function(lib, cds) {
#          return(entropy(cds, get(lib)))
#      }, cds = cds)
#  # Add some names and convert
#  names(res) <- libs
#  data.table::setDT(res) # Will give 1 column per library
#  res # Now by columns