get_UTR3eSet: prepare 3' UTR coverage data for usage test

View source: R/21.get_UTR3eSet.R

get_UTR3eSetR Documentation

prepare 3' UTR coverage data for usage test

Description

generate a UTR3eSet object with PDUI information for statistic tests

Usage

get_UTR3eSet(
  sqlite_db,
  normalize = c("none", "quantiles", "quantiles.robust", "mean", "median"),
  ...,
  singleSample = FALSE
)

Arguments

sqlite_db

A path to the SQLite database for InPAS, i.e. the output of setup_sqlitedb().

normalize

A character(1) vector, spcifying the normalization method. It can be "none", "quantiles", "quantiles.robust", "mean", or "median"

...

parameter can be passed into preprocessCore::normalize.quantiles.robust()

singleSample

A logical(1) vector, indicating whether data is prepared for analysis in a singleSample mode? Default, FALSE

Value

An object of UTR3eSet which contains following elements: usage: an GenomicRanges::GRanges object with CP sites info. PDUI: a matrix of PDUI PDUI.log2: log2 transformed PDUI matrix short: a matrix of usage of short form long: a matrix of usage of long form if singleSample is TRUE, one more element, signals, will be included.

Author(s)

Jianhong Ou, Haibo Liu

Examples

if (interactive()) {
  library(BSgenome.Mmusculus.UCSC.mm10)
  library(TxDb.Mmusculus.UCSC.mm10.knownGene)
  genome <- BSgenome.Mmusculus.UCSC.mm10
  TxDb <- TxDb.Mmusculus.UCSC.mm10.knownGene

  ## load UTR3 annotation and convert it into a GRangesList
  data(utr3.mm10)
  utr3 <- split(utr3.mm10, seqnames(utr3.mm10), drop = TRUE)

  bedgraphs <- system.file("extdata", c(
    "Baf3.extract.bedgraph",
    "UM15.extract.bedgraph"
  ),
  package = "InPAS"
  )
  tags <- c("Baf3", "UM15")
  metadata <- data.frame(
    tag = tags,
    condition = c("Baf3", "UM15"),
    bedgraph_file = bedgraphs
  )
  outdir <- tempdir()
  write.table(metadata,
    file = file.path(outdir, "metadata.txt"),
    sep = "\t", quote = FALSE, row.names = FALSE
  )

  sqlite_db <- setup_sqlitedb(metadata = file.path(
    outdir,
    "metadata.txt"
  ), outdir)
  addLockName(filename = tempfile())
  coverage <- list()
  for (i in seq_along(bedgraphs)) {
    coverage[[tags[i]]] <- get_ssRleCov(
      bedgraph = bedgraphs[i],
      tag = tags[i],
      genome = genome,
      sqlite_db = sqlite_db,
      outdir = outdir,
      chr2exclude = "chrM"
    )
  }

  data4CPsSearch <- setup_CPsSearch(sqlite_db,
    genome,
    chr.utr3 = utr3[["chr6"]],
    seqname = "chr6",
    background = "10K",
    TxDb = TxDb,
    hugeData = TRUE,
    outdir = outdir,
    minZ = 2,
    cutStart = 10,
    MINSIZE = 10,
    coverage_threshold = 5
  )
  ## polyA_PWM
  load(system.file("extdata", "polyA.rda", package = "InPAS"))

  ## load the Naive Bayes classifier model from the cleanUpdTSeq package
  library(cleanUpdTSeq)
  data(classifier)

  CPs <- search_CPs(
    seqname = "chr6",
    sqlite_db = sqlite_db,
    genome = genome,
    MINSIZE = 10,
    window_size = 100,
    search_point_START = 50,
    search_point_END = NA,
    cutEnd = 0,
    adjust_distal_polyA_end = TRUE,
    long_coverage_threshold = 2,
    PolyA_PWM = pwm,
    classifier = classifier,
    classifier_cutoff = 0.8,
    shift_range = 100,
    step = 5,
    outdir = outdir
  )
  utr3_cds_cov <- get_regionCov(
    chr.utr3 = utr3[["chr6"]],
    sqlite_db,
    outdir,
    phmm = FALSE
  )
  eSet <- get_UTR3eSet(sqlite_db,
    normalize = "none",
    singleSample = FALSE
  )
  test_out <- test_dPDUI(
    eset = eSet,
    method = "fisher.exact",
    normalize = "none",
    sqlite_db = sqlite_db
  )
}

jianhong/InPAS documentation built on Dec. 20, 2024, 3:11 p.m.