R/object.R

Defines functions CreateBobroObject FindBobroMotif PlotBobroMotif .AddMetaData

Documented in CreateBobroObject FindBobroMotif PlotBobroMotif

## usethis namespace: start
#' @importFrom Rcpp sourceCpp
## usethis namespace: end
NULL

#' @importFrom Rcpp evalCpp
#' @importFrom Matrix colSums rowSums colMeans rowMeans
#' @importFrom methods setClass setOldClass setClassUnion slot
#' slot<- setMethod new signature slotNames is
#' @importClassesFrom Matrix dgCMatrix
#'
#'
NULL

# The Bobro Object structure
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Class definitions
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
setOldClass(Classes = "package_version")
setClassUnion(name = "AnyMatrix", c("matrix", "dgCMatrix"))

#' The Graph Class
#'
#' The Graph class simply inherits from dgCMatrix. We do this to enable future expandability of graphs.
#'
#' @name Graph-class
#' @rdname Graph-class
#' @exportClass Graph
#'
#' @seealso \code{\link[Matrix]{dgCMatrix-class}}
#'
setClass(
  Class = "Graph",
  contains = "dgCMatrix"
)


#' The BobroCommand Class
#'
#' The BobroCommand is used for logging commands that are run on a BobroObject. It stores parameters and timestamps
#'
#' @slot name Command name
#' @slot time_stamp Timestamp of when command was tun
#' @slot call_string String of the command call
#' @slot params List of parameters used in the command call
#'
#' @name BobroCommand-class
#' @rdname BobroCommand-class
#' @exportClass BobroCommand
#'
setClass(
  Class = "BobroCommand",
  slots = c(
    name = "character",
    time_stamp = "POSIXct",
    call_string = "character",
    params = "ANY"
  )
)


#' The Bobro Class
#'
#' The Bobro object is a representation of DNA sequences and ChIP assay data for R; The
#' object was designed to be as self-contained as possible, and easily extendible to new methods.
#'
#' @slot assays A list of assays for this project
#' @slot meta_data Contains meta-information about each cell, starting with number of genes detected (nGene)
#' and the original identity class (orig.ident); more information is added using \code{AddMetaData}
#' @slot graphs A list of \code{\link{Graph-class}} objects
#' @slot neighbors ...
#' @slot approximations A list of matrix approximations for this object
#' @slot project.name Name of the project
#' @slot misc A list of miscellaneous information
#' @slot version Version of Bobro this object was built under
#' @slot commands A list of logged commands run on this \code{Bobro} object
#' @slot tools A list of miscellaneous data generated by other tools, should be filled by developers only using \code{\link{Tool}<-}
#' @slot parameters A list of parameters run on this \code{Bobro} object
#' @name Bobro-class
#' @rdname Bobro-class
#' @exportClass Bobro
#'
setClass(
  Class = "Bobro",
  slots = c(
    sequence = "ANY",
    meta_data = "list",
    graphs = "list",
    neighbors = "list",
    approximations = "list",
    pval="list",
    pwm="list",
    instance="list",
    consensus="list",
    project_name = "character",
    misc = "list",
    version = "package_version",
    commands = "list",
    tools = "list",
    parameters = "list"
  )
)

setValidity(
  "Bobro",
  function(object) {
    if (class(object@sequence) != "DNAStringSet") {
      return("'genome' slot must have length 1")
    }
    TRUE
  }
)

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Functions
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' Create a Bobro object
#'
#' Create a Bobro object from a DNA string object (usually read fasta format file from package Biostrings).
#'

#' @param project Sets the project name for the Bobro object.
#' @param meta_data Additional metadata to add to the Bobro object.
#' @importFrom utils packageVersion
#' @export
#'
#' @examples
#' DNAStringObject <- readDNAStringSet("data/test1.fa")
#' my.object <- CreateBobroObject(DNAStringObject)
#' my.object
CreateBobroObject <- function(
                              DNAStringObject,
                              project = "BobroProject",
                              meta_data = NULL) {
  if (class(DNAStringObject) != "DNAStringSet") {
    stop("x must be a DNAStringSet object from package Biostrings! ")
  }
  header <- names(DNAStringObject)
  frequency <- alphabetFrequency(DNAStringObject, baseOnly = T, as.prob = T)
  base_frequency <- alphabetFrequency(DNAStringObject, as.prob = T, collapse = T)[1:4]
  init.meta_data <- list(header = header, frequency = frequency, base_frequency = base_frequency)
  default_parameter <- list(
    ## Commonly used parameters
    length = 14, # motif length [5, ]
    is_fast = FALSE, # the flag of fast version of bobro which just enhance two ends of motif
    n_output = 10, # number of closures to report (used under a specific input length)
    both_strand = T, #search motif sites on both strands (reverse complement)
    ## Advanced parameters:
    n = 1, # top n closures under each length are used when searching multiple motif length
    w = 2, # the weight of the two motif ends
    k = 3, # the minimum size of the initial motif seeds
    c = 1, # consistency level of the motif seeds (0.5-1.0]
    s = 5, # the nunber of simulation times [5, ]
    u = 0.7, # the threshold of two closures' similarity socre (0,1]
    a = 9, # the upper limit of conservation level (N,10]
    N = 6, # the lower limit of conservation level (0,a)
    P = FALSE, # the flag of palindromic of TFBS
    M = FALSE, # the flag of mirror of TFBS
    G = FALSE, # the flag of global TF prediction
    C = FALSE, # the flag of local TF prediction
    E = FALSE, # the flag of expansion of closures base on the threshold 0.3-0.8
    A = FALSE, # the flag of approximation of pvalue calculation
    W = FALSE, # the flag of considering sequences weight
    R = 1, # the range when we use [L,U]
    e = 3, # the times of seed alignments enlargement [1,3]
    b = 0.95 # the conserve level when search in background genome
    ## Advanced parameters:
  )
  object <- new(
    Class = "Bobro",
    sequence = DNAStringObject,
    meta_data = init.meta_data,
    project_name = project,
    version = packageVersion(pkg = "Bobro"),
    parameters = default_parameter
  )
  return(object)
}

#' Find Bobro motif
#'
#'
#' @param project Sets the project name for the Bobro object.
#' @export
#'
#' @examples
#' test_sequence <- Biostrings::readDNAStringSet("../data/cra.fa")
#' object <- CreateBobroObject(test_sequence)
#' object <- FindBobroMotif(object)
#'
FindBobroMotif <- function(object) {
  quiet <- function(x) {
    sink(tempfile())
    on.exit(sink())
    invisible(force(x))
  }
  result <- quiet(rGADEM::GADEM(object@sequence,verbose=0,numGeneration = 1,nmotifs = 10, pValue = 1, eValue = 100))
  pwm <- lapply(result@motifList, function(x){
    return(x@pwm[,1:14])
  })

  pval <- lapply(result@motifList, function(x){
    return(x@alignList[[1]]@pval)
  })

  consensus <- lapply(result@motifList, function(x){
    return(substr(x@alignList[[2]]@seq,1,14))
  })
  names(pwm) <- names(pval) <- names(consensus) <- paste("Moitf",seq(1:length(pwm)),sep="-")

  object@pwm <- pwm[1:10]
  object@pval <- pval[1:10]
  object@consensus <- consensus[1:10]
  return(object)
}

#' Plot motif logo
#'
#'
#' @param project Sets the project name for the Bobro object.
#' @export
#'
#' @examples
#' test_sequence <- Biostrings::readDNAStringSet("../data/cra.fa")
#' object <- CreateBobroObject(test_sequence)
#' object <- FindBobroMotif(object)
#' PlotBobroMotif(object)
#'
PlotBobroMotif <- function(object) {
  p1 <- ggseqlogo::ggseqlogo(object@pwm)
  print(p1)
}

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Internal
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Internal AddMetaData defintion
#
# @param object An object
# @param metadata A vector, list, or data.frame with metadata to add
# @param col.name A name for meta data if not a named list or data.frame
#
# @return object with metadata added
#
.AddMetaData <- function(object, metadata, col.name = NULL) {
  if (is.null(x = col.name) && is.atomic(x = metadata)) {
    stop("'col.name' must be provided for atomic metadata types (eg. vectors)")
  }
  if (inherits(x = metadata, what = c("matrix", "Matrix"))) {
    metadata <- as.data.frame(x = metadata)
  }
  col.name <- col.name %||% names(x = metadata) %||% colnames(x = metadata)
  if (is.null(x = col.name)) {
    stop("No metadata name provided and could not infer it from metadata object")
  }
  object[[col.name]] <- metadata
  # if (class(x = metadata) == "data.frame") {
  #   for (ii in 1:ncol(x = metadata)) {
  #     object[[colnames(x = metadata)[ii]]] <- metadata[, ii, drop = FALSE]
  #   }
  # } else {
  #   object[[col.name]] <- metadata
  # }
  return(object)
}
Wang-Cankun/Bobro documentation built on Feb. 23, 2020, 5:40 a.m.