inst/Functions/motifStack.functions.R

#' Class \code{"pcm"}
#'
#' An object of class \code{"pcm"} represents the position count matrix of a
#' DNA/RNA/amino-acid sequence motif. The entry stores a matrix, which in row
#' i, column j gives the counts of observing nucleotide/or amino acid i in
#' position j of the motif.
#'
#'
#' @name pcm-class
#' @aliases pcm
#' @docType class
#' @section Objects from the Class: Objects can be created by calls of the form
#' \code{new("pcm", mat, name, alphabet, color, background)}.
#' @keywords classes
#' @export
#' @examples
#'
#' pcm <- read.table(file.path(find.package("motifStack"), "extdata", "bin_SOLEXA.pcm"))
#' pcm <- pcm[,3:ncol(pcm)]
#' rownames(pcm) <- c("A","C","G","T")
#' motif <- new("pcm", mat=as.matrix(pcm), name="bin_SOLEXA")
#' plot(motif)
#' pcm2pfm(pcm)
#' pcm2pssm(pcm)
setClass(
  "pcm",
  representation(mat="matrix", name="character", alphabet="character",
                 color="character", background="numeric", tags="list",
                 markers="list"),
  validity=function(object){
    re<-TRUE
    if (checkInteger(unlist(object@mat))) re<-"Columns of pcm must be integer"
    if (is.null(rownames(object@mat))) re<-"rownames of pcm is empty"
    if (is.null(names(object@color))) re<-"names of color is empty"
    if (length(setdif(rownames(object@mat), names(object@color)))>0) re<-"not every alphabet in pcm has a corresponding color"
    if (is.null(names(object@background))) re<-"names of background is empty"
    if (length(setdif(rownames(object@mat), names(object@background)))>0) re<-"not every alphabet in pcm has a background"
    if (!object@alphabet %in% c("DNA", "RNA", "AA", "others")) re<-"alphabet must be either DNA, RNA, AA or others"
    if (object@alphabet=="DNA" & !all(rownames(object@mat) %in% c("A","C","G","T"))) re<-"rownames of pcm for DNA motifs must be capital A, C, G and T"
    if (object@alphabet=="RNA" & !all(rownames(object@mat) %in% c("A","C","G","U"))) re<-"rownames of pcm for RNA motifs must be capital A, C, G and U"
    if (object@alphabet=="AA" & !all(rownames(object@mat) %in% LETTERS[1:26])) re<-"rownames of pcm for AA motifs must be capital letters"
    if (any(!is(object@markers, "marker"))) re<- "markers must be a list of marker object"
    re
  }
)


#' "pcm" methods
#'
#' methods for pcm objects.
#'
#'
#' @rdname pcm-class
#' @aliases $,pcm-method $<-,pcm-method
#' @docType methods
#' @param x An object of class \code{pcm}. For \code{getIC}, if parameter p is
#' followed, x should be an object of matrix.  For \code{pcm2pfm}, x also could
#' be an object of matrix.
#' @param y Not use.
#' @param p p is the background frequency.
#' @param n how many spaces should be added.
#' @param b logical value to indicate where the space should be added.
#' @param background a \code{"numeric"} vector. The background frequency.
#' @param t numeric value of information content threshold for trimming.
#' @param \dots Further potential arguments passed to \code{plotMotifLogo}.
#' @param row.names,optional see as.data.frame
#' @param name slot name of pcm object.
#' @section Methods: \describe{ \item{addBlank}{\code{signature(x="pcm",
#' n="numeric", b="logical")} add space into the position count matrix for
#' alignment. b is a bool value, if TRUE, add space to the 3' end, else add
#' space to the 5' end. n indicates how many spaces should be added.}
#'
#' \item{coerce}{\code{signature(from = "pcm", to = "matrix")}: convert object
#' pcm to matrix }
#'
#' \item{getIC}{\code{signature(x = "pcm",)} Calculate information content
#' profile for position frequency matrix. }
#'
#' \item{matrixReverseComplement}{\code{signature(x = "pcm")} get the reverse
#' complement of position frequency matrix.}
#'
#' \item{plot}{\code{signature(x = "pcm")} Plots the sequence logo of the
#' position count matrix. }
#'
#' \item{trimMotif}{\code{signature(x = "pcm", t= "numeric")} trim motif by
#' information content. }
#'
#' \item{$, $<-}{Get or set the slot of \code{\link{pcm-class}}}
#'
#' \item{as.data.frame}{convert \code{\link{pcm-class}} to a data.frame}
#' \item{format}{return the name_pcm of \code{\link{pcm-class}}} }
#' @keywords classes
#' @examples
#'
#' pcm <- read.table(file.path(find.package("motifStack"), "extdata", "bin_SOLEXA.pcm"))
#' pcm <- pcm[,3:ncol(pcm)]
#' rownames(pcm) <- c("A","C","G","T")
#' motif <- new("pcm", mat=as.matrix(pcm), name="bin_SOLEXA")
#' getIC(motif)
#' matrixReverseComplement(motif)
#' as(motif,"matrix")
#' pcm2pfm(motif)
#' as.data.frame(motif)
#' format(motif)
#'
#' @exportMethod `$` `$<-`
setMethod("$", "pcm", function(x, name) slot(x, name))
setReplaceMethod("$", "pcm",
                 function(x, name, value){
                   slot(x, name, check = TRUE) <- value
                   x
                 })

#' @importFrom grDevices rainbow
setMethod("initialize","pcm",function(.Object, mat, name, alphabet, color, background,
                                      tags, markers){
  if(mode(mat)!="numeric") stop("mat must be a numeric matrix")
  if(is.null(rownames(mat))) stop("rownames of pcm is empty")
  rownames(mat)<-toupper(rownames(mat))
  rname<-rownames(mat)
  if(missing(alphabet)){
    rname<-rname[order(rname)]
    if(all(rname %in% c("A","C","G","T"))) alphabet<-"DNA"
    else{
      if(all(rname %in% c("A","C","G","U"))) alphabet<-"RNA"
      else{
        if(all(rname %in% LETTERS[1:26]) & nrow(mat)==20) alphabet<-"AA"
        else alphabet<-"others"
      }
    }
  }
  if(missing(color)){
    if(alphabet!="others"){
      color<-colorset(alphabet, "auto")
    }else{
      color <- rainbow(nrow(mat))
      names(color) <- rname
    }
  }
  names(color) <- toupper(names(color))
  if(missing(background)){
    background<-rep(1/nrow(mat), nrow(mat))
    names(background)<-rname
  }
  if(length(background)<nrow(mat)){
    background <- rep(background, nrow(mat))[1:nrow(mat)]
    names(background) <- rname
  }
  .Object@mat            =    mat
  .Object@name        =    name
  .Object@alphabet    =    alphabet
  .Object@color        =    color
  .Object@background    =    background
  if(!missing(tags)) .Object@tags = tags
  if(!missing(markers)) .Object@markers = markers
  .Object
})

#' @name plot
#' @rdname pcm-class
#' @aliases plot plot,pcm,ANY-method
#' @importFrom stats4 plot
#' @exportMethod plot
setMethod("plot", signature(x="pcm"),
          function(x, y="missing", ...){
            plot(pcm2pfm(x), ...)
          }
)

#' @name as
#' @rdname pcm-class
#' @aliases coerce,pcm,matrix-method
#' @exportMethod coerce
setAs(from="pcm", to="matrix", function(from){
  from@mat
})



#' @rdname pcm-class
#' @exportMethod trimMotif
#' @aliases trimMotif trimMotif,pcm,numeric-method
setGeneric("trimMotif", function(x, t) standardGeneric("trimMotif"))
setMethod("trimMotif", signature(x="pcm", t="numeric"), function(x, t){
  .sw <- c(0, 0)
  ic <- getIC(x)
  for(i in 1:ncol(x@mat)){
    if(ic[i]>=t){
      .flag <- FALSE
      if(i<ncol(x@mat)){
        if(ic[i+1]>=t) .flag<-TRUE
        else if(ic[i]>=min(2*t, 0.5)) .flag<-TRUE
      }else{
        .flag<-TRUE
      }
      if(.flag){
        if(.sw[1]==0) .sw[1] <- i
        else .sw[2] <- i
      }
    }
  }
  if(.sw[1]<.sw[2]-1) x@mat <- x@mat[, .sw[1]:.sw[2]]
  else x <- NA
  x
})

#' @rdname pcm-class
#' @exportMethod matrixReverseComplement
#' @aliases matrixReverseComplement matrixReverseComplement,pcm-method
setGeneric("matrixReverseComplement",
           function(x) standardGeneric("matrixReverseComplement"))
setMethod("matrixReverseComplement", "pcm", function(x){
  if(x@alphabet!="DNA") stop("alphabet of pcm must be DNA")
  mat<-x@mat
  ## double check the rownames to A, C, G, T.
  if(!all(toupper(rownames(mat))==c("A", "C", "G", "T"))){
    mat <- mat[match(c("A", "C", "G", "T"), toupper(rownames(mat))), ]
  }
  rc<-matrix(nrow=nrow(mat),ncol=ncol(mat))
  complements<-c(4,3,2,1)
  for(i in 1:nrow(mat)){
    for(j in 1:ncol(mat)){
      rc[complements[i],(ncol(mat)+1-j)]<-mat[i,j]
    }
  }
  rownames(rc)<-rownames(mat)
  x@mat<-rc
  x
})

#' @rdname pcm-class
#' @exportMethod addBlank
#' @aliases addBlank addBlank,pcm,numeric,logical-method
setGeneric("addBlank", function(x, n, b) standardGeneric("addBlank"))
setMethod("addBlank", signature(x="pcm", n="numeric", b="logical"), function(x, n, b){
  #if(x@alphabet!="DNA") stop("alphabet of pfm must be DNA")
  N<-matrix(rep(rep(0,nrow(x@mat)), n), nrow=nrow(x@mat))
  rownames(N) <- rownames(x@mat)
  if(b){
    N<-cbind(x@mat,N)
  }else{
    N<-cbind(N,x@mat)
    if(length(x@markers)>0){
      x@markers <- lapply(x@markers, function(.ele){
        .ele@start <- .ele@start+n
        .ele@stop <- .ele@stop+n
        .ele
      })
    }
  }
  rownames(N)<-rownames(x@mat)
  x@mat<-N
  x
})

#' @rdname pcm-class
#' @exportMethod getIC
#' @aliases getIC getIC,pcm,ANY-method
setGeneric("getIC", function(x, p) standardGeneric("getIC"))
setMethod("getIC", signature(x="pcm"), function(x, p="missing"){
  mat<-pcm2pfm(x@mat)
  colSums(mat * (addPseudolog2(mat) - addPseudolog2(x@background)))
})

#' @rdname pcm-class
#' @exportMethod pcm2pfm
#' @aliases pcm2pfm pcm2pfm,pcm,ANY-method pcm2pfm,matrix,ANY-method
#' pcm2pfm,matrix,numeric-method pcm2pfm,data.frame,ANY-method
#' pcm2pfm,data.frame,numeric-method pcm2pfm,list,ANY-method
#' pcm2pfm,list,numeric-method
setGeneric("pcm2pfm", function(x, background) standardGeneric("pcm2pfm"))
setMethod("pcm2pfm", signature(x="matrix", background="numeric"), function(x, background){
  s <- max(colSums(x, na.rm = TRUE), na.rm = TRUE)
  bck <- background/sum(background)
  if(s==0){
    mat <- matrix(rep(bck, ncol(x)), nrow = nrow(x), ncol = ncol(x))
    rownames(mat) <- rownames(x)
    colnames(mat) <- colnames(x)
    return(mat)
  }
  p <- x/s
  cs <- colSums(p)
  cs[is.na(cs)] <- 0
  p[, cs < 1] <- p[, cs < 1] + rep((1-cs[cs < 1]), each=nrow(x))*bck
  p
})

setMethod("pcm2pfm", signature(x="matrix"), function(x, background="missing"){
  pcm2pfm(x, rep(1/nrow(x), nrow(x)))
})

setMethod("pcm2pfm", signature(x="data.frame", background="numeric"), function(x, background){
  pcm2pfm(as.matrix(x), background)
})

setMethod("pcm2pfm", signature(x="data.frame"), function(x, background="missing"){
  pcm2pfm(as.matrix(x), rep(1/nrow(x), nrow(x)))
})

## pcm to pfm from pcm object
setMethod("pcm2pfm", signature(x="pcm"), function(x, background="missing"){
  bck <- x@background[rownames(x@mat)]
  if(any(is.na(bck))) stop("some elements does not have background value", bck)
  .mat <- pcm2pfm(x@mat, bck)
  new("pfm", mat=.mat, name=x@name, alphabet=x@alphabet,
      color=x@color, background=x@background, tags=x@tags,
      markers=x@markers)
})

setMethod("pcm2pfm", signature(x="list"), function(x, background="missing"){
  lapply(x, pcm2pfm)
})
setMethod("pcm2pfm", signature(x="list", background="numeric"), function(x, background){
  lapply(x, pcm2pfm, background=background)
})

#' @rdname pcm-class
#' @exportMethod pcm2pssm
#' @aliases pcm2pssm pcm2pssm,pcm,ANY-method pcm2pssm,matrix,ANY-method
#' pcm2pssm,matrix,numeric-method pcm2pssm,data.frame,ANY-method
#' pcm2pssm,data.frame,numeric-method pcm2pssm,list,ANY-method
#' pcm2pssm,list,numeric-method
setGeneric("pcm2pssm", function(x, background) standardGeneric("pcm2pssm"))
setMethod("pcm2pssm", signature(x="matrix", background="numeric"), function(x, background){
  ## check the background order, first order, length=4; second order, length=16
  ord <- 0
  if(length(background)==nrow(x)){
    ord <- 1
  }
  if(length(background)==2^nrow(x)+nrow(x)){
    ord <- 2
  }
  if(ord==0){
    stop("The background dimention is incorrect. For example, if it is DNA motif,
         4 or 20 (16 (2nd order) + 4(1st order)) named backgrounds are expected.")
  }
  m <- pcm2pfm(x, background)
  stopifnot(all(rownames(m)==rownames(x)))
  if(length(names(background))!=length(background)){
    stop("background must be a named vector.")
  }
  if(ord==1){
    m1 <- addPseudolog2(m) - addPseudolog2(background[rownames(m)])
  }else{
    ## 2 order background
    ## 2*log2(sum(p_i*(p_ij/q_ij)))
    letters <- rownames(m)
    ## calculate 1 order of the motif
    total <- sum(x, na.rm = TRUE)
    P1 <- rowSums(x, na.rm = TRUE)
    names(P1) <- letters
    P1 <- addPseudolog2(P1)
    ## calculate 2 order of the motif
    P2 <- lapply(letters, function(.ele){
      mat <- t(t(cbind(background[rownames(m)], m[, -ncol(m)])) *
                 m[.ele, , drop=TRUE])
      rownames(mat) <- paste0(letters, .ele)
      mat
    })
    names(P2) <- letters
    ## 2 order matrix
    bg <- split(background[paste0(rep(letters, length(letters)),
                                  rep(letters, each=length(letters)))],
                rep(letters, each=length(letters)))[letters]
    Pq <- mapply(FUN=function(p, b){
      addPseudolog2(p) - addPseudolog2(b)
    }, P2[letters], bg[letters], SIMPLIFY = FALSE)
    m1 <- lapply(Pq, function(.ele){## .ele is a matrix, eg 4 X len
      colSums(.ele + P1)
    })
    m1 <- do.call(rbind, m1)
    rownames(m1) <- letters
  }
  return(m1)
})

setMethod("pcm2pssm", signature(x="matrix"), function(x, background="missing"){
  background <- rep(1/nrow(x), nrow(x))
  names(background) <- rownames(x)
  pcm2pssm(x, background)
})

setMethod("pcm2pssm", signature(x="data.frame", background="numeric"), function(x, background){
  pcm2pssm(as.matrix(x), background)
})

setMethod("pcm2pssm", signature(x="data.frame"), function(x, background="missing"){
  background <- rep(1/nrow(x), nrow(x))
  names(background) <- rownames(x)
  pcm2pssm(as.matrix(x), background)
})
## pcm to pssm from pcm object
setMethod("pcm2pssm", signature(x="pcm"), function(x, background="missing"){
  .mat <- pcm2pssm(x@mat, x@background)
  new("pssm", mat=.mat, name=x@name, alphabet=x@alphabet,
      color=x@color, background=x@background, tags=x@tags,
      markers=x@markers)
})

setMethod("pcm2pssm", signature(x="list"), function(x, background="missing"){
  lapply(x, pcm2pssm)
})
setMethod("pcm2pssm", signature(x="list", background="numeric"), function(x, background){
  lapply(x, pcm2pssm, background=background)
})


#' @rdname pcm-class
#' @exportMethod as.data.frame
#' @aliases as.data.frame,pcm-method
setMethod("as.data.frame", signature(x="pcm"), function(x, row.names = NULL, optional = FALSE, ...){
  as.data.frame(x@mat, ...)
})

#' @rdname pcm-class
#' @exportMethod format
#' @aliases format,pcm-method
setMethod("format", signature(x="pcm"), function(x, ...){
  paste0(x@name, "_pcm")
})

Try the ClusTCR2 package in your browser

Any scripts or data that you put into this service are public.

ClusTCR2 documentation built on May 29, 2024, 9:32 a.m.