R/create_db.R

##
#' Create a peptide collection
#'
#' Constructor to create peptide collection to be used in
#' \code{summarizePeptides}.
#'
#' @param position A \code{data.frame} or \code{GRanges} object. If a
#' \code{data.frame} is provided, it should contain 'start' and 'end' or 'width'
#' columns as well as a peptide column. If position is a \code{GRanges} object,
#' then it must either have peptide as names or contain a peptide metadata
#' column.
#'
#' @details
#'   \code{position} can have additional columns. These columns will be kept in
#'   the resulting peptide collection. This is especially useful to include
#'   clades and grouping parameters for the \code{makeCalls} function.
#'
#'   If the input contains all the z-scores (z1 to z5), then they will not be
#'   re-calculated. If some (but not all) z-scores are missing, a warning
#'   message will be sent and the z-scores are re-calculated.
#'
#'
#' @author Renan Sauteraud
#' @seealso \code{\link{GRanges}}
#'
#' @examples
#' #construct data.frame object
#'    AA <- c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P",
#'    "Q","R", "S", "T", "V", "W", "Y")
#'    starts <- seq(1, 30, 3)
#'    ends <- starts + 14
#'    peptides <- sapply(1:10, function(x) {
#'      paste0(AA[floor(runif(15, 1, 20))], collapse = "")
#'    })
#'    data <- data.frame(start = starts, end = ends, peptide = peptides)
#' #from data.frame
#'    new_pep <- create_db(data)
#' #from GRanges
#'    new_pep <- create_db(new_pep)
#'
#'
#' @export
#' @importFrom GenomicRanges GRanges
#' @importFrom IRanges IRanges
#' @importMethodsFrom GenomicRanges mcols mcols<- names
#' @importClassesFrom IRanges IRanges
#' @importClassesFrom GenomicRanges GRanges
#'
create_db <- function(position){
  if(!class(position) %in% c("data.frame", "GRanges")){
    stop(paste("`position' must be of class 'GRanges' or data.frame'. Given:",
               class(position)))
  }else if(class(position) == "data.frame"){
    if(!"peptide" %in% colnames(position)){
      stop("The given data.frame must contain a 'peptide' column.")
    }
    cn <- colnames(position)
    extracols <- cn[!cn %in% c("start", "end", "width")]
    ir <- IRanges(start = position$start, end = position$end, width = position$width,
                  names = position$peptide)
    gr <- GRanges(ranges = ir, seqnames = " ")
    names(gr) <- position$peptide
    mcols(gr) <- position[, extracols, drop = FALSE]
  } else{
    if(is.null(names(position))){
      if(!"peptide" %in% colnames(mcols(position))){
        stop("The given GRanges object must contain a 'peptide' column.")
      } else{
        names(position) <- position$peptide
      }
    } else{
      if(class(names(position)) != "character"){
        stop("The names of the GRanges object should be the peptide sequences")
      }
    }
    if(any(names(position) != position$peptide)){
      stop("The names of the GRanges object should match the 'peptide' column.")
    }
    gr <- position
  }
  # Calculating z-scores
  if(!all(paste0("z", 1:5) %in% colnames(mcols(gr)))){
    existing_zs <- colnames(mcols(gr))[colnames(mcols(gr))%in% paste0("z", 1:5)]
    if(length(existing_zs) > 0){
      warning("Removing existing z-scores and recalculating new ones.")
      mcols(gr) <- mcols(gr)[!colnames(mcols(gr))%in% existing_zs]
    }
    mcols(gr) <- data.frame(mcols(gr), .makeZpepMatrix(gr$peptide))
  }
  return(gr)
}
# Test
# gr <- create_db(data)
# gr1 <- create_db(gr)
# gr2 <- gr; mcols(gr2) <- mcols(gr2)[, !colnames(mcols(gr2)) %in% paste0("z", 1:5), drop = FALSE]
# gr2 <- create_db(gr2)
# gr3 <- gr; gr3$z1 <- NULL
# gr3 <- create_db(gr3)
# d.f, complete GR, GR w/o zs, GR w/ missing z
RGLab/pepStat documentation built on May 8, 2019, 5:56 a.m.