R/0.BAf-class.R

# -----------------------------------------------------------------------------#
#' An S4 class to handle the data from Binder Arrays
#' 
#' This class is designed to store the data generated by Binder Arrays
#' (including suspension bead arrays) technology and to facilitate the analysis
#' of the data. It is distributed and stored in 5 main slots, \code{.Data},
#' \code{sinfo}, \code{binder}, \code{assy_s}, and \code{assy_b}. In order to
#' assure the orders of samples and targets are kept same in all the variables,
#' \code{'id'}, \code{colnames}, and \code{rownames} are checked.
#' 
#' @name BAf-class
#' @docType class
#' 
#' @slot .Data \code{\link{matrix}} - row : samples, col : antibody\cr It 
#'   contains all measured values. Its column names are reserved for target IDs.
#'   The row names are the keys to link to \code{@sinfo} assuring the order of 
#'   sample is same in both.
#' @slot sinfo \code{\link{tbl_df}} - row : samples, col : variables, that
#'   contains sample information\cr Each row is for each sample. Each column has
#'   a type of information (e.g age) about the sample. Some description about
#'   the columns can be found in the \code{@codebook}. Two columns in this table
#'   are special.\cr 1) A column \code{'id'} is reserved for the unique
#'   identifiers of the samples stored in a BAf-class object. The \code{'id'} is
#'   used to confirm the order of the samples are same as other slots. 2) One
#'   more column is reserved to indicate the batch of the samples. The name of
#'   this column is given in \code{@batch_c}.
#' @slot binder \code{\link{data.frame}} - row : binder (or antibodies / 
#'   antigen), col : annotations for the binders\cr All information about beads 
#'   or bound antibodies are stored here. Rows are in the same order of columns 
#'   in \code{@.Data}.\cr 1) Like \code{@sinfo}, A column \code{'id'} is 
#'   reserved for the unique identifiers of the binders, which have to be 
#'   identical to column names of \code{@.Data}.\cr And one column should be 
#'   assigned for binder batches. The name of the column is given in 
#'   \code{@batch_c}.
#' @slot batch_c \code{\link{list}} - two elements, \code{sinfo} and 
#'   \code{binder}. Each has one text to indicate the column that contains batch
#'   IDs for sample or binder in \code{@sinfo} or \code{@binder}.
#' @slot assy_s \code{\link{list}} - each element is a \code{data.frame} of a 
#'   particular batch-wise property (e.g. assay_failure). The columns of the 
#'   \code{data.frame} are the binder batches, while rows are samples. In every 
#'   data frame, the column names should be matched to the binder batch IDs in 
#'   \code{@binder}.
#' @slot assy_b \code{\link{list}} - Like \code{@assy_s}, but for binders. Each 
#'   row is for each binder in \code{@binder}. The columns are for the sample 
#'   batches.
#' @slot codebook \code{\link{list}} - two elements, \code{sinfo} and
#'   \code{binder}. Each contains a data frame of the codebook for \code{@sinfo}
#'   or \code{@binder}.
#' @slot note any
#'   
#' @author Mun-Gwan Hong <\email{mun-gwan.hong@scilifelab.se}>
#' @exportClass BAf
# -----------------------------------------------------------------------------#
# created  : 2016-01-21 by Mun-Gwan
# modified : 2017-07-20 by Mun-Gwan : inherit matrix (replace @X with @.Data)
#            2017-07-28 by Mun-Gwan : change the type from data.frame to tbl_df
#            2017-08-17 by Mun-Gwan : change to BAf-class
# -----------------------------------------------------------------------------#

BAf <- setClass("BAf", slots= c(sinfo  = "tbl_df", 
                                binder = "tbl_df", 
                                batch_c = "list",
                                assy_s = "list",
                                assy_b = "list",
                                codebook = "list",
                                note  = "ANY"),
                contains= "matrix"
)



##  Initialize --------------------------------------------------------------

setMethod(
  "initialize", "BAf",
  function(.Object,
           ...,
           sinfo  = tibble(),
           binder = tibble(),
           sinfo_batch_i  = "batch",
           binder_batch_i = "batch",
           assay_sinfo  = list(),
           assay_binder = list(),
           codebook_sinfo  = tibble(),
           codebook_binder = tibble(),
           note  = character()) {
    
    codebook_sinfo  <- codebook_sinfo  %>% as_tibble
    codebook_binder <- codebook_binder %>% as_tibble
    
    #  main numerical data
    dots <- if(length(list(...))) {
      callNextMethod(...)
    } else {
      matrix(nrow= 0, ncol= 0)
    }
    

    ###  Check ---------------------------------------------

    
    ##  check dimensions
    stopifnot(ncol(dots) == nrow(binder),
              nrow(dots) == nrow(sinfo))
    for(j in seq_along(assay_sinfo)) {
      stopifnot(nrow(sinfo) == nrow(assay_sinfo[[j]]))
    }
    for(j in seq_along(assay_binder)) {
      stopifnot(nrow(binder) == nrow(assay_binder[[j]]))
    }

    ##  check batch ids
    stopifnot(is.character(sinfo_batch_i ),
              is.character(binder_batch_i))
    
    
    ###  Key variables such as 'id' ------------------------

    ##  default IDs
    def_ids <- function(x, def) {
      if(is.null(x) || anyDuplicated(x) != 0) def else x
    }
    ##  'id's of samples and binders from the main matrix
    set1 <- list(
      sid = def_ids(rownames(dots), 1:nrow(dots) %>% paste0("s", .)),
      bid = def_ids(colnames(dots), 1:ncol(dots) %>% paste0("b", .))
    )
    
    ##  Reserved key variables
    reserved_variable <- function(x, res_v, def) {
      x <- x %>% as_tibble
      x[[res_v]] <- if(res_v %in% names(x)) {
        #  If the res_v is given, return as.character
        x[[res_v]] %>% as.character
      } else {
        #  If not, use the one from the main matrix or the default
        if(nrow(x) > 0) def else character()
      } 
      #  move the res_v to the 1st pos.
      return(x %>% dplyr::select(one_of(res_v), everything()))  
    }
    sinfo <- sinfo %>% 
      #  batch ID column
      reserved_variable( sinfo_batch_i[1L], rep_len("1", nrow(.))) %>% 
      reserved_variable("id", set1$sid) %>%            # 'id' to first
      mutate_at(2, factor)               # batch to factor
    binder <- binder %>% 
      #  batch ID column
      reserved_variable(binder_batch_i[1L], rep_len("1", nrow(.))) %>% 
      # 'id' column
      reserved_variable("id", set1$bid) %>% 
      mutate_at(2, factor)               # batch to factor
    rm(set1)    # from the main matrix
    
    

    ###  Generate .Object ----------------------------------

    .Object@.Data  <- dots
    .Object@sinfo  <- sinfo
    .Object@binder <- binder
    .Object@batch_c <- list(sinfo  = sinfo_batch_i [1L], 
                            binder = binder_batch_i[1L])
    .Object@assy_s <- assay_sinfo 
    .Object@assy_b <- assay_binder
    .Object@codebook <- list(sinfo  = codebook_sinfo , 
                             binder = codebook_binder)
    .Object@note <- note
    
    .Object %>% 
      reset_key_ids()    # prioritize the id in @sinfo and id in @binder
  }
)


##  Validity ----------------------------------------------------------------

setValidity("BAf", function(object) {
  ##  'batch_c'
  eval(substitute(
    stopifnot(
      is.list(x),
      identical(names(x), c("sinfo", "binder")),
      is.character(x$sinfo),
      is.character(x$binder)
    ), 
    list(x = quote(object@batch_c))
  ))
  object@batch_c <- object@batch_c %>% lapply(. %>% .[1])

  ##  both 'id' and batch ID columns should exist in @sinfo and @binder.
  stopifnot(
    "id" %in% names(object@sinfo),
    object@batch_c$sinfo %in% names(object@sinfo),
    "id" %in% names(object@binder),
    object@batch_c$binder %in% names(object@binder)
  )

  ##  error message functions ------------------------------
  
  #  fn: Remove 'object' at the front of the text
  rm_obj <- . %>% gsub("object@", "@", .)
  #  fn: If not identical, then inform the mismatch.
  error_if_mismatch <- function(var1, var2) {
    if(length(var2) > 0 && ! identical(var1, var2)) {
      paste("Mismatch between", deparse(substitute(var1)) %>% rm_obj, 
            "and", deparse(substitute(var2)) %>% rm_obj)
    }
  }
  #  fn: Give an error if 'cond' is not true.
  error_ifnot <- function(cond) {
    if(!cond) {
      deparse(substitute(cond)) %>% rm_obj %>% paste("is not TRUE.") 
    } 
  }
  #  fn: Give an error if the number of rows of x1 and n_X is not equal or
  #      if x1$id is not same as names_X
  err_if_ne_mis <- function(x1, n_X, names_X) {
    eval.parent(substitute(
      error_ifnot(nrow(x1) == n_X) %>% {    # check count
        if(is.null(.)) {
          error_if_mismatch(x1$id, names_X)  # check order.
        } else .
      }
    ))
  }

  validate_a_side <- function(x1, batch_c_1, 
                              n_X, names_X, 
                              assay, x2_batch) {
    eval(substitute(                     # catch inputs as they are
      # a collection of error messages
      c(
        # check 'id' in @'x1' and @.Data are same
        err_if_ne_mis(x1, n_X, names_X),
        
        ##  'batch' column
        error_ifnot(is.factor(x1[[batch_c_1]])),
        error_ifnot(any(is.na(x1[[batch_c_1]])) == FALSE),
      
        ##  @assy_s or @assy_b
        if(length(assay) > 0) {			# when '@assy' has something
          ##  element names of the @assy list
          nm <- names(assay)
          if(is.null(nm) || any(nm %in% c(NA, ""))) {
            deparse(substitute(assay)) %>% rm_obj %>% 
              paste("The names of all elements of", ., 
                    "should be given and unique!")
          } else {
            lapply(seq_along(assay), function(j) {		# for each element in @assay
              c( 
                #  check 'id' of @binder and @.Date
                err_if_ne_mis(x1, nrow(assay[[j]]), rownames(assay[[j]])),
                error_ifnot(is.data.frame(assay[[j]])),
                error_if_mismatch(sort(colnames(assay[[j]])), 
                                  sort(levels(x2_batch)))
              ) %>% 
                # replace '[[j]]' with the real element name
                sub("\\[\\[j\\]\\]", paste("[[", nm[j], "]]"), .)
            }) %>% unlist()
          }
        }
      )
    ))
  }
  er_m <- c(
    validate_a_side(object@sinfo, object@batch_c$sinfo, 
                    nrow(object@.Data), rownames(object@.Data), 
                    object@assy_s, object@binder[[object@batch_c$binder]]),
    validate_a_side(object@binder, object@batch_c$binder, 
                    ncol(object@.Data), colnames(object@.Data), 
                    object@assy_b, object@sinfo[[object@batch_c$sinfo]])
  )

  ##  return
  if(is.null(er_m)) return(TRUE) else return(er_m)
})
Rundmus/BAf-R_package documentation built on May 18, 2020, 12:59 p.m.