R/check_blocks.r

Defines functions check_blocks_align check_blocks_rownames check_blocks_colnames check_blocks_names check_blocks_quantitative check_blocks_matrix check_blocks_is_list check_blocks

#' Check blocks
#'
#' check_blocks runs several checks on the blocks and transform them in
#' order to ensure that the blocks can be analysed properly.
#'
#' check_blocks performs the following checks and apply the following
#' transformations to the blocks:
#' \itemize{
#'   \item If a single block is given as a data frame or a matrix, \code{blocks}
#'   is transformed into a list with the block as its unique element. Otherwise,
#'   if \code{blocks} is not a list, an error is raised.
#'   \item Coerce each element of \code{blocks} to a matrix.
#'   \item Make sure that all the blocks apart from the response block are
#'   quantitative.
#'   \item Add missing names to \code{blocks}.
#'   \item Add missing column names to each block and prefix column names with
#'   block names if some column names are duplicated between blocks.
#'   \item Check blocks' row names. Raises an error if a block has duplicated
#'   row names. Several scenario are possible:
#'   \itemize{
#'     \item If all blocks are missing row names, row names are created if
#'     \code{allow_unnames} is TRUE, otherwise an error is raised.
#'     \item If a block is missing row names and all other blocks' row names
#'     match, missing row names are copied from the other blocks.
#'     \item If a block is missing row names but other blocks' have none
#'     matching row names, an error is raised.
#'   }
#'   \item If \code{add_NAlines} is FALSE and blocks have different number of
#'   rows, an error is raised. Otherwise, lines filled with NA values are added
#'   to the blocks with missing rows. Blocks' rows are permuted so that every
#'   block has the same row names in the same order.
#' }
#' @inheritParams rgcca
#' @param add_Nalines logical, if TRUE, lines filled with NA are added to blocks
#' with missing rows
#' @param allow_unnames logical, if FALSE, an error is raised if blocks do not
#' have row names
#' @importFrom stats setNames
#' @noRd
check_blocks <- function(blocks, add_NAlines = FALSE, allow_unnames = TRUE,
                         quiet = FALSE, response = NULL) {
  blocks <- check_blocks_is_list(blocks)
  blocks <- check_blocks_matrix(blocks)
  blocks <- check_blocks_quantitative(blocks, response)
  blocks <- check_blocks_names(blocks, quiet)
  blocks <- check_blocks_colnames(blocks, quiet)
  blocks <- check_blocks_rownames(blocks, allow_unnames, quiet)
  blocks <- check_blocks_align(blocks, add_NAlines, quiet)

  invisible(blocks)
}

check_blocks_is_list <- function(blocks) {
  # Check that there is either a single block or a list of blocks
  if (is.matrix(blocks) || is.data.frame(blocks)) blocks <- list(blocks)
  if (!is.list(blocks)) stop_rgcca(paste("blocks must be a list."))
  return(blocks)
}

check_blocks_matrix <- function(blocks) {
  blocks <- lapply(blocks, function(x) {
    if (is.matrix(x)) {
      return(x)
    }
    if (is.data.frame(x)) {
      return(data.matrix(x))
    }
    names_x <- names(x)
    x <- data.matrix(x)
    rownames(x) <- names_x
    return(x)
  })
  return(blocks)
}

check_blocks_quantitative <- function(blocks, response = NULL) {
  response <- ifelse(is.null(response), length(blocks) + 1, response)
  lapply(seq_along(blocks), function(j) {
    x <- blocks[[j]]
    qualitative <- is.character(x) || is.factor(x)
    if (j == response) {
      if (qualitative && (NCOL(x) > 1)) {
        stop_rgcca(
          "unsupported multivariate qualitative block. Block ", j,
          " is a multivariate qualitative block. The method ",
          "is not able to cope with it."
        )
      }
    } else {
      if (qualitative) {
        stop_rgcca(
          "unsupported qualitative block. Block ", j,
          " is a qualitative block but is not the response block. The method ",
          "is not able to cope with it."
        )
      }
    }
  })
  return(blocks)
}

check_blocks_names <- function(blocks, quiet = FALSE) {
  # Add block names if some are missing
  renamed <- FALSE
  if (is.null(names(blocks))) names(blocks) <- rep("", length(blocks))
  for (x in which(names(blocks) == "")) {
    names(blocks)[x] <- paste0("block", x)
    renamed <- TRUE
  }
  if (!quiet && renamed) {
    message("Missing block names are automatically labeled.")
  }
  return(blocks)
}

check_blocks_colnames <- function(blocks, quiet = FALSE) {
  # Check for empty colnames
  if (any(vapply(
    blocks, function(x) is.null(colnames(x)),
    FUN.VALUE = logical(1)
  ))) {
    if (!quiet) message("Missing colnames are automatically labeled.")
    blocks <- lapply(
      setNames(seq_along(blocks), names(blocks)),
      function(x) {
        block <- blocks[[x]]
        if (is.null(colnames(block))) {
          if (NCOL(block) == 1) {
            colnames(block) <- names(blocks)[x]
          } else {
            colnames(block) <- paste0("V", x, "_", seq_len(NCOL(block)))
          }
        }
        return(block)
      }
    )
  }

  # Check for duplicated colnames
  if (any(duplicated(unlist(lapply(blocks, colnames))))) {
    if (!quiet) message("Duplicated colnames are modified to avoid confusion.")

    blocks <- lapply(
      setNames(seq_along(blocks), names(blocks)),
      function(x) {
        block <- blocks[[x]]
        colnames(block) <- paste(names(blocks)[x],
          colnames(blocks[[x]]),
          sep = "_"
        )
        return(block)
      }
    )
  }
  return(blocks)
}

check_blocks_rownames <- function(blocks, allow_unnames = TRUE, quiet = FALSE) {
  # Raise error if duplicated rownames
  lapply(blocks, function(x) {
    if (!is.null(row.names(x)) && any(duplicated(row.names(x)))) {
      stop_rgcca(
        "blocks have duplicated rownames."
      )
    }
  })

  # Create rownames for all blocks if all missing
  if (all(vapply(
    blocks, function(x) is.null(row.names(x)),
    FUN.VALUE = logical(1)
  ))) {
    if (allow_unnames) {
      blocks <- lapply(
        blocks,
        function(x) {
          rownames(x) <- paste0("S", seq_len(NROW(x)))
          return(x)
        }
      )
      if (!quiet) message("Missing rownames are automatically labeled.")
    } else {
      stop_rgcca(paste("blocks must have rownames."))
    }
  }

  # If at least one block does not have rownames, 2 cases arise:
  #   - if all blocks with names have the same rownames, in the same order,
  #     we fill the missing rownames with the rownames of the other blocks;
  #   - otherwise we raise an error.
  if (any(
    vapply(blocks, function(x) is.null(row.names(x)), FUN.VALUE = logical(1))
  )) {
    matrix_of_rownames <- Reduce(cbind, lapply(blocks, row.names))
    is_valid <- all(
      apply(matrix_of_rownames, 2, identical, matrix_of_rownames[, 1])
    )
    if (is_valid) {
      blocks <- lapply(
        blocks,
        function(x) {
          row.names(x) <- matrix_of_rownames[, 1]
          return(x)
        }
      )
      if (!quiet) message("Missing rownames are automatically labeled.")
    } else {
      stop_rgcca(
        "some blocks are missing rownames, and the other blocks' ",
        "rownames are not consistent."
      )
    }
  }
  return(blocks)
}

check_blocks_align <- function(blocks, add_NAlines = FALSE, quiet = FALSE) {
  # Construct union of rownames
  all_names <- Reduce(union, lapply(blocks, row.names))

  # If add_NAlines is FALSE and one block doesn't have as many rows as there
  # are names in all_names, we stop. Otherwise we complete the blocks by
  # adding rows full of NA.

  if (any(vapply(blocks, nrow, FUN.VALUE = integer(1)) != length(all_names))) {
    if (add_NAlines) {
      blocks <- lapply(blocks, function(x) {
        missing_names <- setdiff(all_names, rownames(x))
        y <- matrix(NA, nrow = length(missing_names), ncol = ncol(x))
        rownames(y) <- missing_names
        return(rbind(x, y))
      })
    } else {
      stop_rgcca("blocks must have the same rownames.")
    }
  }

  # Align blocks using rownames
  blocks <- lapply(
    blocks, function(x) x[row.names(blocks[[1]]), , drop = FALSE]
  )
  return(blocks)
}
Tenenhaus/RGCCA documentation built on Feb. 12, 2024, 8:34 a.m.