tmp-save/addInter.R

################################################################################

#' Augment a dataset with interactions
#'
#' Augment the number of column of a `big.matrox` by adding new columns
#' which are interactions between pairs. It may be good idea to use
#' [big_standardize] to have scaled columns as input.
#'
#' @inheritParams bigstatsr-package
#' @param arr.ind Matrix of two columns `row` and `col` specifying pairs
#' that need to be multiplied to create new columns.
#'
#' @return The newly created `big.matrix` (or its descriptor).
#' @export
#'
#' @examples
#' tmp <- tmpFBM(descriptor = FALSE)(10, 5, type = "double")
#' tmp[] <- rnorm(length(tmp))
#' big_standardize(tmp)
#' apply(tmp[,], 2, function(x) c(mean(x), sd(x)))
#'
#' test <- big_addInter(tmp, arr.ind = cbind(1:4, 5))
big_addInter <- function(X., arr.ind, fun.createBM = BM()) {

  assert_type(X., "double")

  res <- fun.createBM(nrow = nrow(X.),
                      ncol = ncol(X.) + nrow(arr.ind),
                      type = "double")

  addInter(attach.BM(res)@address,
           attach.BM(X.)@address,
           arr.ind)

  res
}

################################################################################

#' Standardize a double "big.matrix"
#'
#' @inheritParams bigstatsr-package
#' @param thr.sd Threshold on standard deviation under which to ignore a column.
#'
#' @return A vector of column indices that have a low standard deviation.
#' @export
#'
#' @examples
#' tmp <- tmpFBM(descriptor = FALSE)(10, 5, type = "double")
#' tmp[] <- rnorm(length(tmp))
#' apply(tmp[,], 2, function(x) c(mean(x), sd(x)))
#'
#' big_standardize(tmp)
#' apply(tmp[,], 2, function(x) c(mean(x), sd(x)))
big_standardize <- function(X., thr.sd = 1e-4) {
  assert_type(X., "double")
  standardize(attach.BM(X.)@address, thr.sd)
}

################################################################################
privefl/bigstatsr documentation built on March 29, 2024, 3:31 a.m.