R/arr_3d.R

Defines functions apply_col apply_3d

Documented in apply_3d apply_col

#' apply function for 3d array
#'
#' NA values will be removed automatically
#'
#' @param array A 3d array
#' @param dim giving the subscripts to split up data by.
#' @param FUN function, should only be row applied function, e.g. `matrixStats::rowMeans2`,
#' `matrixStats::rowMins`, `matrixStats::rowRanges`.
#' Because 3d array will be convert to matrix first, with the aggregated dim in
#' the last dimension.
#' @param by
#' * If not provided (`NULL`), the aggregated dim will be disappear.
#'   For example, daily precipitation `[nrow, ncol, 31-days]` aggregate into
#'   monthly `[nrow, ncol]`.
#' * If provided, `by` should be equal to the aggregated `dim`.
#'   For example, daily precipitation `[nrow, ncol, 365-days]` aggregate into
#'   monthly `[nrow, ncol, 12-months]`. In that situation, `by` should be equal
#'   to 365, and be `format(date, '%Y%m')`.
#'
#' @inheritParams set_dim
#' @inheritParams apply_row
#'
#' @seealso [apply_row] [matrixStats::rowRanges]
#'
#' @keywords internal
#' @examples
#' set.seed(1)
#' size <- c(10, 8, 31)
#' arr <- array(rnorm(10 * 8 * 31), dim = size)
#'
#' by <- c(rep(1, 10), rep(2, 21))
#' r2 <- apply_3d(arr, 3, by = by, FUN = rowMeans)
#'
#' \dontrun{
#' arr_yearly <- apply_3d(arr, by = year(dates), scale = days_in_month(dates))
#' }
#' @importFrom matrixStats rowMeans2 rowMins rowMaxs
#' @export
apply_3d <- function(array, dim = 3, FUN = rowMeans2, by = NULL, scale = 1, na.rm = TRUE, ...) {
    # TODO: add by at here
    dims <- dim(array)
    ndim <- length(dims) # dimensions

    I_dims <- setdiff(1:ndim, dim) # dimensions order
    dims_head <- dims[I_dims] # header dimensions

    # move grouped dim to the last
    if (dim != ndim) {
        array %<>% aperm(c(I_dims, dim))
    }

    mat <- array_3dTo2d(array)

    if (is.null(by)) {
        ans <- FUN(mat, ..., na.rm = na.rm)
        dim_new <- dims_head
    } else {
        dim_new <- c(dims_head, length(unique(by)))
        ans <- apply_row(mat, by, FUN, scale = scale)
    }
    dim(ans) <- dim_new
    ans
}

#' apply_col
#'
#' * `apply_col`: aggregate by col, return a `[ngrp, ncol]` matrix
#' * `apply_row`: aggregate by row, return a `[nrow, ngrp]` matrix
#'
#' For example, setting the dimension of `mat` is `[ngrid, ntime]`,
#' if you want to aggregate by time, `apply_row` should be used here;
#' if you want to aggregate by region (grids), `apply_col` should be used.
#'
#' @param mat matrix, `[nrow, ncol]`
#' @param by integer vector, with the dim of `[ntime]`
#' @param scale in the same length of `by`, or a const value,
#' `value_returned` = `FUN(x)*scale`. This parameter is designed for converting
#' monthly to yearly, meanwhile multiply days in month.
#' Currently, same group should have the same scale factor. Otherwise, only the
#' first is used.
#' @note This function also suits for big.matrix object.
#'
#' @examples
#' mat <- matrix(rnorm(4 * 6), 4, 6)
#' mat_bycol <- apply_col(mat, c(1, 1, 2, 2), colMeans)
#' mat_byrow <- apply_row(mat, c(1, 1, 2, 2, 3, 3), rowMeans)
#'
#' @importFrom matrixStats colMeans2 rowMeans2 colMins colMaxs rowMins rowMaxs
#' @keywords internal
#' @export
apply_col <- function(mat, by, FUN = colMeans2, scale = 1, ...) {
    if (length(by) != nrow(mat)) {
        stop("Length of by is not equal to nrow of mat")
    }
    if (length(scale) == 1) scale <- rep(scale, length(by))
    grps <- unique(by) %>% sort()

    ans <- lapply(grps, function(grp) {
        I <- which(by == grp)
        factor <- scale[I][1]
        FUN(mat[I, , drop = FALSE] * factor, na.rm = TRUE, ...)
    }) %>% do.call(rbind, .)

    if (!is.matrix(ans)) ans <- as.matrix(ans)
    ans %>%
        set_rownames(grps) %>%
        set_colnames(colnames(mat))
}


#' @rdname apply_col
#' @export
apply_row <- function(mat, by, FUN = rowMeans2, scale = 1, ...) {
    if (length(by) != ncol(mat)) {
        stop("Length of by is not equal to ncol of mat")
    }
    if (length(scale) == 1) scale <- rep(scale, length(by))
    grps <- unique(by) %>% sort()

    ans <- lapply(grps, function(grp) {
        I <- which(by == grp)
        factor <- scale[I][1]
        FUN(mat[, I, drop = FALSE] * factor, na.rm = TRUE, ...)
    }) %>% do.call(cbind, .)

    if (!is.matrix(ans)) ans <- as.matrix(ans)
    ans %>%
        set_colnames(grps) %>%
        set_rownames(rownames(mat))
}

#' array_3dTo2d
#'
#' @param array array with the dimension of `[nlon, nlat, ntime]`
#' @param I_grid subindex of `[nrow, ncol]`
#'
#' @return
#' `[nlat*nlon, ntime]`
#'
#' @keywords internal
#' @export
array_3dTo2d <- function(array, I_grid = NULL) {
    dim <- dim(array)
    names_last <- dimnames(array) %>% last()

    if (length(dim) >= 3) {
        dim(array) <- c(prod(dim[1:2]), dim[3])
    }
    if (!is.null(I_grid)) {
        array <- array[I_grid, ]
    }
    if (!is.null(names_last)) array %<>% set_dimnames(list(NULL, names_last))
    return(array)
}

#' @param dim `[nrow, ncol]`
#'
#' @keywords internal
#' @rdname array_3dTo2d
#' @export
array_2dTo3d <- function(array, I_grid = NULL, dim) {
    ntime <- dim(array) %>% last()
    dim <- c(dim, ntime)

    temp <- array(NA * array[1], dim = dim) %>%
        array_3dTo2d()
    if (is.null(I_grid)) {
        temp <- array
    } else {
        temp[I_grid, ] <- array
    }
    ans <- set_dim(temp, dim)
    names_last <- dimnames(array) %>% last()
    if (!is.null(names_last)) ans %<>% set_dimnames(list(NULL, NULL, names_last))
    ans
}

Try the rtrend package in your browser

Any scripts or data that you put into this service are public.

rtrend documentation built on Nov. 10, 2022, 6:14 p.m.