# Set up the data for flash. A "flash.data" object always contains fields "Y"
# (the data, with NAs replaced by zeros), "Z" (a matrix or array that has
# zeros where data is missing and ones where data is nonmissing; if no data
# is missing, then Z = 1), and "est.tau.dim" (the variance type). If standard
# errors are provided, then (depending on the variance type) the object will
# also contain fields "given.S2" or "given.tau" and "given.tau.dim".
#
set.flash.data <- function(data, S = NULL, S.dim = NULL, var.type = NULL) {
flash.data <- list(t.init = Sys.time(), t.final = Sys.time())
data <- handle.data(data)
must.be.supported.S.type(S)
must.be.compatible.data.types(data, S)
data.dim <- get.data.dims(data)
must.be.integer(S.dim, lower = 0, upper = length(data.dim), allow.null = TRUE)
must.be.valid.var.type(var.type, length(data.dim))
# Set Y, Z, and est.tau.dim.
flash.data$Y <- data
if (anyNA(data)) {
flash.data$Y[is.na(data)] <- 0
flash.data$Z <- 1L * !is.na(data)
} else {
flash.data$Z <- 1
}
flash.data$est.tau.dim <- var.type
must.not.have.zero.slices(flash.data$Y, S, var.type)
# Set S.dim.
if (is.null(S.dim)) {
S.dim <- infer.S.dim(S, data.dim)
} else {
dims.must.match(data, S, S.dim)
}
if (is.S2.stored(S, S.dim, var.type)) {
# TODO: estimate.noisy.kron.tau cannot yet handle missing data.
if (anyNA(data) && (length(var.type) > 1)) {
stop("The noisy Kronecker variance structure has not yet been implemented ",
"for missing data.")
}
S2 <- S^2
if (is.vector(S)) {
# TODO: estimate.noisy.tau and estimate.noisy.kron.tau cannot yet handle
# vector-valued S.
each <- prod(c(1, data.dim[1:length(data.dim) < S.dim]))
S2 <- array(rep(S2, each = each), dim = data.dim)
}
S2[is.na(data)] <- Inf
flash.data$given.S2 <- S2
} else if (!is.null(S)) {
tau <- 1 / S^2
if (!is.vector(S)) {
tau[is.na(data)] <- 0
}
flash.data$given.tau <- tau
flash.data$given.tau.dim <- S.dim
}
class(flash.data) <- c("flash.data", "list")
return(flash.data)
}
handle.data <- function(X) {
# Low-rank and low-rank plus sparse representations as named lists:
if (is.list(X) &&
any(c("u", "U") %in% names(X)) &&
any(c("v", "V") %in% names(X))) {
X <- handle.udvs(X)
}
return(X)
}
# Low-rank or low-rank plus sparse representations, of form:
# X = u diag(d) v' + s.
#
handle.udvs <- function(X) {
names(X) <- tolower(names(X))
# Fields u and v (or U and V) are required. d and s (or D and S) are optional.
u <- X$u
v <- X$v
d <- X$d
s <- X$s
# Check u and v.
if (!(is.matrix(u) || inherits(u, "Matrix"))
|| !(is.matrix(v) || inherits(v, "Matrix"))) {
stop("Data fields u and v must be matrices (or sparse matrices of class ",
"\"Matrix\").")
}
if (anyNA(u) || anyNA(v)) {
stop("Data matrices u and v must not have missing data.")
}
if (!identical(ncol(u), ncol(v))) {
# Silently handle the case where v^T is provided (rather than v).
if (identical(ncol(u), nrow(v))) {
v <- t(v)
} else {
stop("Data matrices u and v are of incompatible dimensions.")
}
}
K <- ncol(u)
# Check d.
if (!is.null(d)) {
if (!is.numeric(d) || (length(d) < K)) {
stop("If data field d is provided, it must be a vector of length K.")
}
d <- d[1:K]
u <- t(t(u) * sqrt(d))
v <- t(t(v) * sqrt(d))
}
udv <- list(u, v)
class(udv) <- c("lowrank", "list")
# Check s.
if (is.null(s)) {
ret <- udv
} else {
if (!(is.matrix(s) || inherits(s, "Matrix"))) {
stop("If data field s is provided, it must be a matrix or object of ",
"class \"Matrix\".")
}
if (!identical(nrow(u), nrow(s)) || !identical(nrow(v), ncol(s))) {
stop("The dimensions of data matrix s do not match the dimensions of ",
"uv^T.")
}
ret <- list("lowrank" = udv, "sparse" = s)
class(ret) <- c("lrps", "list")
}
return(ret)
}
# If S is a vector, then attempt to infer its mode (e.g., row-wise, column-wise).
#
infer.S.dim <- function(S, data.dim) {
if (!is.vector(S)) {
S.dim <- NULL
} else if (length(S) == 1) {
S.dim <- 0
} else {
S.dim <- which(length(S) == data.dim)
if (length(S.dim) == 0) {
stop("S was interpreted as a vector, but couldn't be aligned ",
"with the data.")
} else if (length(S.dim) > 1) {
stop("S could not be unambiguously interpreted. Please specify S.dim.")
}
}
return(S.dim)
}
# When given and estimated variances vary along one and the same mode (for
# example, when row-wise standard errors are provided and additional
# row-wise variances are to be estimated), then providing standard errors
# is equivalent to putting lower limits on the estimated standard errors.
# Otherwise, estimation is a difficult optimization problem. In the former
# case, it is more convenient to store tau; in the latter, it is better to
# store S^2.
#
is.S2.stored <- function(S, S.dim, var.type) {
return(!is.null(S)
&& (length(var.type) > 0)
&& (length(var.type) > 1
|| is.null(S.dim)
|| (S.dim > 0 && !(S.dim == var.type))))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.