R/fit_ggm_grips.R In gRim: Graphical Interaction Models

Documented in fit_ggm_grips

```#' @title Fit Gaussian graphical models
#' @description Fit Gaussian graphical models using various algorithms.
#' @author Søren Højsgaard, \email{sorenh@@math.aau.dk}
#' @name fit_ggm_grips
#'
#' @param S Sample covariance matrix.
#' @param formula Generators of model; a list of integer vectors or a 2
#'     x p matrix of integers.
#' @param nobs Number of observations
#' @param K Initial value of concentration matrix.
#' @param maxit Maximum number of iterations.
#' @param eps Convergence criterion.
#' @param convcrit Convergence criterions. See section `details`.
#' @param aux A list of form name=value.
#' @param method Either `"ncd"` (default), `"covips"` or `"conips"`.
#' @param print Should output from fitting be printed?
#'
#' @details
#'
#' Convergence criterion:
#'
#' * 1: max absolute difference between S and Sigmahat on edges.
#'
#' * 2: difference in log likelihood divided by number of parameters in the
#' model (number of edges + number of nodes) between successive
#' iterations.
#'
#' * 3: computed duality gap may turn negative due to rounding error, so its absolute value is returned.
#' This still provides upper bound on error of likelihood function.
#'
#' Methods:
#' * "ncd": Neighbour coordinate descent.
#' * "covips": IPS based on working with the covariance matrix.
#' * "conips": IPS based on working with the concentration matrix.
#'
#' ncd is very fast but may fail to converge in rare cases. Both
#' covips and conips are guaranteed to converge provided the maximum
#' likelihood estimate exists, and covips are considerably faster than
#' conips.
#'
#' @examples
#' options("digits"=3)
#' data(math, package="gRbase")
#'
#' S <- cov(math)
#' nobs <- nrow(math)
#' gl <- list(1:3, 3:5)
#' em <- matrix(c(1,2, 2,3, 1,3, 3,4, 3,5, 4,5), nrow=2)
#'
#' EPS = 1e-2
#'
#' fit_cov = fit_ggm_grips(S, gl, nobs=nobs, eps=EPS, method="cov")
#' fit_con = fit_ggm_grips(S, gl, nobs=nobs, eps=EPS, method="con")
#' fit_ncd = fit_ggm_grips(S, gl, nobs=nobs, eps=EPS, method="ncd")
#'
#' K <- solve(S)
#' (fit_con\$K - K)  |> abs() |> max()
#' (fit_cov\$K - K)  |> abs() |> max()
#' (fit_ncd\$K - K)  |> abs() |> max()
#'
NULL

## Colouring number determined using coreness function from igraph

get_col_number <- function(emat, d, nobs){
ig <- as_emat2igraph(emat, d)
col_number <- max(coreness(ig))+1
if (col_number > (nobs - 1)) {
stop(glue("Colouring number ({col_number}) exceeds degrees of freedom ({nobs-1}); MLE may not exist.\n"))
}
col_number
}

#' @rdname fit_ggm_grips
#' @export

fit_ggm_grips <- function(S, formula=NULL, nobs, K=NULL, maxit=10000L, eps=1e-2, convcrit=1, aux=list(),
method="ncd", print=0) {
t0 <- .get.time()
method <- match.arg(tolower(method),
c("ncd", "covips", "conips"))
#c("covips", "conips", "ncd", "sncd"))

method_str <- method

formula <- parse_formula(formula, nrow(S))
elst  <- formula2glist(formula)
emat  <- as_elist2emat(elst)
amat  <- as_emat2amat(emat, d=nrow(S))
## str(list(formula=formula, elst=elst, emat=emat, amat=amat))

col_number <- get_col_number(emat, nrow(S), nobs)

## If NCD is used, we determine a small_first ordering to be used in the iteration

reo <- NULL
if (identical(method,"ncd")) {
reo<-reorder(S, amat)
amat<-reo\$amat2
emat<-as_amat2emat(amat)
S <- reo\$S2
}

## Rescale to use correlation matrix and remember rescaling

scale_it = FALSE
scaling=rep(1,nrow(S))
if (any(abs(diag(S)-1) > 1e-8)){
scaling = sqrt(diag(S))
S <- cov2cor(S)
scale_it=TRUE
}

switch(method,
## "sncd" = {ver=0; method="ncd"},
"ncd"  = {ver=1},
"covips"={ver=0},
"conips"={ver=0}
)

aux0 <- list(method  = method,
amat    = amat,
version = ver,
engine  = "cpp")

engine <- match.arg(tolower(aux0\$engine), c("cpp", "r"))
aux0[names(aux)] <- aux

if (is.null(K)) {
if (identical(method, "ncd")) {
K <- diag(1, nrow(S))
} else {
K <- diag(1/diag(S))
}
}
Ks <- .c_clone(K)

comb <- paste0(engine, "_", method)
switch(comb,
"cpp_covips"     = {
fitfun <- .c_covips_ggm_
},
"cpp_conips"     = {
fitfun <- .c_conips_ggm_
},
"cpp_ncd"        = {
fitfun <- .c_ncd_ggm_
},
"r_covips"       = {
fitfun <- r_covips_ggm_
},
"r_conips"       = {
fitfun <- r_conips_ggm_
},
"r_ncd"          = {
fitfun <- r_ncd_ggm_
}
)

out <- fitfun(S=S, elst=elst, emat=emat,
nobs=nobs, K=Ks, maxit=maxit, eps=eps, convcrit=convcrit, print=print, aux=aux0)

out <- c(out, list(edges=emat, nobs=nobs, eps=eps, col_number=col_number))
out <- .finalize_fit(out, S=S, t0=t0, method=method_str, engine=engine, scale_it=scale_it, scaling=scaling, reo=reo)
class(out) <- "gips_fit_class"
out
}

## 1. formula is right hand sided formula -> returns list
## 2. formula is list -> returns list
## 3. formula is matrix -> returns matrix

parse_formula <- function(formula, nvar) {

if (is.null(formula))
return(matrix(NA, nrow=2, ncol=0))

if (inherits(formula, "matrix"))
return(formula)

if (inherits(formula, "list")) {
if (length(formula) == 0)
return(matrix(NA, nrow=2, ncol=0))
else
return(formula)
}

if (inherits(formula, "formula")) {
st <- gRbase::rhsf2vec(formula)

if ((length(st) == 1) && (st %in% c(".^1", ".^."))) {
formula <-
switch (st,
".^1" = {
matrix(NA, nrow = 2, ncol = 0)
},
".^." = {
emat_saturated_model(1:nvar)
})
} else {
st <- gRbase::rhsf2list(formula)
formula <- lapply(st, as.numeric)
}
return(formula)
}
}

formula2glist <- function(glist) {

if (is.list(glist)) {
return(glist)
}

if (is.matrix(glist)) {
if (nrow(glist) == 2) {
return(colmat2list(glist))
}
else stop("Need 2 x p or p x 2 matrix")
}
else
stop("Need list or matrix")
}

.finalize_fit <- function(out, S=S, t0=NULL, method, engine, scale_it, scaling, reo, ...) {
## cat(".finalize_fit\n")

dots <- list(...)
dimnames(out\$K) <- dimnames(S)
if (inherits(out\$Sigma, "matrix"))
dimnames(out\$Sigma) <- dimnames(S)

nparm <- ncol(out\$edges) + nrow(out\$K)

### Rescaling output
Sigmascaling = scaling%*%t(scaling)
out\$Sigma=out\$Sigma*Sigmascaling
Kscaling=1/Sigmascaling
out\$K=out\$K*Kscaling
S =S*Sigmascaling

if (!is.null(t0))
out\$time <- round(.get.diff.time(t0, units="secs"), 2)

## Variable 'converged' is defined for NCD only
if (is.null(out\$converged))
out\$converged = TRUE

if (is.null(out\$mev))
out\$mev = NA

if (out\$converged) {
trKS = sum(out\$K * S)
logL = out\$logL-out\$nobs*sum(log(scaling))
conv = out\$conv_check
dgap = out\$gap
} else {
trKS <- logL <- conv <- dgap <- NA
}

if (!identical(method, "ncd")){
dgap <- NA
} else { ### reorganizing output to original ordering of variables
S <- S[reo\$sfo_inv, reo\$sfo_inv]
dd <- dim(out\$edges)
ee <- reo\$sfo_inv[out\$edges]
## print(dd); print(out\$edges); print(reo\$sfo); print(reo\$sfo_inv)
## dim(ee) <- dd
## print(ee)
out\$edges <- ee
out\$K <- out\$K[reo\$sfo_inv, reo\$sfo_inv]
out\$Sigma <- out\$Sigma[reo\$sfo_inv, reo\$sfo_inv]
}

out\$details <- list(
method = method,
## ver    = out\$ver,
eng    = engine,
time   = unname(out\$time),
nobs   = out\$nobs,
iter   = out\$iter,
eps    = out\$eps,
dim    = nparm,
idim   = nrow(out\$K),
trKS   = trKS,
logL   = logL,
conv   = conv,
dgap   = dgap
##  mev    = out\$mev
)
out\$time <- out\$iter <- out\$eps <- NULL
out\$dim  <- out\$diff <- NULL
out\$logL <- out\$gap <- out\$conv_check <- NULL

out <- c(out, dots)
class(out) <- "gips_fit_class"
out
}

get_init_parm <- function(S, K) {

.parmInit <- function(S) {
list(K=diag(1/diag(S)), Sigma=diag(diag(S)))
}

if (is.null(K))
parm <- .parmInit(S)
else
parm <- list(K=K, Sigma=solve_fun(K))
parm
}

## .initK <- function(S){
##     diag(1/diag(S))
## }
```

Try the gRim package in your browser

Any scripts or data that you put into this service are public.

gRim documentation built on June 22, 2024, 9:37 a.m.