R/MAF_wrappers.R
In Infusion: Inference Using Simulation

Documented in config_mafR str.MAF

.py_MAF_handle <- list2env(list(is_set=FALSE),parent = emptyenv())

.get_py_MAF_handle <- function(reset=FALSE, torch_device=.Infusion.data$options$torch_device,
                               GPU_mem=.Infusion.data$options$GPU_mem # , silent_ignore=FALSE
                               ) {
  if ( ! missing(torch_device)) .Infusion.data$options$torch_device <- torch_device
  # if (.py_MAF_handle$is_set && ! reset && ! silent_ignore) {
  #   warning("Python environment already exists. Use reset=TRUE to overwrite it\n (possibly losing previous results).")
  # }
  .py_MAF_handle <- mafR::get_py_MAF_handle(envir=.py_MAF_handle, 
                                            reset=reset, torch_device=torch_device,
                                            GPU_mem=GPU_mem)
  if ( (! .one_time_warnings$GPU_warned) && torch_device=="cpu" && ! reset) {
    message(paste("GPU will be ignored. Use config_mafR() to force its use."))
    .one_time_warnings$GPU_warned <- TRUE
  }
  .py_MAF_handle
}


config_mafR <- function(torch_device, 
                        #reset=FALSE, 
                        ...) {
  if (missing(torch_device)) torch_device <- .Infusion.data$options$torch_device
  if ( ( ! .one_time_warnings$GPU_warned ) && torch_device=="cpu" ) {
    warning("GPU will be ignored. See config_mafR(torch_device=.) to force its use.")
    .one_time_warnings$GPU_warned <- TRUE 
  }
  py_MAF_handle <- .get_py_MAF_handle(reset=TRUE, # reset, 
                                      torch_device=torch_device, ...)
  message("torch device set to '", py_MAF_handle$device$type,"'.")
  invisible(NULL)
}

# I should avoid using this fn ?
# .r_to_torch <- function(x, py_handle=.get_py_MAF_handle(), 
#                         device=py_handle$device) {
#   mafR::r_to_torch(x=x, py_handle=py_handle, device=device)
# }       

# A wild speculation. Allows much larger batch size than usually considered. 
.MAF_batchsize_mem_hack <- function(py_MAF_handle, trainsize) {
  if ( is.null(gpu_memory_B <- py_MAF_handle$gpu_memory)) {
    ram_kB <- as.numeric(system("awk '/MemFree/ {print $2}' /proc/meminfo", intern=TRUE))
    memory_B <- 1000 * ram_kB
  } else memory_B <- gpu_memory_B[2]
  batch_size <- 32L*min( max(1L, 
                             as.integer(memory_B/(trainsize*trainsize)),
                             as.integer(trainsize/ 320L) # => batch size ~trainsize/10 at most
                             ),
                         64L)
  batch_size # between 32L and min(2048L, O(trainsize/10))
} # MUST Be integer

# .MAF_validasize <- function(nr, ...) {as.integer(sqrt(nr))}

.design_MAF <- function(nr,nc, py_MAF_handle, transforms, 
                        design_fac, 
                        ... # to override elements of default return value
                        ) {
  hidden_units <- .Infusion.data$options$design_hidden_layers(nr=nr,nc=nc, transforms=transforms,
                                           design_fac=design_fac)
  patience <- .Infusion.data$options$MAF_patience
  valid_size <- .Infusion.data$options$MAF_validasize(nr)
  trainsize <- nr-valid_size
  batch_size <- .Infusion.data$options$MAF_batchsize(py_MAF_handle, trainsize)
  val_indices <- sample(nr, valid_size)
  train_indices <- seq_len(nr)
  train_indices[val_indices] <- NA_integer_
  train_indices <- na.omit(train_indices)
  design <- list(hidden_units=hidden_units, batch_size=batch_size,
       train_indices=train_indices, val_indices=val_indices,
       patience=patience)
  dotlist <- list(...)
  design[names(dotlist)] <- dotlist
  
  if (.is_devel_session()) {
    is_cpu <- py_MAF_handle$device$type=="cpu"
    if (is_cpu) {xpu <- "CPU"} else {xpu <- "GPU"}
    locmess <- with(design,
                    paste0("On ",xpu,": train size: ",length(train_indices),
                           "; valida.set size: ", length(val_indices),
                      "; batch size: ", batch_size,
                      "; hidden units: ",  paste0(hidden_units, collapse=","),
                      "; patience: ", patience, "\n")
    )
    cat(cli::col_green(locmess))
  }
  
  design
}

# the dots are passed to .design_MAF where they override elements in the returned list.
# so e.g passing patience through the dots overrides the default $options$MAF_patience 
.wrap_MAF <- function(data, inferredvars, statNames, verbose=TRUE,
                      py_MAF_handle=.get_py_MAF_handle(), 
                      transforms=.Infusion.data$options$MAF_auto_layers, 
                      design_fac=.Infusion.data$options$MAF_design_fac, 
                      Adam_learning_rate=.Infusion.data$options$Adam_learning_rate, 
                      ...) {
  allVars <- c(inferredvars, statNames)
  data <- as.matrix(data[,allVars, drop=FALSE])
  design <- .design_MAF(nr=nrow(data),nc=ncol(data), py_MAF_handle, transforms,
                        design_fac=design_fac,
                        ...)
  
  train <- data[design$train_indices, , drop=FALSE]
  train <- reticulate::r_to_py(train)
  valida_set <- data[design$val_indices, allVars, drop=FALSE] 
  valida_set <- reticulate::r_to_py(valida_set)
  
  captured <- switch(paste(as.integer(verbose)), "2"=c(), c("stdout"))
  time1 <- Sys.time()
  py_output <- reticulate::py_capture_output(
    MAF_object <- py_MAF_handle$MAF_density_estimation(y_train=train, 
                                                      y_test=valida_set, 
                                                      features=ncol(data), 
                                                      transforms=transforms,      
                                                      hidden_features=design$hidden_units, 
                                                      randperm=FALSE, 
                                                      max_epochs=500L, 
                                                      batch_size=design$batch_size,
                                                      device= .py_MAF_handle$device,
                                                      patience=design$patience,
                                                      learning_rate=Adam_learning_rate)
    , type = captured)
  attr(MAF_object, "train_time") <- round(as.numeric(difftime(Sys.time(), time1, units = "secs")), 1) ## spaMM:::.timerraw(time1)
  if (verbose==1L) {
    py_output <- strsplit(py_output, split="\n", fixed=TRUE)[[1]]
    cat(paste0("Output from MAF_density_estimation():\n",
                py_output[1],"\n  (...)\n", 
                paste(tail(py_output,3)[1:2],collapse="\n"),"\n") )
  }
  class(MAF_object) <- c("MAF", class(MAF_object))
  attr(MAF_object, "colTypes") <- list(allVars=allVars)
  MAF_object
}

# the dots are passed to .design_MAF where they override elements in the returned list.
# so e.g passing patience through the dots overridesthe default $options$MAF_patience 
.wrap_MAF_cond <- function(data, outVars, givenVars, verbose=TRUE,
                           py_MAF_handle=.get_py_MAF_handle(), 
                           transforms=.Infusion.data$options$MAF_auto_layers,
                           design_fac=.Infusion.data$options$MAF_design_fac, 
                           Adam_learning_rate=.Infusion.data$options$Adam_learning_rate, 
                           ...) {
  data <- as.matrix(data)
  design <- .design_MAF(nr=nrow(data),
                        nc=length(c(outVars,givenVars)), 
                        py_MAF_handle, transforms=transforms, 
                        design_fac=design_fac, ...)
  train_indices <- design$train_indices
  val_indices <- design$val_indices
  
  train_x <- as.matrix(data[train_indices, givenVars, drop=FALSE]) # conditions
  train_x <- reticulate::r_to_py(train_x)
  train_y <- as.matrix(data[train_indices, outVars, drop=FALSE])
  train_y <- reticulate::r_to_py(train_y)
  valida_set_x <- as.matrix(data[val_indices,givenVars, drop=FALSE])
  valida_set_x <- reticulate::r_to_py(valida_set_x)
  valida_set_y <- as.matrix(data[val_indices,outVars, drop=FALSE])
  valida_set_y <- reticulate::r_to_py(valida_set_y)
  
  captured <- switch(paste(as.integer(verbose)), "2"=c(), c("stdout"))
  time1 <- Sys.time()
  py_output <- reticulate::py_capture_output(
    MAF_object <- py_MAF_handle$MAF_conditional_density_estimation(train_y, train_x, valida_set_y, valida_set_x, 
                                                    features=length(outVars), 
                                                    context=length(givenVars), 
                                                    transforms=transforms, 
                                                    hidden_features=design$hidden_units, 
                                                    randperm=FALSE, 
                                                    max_epochs=500L, batch_size=design$batch_size,
                                                    device = py_MAF_handle$device,
                                                    patience=design$patience,
                                                    learning_rate=Adam_learning_rate)
    , type = captured)
  attr(MAF_object, "train_time") <- round(as.numeric(difftime(Sys.time(), time1, units = "secs")), 1) ## spaMM:::.timerraw(time1)
  
  if (verbose==1L) {
    py_output <- strsplit(py_output, split="\n", fixed=TRUE)[[1]]
    cat(paste0("Output from MAF_conditional_density_estimation():\n",
               py_output[1],"\n  (...)\n", 
               paste(tail(py_output,3)[1:2],collapse="\n"),"\n") )
  }
  
  class(MAF_object) <- c("MAF", class(MAF_object))
  attr(MAF_object, "colTypes") <- list(outVars=outVars, givenVars=givenVars)
  MAF_object
}

str.MAF <- function(object, ...) {
  # class(object) <- setdiff(class(object), "MAF") # to call other str() methods
  # str(object, ...)
  attrs <- attributes(object)
  extraAttrs <- setdiff(names(attrs),c("class","py_object"))
  cat("'MAF' object with attributes: ")
  str(attrs[c("py_object",extraAttrs,"class")])
}

.calc_all_MAFs <- function(data, statNames, latentVars, fittedPars, inferredVars, 
                           verbose, using="MAFmix") {
  locreftable <- na.omit(data)   
  if (nNAlines <- length(attr(locreftable,"na.action"))) {
    message(paste0(nNAlines," lines of reference table still contained NA's, and were removed. see ?handling_NAs for details."))
  }
  if (verbose$most) {
    if (length(grep("c.mafR",using))) {
      cat(paste0("Inferring densities [(S;P); (S,P); P instrumental; P|S posterior]: ",
                 nrow(data)," points\n"))
    } else cat(paste0("Inferring densities [joint(S,P); P instrumental; P|S posterior]: ",
                      nrow(data)," points\n"))
  }
  
  py_MAF_handle <- .get_py_MAF_handle()
  # reticulate::py_run_string("del complconddens") # no clear effect
  py_main <- py_MAF_handle$py_main  # Module(__main__)
  py_main$complconddens <- py_main$completedens <- py_main$pardens <- 
    py_main$postdens <- py_main$jointdens <- py_main$conddens <- NULL
  # py_MAF_handle$gc$collect()
  py_MAF_handle$torch$cuda$empty_cache() # _F I X M E_ ?
  
  if (length(grep("c.mafR",using))) {
    complconddens <- # pdf(S,L|P)
      .wrap_MAF_cond(locreftable, outVars=c(statNames,latentVars), 
                     givenVars=fittedPars, verbose=verbose$MAF,
                     py_MAF_handle=.get_py_MAF_handle()) 
  } else complconddens <- NULL
  py_main$complconddens <- complconddens
  
  completedens <- 
    py_main$completedens <- 
    .wrap_MAF(locreftable, inferredvars=inferredVars, statNames=statNames, verbose=verbose$MAF,
              py_MAF_handle=.get_py_MAF_handle())
  
  pardens <- 
    py_main$pardens <- 
    .wrap_MAF(locreftable, inferredvars=fittedPars, statNames=c(), verbose=verbose$MAF,
              py_MAF_handle=.get_py_MAF_handle())
  
  # postdens used by .inits_from_postdens() which is needed in maximizations... 
  # I really need a an object to simulate from, not some way to predict from the intrumental posterior,
  # so I cannot simply use a jointdens and a statdens.
  if ( ! length(grep("MAFmix",using))) {
    if (length(grep("u.mafR",using))) { # ___F I X M E___
      design_fac <- .Infusion.data$options$MAF_design_fac/2 # trying to minimize the conditional computation
      patience <- .Infusion.data$options$MAF_patience %/% 2L
    } else { # "c.mafR"
      design_fac <- .Infusion.data$options$MAF_design_fac # trying to minimize the conditional computation
      patience <- .Infusion.data$options$MAF_patience
    } 
    postdens <- 
      py_main$postdens <- # instrumental posterior  P|S
      .wrap_MAF_cond(locreftable, outVars=fittedPars, givenVars=statNames, verbose=verbose$MAF,
                     py_MAF_handle=.get_py_MAF_handle(),design_fac=design_fac,
                     # dots:
                     patience=patience)
    attr(postdens,"which") <- "I_postdens" 
  } else postdens <- NULL
  attr(pardens,"which") <- "pardens"
  if (length(latentVars)) { # then refit without the latent vars
    py_main$jointdens <- 
      .wrap_MAF(locreftable, inferredvars=fittedPars, statNames=statNames, verbose=verbose$MAF,
                py_MAF_handle=.get_py_MAF_handle())
    if (length(grep("c.mafR",using))) {
      conddens <- # pdf(S|P)
        .wrap_MAF_cond(locreftable, outVars=statNames, givenVars=fittedPars, verbose=verbose$MAF,
                       py_MAF_handle=.get_py_MAF_handle()) # !!! not tested with latentVars
      attr(complconddens,"which") <- "S,L|P"
      attr(conddens,"which") <- "S|P"
      attr(jointdens,"which") <- "jointdens"
      attr(completedens,"which") <- "completedens"
    } else conddens <- NULL
    py_main$conddens <- conddens
    resu <- list(jointdens=jointdens, conddens=conddens, completedens=completedens,
                 complconddens=complconddens, pardens=pardens, postdens=postdens)    
  } else {
    if (length(grep("c.mafR",using))) attr(complconddens,"which") <- "S|P"
    attr(completedens,"which") <- "jointdens"
    resu <- list(jointdens=completedens, conddens=complconddens,
                      pardens=pardens, postdens=postdens)  
  }
  py_MAF_handle$py_main <- py_main 
  resu
}


.predict_MAF <- function(X, # parameters only 
                         object, 
                         tstat.obs, # 1-row matrix as otherwise more cases should be considered for cbind'ing
                         # log, 
                         which, # "safe" version ignores, by correcting, spuriously high logL in area of low parameter density.
                         thr_info=.get_thr_info(object),
                         ...) {
  if (is.null(dim(X))) {
    # dim(X) <- c(1L, length(X)) drops dimnames (sigh)
    X <- t(X)
    if (which=="jointvaldens" ||
        length(grep("u.mafR|MAFmix",object$using))) newjointX <- cbind(X,tstat.obs) # cbind two 1-row matrices
  } else {
    if (length(intersect(colnames(X),colnames(tstat.obs)))) stop("'X' should contain only parameters, not summary statistics")
    if (which=="jointvaldens" ||
        length(grep("u.mafR|MAFmix",object$using))) newjointX <- cbind(X,tstat.obs[rep(1,nrow(X)),,drop=FALSE]) # cbind two nrow(X)-row matrices
  }
  nr <- nrow(X)
  
  # if (.py_MAF_handle$device$type=="cuda") {
  #   if (.py_MAF_handle$torch$cuda$memory_allocated() >
  #       100000000) {
  #     .py_MAF_handle$torch$cuda$empty_cache()
  #     .py_MAF_handle$gc$collect()
  #     }
  # }
  
  if (which %in% c("parvaldens","safe") ||
      length(grep("u.mafR|MAFmix",object$using))
      ) {
    X <- reticulate::r_to_py(as.matrix(X)) 
    parvaldens <- 
      .py_MAF_handle$MAF_predict_nocond(object$pardens, X, .py_MAF_handle$device)
    if (which=="parvaldens") return(parvaldens)
  }
  if (which=="jointvaldens" ||
      length(grep("u.mafR|MAFmix",object$using))) {
    newjointX <- reticulate::r_to_py(as.matrix(newjointX)) 
    jointvaldens <- 
      .py_MAF_handle$MAF_predict_nocond(object$jointdens, newjointX, .py_MAF_handle$device)
    return(jointvaldens)
  }
  # else "lik" or "safe"
  sobs <- reticulate::r_to_py(tstat.obs[rep(1,nr),, drop=FALSE]) 
  if (which=="lik") X <- reticulate::r_to_py(as.matrix(X)) 
  if (length(grep("u.mafR|MAFmix",object$using))) {
    condvaldens <- jointvaldens-parvaldens
  } else {
    condvaldens <- 
      .py_MAF_handle$MAF_predict_cond(object$conddens, 
                                      sobs, # 'Y', but I cannot name it
                                      X, # 'cond'... 
                                      .py_MAF_handle$device)
  }
  if (which=="safe") {
    ddens <- parvaldens-thr_info$thr_dpar
    negddens <- (ddens<0)
    if (any(negddens)) {
      dlogl <- condvaldens-thr_info$reft_maxlogl # perhaps by setting a sightly higher thr
      # high would allow a bit more extrapol and thus exploration? bc currently it may induce self-reinforcing 
      # high pardens in suboptimal regions.... but the effect seems opposite on exploration...
      posdlogl <- dlogl>0
      #lowextrapol <- negddens & ! posdlogl
      #condvaldens[lowextrapol] <- condvaldens[lowextrapol]+ ... # not clear what to do
      highextrapol <- negddens & posdlogl # ...................... potentially conservative intervals.............
      # make it continuous wrt to dlogl, but strongly compensating in most cases
      condvaldens[highextrapol] <- condvaldens[highextrapol]+ sqrt(dlogl[highextrapol])*ddens[highextrapol]
      # extrapol <-  posdlogl & ! negddens
      # condvaldens[extrapol] <- condvaldens[extrapol]- sqrt(dlogl[highextrapol])
      attr(condvaldens,"lowdens") <- negddens
    }
  }
  return(condvaldens)
  # if ( ! log) density <- exp(density)
  # return(density)
}

.simulate.MAF <- function(density, nsim, 
                          given, # =t(get_from(object,"stat.obs"))
                          object, # I need 'given' or 'object'
                          batchsize= 4000L
) {
  colTypes <- attr(density,"colTypes")
  which. <- attr(density,"which")
  # .py_MAF_handle$torch$cuda$empty_cache()
  if (which. %in% c("jointdens","pardens","completedens")) {
    trypoints <- 
      density()$sample(reticulate::tuple(as.integer(nsim)))$cpu()$numpy()
    colnames(trypoints) <- colTypes$allVars
  } else { # all conditional simulations
    givenVars <- colTypes$givenVars
    coln_given <- colnames(given) # expect 1-row *matrix*
    if (which.=="I_postdens") { # INSTRUMENTAL postdens
      if ( ! setequal(givenVars, coln_given)) {
        if (length(setdiff(coln_given,colTypes$outVars))) { 
          stop("Wrong 'given' value in .simulate.MAF()")
        } else {
          # patch for sampling instrumental postdens for given parameter values
          given <- t(get_from(object,"stat.obs"))
        }
      }
    } else if (which. %in% c("S|P","S,L|P")) { # then 'given' should be params, not stat.obs...
      if ( ! setequal(givenVars, coln_given)) {
        stop("Wrong 'given' value in .simulate.MAF()")
      }
    } else stop("'which.' value not handled in .simulate.MAF()" ) # might be feasible with MAFcond but...
    if (nrow(given)>1L) {
      stop(".simulate.MAF() handles only a single 'given' vector")
      # The Python batching code does not yet handle other cases. See further comments there.
    }
    given <- reticulate::r_to_py(given[,givenVars,drop=FALSE]) # controls order
    trypoints <- 
      .py_MAF_handle$MAF_simulate_cond(density, 
                                       reticulate::tuple(as.integer(nsim)),
                                       given, # cond
                                       .py_MAF_handle$device,
                                       batchsize
      )
    colnames(trypoints) <- colTypes$outVars
  }
  trypoints
}