R/np.distribution.R

Defines functions npudist.default npudist.dbandwidth npudist.call npudist.formula npudist

Documented in npudist npudist.dbandwidth npudist.default npudist.formula

npudist <-
  function(bws, ...){
    args <- list(...)

    if (!missing(bws)){
      if (is.recursive(bws)){
        if (!is.null(bws$formula) && is.null(args$tdat))
          UseMethod("npudist",bws$formula)
        else if (!is.null(bws$call) && is.null(args$tdat))
          UseMethod("npudist",bws$call)
        else if (!is.call(bws))
          UseMethod("npudist",bws)
        else
          UseMethod("npudist",NULL)
      } else {
        UseMethod("npudist", NULL)
      }
    } else {
      UseMethod("npudist", NULL)
    }
  }


npudist.formula <-
  function(bws, data = NULL, newdata = NULL, ...){

    tt <- terms(bws)
    m <- match(c("formula", "data", "subset", "na.action"),
               names(bws$call), nomatch = 0)
    tmf <- bws$call[c(1,m)]
    tmf[[1]] <- as.name("model.frame")
    tmf[["formula"]] <- tt
    mf.args <- as.list(tmf)[-1L]
    umf <- tmf <- do.call(stats::model.frame, mf.args, envir = environment(tt))

    tdat <- tmf[, attr(attr(tmf, "terms"),"term.labels"), drop = FALSE]

    has.eval <- !is.null(newdata)
    if (has.eval) {
      npValidateNewdataFormula(newdata, tt, include.response = TRUE)
      umf.args <- list(formula = tt, data = newdata)
      umf <- do.call(stats::model.frame, umf.args, envir = parent.frame())
      emf <- umf

      edat <- emf[, attr(attr(emf, "terms"),"term.labels"), drop = FALSE]
    }
    
    ud.args <- list(tdat = tdat)
    if (has.eval)
      ud.args$edat <- edat
    ud.args$bws <- bws
    ev <- do.call(npudist, c(ud.args, list(...)))

    ev$omit <- attr(umf,"na.action")
    ev$rows.omit <- as.vector(ev$omit)
    ev$nobs.omit <- length(ev$rows.omit)

    ev$dist <- napredict(ev$omit, ev$dist)
    ev$derr <- napredict(ev$omit, ev$derr)

    return(ev)
  }

npudist.call <-
  function(bws, ...) {
    npudist(tdat = .np_eval_bws_call_arg(bws, "dat"),
          bws = bws, ...)
  }

npudist.dbandwidth <-
  function(bws,
           tdat = stop("invoked without training data 'tdat'"),
           edat, ...){

    dots <- list(...)
    fit.start <- proc.time()[3]
    fit.progress.handoff <- isTRUE(dots$.np_fit_progress_handoff)

    no.e = missing(edat)

    tdat = toFrame(tdat)

    if (!no.e)
      edat = toFrame(edat)

    if (!(no.e || tdat %~% edat ))
      stop("tdat and edat are not similar data frames!")

    if (length(bws$bw) != length(tdat))
      stop("length of bandwidth vector does not match number of columns of 'tdat'")

    if ((any(bws$icon) &&
         !all(vapply(as.data.frame(tdat[, bws$icon]), inherits, logical(1), c("integer", "numeric")))) ||
        (any(bws$iord) &&
         !all(vapply(as.data.frame(tdat[, bws$iord]), inherits, logical(1), "ordered"))) ||
        (any(bws$iuno) &&
         !all(vapply(as.data.frame(tdat[, bws$iuno]), inherits, logical(1), "factor"))))
      stop("supplied bandwidths do not match 'tdat' in type")

    tdat = na.omit(tdat)
    rows.omit <- unclass(na.action(tdat))

    if (!no.e){
      edat = na.omit(edat)
      rows.omit <- unclass(na.action(edat))
    }

    tnrow = nrow(tdat)
    enrow = (if (no.e) tnrow else nrow(edat))

    ## re-assign levels in training and evaluation data to ensure correct
    ## conversion to numeric type.
    
    tdat <- adjustLevels(tdat, bws$xdati)
    
    if (!no.e)
      edat <- adjustLevels(edat, bws$xdati, allowNewCells = TRUE)

    if (!no.e)
      npKernelBoundsCheckEval(edat, bws$icon, bws$ckerlb, bws$ckerub, argprefix = "cker")

    ## grab the evaluation data before it is converted to numeric
    if(no.e)
      teval <- tdat
    else
      teval <- edat

    ## put the unordered, ordered, and continuous data in their own objects
    ## data that is not a factor is continuous.
    
    tdat = toMatrix(tdat)

    tuno = tdat[, bws$iuno, drop = FALSE]
    tcon = tdat[, bws$icon, drop = FALSE]
    tord = tdat[, bws$iord, drop = FALSE]

    if (!no.e){
      edat = toMatrix(edat)

      euno = edat[, bws$iuno, drop = FALSE]
      econ = edat[, bws$icon, drop = FALSE]
      eord = edat[, bws$iord, drop = FALSE]

    } else {
      euno = data.frame()
      eord = data.frame()
      econ = data.frame()
    }

    myopti = list(
      num_obs_train = tnrow,
      num_obs_eval = enrow,
      num_uno = bws$nuno,
      num_ord = bws$nord,
      num_con = bws$ncon,
      int_LARGE_SF = (if (bws$scaling) SF_NORMAL else SF_ARB),
      BANDWIDTH_den_extern = switch(bws$type,
        fixed = BW_FIXED,
        generalized_nn = BW_GEN_NN,
        adaptive_nn = BW_ADAP_NN),
      int_MINIMIZE_IO=if (isTRUE(getOption("np.messages"))) IO_MIN_FALSE else IO_MIN_TRUE,
      ckerneval = switch(bws$ckertype,
        gaussian = CKER_GAUSS + bws$ckerorder/2 - 1,
        epanechnikov = CKER_EPAN + bws$ckerorder/2 - 1,
        uniform = CKER_UNI,
        "truncated gaussian" = CKER_TGAUSS),
      ukerneval = switch(bws$ukertype,
        aitchisonaitken = UKER_AIT,
        liracine = UKER_LR),
      okerneval = switch(bws$okertype,
        wangvanryzin = OKER_WANG,
        liracine = OKER_NLR,
        "racineliyan" = OKER_RLY),
      no.e = no.e,
      mcv.numRow = attr(bws$xmcv, "num.row"),
      densOrDist = NP_DO_DIST,
      old.dist = FALSE,
      int_do_tree = if (isTRUE(getOption("np.tree"))) DO_TREE_YES else DO_TREE_NO)
    cker.bounds.c <- npKernelBoundsMarshal(bws$ckerlb[bws$icon], bws$ckerub[bws$icon])

    myout <- .np_with_compiled_fit_progress(
      label = "Fitting distribution",
      total = .np_densdist_fit_total(bws = bws, tnrow = tnrow, enrow = enrow),
      handoff = fit.progress.handoff,
      handoff.detail = if (fit.progress.handoff) "starting" else NULL,
      .Call("C_np_density",
            as.double(tuno), as.double(tord), as.double(tcon),
            as.double(euno), as.double(eord), as.double(econ),
            as.double(c(bws$bw[bws$icon], bws$bw[bws$iuno], bws$bw[bws$iord])),
            as.double(bws$xmcv), as.double(attr(bws$xmcv, "pad.num")),
            as.double(bws$nconfac), as.double(bws$ncatfac), as.double(bws$sdev),
            as.integer(myopti),
            as.integer(enrow),
            as.double(cker.bounds.c$lb),
            as.double(cker.bounds.c$ub),
            PACKAGE = "np")
    )
    names(myout)[1] <- "dist"

    fit.elapsed <- proc.time()[3] - fit.start
    optim.time <- if (!is.null(bws$total.time) && is.finite(bws$total.time)) as.double(bws$total.time) else NA_real_
    total.time <- fit.elapsed + (if (is.na(optim.time)) 0.0 else optim.time)

    ev <- npdistribution(bws=bws, eval=teval, dist = myout$dist,
                         derr = myout$derr, ntrain = tnrow, trainiseval = no.e,
                         rows.omit = rows.omit,
                         timing = bws$timing, total.time = total.time,
                         optim.time = optim.time, fit.time = fit.elapsed)
    return(ev)
  }

npudist.default <- function(bws, tdat, ...){
  sc <- sys.call()
  sc.names <- names(sc)

  ## here we check to see if the function was called with tdat =
  ## if it was, we need to catch that and map it to dat =
  ## otherwise the call is passed unadulterated to npudensbw

  bws.named <- any(sc.names == "bws")
  tdat.named <- any(sc.names == "tdat")

  no.bws <- missing(bws)
  no.tdat <- missing(tdat)
  has.explicit.bws <- (!no.bws) && isa(bws, "dbandwidth")
  bws.formula <- (!no.bws) && inherits(bws, "formula")

  if (bws.named && no.tdat && bws.formula) {
    sc$`bws` <- NULL
    sc$formula <- bws
    sc.bw <- sc
    sc.bw[[1]] <- quote(npudistbw)
    bws.named <- FALSE
  } else {
    sc.bw <- sc
    sc.bw[[1]] <- quote(npudistbw)
  }

  ## if bws was passed in explicitly, do not compute bandwidths
    
  if(tdat.named)
    tdat <- toFrame(tdat)

  if(bws.named){
    sc.bw$bandwidth.compute <- FALSE
  }

  ostxy <- c('tdat')
  nstxy <- c('dat')
  
  m.txy <- match(ostxy, names(sc.bw), nomatch = 0)

  if(any(m.txy > 0)) {
    names(sc.bw)[m.txy] <- nstxy[m.txy > 0]
  }
    
  tbw <- if (!has.explicit.bws) {
    .np_progress_select_bandwidth_enhanced(
      "Selecting distribution bandwidth",
      .np_eval_bw_call(sc.bw, caller_env = parent.frame())
    )
  } else {
    .np_eval_bw_call(sc.bw, caller_env = parent.frame())
  }

  ## convention: first argument is always dropped, second, if present, propagated
  call.args <- list(bws = tbw)
  if (!no.tdat) {
    if (tdat.named) {
      call.args$tdat <- tdat
    } else {
      call.args <- c(call.args, list(tdat))
    }
  }
  if (!has.explicit.bws)
    call.args$.np_fit_progress_handoff <- TRUE
  do.call(npudist, c(call.args, list(...)))
}

Try the np package in your browser

Any scripts or data that you put into this service are public.

np documentation built on May 16, 2026, 1:07 a.m.