R/util.R
In np: Nonparametric Kernel Smoothing Methods for Mixed Data Types

Documented in gradients npseed nptgauss se

## This function will compute the cumulative integral at each sample
## realization using the trapezoidal rule and the cumsum function as
## we need to compute this in a computationally efficient manner.

integrate.trapezoidal <- function(x,y) {
  n <- length(x)
  rank.x <- rank(x)
  order.x <- order(x)
  y <- y[order.x]
  x <- x[order.x]
  int.vec <- numeric(length(x))
  ## Use a correction term at the boundary: -cx^2/12*(f'(b)-f'(a)),
  ## check for NaN case
  cx  <- x[2]-x[1]
  ca <- (y[2]-y[1])/cx
  cb <- (y[n]-y[n-1])/cx
  cf <- cx^2/12*(cb-ca)
  if(!is.finite(cf)) cf <- 0
  int.vec[1] <- 0
  int.vec[2:n] <- cumsum((x[2:n]-x[2:n-1])*(y[2:n]+y[2:n-1])/2)
  return(int.vec[rank.x]-cf)
}

## No Zero Denominator, used in C code for kernel estimation...
  
NZD <- function(a) {
  ifelse(a<0,pmin(-.Machine$double.eps,a),pmax(.Machine$double.eps,a))
}

## Function to test for monotone increasing vector

is.monotone.increasing <- function(x) {
  ## Sorted and last value > first value
  !is.unsorted(x) && x[length(x)] > x[1]
}
  
## what is a badord? ... an ordered factor of numeric values to treat
## them properly one must preserve the numeric value, ie. scale not
## just their sorted order Actually, the ord/badord paradigm must go,
## in place of levels caching

matrix.sd <- function(x, na.rm=FALSE) {
  if(is.matrix(x)) apply(x, 2, sd, na.rm=na.rm)
  else if(is.vector(x)) sd(x, na.rm=na.rm)
  else if(is.data.frame(x)) sapply(x, sd, na.rm=na.rm)
  else sd(as.vector(x), na.rm=na.rm)
}

npseed <- function(seed){
  .C("np_set_seed",as.integer(abs(seed)), PACKAGE = "np")
  invisible()
}

erf <- function(z) { 2 * pnorm(z*sqrt(2)) - 1 }

nptgauss <- function(b){

  rel.tol <- sqrt(.Machine$double.eps)

  b.max <- sqrt(-2*log(.Machine$double.eps))

  if((b < 0) || (b > b.max))
    stop(paste("b must be between 0 and",b.max))

  alpha <- 1.0/(pnorm(b)-pnorm(-b)-2*b*dnorm(b))

  tgauss <- function(z)
    ifelse(abs(z) >= b, 0.0, alpha*(dnorm(z) - dnorm(b)))

  c0 <- alpha*dnorm(b)

  k <- integrate(f = function(z) { tgauss(z)^2 }, -b, b)$value
  k2 <- integrate(f = function(z) { z^2*tgauss(z) }, -b, b)$value
  k22 <- integrate(f = function(z) { (z*tgauss(z))^2 }, -b, b)$value
  km <- integrate(f = function(z) { tgauss(z-0.5)*tgauss(z+0.5) }, -b+0.5, b-0.5)$value

  a0 <- (0.5 + 2*b*c0)/integrate(f = function(z){ erf(z/2 + b)*exp(-0.25*z^2) }, -2*b, 0)$value
  a2 <- (c0 + k - a0*erf(b))/erf(b/sqrt(2))
  a1 <- -(a2*erf(b/sqrt(2)) + c0)/(2*b)

  int.kernels[CKER_TGAUSS + 1] <- k

  invisible(.C("np_set_tgauss2",as.double(c(b, alpha, c0, a0, a1, a2, k, k2, k22, km)), PACKAGE = "np"))

}

numNotIn <- function(x){
  while(is.element(num <- rnorm(1),x)){}
  num
}

dlev <- function(x){
  if(is.ordered(x))
    x.dlev <- suppressWarnings(as.numeric(levels(x)))
  if (!is.ordered(x) || any(is.na(x.dlev)))
    x.dlev <- as.numeric(1:nlevels(x))
  x.dlev
}

isNum <- function(x){
  return(!any(is.na(suppressWarnings(as.numeric(x)))))
}

untangle <- function(frame){
  if (is.null(frame))
    return(NULL)
  
  iord <- unlist(lapply(frame, is.ordered))
  iuno <- unlist(lapply(frame, is.factor)) & !iord
  icon <- unlist(lapply(frame, is.numeric))

  if(!all(iord | iuno | icon)) 
    stop("non-allowed data type in data frame")

  inumord <-
    suppressWarnings(unlist(lapply(frame,
    function (z) {
      is.ordered(z) && is.numeric(tryCatch(as.numeric(levels(z)), warning = function (y) {
        FALSE }))
    })))

  all.lev <- lapply(frame, function(y){
    t.ret <- NULL
    if (is.factor(y))
      t.ret <- levels(y)
    t.ret
  })

  all.ulev <- lapply(frame, function (y) {
    t.ret <- NULL
    if (is.factor(y))
      t.ret <- sort(unique(y))
    t.ret
  })

  all.dlev <- lapply(frame, function (y) {
    t.ret <- NULL
    if (is.factor(y))
      t.ret <- dlev(y)
    t.ret
  })

  all.nlev <- lapply(frame, function(y) {
    t.ret <- NULL
    if (is.factor(y))
      t.ret <- nlevels(y)
    t.ret
  })

  list(iord = iord,
       iuno = iuno,
       icon = icon,
       inumord = inumord,
       all.lev = all.lev,
       all.ulev = all.ulev,
       all.dlev = all.dlev,
       all.nlev = all.nlev)
}

validateBandwidth <- function(bws){
  vari <- names(bws$bandwidth)
  bchecker <- function(j){
    v <- vari[j]
    dati <- bws$dati[[v]]
    bwv <- bws$bandwidth[[j]]
    stopifnot(length(bwv) == length(dati$iord))

    cd <- function(a,b){
      (a-b)/(a+b+.Machine$double.eps) > 5.0*.Machine$double.eps
    }
    
    vb <- sapply(1:length(bwv), function(i){
      falg <- (bwv[i] < 0)

      if (dati$icon[i] && (falg || (!is.finite(bwv[i])))){
        stop(paste("Invalid bandwidth supplied for continuous",
                   "variable", bws$varnames[[v]][i], ":",bwv[i]))
      }
      
      if (dati$iord[i] &&
          (falg || cd(bwv[i],oMaxL(dati$all.nlev[[i]],
                         kertype = bws$klist[[v]]$okertype)))){
        stop(paste("Invalid bandwidth supplied for ordered",
                   "variable", bws$varnames[[v]][i], ":",bwv[i]))
      }
      
      if (dati$iuno[i] &&
          (falg || cd(bwv[i],uMaxL(dati$all.nlev[[i]],
                         kertype = bws$klist[[v]]$ukertype)))){
        stop(paste("Invalid bandwidth supplied for unordered",
                   "variable", bws$varnames[[v]][i], ":",bwv[i]))
      }
      return(TRUE)
    })
    
    return(vb)
  }
  vbl <- lapply(1:length(vari), bchecker)
  invisible(vbl)
}

validateBandwidthTF <- function(bws){
  vari <- names(bws$bandwidth)
  bchecker <- function(j){
    v <- vari[j]
    dati <- bws$dati[[v]]
    bwv <- bws$bandwidth[[j]]

    if(length(bwv) != length(dati$iord))
      return(FALSE)

    cd <- function(a,b){
      (a-b)/(a+b+.Machine$double.eps) > 5.0*.Machine$double.eps
    }
    
    vb <- sapply(1:length(bwv), function(i){
      falg <- (bwv[i] < 0)

      if (dati$icon[i]) {
        if(bws$type == "fixed") {
          if(falg || (!is.finite(bwv[i]))){
            return(FALSE)
          }
        } else if((bwv[i] < 1) || (!is.finite(bwv[i]))) {
          return(FALSE)
        }
      }
      
      if (dati$iord[i] &&
          (falg || cd(bwv[i],oMaxL(dati$all.nlev[[i]],
                         kertype = bws$klist[[v]]$okertype)))){
        return(FALSE)
      }
      
      if (dati$iuno[i] &&
          (falg || cd(bwv[i],uMaxL(dati$all.nlev[[i]],
                         kertype = bws$klist[[v]]$ukertype)))){
        return(FALSE)
      }
      return(TRUE)
    })
    
    return(all(vb))
  }
  vbl <- all(unlist(lapply(1:length(vari), bchecker)))
  return(vbl)
}


explodeFormula <- function(formula){
  res <- strsplit(strsplit(paste(deparse(formula), collapse=""),
                           " *[~] *")[[1]], " *[+] *")
  stopifnot(all(sapply(res,length) > 0))
  names(res) <- c("response","terms")
  res
}


explodePipe <- function(formula){
  tf <- as.character(formula)  
  tf <- tf[length(tf)]

  eval(parse(text=paste("c(",
               ifelse(length(as.character(formula)) == 3,
                      'strsplit(as.character(formula)[2]," *[+] *"),',""),
               'strsplit(strsplit(tf," *[|] *")[[1]]," *[+] *"))')))
}

"%~%" <- function(a,b) {
  all(class(a) == class(b)) && (length(a) == length(b)) &&
  all(unlist(lapply(a,coarseclass)) == unlist(lapply(b,coarseclass)))
}

coarseclass <- function(a) {
  ifelse(class(a) == "integer", "numeric", class(a))
}

toFrame <- function(frame) {
  if(!is.data.frame(frame)){
    t.names <- NULL

    if(!(is.vector(frame) || is.factor(frame) || is.matrix(frame)))
      stop(deparse(substitute(frame))," must be a data frame, matrix, vector, or factor")

    if(!is.matrix(frame))
      t.names <- deparse(eval(substitute(substitute(frame)), envir = parent.frame()))
    
    frame <- data.frame(frame, check.names=FALSE)
    
    if(!is.null(t.names))
      names(frame) <- t.names
  }
  return(frame)
}


cast <- function(a, b, same.levels = TRUE){
  if(is.ordered(b)){
    if(same.levels)
      ordered(a, levels = levels(b))
    else
      ordered(a)
  }   
  else if(is.factor(b)){
    if(same.levels)
      factor(a, levels = levels(b))
    else
      factor(a)
  }
  else if (coarseclass(b) == "numeric")
    as.double(a)
  else if (is.data.frame(b)) {
    if (dim(a)[2] == dim(b)[2]){
      r = data.frame(a)
      for (i in 1: length(b))
        r[,i] = cast(a[,i],b[,i], same.levels = same.levels)
      r
    } else { stop("a could not be cast as b") }
  }
}

subcol <- function(x, v, i){
  x[,i] = cast(v,x[,i])
  x
}

mcvConstruct <- function(dati){
  nuno <- sum(dati$iuno)
  nord <- sum(dati$iord)

  num.row <- max(sapply(dati$all.lev,length))

  pad.num <- numNotIn(unlist(dati$all.dlev))

  mcv <- matrix(data = pad.num, nrow = num.row, ncol = (nuno+nord))
  attr(mcv, "num.row") <- num.row
  attr(mcv, "pad.num") <- pad.num

  cnt <- 0
  if (nuno > 0)
    for (i in which(dati$iuno)) 
      mcv[1:length(dati$all.lev[[i]]), (cnt <- cnt+1)] <- dati$all.dlev[[i]]

  cnt <- 0
  if (nord > 0)
    for (i in which(dati$iord))
      mcv[1:length(dati$all.lev[[i]]), (cnt <- cnt+1)+nuno] <- dati$all.dlev[[i]]

  mcv
}

## when admitting new categories, adjustLevels will attempt to catch possible mistakes:
## if an unordered variable contains more than one new category, warn
## if an ordered, but scaleless variable contains a new category, error
## if an ordered, scale-possessing variable contains a new category lacking scale, error

adjustLevels <- function(data, dati, allowNewCells = FALSE){
  for (i in which(dati$iord | dati$iuno)){
    if (allowNewCells){
      newCats <- setdiff(levels(data[,i]),dati$all.lev[[i]])
      if (length(newCats) >= 1){
        if (dati$iuno[i]){
          if (length(newCats) > 1)
            warning(paste("more than one 'new' category is redundant when estimating on unordered data.\n",
                          "training data categories: ", paste(dati$all.lev[[i]], collapse=" "),"\n",
                          "redundant estimation data categories: ", paste(newCats, collapse=" "), "\n", sep=""))
          data[,i] <- factor(data[,i], levels = c(dati$all.lev[[i]], newCats))
        } else {
          if (dati$inumord[i]){
            if (!isNum(newCats))
              stop(paste("estimation data contains a new qualitative category, but training data is\n",
                         "ordered, and numeric.\n",
                         "training data categories: ", paste(dati$all.lev[[i]], collapse=" "),"\n",
                         "conflicting estimation data categories: ", paste(newCats, collapse=" "), "\n", sep=""))
          } else {
            stop(paste("estimation beyond the support of training data of an ordered,\n",
                       "categorical, qualitative variable is not supported.\n"))
          }

          data[,i] <- ordered(data[,i], levels = sort(as.numeric(c(dati$all.lev[[i]], newCats))))
        }
      } else {
        data[,i] <- factor(data[,i], levels = dati$all.lev[[i]])
      }
    } else {
      if (!all(is.element(levels(data[,i]), dati$all.lev[[i]])))
        stop("data contains unknown factors (wrong dataset provided?)")
      data[,i] <- factor(data[,i], levels = dati$all.lev[[i]])
    }
  }

  data
}

toMatrix <- function(data) {
  tq <- sapply(data, function(y) {
    if(is.factor(y))
      y <- dlev(y)[as.integer(y)]
    y})
  dim(tq) <- dim(data) ## cover the corner case of single element d.f.
  tq
}

## this doesn't just strictly check for the response, but does tell you
## that evaluating with response fails ... in principle the evaluation
## could fail without the response too, but then the calling routine is about
## to die a noisy death anyhow ...
succeedWithResponse <- function(tt, frame){
  !any(class(try(eval(expr = attr(tt, "variables"),
                      envir = frame, enclos = NULL), silent = TRUE)) == "try-error")
}

## determine whether a bandwidth
## matches a data set
bwMatch <- function(data, dati){
  if (length(dati$icon) != ncol(data))
    stop("bandwidth vector is of improper length")

  test.dati <- untangle(data)

  if (any(xor(dati$icon,test.dati$icon)) ||
      any(xor(dati$iord,test.dati$iord)) ||
      any(xor(dati$iuno,test.dati$iuno)))
    stop(paste("supplied bandwidths do not match","data", "in type"))
}

uMaxL <- function(c, kertype = c("aitchisonaitken","liracine")){
  switch(kertype,
         aitchisonaitken = (c-1.0)/c,
         liracine = 1.0)
}

oMaxL <- function(c, kertype = c("wangvanryzin", "liracine", "nliracine")){
  switch(kertype,
         wangvanryzin = 1.0,
         liracine = 1.0,
         nliracine = 1.0)
}

## tested with: rbandwidth
## right now all bandwidth objects have some crusty
## vestiges of their evolution, ie. non-list metadata
## such as xnames or ynames. The new metadata system is
## for the most part list based and facilitates generic
## operations

updateBwNameMetadata <- function(nameList, bws){
  ## names of 'old' metadata in bw object
  onames <- names(nameList)
  lapply(1:length(nameList), function(i) {
    bws[[onames[i]]] <<- nameList[[i]]
    bws$varnames[[substr(onames[i],1,1)]] <<- nameList[[i]]
  })
  return(bws)
}

## some string utility functions

pad <- function(s){
  ifelse(nchar(s) > 0, paste("",s,""), " ")
}

rpad <- function(s){
  ifelse(nchar(s) > 0, paste(s,""), "")
}

blank <- function(len){
  sapply(len, function(nb){
    paste(rep(' ', times = nb), collapse='')
  })
}

formatv <- function(v){
  sapply(1:length(v), function(j){ format(v[j]) })
}

## strings used in various report generating functions

genOmitStr <- function(x){
  t.str <- ''
  if(!is.null(x$rows.omit) & !identical(x$rows.omit, NA))
    t.str <- paste("\nNo. Complete Observations: ", x$nobs,
                   "\nNo. Incomplete (NA) Observations: ", x$nobs.omit,
                   "\nObservations omitted or excluded: ", paste(x$rows.omit, collapse=" "),
                   "\n")
  return(t.str)
}

## Estimation-related rgf's
genGofStr <- function(x){
  paste(ifelse(is.na(x$MSE),"",paste("\nResidual standard error:",
                                     format(sqrt(x$MSE)))),
        ifelse(is.na(x$R2),"",paste("\nR-squared:",
                                    format(x$R2))), sep="")
}

genTimingStr <- function(x){
  ifelse(is.na(x$total.time),"",
         paste("\nEstimation Time: ",format(x$total.time)," seconds",sep = ""))
}
  
pCatGofStr <- function(x){
  if(!identical(x$confusion.matrix, NA)){
    cat("\nConfusion Matrix\n")
    print(x$confusion.matrix)
  }

  if (!identical(x$CCR.overall,NA))
    cat("\nOverall Correct Classification Ratio: ", format(x$CCR.overall))

  if (!identical(x$CCR.byoutcome,NA)){
    cat("\nCorrect Classification Ratio By Outcome:\n")
    print(x$CCR.byoutcome)
  }

  if (!identical(x$fit.mcfadden,NA))
    cat("\nMcFadden-Puig-Kerschner performance measure: ", format(x$fit.mcfadden))

}

genDenEstStr <- function(x){
  paste("\nBandwidth Type: ",x$ptype,
        ifelse(is.null(x$log_likelihood) || identical(x$log_likelihood, NA),"",
               paste("\nLog Likelihood:",
                     format(x$log_likelihood))),
        sep="")
}

genRegEstStr <- function(x){
  paste(ifelse(is.null(x$pregtype),"",
               paste("\nKernel Regression Estimator:",x$pregtype)),
        ifelse(is.null(x$ptype), "",
               paste("\nBandwidth Type:",x$ptype)),
        ifelse(is.null(x$tau), "", paste("\nTau:", x$tau)),
        sep = "")
}


## bandwidth-related report generating functions
genBwSelStr <- function(x){
  paste(ifelse(is.null(x$pregtype),"",paste("\nRegression Type:", x$pregtype)),
        ifelse(is.null(x$pmethod),"",paste("\nBandwidth Selection Method:",
                                           x$pmethod)),
        if (!identical(x$formula,NULL)) paste("\nFormula:",
                                              paste(deparse(x$formula), collapse="\n")),
        ifelse(is.null(x$ptype), "",
               paste("\nBandwidth Type: ",x$ptype, sep="")),
        ifelse(identical(x$fval,NA),"",
               paste("\nObjective Function Value: ", format(x$fval),
               " (achieved on multistart ", x$ifval, ")", sep="")), sep="")
}

genBwScaleStrs <- function(x){
  ## approach is to take metadata and flatten it so it then can be
  ## processed into a single string

  vari <- names(x$sumNum)
  
  t.icon <- lapply(vari, function(v){
    x$dati[[v]]$icon })

  sumText <- lapply(1:length(vari), function(i) {
    ifelse(t.icon[[i]],
           ifelse(x$type == "fixed",
                  "Scale Factor:",""), "Lambda Max:")
    
  })

  maxNameLen <- max(nchar(unlist(sumText)))
  print.sumText <- lapply(sumText, '!=', "")

  sumText <- lapply(1:length(sumText), function(i){
    paste(blank(maxNameLen - nchar(sumText[[i]])), sumText[[i]], sep="")
  })

  t.nchar <- lapply(x$varnames[vari], nchar)
                    
  maxNameLen <- max(unlist(t.nchar))

  vatText <- lapply(1:length(t.nchar), function(j){
    paste("\n", rpad(x$vartitleabb[[vari[j]]]), "Var. Name: ",
          x$varnames[[vari[j]]],
          sapply(t.nchar[[j]], function(nc){
            paste(rep(' ', maxNameLen - nc), collapse='')
          }), sep='')
  })

  return(sapply(1:length(t.nchar), function(j){
    paste(vatText[[j]], " Bandwidth: ", npFormat(x$bandwidth[[j]]), " ",
          ifelse(print.sumText[[j]],
                 paste(sumText[[j]], " ", npFormat(x$sumNum[[j]]), sep=""), ""),
          sep="", collapse="")
  }))
}

npFormat <- function(x){
  format(sapply(x,format))
}

genBwKerStrs <- function(x){
  vari <- names(x$klist)

  ncon <- sapply(vari, function(v){
    sum(x$dati[[v]]$icon)
  })

  nuno <- sapply(vari, function(v){
    sum(x$dati[[v]]$iuno)
  })

  nord <- sapply(vari, function(v){
    sum(x$dati[[v]]$iord)
  })

  cktype <- sapply(vari, function(v){
    x$klist[[v]]$ckertype
  })

  uktype <- sapply(vari, function(v){
    x$klist[[v]]$ukertype
  })

  oktype <- sapply(vari, function(v){
    x$klist[[v]]$okertype
  })

  tt <- ''

  if(any(ncon > 0)){
    tt <- paste("\n",
                ifelse(length(unique(cktype)) == 1,
                       paste("\nContinuous Kernel Type:",
                             x$klist[[vari[1]]]$pckertype),
                       paste(sapply(1:length(vari), function(v){
                         ifelse(ncon[v] > 0,
                                paste("\nContinuous Kernel Type (",
                                      x$vartitleabb[[vari[v]]],
                                      " Var.): ", x$klist[[vari[v]]]$pckertype, sep=""),"")
                       }), collapse = "")),
                sep = "")
    tt <-
      paste(tt, paste(sapply(1:length(vari), function(i){
        ifelse(ncon[i] > 0,
               paste("\nNo. Continuous", pad(x$vartitle[[vari[i]]]), "Vars.: ",
                     ncon[i], sep=""), "")
      }), collapse = ""), sep="")
  }
                
    
  if(any(nuno > 0)) {
    tt <- paste(tt, "\n",
                ifelse(length(unique(uktype)) == 1,
                       paste("\nUnordered Categorical Kernel Type:",
                             x$klist[[vari[1]]]$pukertype),
                       paste(sapply(1:length(vari), function(i){
                         ifelse(nuno[i] > 0,
                                paste("\nUnordered Categorical Kernel Type (",
                                      x$vartitleabb[[vari[i]]],
                                      " Var.): ", x$klist[[vari[i]]]$pukertype, sep=""),"")
                       }), collapse = "")),
                sep = "")
    tt <-
      paste(tt, paste(sapply(1:length(vari), function(i){
        ifelse(nuno[i] > 0,
               paste("\nNo. Unordered Categorical", pad(x$vartitle[[vari[i]]]), "Vars.: ",
                     nuno[i], sep=""), "")
      }), collapse = ""), sep="")

  }

  if(any(nord > 0)) {
    tt <- paste(tt, "\n",
                ifelse(length(unique(oktype)) == 1,
                paste("\nOrdered Categorical Kernel Type:",
                      x$klist[[vari[1]]]$pokertype),
                paste(sapply(1:length(vari), function(i){
                  ifelse(nord[i] > 0,
                         paste("\nOrdered Categorical Kernel Type (",
                               x$vartitleabb[[vari[i]]],
                               " Var.): ", x$klist[[vari[i]]]$pokertype, sep=""),"")
                }), collapse = "")),
         sep = "")
    tt <-
      paste(tt, paste(sapply(1:length(vari), function(i){
        ifelse(nord[i] > 0,
               paste("\nNo. Ordered Categorical", pad(x$vartitle[[vari[i]]]), "Vars.: ",
                     nord[i], sep=""), "")
      }), collapse = ""), sep="")

  }

  return(tt)
}

genBwKerStrsXY <- function(x){
  t.str <- ''
  cnt <- 0
  
  if (x$xncon + x$yncon > 0){
    t.str[cnt <- cnt + 1] <-
      paste(ifelse(x$pcxkertype == x$pcykertype,
                   paste("\n\nContinuous Kernel Type:",x$pcxkertype),
                   paste("\n", ifelse(x$xncon > 0,
                                      paste("\nContinuous Kernel Type (Exp. Var.):",
                                            x$pcxkertype), ""),
                         ifelse(x$yncon > 0,
                                paste("\nContinuous Kernel Type (Dep. Var.):",
                                      x$pcykertype), ""))),
            ifelse(x$yncon > 0,paste("\nNo. Continuous Dependent Vars.:",x$yncon),""),
            ifelse(x$xncon > 0,paste("\nNo. Continuous Explanatory Vars.:",x$xncon),""))
  }

  if (x$xnuno + x$ynuno > 0){
    t.str[cnt <- cnt + 1] <-
      paste(ifelse(x$puxkertype == x$puykertype,
                   paste("\n\nUnordered Categorical Kernel Type:",x$puxkertype),
                   paste("\n", ifelse(x$xnuno > 0,
                                      paste("\nUnordered Categorical Kernel Type (Exp. Var.):",
                                            x$puxkertype), ""),
                         ifelse(x$ynuno > 0,
                                paste("\nUnordered Categorical Kernel Type (Dep. Var.):",
                                      x$puykertype), ""))),
            ifelse(x$ynuno > 0,paste("\nNo. Unordered Categorical Dependent Vars.:",x$ynuno),""),
            ifelse(x$xnuno > 0,paste("\nNo. Unordered Categorical Explanatory Vars.:",x$xnuno),""))
  }

  if (x$xnord + x$ynord > 0){
    t.str[cnt <- cnt + 1] <-
      paste(ifelse(x$poxkertype == x$poykertype,
                   paste("\n\nOrdered Categorical Kernel Type:",x$poxkertype),
                   paste("\n", ifelse(x$xnord > 0,
                                      paste("\nOrdered Categorical Kernel Type (Exp. Var.):",
                                            x$poxkertype), ""),
                         ifelse(x$ynord > 0,
                                paste("\nOrdered Categorical Kernel Type (Dep. Var.):",
                                      x$poykertype), ""))),
            ifelse(x$ynord > 0,paste("\nNo. Ordered Categorical Dependent Vars.:",x$ynord),""),
            ifelse(x$xnord > 0,paste("\nNo. Ordered Categorical Explanatory Vars.:",x$xnord),""))
  }
  return(t.str)
}

genBwGOFStrs <- function(x) {
  ###paste("Residual standard error:", sqrt
}
  
## statistical functions
RSQfunc <- function(y,y.pred) {
  y.mean <- mean(y)
  (sum((y-y.mean)*(y.pred-y.mean))^2)/(sum((y-y.mean)^2)*sum((y.pred-y.mean)^2))
}

MSEfunc <- function(y,y.fit) {
  mean((y-y.fit)^2)
}

MAEfunc <- function(y,y.fit) {
  mean(abs(y-y.fit))
}

MAPEfunc <- function(y,y.fit) {
  jj = which(y != 0)
  
  mean(c(abs((y[jj]-y.fit[jj])/y[jj]), as.numeric(replicate(length(y)-length(jj),2))))
}

CORRfunc <- function(y,y.fit) {
  abs(corr(cbind(y,y.fit)))
}

SIGNfunc <- function(y,y.fit) {
  sum(sign(y) == sign(y.fit))/length(y)
}

EssDee <- function(y){
  if(any(dim(as.matrix(y)) == 0))
      return(0)
  sd.vec <- apply(as.matrix(y),2,sd)
  IQR.vec <- apply(as.matrix(y),2,IQR)/QFAC
  mad.vec <- apply(as.matrix(y),2,mad)
  a <- apply(cbind(sd.vec,IQR.vec,mad.vec),1, function(x) max(x))
  if(any(a<=0)) warning(paste("variable ",which(a<=0)," appears to be constant",sep=""))
  a <- apply(cbind(sd.vec,IQR.vec,mad.vec),1, function(x) min(x[x>0]))  
  return(a)
}


##EssDee <- function(y){
##
##  sd.vec <- apply(as.matrix(y),2,sd)
##  IQR.vec <- apply(as.matrix(y),2,IQR)/QFAC
##  return(ifelse(sd.vec<IQR.vec|IQR.vec==0,sd.vec,IQR.vec))
##  
##}

## consolidating various bits of code related to converting internal settings
## to printable strings

bwmToPrint <- function(s){
  switch(s,
         manual = "Manual",
         cv.aic = "Expected Kullback-Leibler Cross-Validation",
         cv.ml = "Maximum Likelihood Cross-Validation",
         cv.cdf = "Least Squares Cross-Validation",
         cv.ls = "Least Squares Cross-Validation",
         cv.ls.np = "Least Squares Cross-Validation (block algorithm)",
         "normal-reference" = "Normal Reference")
}

bwtToPrint <- function(s){
  switch(s,
         fixed = "Fixed",
         generalized_nn = "Generalized Nearest Neighbour",
         adaptive_nn = "Adaptive Nearest Neighbour" )
}

cktToPrint <- function(s, order = ""){
  switch(s,
         gaussian = paste(order,"Gaussian"),
         epanechnikov =  paste(order,"Epanechnikov"),
         uniform = "Uniform",
         "truncated gaussian" = "Truncated Gaussian")
}

uktToPrint <- function(s){
  switch(s,
         aitchisonaitken = "Aitchison and Aitken",
         liracine = "Li and Racine (normalized)")
}

oktToPrint <- function(s, normalized = FALSE) {
  if(normalized){
    pok <- 
      switch(s,
             wangvanryzin = "Wang and Van Ryzin", 
             liracine = "Li and Racine (normalized)",
             nliracine = "Li and Racine (normalized)")
  } else {
    pok <- 
      switch(s,
             wangvanryzin = "Wang and Van Ryzin", 
             liracine = "Li and Racine",
             nliracine = "Li and Racine (normalized)")
  }
  return(pok)
}

### holding place for some generic methods

se <- function(x){
  UseMethod("se",x)
}

gradients <- function(x, ...){
  UseMethod("gradients",x)
}

## From crs to avoid crs:::W.glp

mypoly <- function(x,
                   ex=NULL,
                   degree,
                   gradient.compute = FALSE,
                   r=0,
                   Bernstein = TRUE) {

  if(missing(x)) stop(" Error: x required")
  if(missing(degree)) stop(" Error: degree required")
  if(degree < 1) stop(" Error: degree must be a positive integer")
  if(!is.logical(Bernstein)) stop(" Error: Bernstein must be logical")

  if(!Bernstein) {

    ## Raw polynomials and their derivatives

    if(!is.null(ex)) x <- ex

    if(gradient.compute) {
      Z <- NULL
      for(i in 1:degree) {
        if((i-r) >= 0) {
          tmp <- (factorial(i)/factorial(i-r))*x^max(0,i-r)
        } else {
          tmp <- rep(0,length(x))
        }
        Z <- cbind(Z,tmp)
      }
    } else {
      Z <- outer(x,1L:degree,"^")
    }

  } else {
    ## Bernstein polynomials and their derivatives (i.e. Bezier curves
    ## i.e. B-splines with no interior knots)
    if(is.null(ex)) {
      if(gradient.compute) {
        Z <- gsl.bs(x,degree=degree,deriv=r)
      } else {
        Z <- gsl.bs(x,degree=degree)
      }
    } else {
      if(gradient.compute) {
        Z <- predict(gsl.bs(x,degree=degree,deriv=r),newx=ex)
      } else {
        Z <- predict(gsl.bs(x,degree=degree),newx=ex)
      }
    }
  }

  return(as.matrix(Z))

}

## W.glp is a modified version of the polym() function (stats). The
## function accepts a vector of degrees and provides a generalized
## polynomial with varying polynomial order.

W.glp <- function(xdat = NULL,
                  exdat = NULL,
                  degree = NULL,
                  gradient.vec = NULL,
                  Bernstein = TRUE) {

  if(is.null(xdat)) stop(" Error: You must provide data")
  if(is.null(degree) || any(degree < 0)) stop(paste(" Error: degree vector must contain non-negative integers\ndegree is (", degree, ")\n",sep=""))

  xdat <- as.data.frame(xdat)

  xdat.col.numeric <- sapply(1:ncol(xdat),function(i){is.numeric(xdat[,i])})
  k <- ncol(as.data.frame(xdat[,xdat.col.numeric]))

  xdat.numeric <- NULL
  if(k > 0) {
    xdat.numeric <- as.data.frame(xdat[,xdat.col.numeric])
    if(!is.null(exdat)) {
      exdat.numeric <- as.data.frame(exdat[,xdat.col.numeric])
    } else {
      exdat.numeric <- NULL
    }
  }

  if(!is.null(gradient.vec) && (length(gradient.vec) != k)) stop(paste(" Error: gradient vector and number of numeric predictors must be conformable\n",sep=""))
  if(!is.null(gradient.vec) && any(gradient.vec < 0)) stop(paste(" Error: gradient vector must contain non-negative integers\n",sep=""))

  if(!is.null(gradient.vec)) {
    gradient.compute <- TRUE
  } else {
    gradient.compute <- FALSE
    gradient.vec <- rep(NA,k)
  }

  if(length(degree) != k) stop(" Error: degree vector and number of numeric predictors incompatible")

  if(all(degree == 0) || (k == 0)) {

    ## Local constant OR no continuous variables

    if(is.null(exdat)) {
      return(matrix(1,nrow=nrow(xdat.numeric),ncol=1))
    } else {
      return(matrix(1,nrow=nrow(exdat.numeric),ncol=1))
    }

  } else {

    degree.list <- list()
    for(i in 1:k) degree.list[[i]] <- 0:degree[i]
    z <- do.call("expand.grid", degree.list, k)
    s <- rowSums(z)
    ind <- (s > 0) & (s <= max(degree))
    z <- z[ind, ,drop=FALSE]
    if(!all(degree==max(degree))) {
      for(j in 1:length(degree)) {
        d <- degree[j]
        if((d < max(degree)) & (d > 0)) {
          s <- rowSums(z)
          d <- (s > d) & (z[,j,drop=FALSE]==matrix(d,nrow(z),1,byrow=TRUE))
          z <- z[!d, ]
        }
      }
    }
    if(is.null(exdat)) {
      res <- rep.int(1,nrow(xdat.numeric))
    } else {
      res <- rep.int(1,nrow(exdat.numeric))
    }
    res.deriv <- 1
    if(degree[1] > 0) {
      res <- cbind(1, mypoly(x=xdat.numeric[,1],
                             ex=exdat.numeric[,1],
                             degree=degree[1],
                             gradient.compute=gradient.compute,
                             r=gradient.vec[1],
                             Bernstein=Bernstein))[, 1 + z[, 1]]

      if(gradient.compute && gradient.vec[1] != 0) res.deriv <- cbind(1,matrix(NA,1,degree[1]))[, 1 + z[, 1],drop=FALSE]
      if(gradient.compute && gradient.vec[1] == 0) res.deriv <- cbind(1,matrix(0,1,degree[1]))[, 1 + z[, 1],drop=FALSE]
    }
    if(k > 1) for (i in 2:k) if(degree[i] > 0) {
      res <- res * cbind(1, mypoly(x=xdat.numeric[,i],
                                   ex=exdat.numeric[,i],
                                   degree=degree[i],
                                   gradient.compute=gradient.compute,
                                   r=gradient.vec[i],
                                   Bernstein=Bernstein))[, 1 + z[, i]]
      if(gradient.compute && gradient.vec[i] != 0) res.deriv <- res.deriv * cbind(1,matrix(NA,1,degree[i]))[, 1 + z[, i],drop=FALSE]
      if(gradient.compute && gradient.vec[i] == 0) res.deriv <- res.deriv *cbind(1,matrix(0,1,degree[i]))[, 1 + z[, i],drop=FALSE]
    }

    if(is.null(exdat)) {
      res <- matrix(res,nrow=NROW(xdat))
    } else {
      res <- matrix(res,nrow=NROW(exdat))
    }
    if(gradient.compute) res.deriv <- matrix(res.deriv,nrow=1)
    colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = "."))
    if(gradient.compute) colnames(res.deriv) <- apply(z, 1L, function(x) paste(x, collapse = "."))

    if(gradient.compute) {
      res[,!is.na(as.numeric(res.deriv))] <- 0
      return(cbind(0,res))
    } else {
      return(cbind(1,res))
    }

  }

}

### internal constants used in the c backend

SF_NORMAL = 0
SF_ARB = 1

BW_FIXED = 0
BW_GEN_NN = 1
BW_ADAP_NN = 2

IMULTI_TRUE = 1
IMULTI_FALSE = 0

RE_MIN_TRUE = 0
RE_MIN_FALSE = 1

IO_MIN_TRUE = 1
IO_MIN_FALSE = 0

USE_START_NO = 0
USE_START_YES = 1

NP_DO_DENS = 1
NP_DO_DIST = 0

## initially making an np-wide option via the 'options' mechanism
DO_TREE_NO = 0
DO_TREE_YES = 1

##kernel defs
CKER_GAUSS = 0
CKER_EPAN  = 4
CKER_UNI   = 8
CKER_TGAUSS = 9

UKER_AIT = 0
UKER_LR = 1

OKER_WANG = 0
OKER_LR = 1
OKER_NLR = 2

##density 
BWM_CVML = 0
BWM_CVLS = 1
BWM_CVML_NP= 2

##distribution
DBWM_CVLS = 0

##regression
BWM_CVAIC = 0

REGTYPE_LC = 0
REGTYPE_LL = 1

##conditional density/distribution
CBWM_CVML = 0
CBWM_CVLS = 1
CBWM_NPLS = 2
CBWM_CCDF = 3 # Added 7/2/2010 jracine

##conditional distribution
CDBWM_CVLS = 0

##integral operators on kernels
OP_NOOP        = -1
OP_NORMAL      = 0
OP_CONVOLUTION = 1
OP_DERIVATIVE  = 2
OP_INTEGRAL    = 3

ALL_OPERATORS = c(OP_NORMAL, OP_CONVOLUTION, OP_DERIVATIVE, OP_INTEGRAL)
names(ALL_OPERATORS) <- c("normal","convolution", "derivative", "integral")

PERMUTATION_OPERATORS <- c(OP_NOOP, OP_NORMAL, OP_DERIVATIVE, OP_INTEGRAL)
names(PERMUTATION_OPERATORS) <- c("none", "normal", "derivative", "integral")

## useful numerical constants of kernel integrals
int.kernels <- c(0.28209479177387814348, 0.47603496111841936711, 0.62396943688265038571, 0.74785078617543927990,
                 0.26832815729997476357, 0.55901699437494742410, 0.84658823667359826246, 1.1329342579014329689,
                 0.5, 2.90113075268188e-01)

QFAC <- qnorm(.25,lower.tail=F)*2