R/monotonic.R
In demography: Forecasting Mortality, Fertility, Migration and Population Data

Documented in cm.splinefun

#' Monotonic interpolating splines
#' 
#' Perform cubic spline monotonic interpolation of given data points, returning either a list of points obtained by the interpolation or a function performing the interpolation. The splines are constrained to be monotonically increasing (i.e., the slope is never negative).
#' 
#' These are simply wrappers to the \code{\link[stats]{splinefun}} function family from the stats package.
#' 
#' @param x,y vectors giving the coordinates of the points to be interpolated. Alternatively a single plotting structure can be specified: see \code{\link[grDevices]{xy.coords}}.
#' @param n interpolation takes place at n equally spaced points spanning the interval [\code{xmin}, \code{xmax}].
#' @param xmin left-hand endpoint of the interpolation interval.
#' @param xmax right-hand endpoint of the interpolation interval.
#' @param ... Other arguments are ignored.
#' 
#' @return \item{cm.spline}{returns a list containing components \code{x} and \code{y} which give the ordinates where interpolation took place and the interpolated values.}
#' \item{cm.splinefun}{returns a function which will perform cubic spline interpolation of the given data points. This is often more useful than \code{spline}.}
#' 
#' @references Forsythe, G. E., Malcolm, M. A. and Moler, C. B. (1977) \emph{Computer Methods for Mathematical Computations}.
#' Hyman (1983) \emph{SIAM J. Sci. Stat. Comput.} \bold{4}(4):645-654.
#' Dougherty, Edelman and Hyman 1989 \emph{Mathematics of Computation}, \bold{52}: 471-494. 
#' 
#' @author Rob J Hyndman
#' 
#' @examples 
#' x <- seq(0,4,l=20)
#' y <- sort(rnorm(20))
#' plot(x,y)
#' lines(spline(x, y, n = 201), col = 2) # Not necessarily monotonic
#' lines(cm.spline(x, y, n = 201), col = 3) # Monotonic
#' @keywords smooth
#' @aliases monotonic
#' @export
cm.spline <- function (x, y = NULL, n = 3 * length(x), xmin = min(x), xmax = max(x), ...) 
# wrapper for spline()
# Function retained for backwards compatibility
{
    stats::spline(x, y, n=n, xmin=xmin, xmax=xmax, method="hyman")
}


#' @rdname cm.spline
#' @export
cm.splinefun <- function(x, y = NULL, ...) 
  # wrapper for splinefun()
  # Function retained for backwards compatibility
{ 
  stats::splinefun(x, y, method="hyman")
}


# Function to do cubic smoothing spline fit to y ~ x
# with constraint of monotonic increasing for x>b.
# Based on code provided by Simon Wood
# Last updated: 1 February 2014

smooth.monotonic <- function(x,y,b,k=-1,w=NULL,newx=x)
{
    weight <- !is.null(w)
    if(k<3 & k!= -1)
        stop("Inappropriate value of k")
    # Unconstrained smooth.
    miss <- is.na(y)
    if(weight)
        miss <- miss | w < 1e-9
    yy <- y[!miss]
    xx <- x[!miss]
    if (weight)
    {
        w <- w[!miss]
        w <- w/sum(w)*length(w)
        f.ug <- mgcv::gam(yy~s(xx,k=k),weights=w)
#        assign("w",w,pos=1)
    }
    else
        f.ug <- mgcv::gam(yy~s(xx,k=k))

    if(max(xx) <= b)
        return(mgcv::predict.gam(f.ug,newdata=data.frame(xx=newx),se.fit=TRUE))

    # Create Design matrix, constraints etc. for monotonic spline....
    mgcv::gam(yy~s(xx,k=k),data=data.frame(xx=xx,yy=yy),fit=FALSE) -> G
    if(weight)
        G$w <- w
    nc <- 200                  # number of constraints
    xc <- seq(b,max(xx),l=nc+1)# points at which to impose constraints
    A0 <- mgcv::predict.gam(f.ug,data.frame(xx=xc),type="lpmatrix")
                               # A0%*%p will evaluate spline at the xc points
    A1 <- mgcv::predict.gam(f.ug,data.frame(xx=xc+1e-6),type="lpmatrix")
    A <- (A1-A0)/1e-6          # approximate constraint matrix
                               #(A%%p is  -ve gradient of spline at points xc)

    G$Ain <- A                 # constraint matrix
    G$bin <- rep(0,nc+1)       # constraint vector
    G$sp <- f.ug$sp            # use smoothing parameters from un-constrained fit
    k <- G$smooth[[1]]$df+1
    G$p <- rep(0,k)
    G$p[k] <- 0.1              # get monotonic starting parameters, by 
                               # setting coefficiants of polynomial part of term
    G$p[k-1] <- -mean(0.1*xx)  # must ensure that gam side conditions are
                               # met so that sum of smooth over x's is zero
#    G$p <- rep(0,k+1)
#    G$p[k+1] <- 0.1
#    G$p[k] <- -mean(0.1*xx)
    G$y <- yy
    G$off <- G$off -1          # indexing inconsistency between pcls and internal gam
    G$C <- matrix(0,0,0)       # fixed constraint matrix (there are none)
    p <- mgcv::pcls(G)         # fit spline (using s.p. from unconstrained fit)

    # now modify the gam object from unconstrained fit a little, to use it
    # for predicting and plotting constrained fit.
    f.ug$coefficients <- p
    return(mgcv::predict.gam(f.ug,newdata=data.frame(xx=newx),se.fit=TRUE))
}

smooth.monotonic.cobs <- function(x,y,b,lambda=0,w=NULL,newx=x,nknots=50)
{
    oldwarn <- options(warn=-1)

    weight <- !is.null(w)
    
    miss <- is.na(y)
    if(weight)
        miss <- miss | w < 1e-9
    yy <- y[!miss]
    xx <- x[!miss]
    if (weight)
    {
        w <- w[!miss]
        w <- w/sum(w)*length(w)
        f.ug <- cobs::cobs(xx,yy,w=w,print.warn=FALSE,print.mesg=FALSE,lambda=lambda,nknots=nknots)
    }
    else
        f.ug <- cobs::cobs(xx,yy,print.warn=FALSE,print.mesg=FALSE,lambda=lambda,nknots=nknots)

    fred <- stats::predict(f.ug,interval="conf",nz=200)
    fit <- stats::approx(fred[,1],fred[,2],xout=newx)$y
    se <- stats::approx(fred[,1],(fred[,4]-fred[,3])/2/1.96, xout=newx)$y

    if(max(xx) > b)
    {
        delta <- (max(xx)-min(xx))/10
        xxx <- xx[xx>(b-delta)]
        yyy <- yy[xx>(b-delta)]
        if(weight)
            f.mono <- cobs::cobs(xxx,yyy,constraint="increase",w=w[xx>(b-delta)],print.warn=FALSE,print.mesg=FALSE,lambda=lambda,nknots=nknots)
        else
            f.mono <- cobs::cobs(xxx,yyy,constraint="increase",print.warn=FALSE,print.mesg=FALSE,lambda=lambda,nknots=nknots)
        fred <- stats::predict(f.mono,interval="conf",nz=200)
        newfit <- stats::approx(fred[,1],fred[,2],xout=newx[newx>(b-delta)])$y
        newse <- stats::approx(fred[,1],(fred[,4]-fred[,3])/2/1.96,xout=newx[newx>(b-delta)])$y
        preb <- sum(newx <= (b-delta))
        newfit <- c(rep(0,preb),newfit)
        newse <- c(rep(0,preb),newse)
        postb <- sum(newx > b)
        n <- length(newx)
        cc <- c(rep(0,preb),seq(0,1,length=n-preb-postb),rep(1,postb))
        fit <- (1-cc)*fit + cc*newfit
        se <- (1-cc)*se + cc*newse
    }
    options(warn=oldwarn$warn)
    return(list(fit=fit,se=se))
}