R/gsp.R

Defines functions PolyShift smsp approx.Z smspline.v smspline sc gspf.1 Proj.test Proj Proj.1 gsp spline.E spline.T basis Emat Cmat Xf Xmat

Documented in approx.Z basis Cmat gsp gspf.1 PolyShift Proj Proj.1 Proj.test sc smsp smspline smspline.v spline.E spline.T Xf Xmat

##
##
##  General polynomial splines: August 8, 2008
##  Revised June 16, 2010 to incorporate degree 0 polynomials
##    and constraining evaluation of spline to 0 with 'intercept'
##   
##  Modified: June 15, 2013 - GM
##  Added specified linear contraints to gsp and 
##  to sc expressed
##  in terms of full polynomial parametrization
##  
##  Added 'PolyShift' function to specify constraints for
##  a periodic spline. See example in the manual page.
##
##  Added 'periodic' argument to gsp
##
##


#' @export
Xmat <- function( x, degree, D = 0, signif = 3) {
  
  # Return design matrix for d-th derivative
  
  if ( length(D) < length( x ) ) D = rep(D, length.out = length(x))
  if ( length(x) < length( D ) ) x = rep(x, length.out = length(D))
  xmat = matrix( x, nrow = length(x), ncol = degree + 1)
  # disp( d)
  # disp( x)
  expvec <- 0:degree
  coeffvec <- rep(1, degree+1)
  expmat <- NULL
  coeffmat <- NULL
  
  for (i in 0:max(D) ) {
    expmat <- rbind(expmat, expvec)
    coeffmat <- rbind(coeffmat, coeffvec)
    coeffvec <- coeffvec * expvec
    expvec <- ifelse(expvec > 0, expvec - 1, 0)
  }
  G = coeffmat[ D + 1, ] * xmat ^ expmat[ D + 1, ]
  
  xlab = signif( x, signif)
  rownames(G) = ifelse( D == 0, paste('f(', xlab ,')', sep = ''),
                        paste( "D",D,"(",xlab,")", sep = ""))
  colnames(G) = paste( "X", 0:(ncol(G)-1), sep = "")
  G
}

#  Xmat( c(1:10,pi), 5,0:4,3)

#' @export
Xf <-  function(   x, knots, degree = 3, D = 0, right = TRUE , signif = 3) {
  
  # With the default, right ,== TRUE, if x is at a knot then it is included in
  # in the lower interval. With right = FALSE, it is included in the higher
  # interval. This is needed when building derivative constraints at the
  # knot
  
  xmat = Xmat ( x, degree, D , signif )
  k = sort(knots)
  g = cut( x, c(-Inf, k, Inf), right = right)
  do.call( 'cbind',
           lapply( seq_along(levels(g)) , function( i ) (g ==levels(g)[i]) *  xmat))
}

# Xf( c(1:10,pi), c(3,6))

# Xf( 3, c(3,6),3, 0, )
# Xf( 3, c(3,6),right = F)

#' @export
Cmat <- function( knots, degree, smooth, lin = NULL, intercept = 0, signif = 3) {
  # add lin: contraints, GM 2013-06-13
  # generates constraint matrix
  dm = max(degree)
  
  # intercept
  
  cmat = NULL
  if( !is.null(intercept))  cmat = rbind( cmat, "Intercept" =
                                            Xf( intercept, knots, dm, D=0 ))
  
  # continuity constraints
  
  for ( i in seq_along(knots) ) {
    k = knots[i]
    sm = smooth[i]
    if ( sm > -1 ) {  # sm = -1 corresponds to discontinuity
      dmat  =Xf( k , knots, dm, D = seq(0,sm) , F ) -   Xf( k ,
                                                            knots, dm, D = seq(0,sm) , T )
      rownames( dmat ) = paste( "C(",signif(k, signif),").",
                                seq(0,sm), sep = '')
      cmat = rbind( cmat,  dmat)
    }
  }
  
  # reduced degree constraints
  
  degree = rep( degree, length.out = length(knots) + 1)
  # disp ( degree )
  for ( i in seq_along( degree)) {
    di = degree[i]
    
    if ( dm > di ) {
      dmat = diag( (length(knots) + 1) * (dm +1)) [  (i - 1)*(dm + 1) +
                                                       1 + seq( di+1,dm), , drop = F]
      rownames( dmat ) = paste( "I.", i,".",seq(di+1,dm),sep = '')
      #disp( cmat)
      #disp( dmat)
      cmat = rbind( cmat, dmat)
    }
  }
  
  # add additional linear constraints
  
  if( ! is.null(lin)) cmat <- rbind(cmat,lin) # GM:2013-06-13
  rk = qr(cmat)$rank
  spline.rank = ncol(cmat) - rk
  attr(cmat,"ranks") = c(npar.full = ncol(cmat), C.n = nrow(cmat),
                         C.rank = rk , spline.rank = spline.rank )
  attr(cmat,"d") = svd(cmat) $ d
  cmat
  
}

# Cmat( c(-.2,.4), c(2,3,2), c(2,2))


#' @export
Emat <- function(  knots, degree, smooth , intercept = FALSE, signif = 3) {
  # estimation matrix:
  # polynomial of min
  # note that my intention of using a Type II estimate is not good
  #    precisely because that means the parametrization would depend
  #    on the data which I would like to avoid
  #    BUT perhaps we can implement later
  if ( length( degree) < length(knots) + 1) degree = rep( degree,
                                                          length.out =  length(knots) + 1)
  dmin = min(degree)
  # if ( dmin < 1) stop("minimum degree must be at least 1")
  dmax = max(degree)
  smin = min(smooth)
  smax = max(smooth)
  imax = length(degree)
  
  zeroi = as.numeric(cut( 0, c(-Inf, sort( knots), Inf)))
  dzero = degree[zeroi]
  
  cmat = Xf( 0, knots, degree = dmax , D = seq( 1, dzero))
  
  if ( imax > zeroi ){
    for ( i in (zeroi+1): imax) {
      d.right = degree[ i ]
      d.left  = degree[ i-1 ]
      k = knots[ i - 1 ]
      sm = smooth[ i - 1 ]
      if (  d.right > sm ) {
        dmat  =Xf( k , knots, dmax, D = seq(sm+1,d.right) , F ) -
          Xf( k , knots, dmax, D = seq(sm+1,d.right) , T )
        rownames( dmat ) = paste( "C(",signif(k, signif),").",
                                  seq(sm+1,d.right), sep = '')
        cmat = rbind( cmat,  dmat)
        
      }
    }
  }
  
  if ( zeroi > 1 ){
    for ( i in zeroi: 2) {
      d.right = degree[ i ]
      d.left  = degree[ i-1 ]
      k = knots[ i - 1 ]
      sm = smooth[ i - 1 ]
      if (  d.left > sm ) {
        dmat = Xf( k , knots, dmax, D = seq(sm+1,d.left) , F ) -
          Xf( k , knots, dmax, D = seq(sm+1,d.left) , T )
        rownames( dmat ) = paste( "C(",signif(k, signif),").",
                                  seq(sm+1,d.left), sep = '')
        cmat = rbind( cmat,  dmat)
        
      }
    }
  }
  cmat
}






#' Orthogonal basis for column space of a matrix
#' 
#' %% ~~ A concise (1-5 lines) description of what the function does. ~~
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param X %% ~~Describe \code{X} here~~
#' @param coef %% ~~Describe \code{coef} here~~
basis  <- function( X , coef = FALSE ) {
  # returns linear independent columns
  #
  #   X = fr(X) %*% attr(fr(X),"R")
  #
  q <- qr(X)
  sel <- q$pivot[ seq_len( q$rank)]
  ret <- X[ , sel ]
  attr(ret,"cols") <- sel
  if ( coef )attr(ret,"R") <- qr.coef( qr(ret) , mat)
  colnames(ret) <- colnames(X)[ sel ]
  ret
}

#    tt( c(-1,1,2), c(3,3,3,2), c(3,3,2))

#    tmat = rbind( c(1,1,1,1,1), c(1,0,0,0,1), c(0,1,1,1,0), c(1,2,0,0,1))
#    t(tmat)
#    basis(tmat)
#    basis( t(tmat))



#' Spline tranformation matrix
#' 
#' %% ~~ A concise (1-5 lines) description of what the function does. ~~
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param knots %% ~~Describe \code{knots} here~~
#' @param degree %% ~~Describe \code{degree} here~~
#' @param smooth %% ~~Describe \code{smooth} here~~
#' @param intercept %% ~~Describe \code{intercept} here~~
#' @param signif %% ~~Describe \code{signif} here~~
#' 
#' @export
spline.T <-
  function( knots, degree, smooth, lin = NULL, intercept = 0, signif = 3 ) {
    cmat = Cmat( knots, degree, smooth, lin, intercept, signif  )  # constraint matrix
    emat = Emat( knots, degree, smooth, !is.null(intercept), signif  )  # estimation matrix
    #disp( list(C= cmat, E=emat ) )
    nc = nrow( cmat )
    ne = nrow( emat )
    basisT = t( basis( cbind( t(cmat), t(emat) ) ))
    cols = attr(basisT,"cols")
    ncc = sum( cols <= nc )
    Tmat = solve( basisT )
    attr( Tmat, "nC") =  ncc
    attr( Tmat, "CE") = rbind( cmat, emat)
    attr( Tmat, "CEbasis" ) = t(basisT)
    Tmat
  }





#' Spline estimation function
#' 
#' %% ~~ A concise (1-5 lines) description of what the function does. ~~
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param knots %% ~~Describe \code{knots} here~~
#' @param degree %% ~~Describe \code{degree} here~~
#' @param smooth %% ~~Describe \code{smooth} here~~
#' @param intercept %% ~~Describe \code{intercept} here~~
#' @param signif %% ~~Describe \code{signif} here~~
#' 
#' @export
spline.E <- function( knots, degree, smooth, lin = NULL, intercept = 0, signif = 3 ) {
  cmat = Cmat( knots, degree, smooth, lin, intercept, signif  )  # constraint matrix
  emat = Emat( knots, degree, smooth, !is.null(intercept), signif  )  # estimation matrix
  # disp( list(C= cmat, E=emat ) )
  nc = nrow( cmat )
  ne = nrow( emat )
  basisT = t( basis( cbind( t(cmat), t(emat) ) ))
  cols = attr(basisT,"cols")
  ncc = sum( cols <= nc )
  Tmat = solve( basisT )
  Tmat[, (ncc+1): ncol(Tmat)]
}





#' General regression splines with variable degrees and smoothnes, smoothing
#' splines
#' 
#' These functions implement a general spline with possibly different degrees
#' in each interval and and different orders of smoothness at each knot,
#' including the possibility of allowing a discontinuity at a knot. The
#' function \code{sc} helps in the construction of linear hypothesis matrices
#' to estimate and test levels and derivatives of splines at arbitrary points
#' and the saltus of derivatives that have discontinuities at knots.
#' 
#' Many polynomial regression splines can be generated by 'plus' functions
#' although the resulting basis for the spline may be ill conditioned. For
#' example a 'quadratic spline' (a spline that is quadratic in each interval
#' with and smooth with a first derivative at each knot) with knots at 1 and 3
#' can be fitted with: \preformatted{ plus <- function(x,y) ifelse(x>0,y,0) lm(
#' y ~ x + I(x^2) + plus(x-1,(x-1)^2) + plus(x-3,(x-3)^2)) } All 'standard'
#' polynomial splines with the same degree in each interval and continuity of
#' order one less than the degree at each knot can be constructed in this
#' fashion. A convenient aspect of this parametrization of the spline is that
#' the estimated coefficients have a simple interpretation. The coefficients
#' for 'plus' terms represent the 'saltus' (jump) in the value of a coefficient
#' at the knot. Testing whether the true value of a coefficient is 0 is
#' equivalent to a test for the need to retain the corresponding knot. \cr\cr
#' This approach does not work for some more complex splines with different
#' degrees or different orders of continuity at the knots. An example is the
#' commonly used natural quadratic spline. A natural quadratic spline with
#' knots at -1,0 and 1 (where -1 and 1 are termed 'boundary knots') is linear
#' in the intervals (-Inf,-1) and (1,+Inf), and quadratic in the intervals
#' (-1,0) and (0,1). The spline is smooth of order 1 at each knot.  \cr\cr Many
#' techniques for fitting splines generate a basis for the spline (columns of
#' the design matrix) that has good numerical properties but whose coefficients
#' do not have a simple interpretation. \cr\cr The \code{gsp} function makes it
#' easy to specify a spline with arbitrary degree in each interval and
#' arbitrary smoothness at each knot. The parametrization produces coefficients
#' that have a simple interpretation. For a spline of degree p at x = 0,
#' coefficients correspond to the 1st, 2nd, ... pth derivative at 0. Additional
#' coefficients correspond to each free saltus at each knot. \cr\cr The
#' \code{sc} function generates a matrix to estimate features of a fitted
#' spline that can be expressed as linear combinations of the spline
#' coefficients.  Examples are various derivatives of the spline at any point,
#' left or right derivatives of different orders and the saltus in derivatives
#' at a knot.  \cr\cr A disadvantage of \code{gsp} is that the spline basis may
#' be poorly conditioned.  The impact of this problem can be mitigated by
#' rescaling the x variable so that it has an appropriate range. For example,
#' with a spline whose highest degree is cubic, ensuring that x has a range
#' between -10 and 10 should avert numerical problems. \cr\cr \code{gsp}
#' generates a matrix of regressors for a spline with knots, degree of
#' polynomials in each interval and the degree of smoothness at each knot.
#' Typically, \code{gsp} is used to define a function that is then used in a
#' model equation. See the examples below.
#' 
#' A function to fit a cubic spline with knots at 5 and 10 is generated with:
#' 
#' \preformatted{ sp <- function( x ) gsp( x, c(5,10), c(3,3,3), c(2,2)) }
#' 
#' indicating that a cubic polynomial is used in each of the three intervals
#' and that the second derivative is continuous at each knot.
#' 
#' A 'natural cubic spline' with linear components in each unbounded interval
#' would have the form:
#' 
#' \preformatted{ sp <- function( x ) gsp( x, c(0,5,10,15), c(1,3,3,3,1),
#' c(2,2,2,2)) }
#' 
#' Quadratic and linear splines, respectively: \preformatted{ sp.quad <-
#' function( x ) gsp( x, c(5,10), c(2,2,2), c(1,1)) sp.lin <- function( x )
#' gsp( x, c(5,10), c(1,1,1), c(0,0)) } Where the same degree is used for all
#' intervals and knots, it suffices to give it once: \preformatted{ sp.quad <-
#' function( x ) gsp( x, c(5,10), 2, 1) sp.lin <- function( x ) gsp( x,
#' c(5,10), 1, 0) } An easy way to specify a model in which a knot is dropped
#' is to force a degree of continuity equal to the degree of adjoining
#' polynomials, e.g.  to drop the knot at 10, use: \preformatted{ sp.1 <-
#' function( x ) gsp( x, c(5,10), c(3,3,3), c(2,3)) } This is sometimes easier
#' than laboriously rewriting the spline function for each null hypothesis.
#' 
#' Depending on the maximal degree of the spline, the range of x should not be
#' excessive to avoid numerical problems. The spline matrix generated is 'raw'
#' and values of max(abs(x))^max(degree) may appear in the matrix.  For
#' example, for a cubic spline, it might be desirable to rescale x and/or
#' recenter x so abs(x) < 100 if that is not already the case. Note that the
#' knots need to be correspondingly transformed.
#' 
#' The naming of coefficients should allow them to be easily interpreted.  For
#' example: \preformatted{ > zapsmall( gsp ( 0:10, c(3, 7) , c(2,3,2), c(1,1))
#' )
#' 
#' D1(0) D2(0) C(3).2 C(3).3 C(7).2 f(0) 0 0.0 0.0 0.00000 0.0 f(1) 1 0.5 0.0
#' 0.00000 0.0 f(2) 2 2.0 0.0 0.00000 0.0 f(3) 3 4.5 0.0 0.00000 0.0 f(4) 4 8.0
#' 0.5 0.16667 0.0 f(5) 5 12.5 2.0 1.33333 0.0 f(6) 6 18.0 4.5 4.50000 0.0 f(7)
#' 7 24.5 8.0 10.66667 0.0 f(8) 8 32.0 12.5 20.66667 0.5 f(9) 9 40.5 18.0
#' 34.66667 2.0 f(10) 10 50.0 24.5 52.66667 4.5 }
#' 
#' The coefficient for the first regressor is the first derivative at x = 0;
#' for the second regressor, the second derivative at 0; the third, the saltus
#' (change) in the second derivative at x = 3, the fourth, the saltus in the
#' third derivative at x = 3 and, finally, the saltus in the second derivative
#' at x = 7.
#' 
#' Example: \preformatted{ > sp <- function(x) gsp ( x, c(3, 7) , c(2,3,2),
#' c(1,1)) > zd <- data.frame( x = seq(0,10, .5), y = seq(0,10,.5)^2 + rnorm(
#' 21)) > fit <- lm( y ~ sp( x ), zd) > summary(fit) > Ls <-cbind( 0, sc( sp,
#' c(1,2,3,3,3,5,7,7,7,8), D=3, + type = c(0,0,0,1,2,0,0,1,2,0))) > zapsmall(
#' Ls )
#' 
#' D1(0) D2(0) C(3).2 C(3).3 C(7).2 D3(1) 0 0 0 0 0 0 D3(2) 0 0 0 0 0 0 D3(3-)
#' 0 0 0 0 0 0 D3(3+) 0 0 0 0 1 0 D3(3+)-D3(3-) 0 0 0 0 1 0 D3(5) 0 0 0 0 1 0
#' D3(7-) 0 0 0 0 1 0 D3(7+) 0 0 0 0 0 0 D3(7+)-D3(7-) 0 0 0 0 -1 0 D3(8) 0 0 0
#' 0 0 0
#' 
#' > wald( fit, list( 'third derivatives' = Ls))
#' 
#' numDF denDF F.value p.value third derivatives 1 15 2.013582 0.17634
#' 
#' Estimate Std.Error DF t-value p-value Lower 0.95 Upper 0.95 D3(1) 0.000000
#' 0.000000 15 -1.123777 0.27877 0.000000 0.000000 D3(2) 0.000000 0.000000 15
#' -1.123777 0.27877 0.000000 0.000000 D3(3-) 0.000000 0.000000 15 -1.123777
#' 0.27877 0.000000 0.000000 D3(3+) 0.927625 0.653714 15 1.419008 0.17634
#' -0.465734 2.320984 D3(3+)-D3(3-) 0.927625 0.653714 15 1.419008 0.17634
#' -0.465734 2.320984 D3(5) 0.927625 0.653714 15 1.419008 0.17634 -0.465734
#' 2.320984 D3(7-) 0.927625 0.653714 15 1.419008 0.17634 -0.465734 2.320984
#' D3(7+) 0.000000 0.000000 Inf NaN NaN 0.000000 0.000000 D3(7+)-D3(7-)
#' -0.927625 0.653714 15 -1.419008 0.17634 -2.320984 0.465734 D3(8) 0.000000
#' 0.000000 Inf NaN NaN 0.000000 0.000000
#' 
#' Warning messages: 1: In min(dfs[x != 0]) : no non-missing arguments to min;
#' returning Inf 2: In min(dfs[x != 0]) : no non-missing arguments to min;
#' returning Inf
#' 
#' } Note that some coefficients that are 0 by design may lead to invalid DRs
#' and t-values.
#' 
#' \code{sc} generates a portion of a hypothesis matrix for the coefficients of
#' a general spline constructed with \code{gsp} With: \preformatted{ sc( sp, x,
#' D, type ):
#' 
#' sp is the spline function for which coefficients are required x values at
#' which spline is evaluated D order of derivative: 0 = value of spline, 1 =
#' first derivative, etc.  type at knots: 0 limit from the left, 1 from the
#' right, 2 is saltus (i.e. jump from left to right) }
#' 
#' Warning: \code{sc} will not work correctly if the function defining the
#' spline transforms the variable, e.g. sp <- function(x) gsp( x/100, knot=2 )
#' 
#' Example:
#' 
#' \preformatted{ simd <- data.frame( age = rep(1:50, 2), y =
#' sin(2*pi*(1:100)/5)+ rnorm(100), G = rep( c("male","female"), c(50,50)))
#' 
#' sp <- function(x) gsp( x, knots = c(10,25,40), degree = c(1,2,2,1), smooth =
#' c(1,1,1))
#' 
#' fit <- lm( y ~ sp(age)*G, simd) xyplot( predict(fit) ~ age , simd, groups =
#' G,type = "l") summary(fit) # convenient display
#' 
#' # output:
#' 
#' Call: lm(formula = y ~ sp(age) * G, data = simd)
#' 
#' Residuals: Min 1Q Median 3Q Max -2.5249 -0.7765 -0.0760 0.7882 2.6265
#' 
#' Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.733267
#' 0.605086 1.212 0.229 sp(age)D1(0) -0.084219 0.055163 -1.527 0.130
#' sp(age)C(10).2 0.010984 0.006910 1.590 0.115 sp(age)C(25).2 -0.023034
#' 0.012881 -1.788 0.077 . Gmale -0.307665 0.855721 -0.360 0.720
#' sp(age)D1(0):Gmale 0.058384 0.078012 0.748 0.456 sp(age)C(10).2:Gmale
#' -0.010556 0.009773 -1.080 0.283 sp(age)C(25).2:Gmale 0.026410 0.018216 1.450
#' 0.150 ---
#' 
#' Residual standard error: 1.224 on 92 degrees of freedom Multiple R-squared:
#' 0.0814, Adjusted R-squared: 0.0115 F-statistic: 1.165 on 7 and 92 DF,
#' p-value: 0.3308 # end of output
#' 
#' L0 <- list( "hat" = rbind( "females at age=20" = c( 1, sc(sp,20), 0, 0*
#' sc(sp,20)), "males at age=20" = c( 1, sc(sp,20), 1, 1* sc(sp,20))),
#' "male-female" = rbind( "at 20" = c( 0 , 0*sc(sp,20), 1, 1*sc(sp,20)))) wald(
#' fit, L0 )
#' 
#' ...
#' 
#' L1 <- list("D(yhat)/D(age)"= rbind( "female at age = 25" = c(0, sc(sp,25,1),
#' 0, 0*sc(sp,25,1)), "male at x = 25" = c(0, sc(sp,25,1), 0, 1*sc(sp,25,1))))
#' wald( fit, L1) # output: numDF denDF F.value p.value D(yhat)/D(age) 2 92
#' 1.057307 0.35157
#' 
#' Estimate Std.Error DF t-value p-value Lower 0.95 Upper 0.95 female at age =
#' 25 0.080544 0.056974 92 1.413694 0.16083 -0.032612 0.193700 male at x = 25
#' -0.019412 0.056974 92 -0.340712 0.73410 -0.132568 0.093744 }
#' 
#' A periodic spline function can be generated by forcing the coefficients
#' beyond a periodic knot to repeat the pattern in a previous inteval. For
#' example a periodic spline of period 1 can be created as follows:
#' 
#' \preformatted{ x <- seq(-3,3,.01) y <- rnorm(x)
#' 
#' per.sp <- function(x) gsp( x %% 1, knots = 1, degree = c(3, 3), smooth = 1,
#' lin = cbind( diag(4), - PolyShift(1,4))) fit <- lm( y ~ per.sp(x))
#' summary(fit) xyplot( y + predict(fit) ~ x , panel = panel.superpose.2,type =
#' c("p","l")) xyplot( predict(fit) ~ x , type = 'l') }
#' 
#' A periodic spline with additional knots can be created as shown below. Note
#' that constraint matrix 'lin' expresses constraints for the 'full' polynomial
#' parametrization, i.e. polynomials of degree 3 (thus of order 4 when
#' including the constant term) in each of the 4 intervals.  The constraint
#' given in 'lin' forces the coefficients beyond the periodic knot at x = 1, to
#' repeat the polynomial in the interval just to the right of x = 0.
#' 
#' \preformatted{ x <- seq(-3,3,.01) y <- rnorm(x)
#' 
#' per.sp <- function(x) gsp( x %% 1, knots = c(.333,.666,1), degree = 3,
#' smooth = 1, lin = cbind( diag(4),0*diag(4),0*diag(4), - PolyShift(1,4))) fit
#' <- lm( y ~ per.sp(x)) summary(fit) xyplot( y + predict(fit) ~ x , panel =
#' panel.superpose.2,type = c("p","l")) xyplot( predict(fit) ~ x , type = 'l')
#' }
#' 
#' Overview of utility functions: \preformatted{ Xmat = function( x, degree, D
#' = 0, signif = 3) design/estimation matrix for f[D](x) where f(x) is
#' polynomial of degree degree.
#' 
#' Xf = function( x, knots, degree = 3, D = 0, right = TRUE , signif = 3) uses
#' Xmat to form 'full' matrix with blocks determined by knots intervals
#' 
#' Cmat = function( knots, degree, smooth, intercept = 0, signif = 3) linear
#' constraints
#' 
#' Emat = function( knots, degree, smooth , intercept = FALSE, signif = 3)
#' estimates - not necessarily a basis
#' 
#' basis = function( X , coef = FALSE ) selects linear independent subset of
#' columns of X
#' 
#' spline.T = function( knots, degree, smooth, intercept = 0, signif = 3 ) full
#' transformation of Xf to spline basis and constraints
#' 
#' spline.E = function( knots, degree, smooth, intercept = 0, signif = 3 )
#' transformation for spline basis (first r columns of spline.T)
#' 
#' gsp = function( x , knots, degree = 3 , smooth = pmax(pmin( degree[-1],
#' degree[ - length(degree)]) - 1,0 ), intercept = 0, signif = 3)
#' 
#' }
#' 
#' @aliases gsp sc smsp Xf Xmat qs lsp Cmat PolyShift
#' @param x value(s) where spline is evaluated
#' @param knots vector of knots
#' @param degree vector giving the degree of the spline in each interval. Note
#' the number of intervals is equal to the number of knots + 1. A value of 0
#' corresponds to a constant in the interval. If the spline should evaluate to
#' 0 in the interval, use the \code{intercept} argument to specify some value
#' in the interval at which the spline must evaluate to 0.
#' @param smooth vector with the degree of smoothness at each knot (0 =
#' continuity, 1 = smooth with continuous first derivative, 2 = continuous
#' second derivative, etc. The value -1 allows a discontinuity at the knot.
#' @param intercept value(s) of x at which the spline has value 0, i.e. the
#' value(s) of x for which yhat is estimated by the intercept term in the
#' model. The default is 0. If NULL, the spline is not constrained to evaluate
#' to 0 for any x.
#' @param periodic if TRUE generates a period spline on the base interval
#' [0,max(knots)]. A constraint is generated so that the coefficients generate
#' the same values to the right of max(knots) as they do to the right of 0.
#' Note that all knots should be strictly positive.
#' @param lin provides a matrix specifying additional linear contraints on the
#' 'full' parametrization consisting of blocks of polynomials of degree equal
#' to max(degree) in each of the length(knots)+1 intervals of the spline. See
#' below for examples of a spline that is 0 outside of its boundary knots.
#' @param signif number of significant digits used to label coefficients
#' @param exclude number of leading columns to drop from spline matrix: 0:
#' excludes the intercept column, 1: excludes the linear term as well.  Terms
#' that are excluded from the spline matrix can be modeled explicitly.
#' @param sp a spline function defined by \code{gsp}. See the examples below.
#' @param D the degree of a derivative: 0: value of the function, 1: first
#' derivative, 2: second derivative, etc.
#' @param type how a derivative or value of a function is measured at a
#' possible discontinuity at a knot: 0: limit from the left, 1: limit from the
#' right, 2: saltus (limit from the right minus the limit from the left)
#' @param a a 'periodic' knot at which a spline repeats itself
#' @param n the maximal 'order', i.e. maximal degree + 1, of a periodic spline
#' @return \code{gsp} returns a matrix generating a spline. \code{cs},
#' \code{qs} and \code{lsp} return matrices generating cubic, quadratic and
#' linear splines respectively.
#' 
#' \code{smsp}, whose code is adapted from function in the package
#' \code{lmeSplines}, generates a smoothing spline to be used in the random
#' effects portion of a call to \code{lme}.
#' @note %% ~~further notes~~
#' @section Warning: The variables generated by \code{gsp} are designed so the
#' coefficients are interpretable as changes in derivatives at knots. The
#' resulting matrix is not designed to have optimal numerical properties.
#' 
#' The intermediate matrices generated by \code{gsp} will contain \code{x}
#' raised to the power equal to the highest degree in \code{degree}. The values
#' of \code{x} should be scaled to avoid excessively large values in the spline
#' matrix and ensuing numerical problems.
#' @author Monette, G. \email{[email protected]@yorku.ca}
#' @seealso \code{\link{wald}}
#' @references %% ~put references to the literature/web site here ~
#' @keywords ~kwd1 ~kwd2
#' @examples
#' 
#' ## Fitting a quadratic spline
#' simd <- data.frame( age = rep(1:50, 2), y = sin(2*pi*(1:100)/5) + rnorm(100),
#'           G = rep( c('male','female'), c(50,50)))
#' # define a function generating the spline
#' sp <- function(x) gsp( x, knots = c(10,25,40), degree = c(1,2,2,1),
#'           smooth = c(1,1,1))
#' 
#' fit <- lm( y ~ sp(age)*G, simd)
#' 
#' require(lattice)
#' xyplot( predict(fit) ~ age , simd, groups = G,type = 'l')
#' summary(fit)
#' 
#' ## Linear hypotheses
#' L <- list( "Overall test of slopes at 20" = rbind(
#'       "Female slope at age 20" =  c( F20 <- cbind( 0 , sc( sp, 20, D = 1), 0 , 0 * sc( sp, 20, D = 1))),
#'       "Male slope at age 20" =  c( M20 <- cbind( 0 , sc( sp, 20, D = 1), 0 , 1 * sc( sp, 20, D = 1))),
#'       "Difference" = c(M20 - F20))
#'       )
#' wald( fit, L)
#' 
#' ## Right and left second derivatives at knots and saltus
#' L <- list( "Second derivatives and saltus for female curve at knot at 25" =
#'           cbind( 0, sc( sp, c(25,25,25), D = 2, type =c(0,1,2)), 0,0,0,0))
#' L
#' wald( fit, L )
#' 
#' ## Smoothing splines
#'       library(nlme)
#'       data(Spruce)
#'       Spruce$all <- 1
#'       range( Spruce$days)
#'       sp <- function(x) smsp ( x, seq( 150, 675, 5))
#'       spruce.fit1 <- lme(logSize ~ days, data=Spruce,
#'           random=list(all= pdIdent(~sp(days) -1),
#'           plot=~1, Tree=~1))
#'       summary(spruce.fit1)
#'       pred <- expand.grid( days = seq( 160, 670, 10), all = 1)
#'       pred$logSize <- predict( spruce.fit1, newdata = pred, level = 1)
#'       require( lattice )
#'       xyplot( logSize ~ days, pred, type = 'l')
#' 
#' @export
gsp <- function( x , knots, degree = 3 , smooth = pmax(pmin( degree[-1],
                                                             degree[ - length(degree)]) - 1,0 ), lin = NULL, periodic = FALSE, intercept = 0, signif = 3) {
  
  if ( periodic  ) {
    maxd <- max(degree)
    #disp(maxd)
    maxk <- max(knots)
    #disp(maxk)
    mid <- matrix(0,maxd+1,(maxd+1)*(length(knots)-1))
    const.per <- do.call(cbind,list( diag(maxd+1),
                                     mid, -PolyShift(maxk,maxd+1)))
    #disp(const.per)
    lin <- rbind(lin,const.per)
    #disp(lin)
  }
  degree = rep( degree, length = length(knots) + 1)
  smooth = rep( smooth, length = length(knots))
  spline.attr <-   list( knots = knots, degree = degree,
                         smooth = smooth, lin = lin, intercept = intercept, signif = signif)
  if (is.null(x)) return ( spline.attr)
  if( periodic ) x <- x %% maxk
  ret = Xf ( x, knots, max(degree), signif = signif) %*%
    spline.E( knots, degree, smooth, lin = lin, intercept = intercept, signif = signif)
  attr(ret, "spline.attr") <- spline.attr
  ret
}




#' Projection matrix %% ~~function to do ... ~~
#' 
#' Projection matrix into column space of a matrix. Works even if the matrix
#' does not have full column rank.
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param x %% ~~Describe \code{x} here~~
#' @export
Proj.1 <- function( x)  x %*% ginv(x)



#' Projection matrix
#' 
#' Matrix of orthogonal projection into column space of matrix.
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param x %% ~~Describe \code{x} here~~
#' @export
Proj <- function(x) {
  # projection matrix onto span(x)
  u <- svd(x,nv=0)$u
  u %*% t(u)
  
}



#' Test accuracy of projection method.
#' 
#' %% ~~ A concise (1-5 lines) description of what the function does. ~~
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param x %% ~~Describe \code{x} here~~
#' @param fun %% ~~Describe \code{fun} here~~
#' @export
Proj.test <- function( x, fun = Proj) {
  help = "
  # sample test:
  zz <- matrix( rnorm(120), ncol = 3) %*% diag(c(10000,1,.000001))
  Proj.test( zz, fun = Proj)
  Proj.test( zz, fun = Proj.1)
  "
  # limited testing suggests Proj is generally
  # roughly as good as Proj.1  with errors 1e-17
  # but Proj.1 occasionally has errors ~ 1e-11
  pp <- fun(x)
  ret <- pp%*%pp - pp
  attr(ret,"maxabs") <- max(abs(ret))
  attr(ret,"maxabs")
}




#  round(basis( Proj(tmat)),3)
#  round(basis( Proj(t(tmat))),3)



#' Marked for deletion
#' 
#' %% ~~ A concise (1-5 lines) description of what the function does. ~~
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param x %% ~~Describe \code{x} here~~
#' @param knots %% ~~Describe \code{knots} here~~
#' @param degree %% ~~Describe \code{degree} here~~
#' @param smooth %% ~~Describe \code{smooth} here~~
#' @param intercept %% ~~Describe \code{intercept} here~~
#' @param signif %% ~~Describe \code{signif} here~~
#' @export
gspf.1 <- function( x, knots, degree= 3, smooth = pmax(pmin( degree[-1],
                                                            degree[ - length(degree)]) - 1,0 ), intercept = 0, signif = 3) {
  # generates a 'full' spline matrix
  tmat = spline.T(knots, degree, smooth, intercept = intercept, signif = signif)
  # put E first then C  and intercept last
  nC = attr( tmat, "nC")
  nE = ncol(tmat) - nC
  tmat = tmat[, c(seq(nC+1, ncol(tmat)), 2:nC, 1)]  # first nE columns are E matrix
  # tmat = tmat[ , - ncol(tmat)]   # drop intercept columns
  # full matrix
  X = Xf ( x, knots, max(degree), signif = signif) %*% tmat
  qqr = qr(X)
  Q = qr.Q( qqr )
  R = qr.R( qqr )
  # we need to form the linear combination of
  
  gsp( seq(-2,10), 0, c(2,2), 0)
  ret
}

#' @export
sc <-
  function( sp, x , D = 0, type = 1) {
    
    a = sp(NULL)
    D = rep(D, length.out = length(x))
    type = rep ( type, length.out = length(x))
    left = Xf( x, knots = a$knots, degree = max( a$degree), D = D, right = T)
    right = Xf( x, knots = a$knots, degree = max( a$degree), D = D, right = F)
    cleft = c(1,0,-1) [ match( type, c(0,1,2)) ]
    cright = c(0,1,1) [ match( type, c(0,1,2)) ]
    raw = left * cleft + right * cright
    nam = rownames( raw )
    nam = sub("^f","g", nam)
    nam0 = sub( "\\)","-)", nam)
    nam1 = sub( "\\)","+)", nam)
    nam2 = paste( nam1 ,"-",nam0,sep = '')
    rownames( raw ) =
      ifelse( match( x , a$knots, 0) > 0,
              cbind( nam0,nam1,nam2) [ cbind( seq_along(type), type+1)],
              ifelse( type != 2, nam, '0'))
    mod = raw %*% spline.E(
      a$knots, a$degree, a$smooth,
      lin = a$lin,
      intercept = a$intercept,
      signif = a$signif)
    mod
  }




#' Generate a smoothing spline
#' 
#' %% ~~ A concise (1-5 lines) description of what the function does. ~~
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param formula %% ~~Describe \code{formula} here~~
#' @param data %% ~~Describe \code{data} here~~
#' 
#' @export
smspline <-
  function (formula, data)
  {
    if (is.vector(formula)) {
      x <- formula
    }
    else {
      if (missing(data)) {
        mf <- model.frame(formula)
      }
      else {
        mf <- model.frame(formula, data)
      }
      if (ncol(mf) != 1)
        stop("formula can have only one variable")
      x <- mf[, 1]
    }
    x.u <- sort(unique(x))
    Zx <- smspline.v(x.u)$Zs
    Zx[match(x, x.u), ]
  }



#' smspline.v from library splines with better ginverse
#' 
#' %% ~~ A concise (1-5 lines) description of what the function does. ~~
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param time %% ~~Describe \code{time} here~~
#' 
#' @export
smspline.v <-
  function (time)
  {
    t1 <- sort(unique(time))
    p <- length(t1)
    h <- diff(t1)
    h1 <- h[1:(p - 2)]
    h2 <- h[2:(p - 1)]
    Q <- matrix(0, nr = p, nc = p - 2)
    Q[cbind(1:(p - 2), 1:(p - 2))] <- 1/h1
    Q[cbind(1 + 1:(p - 2), 1:(p - 2))] <- -1/h1 - 1/h2
    Q[cbind(2 + 1:(p - 2), 1:(p - 2))] <- 1/h2
    Gs <- matrix(0, nr = p - 2, nc = p - 2)
    Gs[cbind(1:(p - 2), 1:(p - 2))] <- 1/3 * (h1 + h2)
    Gs[cbind(1 + 1:(p - 3), 1:(p - 3))] <- 1/6 * h2[1:(p - 3)]
    Gs[cbind(1:(p - 3), 1 + 1:(p - 3))] <- 1/6 * h2[1:(p - 3)]
    Gs
    Zus <- t(ginv(Q))  # orig: t(solve(t(Q) %*% Q, t(Q)))
    R <- chol(Gs, pivot = FALSE)
    tol <- max(1e-12, 1e-08 * mean(diag(R)))
    if (sum(abs(diag(R))) < tol)
      stop("singular G matrix")
    Zvs <- Zus %*% t(R)
    list(Xs = cbind(rep(1, p), t1), Zs = Zvs, Q = Q, Gs = Gs,
         R = R)
  }





#' Approximation for semi-parametric splines
#' 
#' %% ~~ A concise (1-5 lines) description of what the function does. ~~
#' 
#' %% ~~ If necessary, more details than the description above ~~
#' 
#' @param Z %% ~~Describe \code{Z} here~~
#' @param oldtimes %% ~~Describe \code{oldtimes} here~~
#' @param newtimes %% ~~Describe \code{newtimes} here~~
#' 
#' @export
approx.Z <-
  function (Z, oldtimes, newtimes)
  {
    oldt.u <- sort(unique(oldtimes))
    if (length(oldt.u) != length(oldtimes) || any(oldt.u != oldtimes)) {
      Z <- Z[match(oldt.u, oldtimes), ]
      oldtimes <- oldt.u
    }
    apply(Z, 2, function(u, oldt, newt) {
      approx(oldt, u, xout = newt)$y
    }, oldt = oldtimes, newt = newtimes)
  }


#' @export
smsp <- function( x, knots ) {
  # generates matrix for smoothing spline over x with knots at knots
  #  Usage:   e.g. from example in smspline
  # lme ( ....., random=list(all= pdIdent(~smsp( x, 0:100) - 1),
  #              plot= pdBlocked(list(~ days,pdIdent(~smsp( x, 0:100) - 1))),
  #              Tree = ~1))
  
  if( max(knots) < max(x) || min(x) < min(knots)) warning( "x beyond range of knots")
  if( length( knots ) < 4 ) warning( "knots should have length at least 4")
  sp = smspline( knots)
  approx.Z( sp, knots, x)
}



#' @export
PolyShift <- function( x, n) {
  # transformation of polynomial coefficients to change origin 
  # sum( coefs * x ^ 0:n) == sum( Pow(a,n)%*%coefs * (x -a)^0:n
  # Useful to equate polynomial coefficients for a periodic spline
  ret <- matrix(0,n,n)
  pow <- x^(col(ret) - row(ret))
  ret[col(ret)>=row(ret)] <- unlist( sapply(0:(n-1),function(x) choose(x,0:x)))
  ret*pow
}
gmonette/spida15 documentation built on May 14, 2017, 1:02 p.m.