R/smooth.pos.R In fda: Functional Data Analysis

Documented in smooth.pos

smooth.pos <- function(argvals, y, WfdParobj, wtvec=rep(1,n), conv=1e-4,
iterlim=50, dbglev=1) {
#  Smooths the relationship of Y to ARGVALS using weights in WTVEC by fitting a
#     positive function of the form
#                      f(x) = exp W(x)
#     where  W  is a function defined over the same range as ARGVALS,
#                         W = log Df.
#  The fitting criterion is penalized mean squared error:
#    PENSSE(lambda) <- \sum w_i[y_i - f(x_i)]^2 +
#                     \lambda * \int [L W(x)]^2 dx
#  where L is a linear differential operator defined in argument Lfdobj,
#  and w_i is a positive weight applied to the observation.
#  The function W(x) is expanded by the basis in functional data object
#    Wfdobj.

#  Arguments:
#  ARGVALS ...  Argument value array of length N, where N is the number of
#               observed curve values for each curve.  It is assumed that
#               that these argument values are common to all observed
#               curves.  If this is not the case, you will need to
#               run this function inside one or more loops, smoothing
#               each curve separately.
#  Y       ...  Function value array (the values to be fit).
#               If the functional data are univariate, this array will be
#               an N by NCURVE matrix, where N is the number of observed
#               curve values for each curve and NCURVE is the number of
#               curves observed.
#               If the functional data are multivariate, this array will be
#               an N by NCURVE by NVAR matrix, where NVAR the number of
#               functions observed per case.  For example, for the gait
#               data, NVAR = 2, since we observe knee and hip angles.
#  WFDPAROBJ... An fd or an fdPar object.  This object
#               contains the specifications for the functional data
#               object to be estimated by smoothing the data.  See
#               comment lines in function fdPar for details.
#               The functional data object WFD in WFDPAROBJ is used
#               to initialize the optimization process.
#               Its coefficient array contains the starting values for
#               the iterative minimization of mean squared error, and
#               this coefficient array must be either a K by NCURVE
#               matrix or a K by NUCRVE by NVAR array,  where K
#               is the number of basis functions.
#               If WFDPAROBJ is NULL, it will be initialized to
#               a matrix or array of zeros.
#  WTVEC   ...  a vector of weights, a vector of N one's by default.
#  CONV    ...  convergence criterion, 0.0001 by default
#  ITERLIM ...  maximum number of iterations, 50 by default.
#  DBGLEV  ...  Controls the level of output on each iteration.  If 0,
#               no output, if 1, output at each iteration, if higher,
#               output at each line search iteration. 1 by default.
#               enabling this option.

#  Returns are:
#  WFD     ...  Functional data object for W.
#               Its coefficient matrix an N by NCURVE (by NVAR) matrix
#               (or array), depending on whether the functional
#               observations are univariate or multivariate.
#  FLIST ... A list object or a vector of list objects, one for
#            each curve (and each variable if functions are multivariate).
#            Each list object has slots:
#                 f    ... The sum of squared errors
#                 norm ... The norm of the gradient
#  When multiple curves and variables are analyzed, the lists containing
#  FLIST objects are indexed linear with curves varying inside
#  variables.

#  check ARGVALS

if (!is.numeric(argvals)) stop("ARGVALS is not numeric.")
argvals <- as.vector(argvals)
if (length(argvals) < 2) stop("ARGVALS does not contain at least two values.")
n       <- length(argvals)
onesobs <- matrix(1,n,1)

#  at least two points are necessary for monotone smoothing

if (n < 2) stop('ARGVALS does not contain at least two values.')

#  check Y

y      <- as.matrix(y)
ychk   <- ycheck(y, n)
y      <- ychk$y ncurve <- ychk$ncurve
nvar   <- ychk$nvar ndim <- ychk$ndim

#  check WfdParobj and get LAMBDA

if (inherits(WfdParobj, "fdPar") || inherits(WfdParobj, "fd")) {
if (inherits(WfdParobj, "fd")) WfdParobj <- fdPar(WfdParobj)
} else {
stop(paste("Argument WFDPAROBJ is neither an fdPar object",
"or an fd object."))
}
Wfdobj   <- WfdParobj$fd Lfdobj <- WfdParobj$Lfd
basisobj <- Wfdobj$basis nbasis <- basisobj$nbasis
coef0    <- Wfdobj$coefs lambda <- WfdParobj$lambda

#  check WTVEC

wtvec <- wtcheck(n, wtvec)$wtvec # set up some arrays climit <- c(rep(-400,nbasis),rep(400,nbasis)) coef0 <- Wfdobj$coefs
active  <- 1:nbasis

#  initialize matrix Kmat defining penalty term

if (lambda > 0) Kmat <- lambda*eval.penalty(basisobj, Lfdobj)
else            Kmat <- matrix(0,nbasis,nbasis)

#  --------------------------------------------------------------------
#              loop through variables and curves
#  --------------------------------------------------------------------

#  set up arrays and lists to contain returned information

if (ndim == 2) {
coef <- matrix(0,nbasis,ncurve)
} else {
coef <- array(0,c(nbasis,ncurve,nvar))
}

if (ncurve > 1 || nvar > 1 ) Flist <- vector("list",ncurve*nvar)
else                         Flist <- NULL

for (ivar in 1:nvar) {
if (ndim == 2) {
sclfac <- mean(c(y)^2)
} else {
sclfac <- mean(c(y[,,ivar])^2)
}
for (icurve in 1:ncurve) {
if (ndim == 2) {
yi    <- y[,icurve]
cveci <- coef0[,icurve]
} else {
yi    <- y[,icurve,ivar]
cveci <- coef0[,icurve,ivar]
}

#  evaluate log likelihood
#    and its derivatives with respect to these coefficients

result <- PENSSEfun(argvals, yi, basisobj, cveci, Kmat, wtvec)
PENSSE   <- result[[1]]
DPENSSE  <- result[[2]]

#  compute initial badness of fit measures

f0    <- PENSSE
gvec0 <- DPENSSE
Foldlist <- list(f = f0, grad = gvec0, norm = sqrt(mean(gvec0^2)))

#  compute the initial expected Hessian

hmat0 <- PENSSEhess(argvals, yi, basisobj, cveci, Kmat, wtvec)

#  evaluate the initial update vector for correcting the initial bmat

deltac   <- -solve(hmat0,gvec0)
cosangle <- -sum(gvec0*deltac)/sqrt(sum(gvec0^2)*sum(deltac^2))

#  initialize iteration status arrays

iternum <- 0
status <- c(iternum, Foldlist$f, -PENSSE, Foldlist$norm)
if (dbglev >= 1) {
if (ncurve > 1 || nvar > 1) {
if (ncurve > 1 && nvar > 1) {
cat("\n")
curvetitle <- paste('Results for curve',icurve,'and variable',ivar)
}
if (ncurve > 1 && nvar == 1) {
cat("\n")
curvetitle <- paste('Results for curve',icurve)
}
if (ncurve == 1 && nvar > 1) {
cat("\n")
curvetitle <- paste('Results for variable',ivar)
}
cat("\n")
cat(curvetitle)
}
cat("\n")
cat("\n")
cat(iternum)
cat("        ")
cat(round(status[2],4))
cat("      ")
cat(round(status[3],4))
}
#  -------  Begin iterations  -----------

MAXSTEPITER <- 10
MAXSTEP     <- 400
trial       <- 1
linemat     <- matrix(0,3,5)
Flisti      <- Foldlist
gvec        <- gvec0
dbgwrd      <- dbglev > 1

if (iterlim == 0) {
cat("\n")
} else {
for (iter in 1:iterlim) {
iternum <- iternum + 1
#  take optimal stepsize
dblwrd <- rep(FALSE,2)
limwrd <- rep(FALSE,2)
stpwrd <- FALSE
ind    <- 0
ips    <- 0
#  compute slope at 0 for line search
linemat[2,1] <- sum(deltac*Flisti$grad) # normalize search direction vector sdg <- sqrt(sum(deltac^2)) deltac <- deltac/sdg dgsum <- sum(deltac) linemat[2,1] <- linemat[2,1]/sdg # initialize line search vectors linemat[,1:4] <- outer(c(0, linemat[2,1], Flisti$f),rep(1,4))
stepiter <- 0
if (dbglev >= 2) {
cat("\n")
cat(paste("                 ", stepiter, "  "))
cat(format(round(t(linemat[,1]),6)))
}
#  break with error condition if (initial slope is nonnegative
if (linemat[2,1] >= 0) {
print("Initial slope nonnegative.")
ind <- 3
break
}
#  return successfully if (initial slope is very small
if (linemat[2,1] >= -1e-5) {
if (dbglev > 1) print("Initial slope too small")
break
}
#  first step set to trial
linemat[1,5]  <- trial
#  Main iteration loop for linesrch
for (stepiter in 1:MAXSTEPITER) {
#  ensure that step does not go beyond limits on parameters
limflg  <- 0
#  check the step size
result <- stepchk(linemat[1,5], cveci, deltac, limwrd, ind,
climit, active, dbgwrd)
linemat[1,5] <- result[[1]]
ind          <- result[[2]]
limwrd       <- result[[3]]
if (linemat[1,5] <= 1e-7) {
#  Current step size too small  terminate
Flisti  <- Foldlist
cvecnew <- cveci
gvecnew <- gvec
if (dbglev >= 2) {
print("Stepsize too small")
print(linemat[1,5])
}
if (limflg) ind <- 1 else ind <- 4
break
}
#  compute new function value and gradient
cvecnew  <- cveci + linemat[1,5]*deltac
result   <- PENSSEfun(argvals, yi, basisobj, cvecnew, Kmat, wtvec)
PENSSE   <- result[[1]]
DPENSSE  <- result[[2]]
Flisti$f <- PENSSE gvecnew <- DPENSSE Flisti$grad <- gvecnew
Flisti$norm <- sqrt(mean(gvecnew^2)) linemat[3,5] <- Flisti$f
#  compute new directional derivative
linemat[2,5] <- sum(deltac*gvecnew)
if (dbglev >= 2) {
cat("\n")
cat(paste("                 ", stepiter, "  "))
cat(format(round(t(linemat[,5]),6)))
}
#  compute next step
result  <- stepit(linemat, ips, dblwrd, MAXSTEP)
linemat <- result[[1]]
ips     <- result[[2]]
ind     <- result[[3]]
dblwrd  <- result[[4]]
trial   <- linemat[1,5]
#  ind == 0 implies convergence
if (ind == 0 | ind == 5) break
#  end of line search loop
}
cveci <- cvecnew
gvec  <- gvecnew
#  test for convergence
if (abs(Flisti$f - Foldlist$f) < sclfac*conv) {
cat("\n")
break
}
if (Flisti$f >= Foldlist$f) break
#  compute the Hessian
hmat <- PENSSEhess(argvals, yi, basisobj, cveci, Kmat, wtvec)
#  evaluate the update vector
deltac <- -solve(hmat,gvec)
cosangle  <- -sum(gvec*deltac)/sqrt(sum(gvec^2)*sum(deltac^2))
if (cosangle < 0) {
if (dbglev > 1) print("cos(angle) negative")
deltac <- -gvec
}
Foldlist <- Flisti
#  display iteration status
status <- c(iternum, Flisti$f, Flisti$norm)
if (dbglev >= 1) {
cat("\n")
cat(iternum)
cat("        ")
cat(round(status[2],4))
cat("      ")
cat(round(status[3],4))
}
#  end of iteration loop
}
}

#  save coefficients in arrays COEF and BETA

if (ndim == 2) {
coef[,icurve] <- cveci
} else {
coef[,icurve,ivar] <- cveci
}

#  save Flisti

if (ncurve == 1 && nvar == 1) {
Flist <- Flisti
} else {
Flist[[(ivar-1)*ncurve+icurve]] <- Flisti
}

}
}

Wfdobj <- fd(coef, basisobj)

posFd <- list("Wfdobj"=Wfdobj, "Flist"=Flist,
"argvals"=argvals, "y"=y)
class(posFd) <- 'posfd'

return(posFd)
}

#  ---------------------------------------------------------------

PENSSEfun <- function(argvals, yi, basisobj, cveci, Kmat, wtvec) {
#  Computes the log likelihood and its derivative with
#    respect to the coefficients in CVEC
n       <- length(argvals)
nbasis  <- basisobj$nbasis phimat <- getbasismatrix(argvals, basisobj, 0) Wvec <- phimat %*% cveci EWvec <- exp(Wvec) res <- yi - EWvec PENSSE <- mean(wtvec*res^2) + t(cveci) %*% Kmat %*% cveci DPENSSE <- -2*crossprod(phimat,wtvec*res*EWvec)/n + 2*Kmat %*% cveci return( list(PENSSE, DPENSSE) ) } # --------------------------------------------------------------- PENSSEhess <- function(argvals, yi, basisobj, cveci, Kmat, wtvec) { # Computes the expected Hessian n <- length(argvals) nbasis <- basisobj$nbasis
phimat   <- getbasismatrix(argvals, basisobj, 0)
Wvec     <- phimat %*% cveci
EWvec    <- exp(Wvec)
D2PENSSE <- 2*t(phimat) %*% diag(as.numeric(wtvec*EWvec^2)) %*% phimat/n + 2*Kmat
return(D2PENSSE)
}


Try the fda package in your browser

Any scripts or data that you put into this service are public.

fda documentation built on May 29, 2024, 11:26 a.m.