Nothing
###
### Ramsay, Hooker & Graves (2009)
### Functional Data Analysis with R and Matlab (Springer)
###
# Remarks and disclaimers
# These R commands are either those in this book, or designed to
# otherwise illustrate how R can be used in the analysis of functional
# data.
# We do not claim to reproduce the results in the book exactly by these
# commands for various reasons, including:
# -- the analyses used to produce the book may not have been
# entirely correct, possibly due to coding and accuracy issues
# in the functions themselves
# -- we may have changed our minds about how these analyses should be
# done since, and we want to suggest better ways
# -- the R language changes with each release of the base system, and
# certainly the functional data analysis functions change as well
# -- we might choose to offer new analyses from time to time by
# augmenting those in the book
# -- many illustrations in the book were produced using Matlab, which
# inevitably can imply slightly different results and graphical
# displays
# -- we may have changed our minds about variable names. For example,
# we now prefer "yearRng" to "yearRng" for the weather data.
# -- three of us wrote the book, and the person preparing these scripts
# might not be the person who wrote the text
# Moreover, we expect to augment and modify these command scripts from time
# to time as we get new data illustrating new things, add functionality
# to the package, or just for fun.
###
### ch. 5. Smoothing: Computing Curves from Noisy Data
###
# load the fda package
library(fda)
# display the data files associated with the fda package
data(package='fda')
# start the HTML help system if you are connected to the Internet, in
# order to open the R-Project documentation index page in order to obtain
# information about R or the fda package.
help.start()
##
## Section 5.1. Regression Splines: Smoothing by Regression Analysis
##
# ------------------- Smoothing the growth data ------------------------
# define the range of the ages and set up a fine mesh of ages
ageRng = c(1,18)
age = growth$age
agefine = seq(1,18,len=501)
# set up order 6 spline basis with 12 basis functions for
# fitting the growth data so as to estimate acceleration
nbasis = 12;
norder = 6;
heightbasis12 = create.bspline.basis(ageRng, nbasis, norder)
# fit the data by least squares
basismat = eval.basis(age, heightbasis12)
heightmat = growth$hgtf
heightcoef = lsfit(basismat, heightmat, intercept=FALSE)$coef
# fit the data using function smooth_basis, which does the same thing.
heightList = smooth.basis(age, heightmat, heightbasis12)
heightfd = heightList$fd
height.df = heightList$df
height.gcv = heightList$gcv
heightbasismat = eval.basis(age, heightbasis12)
y2cMap = solve(crossprod(heightbasismat),
t(heightbasismat))
##
## Section 5.2. Data Smoothing with Roughness Penalties
##
# section 5.2.2 The Roughness Penalty Matrix R
# --------------- Smoothing the Canadian weather data ------------------
# define a Fourier basis for daily temperature data
yearRng = c(0,365)
nbasis = 65
tempbasis = create.fourier.basis(yearRng,nbasis)
# define the harmonic acceleration operator
# When the coefficients of the linear differential operator
# are constant, the Lfd object can be set up more simply
# by using function vec2Lfd as follows:
# The first argument is a vector of coefficients for the
# operator, and the second argument is the range over which
# the operator is defined.
harmaccelLfd = vec2Lfd(c(0,(2*pi/365)^2,0), yearRng)
# compute the penalty matrix R
Rmat = eval.penalty(tempbasis, harmaccelLfd)
# section 5.2.4 Defining Smoothing by Functional Parameter Objects
# ------- Smoothing the growth data with a roughness penalty -----------
# set up a basis for the growth data
# with knots at ages of height measurement
norder = 6
nbasis = length(age) + norder - 2
heightbasis = create.bspline.basis(ageRng, nbasis, norder, age)
# define a functional parameter object for smoothing
heightLfd = 4
heightlambda = 0.01
heightfdPar = fdPar(heightbasis, 4, 0.01)
# smooth the data
heightfdSmooth = smooth.basis(age, heightmat, heightfdPar)
heightfd = heightfdSmooth$fd
# section 5.2.5 Choosing Smoothing Parameter lambda
loglam = seq(-6, 0, 0.25)
Gcvsave = rep(NA, length(loglam))
names(Gcvsave) = loglam
Dfsave = Gcvsave
for(i in 1:length(loglam)){
hgtfdPari = fdPar(heightbasis, Lfdobj=4, 10^loglam[i])
hgtSm.i = smooth.basis(age, heightmat, hgtfdPari)
Gcvsave[i] = sum(hgtSm.i$gcv)
Dfsave[i] = hgtSm.i$df
}
# Figure 5.1.
plot(loglam, Gcvsave, 'o', las=1, xlab=expression(log[10](lambda)),
ylab=expression(GCV(lambda)), lwd=2 )
##
## 5.3. Case Study: The Log Precipitation Data
##
# organize data to have winter in the center of the plot
dayOfYearShifted = c(182:365, 1:181)
logprecav = CanadianWeather$dailyAv[
dayOfYearShifted, , 'log10precip']
# set up a saturated basis: as many basis functions as observations
nbasis = 365
daybasis = create.fourier.basis(yearRng, nbasis)
# set up the harmonic acceleration operator
Lcoef = c(0,(2*pi/diff(yearRng))^2,0)
harmaccelLfd = vec2Lfd(Lcoef, yearRng)
# step through values of log(lambda)
loglam = seq(4,9,0.25)
nlam = length(loglam)
dfsave = rep(NA,nlam)
names(dfsave) = loglam
gcvsave = dfsave
for (ilam in 1:nlam) {
cat(paste('log10 lambda =',loglam[ilam],'\n'))
lambda = 10^loglam[ilam]
fdParobj = fdPar(daybasis, harmaccelLfd, lambda)
smoothlist = smooth.basis(day.5, logprecav,
fdParobj)
dfsave[ilam] = smoothlist$df
gcvsave[ilam] = sum(smoothlist$gcv)
}
# Figure 5.2.
plot(loglam, gcvsave, type='b', lwd=2, ylab='GCV Criterion',
xlab=expression(log[10](lambda)) )
# smooth data with minimizing value of lambda
lambda = 1e6
fdParobj = fdPar(daybasis, harmaccelLfd, lambda)
logprec.fit = smooth.basis(day.5, logprecav, fdParobj)
logprec.fd = logprec.fit$fd
fdnames = list("Day (July 1 to June 30)",
"Weather Station" = CanadianWeather$place,
"Log 10 Precipitation (mm)")
logprec.fd$fdnames = fdnames
# plot the functional data object
plot(logprec.fd, lwd=2)
# plotfit.fd: Pauses between plots
# *** --->>> input required (e.g., click on the plot)
# to advance to the next plot
plotfit.fd(logprecav, day.5, logprec.fd, lwd=2)
##
## Section 5.4 Positive, Monotone, Density
## and Other Constrained Functions
##
# ---------------- Positive smoothing of precipitation ----------------
lambda = 1e3
WfdParobj = fdPar(daybasis, harmaccelLfd, lambda)
VanPrec = CanadianWeather$dailyAv[
dayOfYearShifted, 'Vancouver', 'Precipitation.mm']
VanPrecPos = smooth.pos(day.5, VanPrec, WfdParobj)
Wfd = VanPrecPos$Wfdobj
Wfd$fdnames = list("Day (July 1 to June 30)",
"Weather Station" = CanadianWeather$place,
"Log 10 Precipitation (mm)")
precfit = exp(eval.fd(day.5, Wfd))
plot(day.5, VanPrec, type="p", cex=1.2,
xlab="Day (July 1 to June 30)",
ylab="Millimeters",
main="Vancouver's Precipitation")
lines(day.5, precfit,lwd=2)
# ------------ 5.4.2.1 Monotone smoothing of the tibia data -----------
# set up the data for analysis
day = infantGrowth[, 'day']
tib = infantGrowth[, 'tibiaLength']
n = length(tib)
# a basis for monotone smoothing
nbasis = 42
Wbasis = create.bspline.basis(c(1,n), nbasis)
# the fdPar object for smoothing
Wfd0 = fd(matrix(0,nbasis,1), Wbasis)
WfdPar = fdPar(Wfd0, 2, 1e-4)
# smooth the data
result = smooth.monotone(day, tib, WfdPar)
Wfd = result$Wfd
beta = result$beta
# compute fit and derivatives of fit
dayfine = seq(1,n,len=151)
tibhat = beta[1]+beta[2]*eval.monfd(dayfine ,Wfd)
Dtibhat = beta[2]*eval.monfd(dayfine, Wfd, 1)
D2tibhat = beta[2]*eval.monfd(dayfine, Wfd, 2)
# plot height
op = par(mfrow=c(3,1), mar=c(5,5,3,2), lwd=2)
plot(day, tib, type = "p", cex=1.2, las=1,
xlab="Day", ylab='', main="Tibia Length (mm)")
lines(dayfine, tibhat, lwd=2)
# plot velocity
plot(dayfine, Dtibhat, type = "l", cex=1.2, las=1,
xlab="Day", ylab='', main="Tibia Velocity (mm/day)")
# plot acceleration
plot(dayfine, D2tibhat, type = "l", cex=1.2, las=1,
xlab="Day", ylab='', main="Tibia Acceleration (mm/day/day)")
lines(c(1,n),c(0,0),lty=2)
par(op)
# --------- 5.4.2.2 Monotone smoothing the Berkeley female data --------
##
## Compute the monotone smoothing of the Berkeley female growth data.
##
# set up ages of measurement and an age mesh
age = growth$age
nage = length(age)
ageRng = range(age)
nfine = 101
agefine = seq(ageRng[1], ageRng[2], length=nfine)
# the data
hgtf = growth$hgtf
ncasef = dim(hgtf)[2]
# an order 6 bspline basis with knots at ages of measurement
norder = 6
nbasis = nage + norder - 2
wbasis = create.bspline.basis(ageRng, nbasis, norder, age)
# define the roughness penalty for function W
Lfdobj = 3 # penalize curvature of acceleration
lambda = 10^(-0.5) # smoothing parameter
cvecf = matrix(0, nbasis, ncasef)
Wfd0 = fd(cvecf, wbasis)
growfdPar = fdPar(Wfd0, Lfdobj, lambda)
# monotone smoothing
growthMon = smooth.monotone(age, hgtf, growfdPar)
# (wait for an iterative fit to each of 54 girls)
Wfd = growthMon$Wfd
betaf = growthMon$beta
hgtfhatfd = growthMon$yhatfd
# Set up functional data objects for the acceleration curves
# and their mean. Suffix UN means "unregistered".
accelfdUN = deriv.fd(hgtfhatfd, 2)
accelmeanfdUN = mean(accelfdUN)
# plot unregistered curves
par(ask=FALSE)
plot(accelfdUN, xlim=ageRng, ylim=c(-4,3), lty=1, lwd=2,
cex=2, xlab="Age", ylab="Acceleration (cm/yr/yr)")
# 5.4.3. Probability density Function
# ----------- Density function for Regina Precipitation ----------------
# plot the empirical quantile function
NR = length(ReginaPrecip)
plot(1:NR, sort(ReginaPrecip), xlab='Rank of rainfall',
ylab='Ordered daily rainfall (mm)' )
# set up spline basis for log precipitation density
# with knots logarithmicaly spaced
sel2.45 = ((2 <= ReginaPrecip) & (ReginaPrecip <= 45))
RegPrec = sort(ReginaPrecip[sel2.45])
N = length(RegPrec)
# set up spline basis for log precipitation density
# with knots logarithmicaly spaced
Wknots = RegPrec[round(N*seq(1/N,1,len=11),0)]
Wnbasis = length(Wknots) + 2
Wbasis = create.bspline.basis(range(RegPrec),13,4,Wknots)
# set up the functional parameter object
Wlambda = 1e-1
WfdPar = fdPar(Wbasis, 2, Wlambda)
# estimate the density
densityList = density.fd(RegPrec, WfdPar)
Wfd = densityList$Wfdobj
C. = densityList$C
# plot the density with the knot locations
Zfine = seq(RegPrec[1],RegPrec[N],len=201)
Wfine = eval.fd(Zfine, Wfd)
Pfine = exp(Wfine)/C.
plot(Zfine, Pfine, type='l', lwd=2, xlab='Precipitation (mm)',
ylab='Probability Density')
abline(v=Wknots, lty='dashed', lwd=2)
##
## Section 5.5 Assessing the Fit to the Log Precipitation Data
##
logprecmat = eval.fd(day.5, logprec.fd)
logprecres = logprecav - logprecmat
# variance across stations
logprecvar1 = apply(logprecres^2, 1, sum)/35
# variance across time
logprecvar2 = apply(logprecres^2, 2, sum)/(365-12)
# Figure 5.7
plot(sqrt(logprecvar2), xlab='Station Number',
ylab='Standard Deviation across Day')
rt = which(CanadianWeather$place %in%
c("Winnipeg", 'Regina', 'Churchill', 'Montreal', 'St. Johns'))
lft = which(CanadianWeather$place %in%
c('Yellowknife', 'Resolute', 'Vancouver', 'Iqaluit',
'Pr. George', 'Pr. Rupert') )
below = which(CanadianWeather$place %in% 'Edmonton')
top = which(CanadianWeather$place %in% 'Halifax')
text(rt, sqrt(logprecvar2[rt]), labels=CanadianWeather$place[rt],
pos=4)
text(lft, sqrt(logprecvar2[lft]), labels=CanadianWeather$place[lft],
pos=2)
text(below, sqrt(logprecvar2[below]), labels=CanadianWeather$place[below],
pos=1)
text(top, sqrt(logprecvar2[top]), labels=CanadianWeather$place[top],
pos=3)
# Figure 5.8
logstddev.fit = smooth.basis(day.5, log(logprecvar1)/2, fdParobj)
logstddev.fd = logstddev.fit$fd
logprecvar1fit = exp(eval.fd(day.5, logstddev.fd))
plot(day.5, sqrt(logprecvar1), xlab='Day',
ylab='Standard seviation across stations')
lines(day.5, logprecvar1fit, lwd=2)
##
## Section 5.6 Details for the fdPar Class and smooth.basis Function
##
help(fdPar)
help(smooth.basis)
##
## Section 5.8 Some Things to Try
##
# (exercises for the reader)
##
## Section 5.7 More to Read
##
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.