demo/weathersmooth.R

#  This file is intended to be used after the weather data have been set up
#  by the commands in weathersetup.R

#  The weather data are smoothed using the harmonic acceleration penaltym
#  and the smoothed data are saved as a named list weatherfd.

#  Many other interesting ways of smoothing the data and plotting results
#  can be found in the file canadian-weather.R, set up in 2008 by 
#  Spencer Graves.

#  Last modified 17 November 2008

#  load the data

#load("weatherdata")

#  ------------------------  set up the data  -----------------------

tempav  <- daily$tempav
precav  <- daily$precav
station <- daily$place

#  set up the times of observation at noon

daytime   <- (1:365)-0.5

daytime <- weatherdata$daytime
station <- weatherdata$station

#  JJindex re-orders days so that they run from July 1 to June 30.
#  This can be useful for understanding weather data because variation 
#  from curve to curve is greater and more interesting in mid-winter.  
#  July 1 is the 182nd day.
#  If this reordering is desired, use commands
#  daytime = daytime[JJindex]
#  tempav = tempav[JJindex,]
#  precav = precav[JJindex,]

JJindex = c(182:365, 1:181)

#  -------------  set up fourier basis  ---------------------------

#  Here it was decided that 65 basis functions captured enough of
#  the detail in the temperature data: about one basis function
#  per week.  However, see below for smoothing with a saturated
#  basis (365 basis functions) where smoothing is defined by the
#  GCV criterion.

rangeval <- c(0,365)
period   <- 365
nbasis   <- 365
daybasis <- create.fourier.basis(rangeval, nbasis, period)

#  -----------  set up the harmonic acceleration operator  ----------

harmaccelLfd <- vec2Lfd(c(0,(2*pi/365)^2,0), rangeval)

#  ---------------------------------------------------------------------
#                        Smooth temperature  
#  ---------------------------------------------------------------------

tempav <- weatherdata$tempav

#                 Choose level of smoothing using
#          the generalized cross-validation criterion
#              with smoothing function smooth.basis.

#  set up range of smoothing parameters in log_10 units

loglam <- -3:3
nlam   <- length(loglam)

dfsave  <- rep(0,nlam)
gcvsave <- rep(0,nlam)

#  loop through smoothing parameters

for (ilam in 1:nlam) {
	lambda     <- 10^loglam[ilam]
	cat(paste("lambda =",lambda,"\n"))
	fdParobj   <- fdPar(daybasis, harmaccelLfd, lambda)
	smoothlist <- smooth.basis(daytime, tempav, fdParobj)
	fdobj      <- smoothlist[[1]]
	df         <- smoothlist[[2]]
	gcv        <- smoothlist[[3]]
	dfsave[ilam]  <- df
	gcvsave[ilam] <- sum(gcv)
}

#  display and plot degrees of freedom and GCV criterion

cbind(loglam, dfsave, gcvsave)

par(mfrow=c(1,2), pty="m")
plot(loglam, gcvsave, type="b", cex=1,
     xlab="Log_10 lambda", ylab="GCV Criterion",
     main="Temperature Smoothing")

plot(loglam, dfsave, type="b",  cex=1,
     xlab="Log_10 lambda", ylab="Degrees of freedom",
     main="Temperature Smoothing")

#  Do final smooth with lambda = 100 because the minimum GCV value
#  is very rough, and also equivalent to 255 degrees of freedom,
#  whereas the smooth with this smoothing parameter value is able 
#  to nicely track high frequency events such as the January thaws
#  clearly visible in the data.  Moreover, the lambda = 100 smooth
#  is equivalent to about 56 degrees of freedom, which is enough to
#  track events down to a resolution about one week.

lambda   <- 100  #  minimum GCV estimate, corresponding to 255 df
fdParobj <- fdPar(daybasis, harmaccelLfd, lambda)

smoothlist <- smooth.basis(daytime, tempav, fdParobj)

daytempfd  <- smoothlist$fd
df         <- smoothlist$df
SSE        <- smoothlist$SSE
tempy2cMap <- smoothlist$y2cMap

print(paste("Degrees of freedom =", round(df,0)))

daytempfd$fdnames <- list(Day = NULL, Station = station, DegC = NULL)

#  estimate standard error of fit

stderr <- sqrt(SSE/(35*(365-df)))  

print(paste("Standard error =", round(stderr,2), "deg. C"))

#  plot data and fit

par(mfrow=c(1,1), pty="m", ask=T)
plotfit.fd(tempav, daytime, daytempfd, titles=station)

#  Compute the standard error of estimate for the fitted curve

Phimat <- eval.basis(daytime, daybasis)

yhatMap <- Phimat %*% tempy2cMap

SigmaErrMat <- crossprod(t(yhatMap))*stderr^2

StdErrVec <- sqrt(diag(SigmaErrMat))

#  Set up the data for the winter days, from Dec. 15 to Feb. 28

WinterIndex <- c(340:365,1:66)
WinterTime  <- (1:length(WinterIndex))-0.5

#  Plot the winter data, the fit, and 
#  the pointwise 95% confidence intervals for the fit

tempavhat <- eval.fd(daytime, daytempfd)
par(ask=T)
for (i in 1:35) {
  plot(WinterTime, tempavhat[WinterIndex,i], type="l", cex = 1.2,
       xlim=c(0,length(WinterIndex)), ylim=c(-35,5),
       xlab="Day from Dec. 15", ylab="Deg. C", main=station[i])
  lines(WinterTime, tempavhat[WinterIndex,i]+2*StdErrVec[WinterIndex], lty=3)
  lines(WinterTime, tempavhat[WinterIndex,i]-2*StdErrVec[WinterIndex], lty=3)
  points(WinterTime, tempav[WinterIndex,i])
}
      
#  ---------------------------------------------------------------------
#                        Smooth precipitation  
#  ---------------------------------------------------------------------

#                 Choose level of smoothing using
#          the generalized cross-validation criterion
#              with smoothing function smooth.basis.

#  set up range of smoothing parameters in log.10 units

loglam <- 4:9
nlam <- length(loglam)

dfsave  <- rep(0,nlam)
gcvsave <- rep(0,nlam)

#  loop through smoothing parameters

for (ilam in 1:nlam) {
	lambda <- 10^loglam[ilam]
	cat(paste("lambda =",lambda,"\n"))
	fdParobj <- fdPar(daybasis, harmaccelLfd, lambda)
	smoothlist <- smooth.basis(daytime, precav, fdParobj)
	fdobj  <- smoothlist[[1]]
	df     <- smoothlist[[2]]
	gcv    <- smoothlist[[3]]
	dfsave[ilam]  <- df
	gcvsave[ilam] <- sum(gcv)
}

#  display and plot degrees of freedom and GCV criterion

cbind(loglam, dfsave, gcvsave)

par(mfrow=c(1,2), pty="m")
plot(loglam, gcvsave, type="b", cex=1,
     xlab="Log_10 lambda", ylab="GCV Criterion",
     main="Precipitation Smoothing")

plot(loglam, dfsave, type="b", cex=1,
     xlab="Log_10 lambda", ylab="Degrees of freedom",
     main="Precipitation Smoothing")

#  Do final smooth with lambda = 1e5 rather than minimum GCV value
#  of 1e7 because the latter seems to over-smooth several sets of data

lambda   <- 1e5  
fdParobj <- fdPar(daybasis, harmaccelLfd, lambda)

smoothlist <- smooth.basis(daytime, precav, fdParobj)

dayprecfd  <- smoothlist$fd
df         <- smoothlist$df
SSE        <- smoothlist$SSE
precy2cMap <- smoothlist$y2cMap

print(paste("Degrees of freedom =", round(df,0)))

dayprecfd$fdnames <- list(Day = NULL, Station = station, Millimetres = NULL)

#  estimate standard error of fit

stderr <- sqrt(SSE/(35*(365-df))) 

print(paste("Standard error =", round(stderr,2), "mm"))

#  plot data and fit

par(mfrow=c(1,1), pty="m", ask=T)
plotfit.fd(precav, daytime, dayprecfd, titles=station)

#  ---------------------------------------------------------------------
#    Plot precipitation against temperature for each station in turn
#  ---------------------------------------------------------------------

daytime <- weatherdata$daytime
station <- weatherdata$station
tempmat <- eval.fd(daytime, daytempfd)
precmat <- eval.fd(daytime, dayprecfd)

#  define 1-character names for months

monthletter <- c("J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D")
monthtime   <- c(1,32,50,81,111,142,172,203,234,264,295,315)

par(ask=T)
for (i in 1:35) {
  plot(tempmat[,i], precmat[,i], type="l", cex=1.2, 
       xlim=c(-35,25), ylim=c(0,13), 
       xlab="Temperature", ylab="Precipitation",
       main=station[i])
  text(tempmat[monthtime,i], precmat[monthtime,i], monthletter, cex=1.2)
}

#  -------------------------------------------------------------------
#         save the results as two fdPar objects plus y2cMaps
#  -------------------------------------------------------------------

tempfdPar <- fdPar(daytempfd, harmaccelLfd, 1e2)
precfdPar <- fdPar(dayprecfd, harmaccelLfd, 1e5)

weatherfd <- list(tempfdPar  = tempfdPar,  precfdPar  = precfdPar,
                  tempy2cMap = tempy2cMap, precy2cMap = precy2cMap)

save(weatherfd, file="weatherfd")

#  -------------------------------------------------------------------
#     Smooth Vancouver's precipitation with a positive function.
#  -------------------------------------------------------------------

#  select Vancouver's precipitation

index <- (1:35)[station == "Vancouver  "] 
VanPrec  <- precav[,index]

#  We use a more compact basis here to avoid storage allocation
#  problems that arise when 365 basis functions are used.

dayrange   <- c(0,365)
daybasis65 <- create.fourier.basis(dayrange, 65)

#  smooth the data using 65 basis functions

lambda    <- 1e4
fdParobj  <- fdPar(daybasis65, harmaccelLfd, lambda)
VanPrecfd <- smooth.basis(daytime, VanPrec, fdParobj)$fd

#  Plot temperature curves and values

plotfit.fd(VanPrec, daytime, VanPrecfd, titles=station[index])

#  smooth the data with a positive function

Wfd1 <- smooth.pos(daytime, VanPrec, fdParobj)$Wfdobj

#  plot both the original smooth and positive smooth

VanPrecvec    <- eval.fd(daytime, VanPrecfd)
VanPrecposvec <- eval.posfd(daytime, Wfd1)

par(ask=F)
plot(daytime,  VanPrec, type="p",
     xlab="Day", ylab="Precipitation (mm)")
lines(daytime, VanPrecposvec, lwd=2, col=4)
lines(daytime, VanPrecvec, lwd=2, col=2)
legend(100, 8, c("Positive smooth", "Unrestricted smooth"), 
       lty=c(1,1), col=c(4,2))

#  plot the squared residuals

VanPrecres <- VanPrec - VanPrecposvec
plot(daytime, VanPrecres^2, type="p",
     xlab="Day", ylab="Squared Residuals")
title("Squared residuals from positive fit")

#  compute a postive smooth of the squared residuals

lambda <- 1e3
fdParobj <- fdPar(daybasis65, harmaccelLfd, lambda)

Wfd <- smooth.pos(daytime, VanPrecres^2, fdParobj)$Wfdobj

#  plot the square root of this smooth along with the residuals

VanPrecvarhat <- eval.posfd(daytime, Wfd)
VanPrecstdhat <- sqrt(VanPrecvarhat)

plot(daytime, VanPrecres^2, type="p",
     xlab="Day", ylab="Squared Residuals")
lines(daytime, VanPrecvarhat, lwd=2)

#  set up a weight function for (revised smoothing

wtvec <- as.vector(1/VanPrecvarhat)

lambda   <- 1e3
fdParobj <- fdPar(daybasis65, harmaccelLfd, lambda)

Wfd2 <- smooth.pos(daytime, VanPrec, fdParobj, wtvec)$Wfdobj

#  plot the two smooths, one with weighting, one without

VanPrecposvec2 <- eval.posfd(daytime, Wfd2)

plot(daytime,  VanPrec, type="p")
lines(daytime, VanPrecposvec2, lwd=2, col=4)
lines(daytime, VanPrecposvec, lwd=2, col=2)
legend(100, 8, c("Weighted", "Unweighted"), 
       lty=c(1,1), col=c(4,2))

Try the fda package in your browser

Any scripts or data that you put into this service are public.

fda documentation built on May 2, 2019, 5:12 p.m.