Nothing
# Purpose : Fits a mass-preserving (pycnophylactic) spline to soil profile data;
# Maintainer : Brendan Malone (brendan.malone@sydney.edu.au);
# Contributions : Tomislav Hengl (tom.hengl@wur.nl); Tom Bishop (t.bishop@usyd.edu.au); David Rossiter (david.rossiter@wur.nl)
# Status : experimental
# Note : Mass-preserving spline explained in detail in [http://dx.doi.org/10.1016/S0016-7061(99)00003-8];
# Note 2 : This code needs to be cleaned up;
# Spline fitting for horizon data (created by Brendan Malone; adjusted by T. Hengl)
setMethod('mpspline', signature(obj = "SoilProfileCollection"),
function(obj, var.name, lam = 0.1, d = t(c(0,5,15,30,60,100,200)), vlow = 0, vhigh = 1000, show.progress=TRUE){
depthcols = obj@depthcols
idcol = obj@idcol
## convert to a data frame:
obj@horizons = obj@horizons[,c(idcol, depthcols, var.name)]
## TH: remove all horizons with negative depth!
obj@horizons <- obj@horizons[!obj@horizons[,depthcols[1]]<0 & !obj@horizons[,depthcols[2]]<0,]
objd <- .as.data.frame.SoilProfileCollection(x=obj)
## organize the data:
ndata <- nrow(objd)
mxd <- max(d)
## Matrix in which the averaged values of the spline are fitted. The depths are specified in the (d) object:
m_fyfit <- matrix(NA, ncol=length(c(1:mxd)), nrow=ndata)
## Matrix in which the sum of square errors of each lamda iteration for the working profile are stored
yave <- matrix(NA, ncol=length(d), nrow=ndata)
## Matrix in which the sum of square errors for eac h lambda iteration for each profile are stored
sse <- matrix(NA, ncol=length(lam), nrow=1)
sset <- matrix(NA, ncol=length(lam), nrow=ndata)
nl <- length(lam) # Length of the lam matrix
svar.lst <- grep(names(objd), pattern=glob2rx(paste(var.name, "_*", sep="")))
s <- 0.05*sd(unlist(unclass(objd[,svar.lst])), na.rm=TRUE) # 5% of the standard deviation of the target attribute
s2 <- s*s # overall variance of soil attribute
## reformat table (profile no, upper boundary, lower boundary, vars):
upperb.lst <- grep(names(objd), pattern=glob2rx(paste(depthcols[1], "_*",sep="")))
lowerb.lst <- grep(names(objd), pattern=glob2rx(paste(depthcols[2], "_*",sep="")))
objd_m <- objd[,c(grep(names(objd), pattern=glob2rx(paste0("^", idcol))), upperb.lst, lowerb.lst, svar.lst)]
np <- length(svar.lst) # max number of horizons
## Matrix in which the averaged values of spline-fitted values at observed depths are entered:
dave <- matrix(NA, ncol=np, nrow=ndata)
if(np<2|is.na(np)){print("Submitted soil profiles all have 1 horizon")}
svar.lst <- grep(names(objd_m), pattern=glob2rx(paste(var.name, "_*",sep="")))
upperb.lst <- grep(names(objd_m), pattern=glob2rx(paste(depthcols[1], "_*",sep="")))
lowerb.lst <- grep(names(objd_m), pattern=glob2rx(paste(depthcols[2], "_*",sep="")))
## if missing, fill in the depth of first horizon as "0"
missing.A <- is.na(objd_m[,which(names(objd_m)==paste(depthcols[1], "_A",sep=""))])
objd_m[missing.A,which(names(objd_m)==paste(depthcols[1], "_A",sep=""))] <- 0
## mask out all profiles with <2 horizons and with at least one of the first 3 horizons defined:
sel <- !(is.na(objd_m[,which(names(objd_m)==paste(var.name, "_A",sep=""))]) & is.na(objd_m[,which(names(objd_m)==paste(var.name, "_B",sep=""))])) &
rowSums(!is.na(objd_m[,svar.lst]))>0 &
rowSums(!is.na(objd_m[,upperb.lst]))>0 &
rowSums(!is.na(objd_m[,lowerb.lst]))>0
if(sum(sel)==0){
stop("Submitted soil profiles do not contain enough horizons (>2) for spline fitting")
}
## detect lowest horizon no:
uw.hor <- rowSums(!is.na(objd_m[,upperb.lst]))
lw.hor <- as.vector(which(rowSums(!is.na(objd_m[,lowerb.lst])) < rowSums(!is.na(objd_m[,upperb.lst]))&rowSums(!is.na(objd_m[,upperb.lst]))>1)) # profiles with un-even number of lower/upper depths
## add missing lower depth where necessary:
for(lw in lw.hor){
uwx <- objd_m[lw,upperb.lst[uw.hor[lw]]]
if(!is.na(uwx)&sel[lw]==TRUE){
message("Adding missing lower depths...")
if(uwx<150) { objd_m[lw,lowerb.lst[uw.hor[lw]]] <- 150 }
else {
objd_m[lw,lowerb.lst[uw.hor[lw]]] <- 200
}
}
}
## Fit splines profile by profile:
message("Fitting mass preserving splines per profile...")
if (show.progress) pb <- txtProgressBar(min=0, max=length(sel), style=3)
for(st in as.vector(which(sel))) {
subs <- matrix(unlist(c(1:np, as.vector(objd_m[st, upperb.lst]), as.vector(objd_m[st, lowerb.lst]), as.vector(objd_m[st, svar.lst]))), ncol=4)
d.ho <- rowMeans(data.frame(x=subs[,2], y=c(NA, subs[1:(nrow(subs)-1),3])), na.rm=TRUE)
## mask out missing values
if (ncol(as.matrix(subs[!is.na(subs[,2])&!is.na(subs[,3])&!is.na(subs[,4]),]))==1)
{subs=t(as.matrix(subs[!is.na(subs[,2])&!is.na(subs[,3])&!is.na(subs[,4]),]))}
else {subs<- subs[!is.na(subs[,2])&!is.na(subs[,3])&!is.na(subs[,4]),]}
## manipulate the profile data to the required form
ir <- c(1:length(subs[,1]))
ir <- as.matrix(t(ir))
u <- subs[ir,2]
u <- as.matrix(t(u)) # upper
v <- subs[ir,3]
v <- as.matrix(t(v)) # lower
y <- subs[ir,4]
y <- as.matrix(t(y)) # concentration
n <- length(y) # number of observations in the profile
############################################################################################################################################################
## routine for handling profiles with one observation
if (n == 1){
message(paste("Spline not fitted to profile:", objd_m[st,1] ,sep=" "))
xfit<- as.matrix(t(c(1:mxd))) ## spline will be interpolated onto these depths (1cm res)
nj<- max(v)
if (nj > mxd)
{nj<- mxd}
yfit<- xfit
yfit[,1:nj]<- y ## values extrapolated onto yfit
if (nj < mxd)
{yfit[,(nj+1):mxd]=NA}
m_fyfit[st,]<- yfit
## Averages of the spline at specified depths
nd<- length(d)-1 ## number of depth intervals
dl<-d+1 ## increase d by 1
for (cj in 1:nd) {
xd1<- dl[cj]
xd2<- dl[cj+1]-1
if (nj>=xd1 & nj<=xd2)
{xd2<- nj-1
yave[st,cj]<- mean(yfit[,xd1:xd2])}
else
{yave[st,cj]<- mean(yfit[,xd1:xd2])} # average of the spline at the specified depth intervals
yave[st,cj+1]<- max(v)} #maximum soil depth
}
## End of single observation profile routine
###############################################################################################################################################################
## Start of routine for fitting spline to profiles with multiple observations
else {
###############################################################################################################################################################
## ESTIMATION OF SPLINE PARAMETERS
np1 <- n+1 # number of interval boundaries
nm1 <- n-1
delta <- v-u # depths of each layer
del <- c(u[2:n],u[n])-v # del is (u1-v0,u2-v1, ...)
## create the (n-1)x(n-1) matrix r; first create r with 1's on the diagonal and upper diagonal, and 0's elsewhere
r <- matrix(0,ncol=nm1,nrow=nm1)
for(dig in 1:nm1){
r[dig,dig]<-1
}
for(udig in 1:nm1-1){
r[udig,udig+1]<-1
}
## then create a diagonal matrix d2 of differences to premultiply the current r
d2 <- matrix(0, ncol=nm1, nrow=nm1)
diag(d2) <- delta[2:n] # delta = depth of each layer
## then premultiply and add the transpose; this gives half of r
r <- d2 %*% r
r <- r + t(r)
## then create a new diagonal matrix for differences to add to the diagonal
d1 <- matrix(0, ncol=nm1, nrow=nm1)
diag(d1) <- delta[1:nm1] # delta = depth of each layer
d3 <- matrix(0, ncol=nm1, nrow=nm1)
diag(d3) <- del[1:nm1] # del = differences
r <- r+2*d1 + 6*d3
## create the (n-1)xn matrix q
q <- matrix(0,ncol=n,nrow=n)
for (dig in 1:n){
q[dig,dig]<- -1
}
for (udig in 1:n-1){
q[udig,udig+1]<-1
}
q <- q[1:nm1,1:n]
dim.mat <- matrix(q[],ncol=length(1:n),nrow=length(1:nm1))
## inverse of r
rinv <- try(solve(r), TRUE)
## Note: in same cases this will fail due to singular matrix problems, hence you need to check if the object is meaningfull:
if(is.matrix(rinv)){
## identity matrix i
ind <- diag(n)
## create the matrix coefficent z
pr.mat <- matrix(0,ncol=length(1:nm1),nrow=length(1:n))
pr.mat[] <- 6*n*lam
fdub <- pr.mat*t(dim.mat)%*%rinv
z <- fdub%*%dim.mat+ind
## solve for the fitted layer means
sbar <- solve(z,t(y))
## calculate the fitted value at the knots
b <- 6*rinv%*%dim.mat%*% sbar
b0 <- rbind(0,b) # add a row to top = 0
b1 <- rbind(b,0) # add a row to bottom = 0
gamma <- (b1-b0) / t(2*delta)
alfa <- sbar-b0 * t(delta) / 2-gamma * t(delta)^2/3
## END ESTIMATION OF SPLINE PARAMETERS
###############################################################################################################################################################
## fit the spline
xfit<- as.matrix(t(c(1:mxd))) ## spline will be interpolated onto these depths (1cm res)
nj<- max(v)
if (nj > mxd)
{nj<- mxd}
yfit<- xfit
for (k in 1:nj){
xd<-xfit[k]
if (xd< u[1])
{p<- alfa[1]} else
{for (its in 1:n) {
if(its < n)
{tf2=as.numeric(xd>v[its] & xd<u[its+1])} else {tf2<-0}
if (xd>=u[its] & xd<=v[its])
{p=alfa[its]+b0[its]*(xd-u[its])+gamma[its]*(xd-u[its])^2} else if (tf2)
{phi=alfa[its+1]-b1[its]*(u[its+1]-v[its])
p=phi+b1[its]*(xd-v[its])}
}}
yfit[k]=p }
if (nj < mxd)
{yfit[,(nj+1):mxd]=NA}
yfit[which(yfit > vhigh)] <- vhigh
yfit[which(yfit < vlow)] <-vlow
m_fyfit[st,]<- yfit
## Averages of the spline at specified depths
nd<- length(d)-1 # number of depth intervals
dl<-d+1 # increase d by 1
for (cj in 1:nd) {
xd1<- dl[cj]
xd2<- dl[cj+1]-1
if (nj>=xd1 & nj<=xd2)
{xd2<- nj-1
yave[st,cj]<- mean(yfit[,xd1:xd2])}
else
{yave[st,cj]<- mean(yfit[,xd1:xd2])} # average of the spline at the specified depth intervals
yave[st,cj+1]<- max(v)} #maximum soil depth
## Spline estimates at observed depths
dave[st,1:n]<- sbar
## CALCULATION OF THE ERROR BETWEEN OBSERVED AND FITTED VALUES
## calculate Wahba's estimate of the residual variance sigma^2
ssq <- sum((t(y)-sbar)^2)
g <- solve(z)
ei <- eigen(g)
ei <- ei$values
df <- n-sum(ei)
sig2w <- ssq/df
## calculate the Carter and Eagleson estimate of residual variance
dfc <- n-2*sum(ei)+sum(ei^2)
sig2c <- ssq/dfc
## calculate the estimate of the true mean squared error
tmse <- ssq/n-2*s2*df/n+s2
sset[st] <- tmse
}
}
if (show.progress) { setTxtProgressBar(pb, st) }
}
if (show.progress) {
close(pb)
#cat(st, "\r") ## TH: Suggested by D. Rossiter but not required
#flush.console()
}
## asthetics for output
## yave
yave<- as.data.frame(yave)
jmat<- matrix(NA,ncol=1,nrow=length(d))
for (i in 1:length(d)-1) {
a1<-paste(d[i],d[i+1],sep="-")
a1<-paste(a1,"cm",sep=" ")
jmat[i]<- a1}
jmat[length(d)]<- "soil depth"
for (jj in 1:length(jmat)){
names(yave)[jj]<- jmat[jj]
}
retval <- list(idcol=objd_m[,1], var.fitted=dave, var.std=yave, var.1cm=t(m_fyfit))
return(retval)
})
# end of script;
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.