R/smooth.construct.msg.smooth.spec.R

#' Multidimensional Scaling for Generalized additive models - msg
#'
#' Provides smoothing methods for multidimensional scaling-based projections 
#' of the data.
#'
#' Usage is split into two cases: (1) For geographical smooths of two 
#' coordinates, within-area distances with respect to the boundary are used to 
#' create a distance matrix that is then projected into as many dimensions as 
#' required. (2) General distance smoothing where distances are calculated 
#' between observations based on all covariates supplied using Euclidean or 
#' Mahalanobis distances.
#'
#' In both cases smoothing is performed using Duchon splines (see 
#' \code{\link{Duchon.spline}} for more information).
#'
#' @aliases msg smooth.construct.msg.smooth.spec
#' @method smooth.construct msg.smooth.spec
#' @useDynLib msg wood_path
#' @export
#' @import mgcv
#'
#' @param object a smooth specification object, usually generated by a term 
#'    \code{s(...,bs="ds",...)}. Note that \code{xt} object is needed, see 
#'    Details.
#' @param data a list containing just the data (including any \code{by} 
#'    variable) required by this term, with names corresponding to 
#'    \code{object$term} (and \code{object$by}). The \code{by} variable is the 
#'    last element.
#' @param knots IGNORED!
#'
#' @return An object of class \code{msg.smooth}. In addition to the usual 
#'  elements of a smooth class documented under \code{\link{smooth.construct}}, 
#'    this object will contain an element named \code{msg}:
#' \tabular{ll}{\code{metric} \tab which metric was used to calculate the 
#'                  distance matrix. For geographical smoothing only "WAD" 
#'                  (within-area distance) is permitted. Otherwise, "euclidean"
#'                  or "mahalanobis" are allowed\cr
#'              \code{mds.obj} \tab result of running \code{\link{cmdscale}} on
#'                  the starting grid (for geographical smoothing) or the data 
#'                  (general distance smoothing)\cr
#'              \code{dim} \tab dimension of the MDS projection.\cr
#'              \code{term} \tab auto-generated names of the variables in the 
#'                  MDS space (of the form "mds-i" where i indexes the data)\cr
#'              \code{data} \tab the data projected into MDS space\cr
#'              \tab Plus those extra elements as documented in 
#'                  \code{\link{Duchon.spline}}}
#'
#' @section Details: The constructor is not normally called directly, but is 
#' rather used internally by \code{\link{gam}}. To use for basis setup it is 
#' recommended to use \code{\link{smooth.construct2}}.
#'
#' The specification \code{object} will contain information on how to handle 
#' the boundary and the dimension of the MDS projection. In particular, the 
#' boundary is specified as a \code{list} or \code{data.frame} with named 
#' \code{x} and \code{y} components. The MDS dimension projection is specified 
#' as an integer. See the example below for how these are specified to the user.
#'
#' Note that for geographical smoothing the coordinates must be named \code{x} 
#' and \code{y}.
#'
#' MDS dimension selection may be performed by finding the projection with the 
#' lowest GCV score. BEWARE: the GCV score is not necessarily monotonic in the 
#' number of dimensions. Automated dimension selection will appear in a later 
#' version of the package.
#'
#' @references
#' Duchon, J. (1977) Splines minimizing rotation-invariant semi-norms in Solobev spaces. in W. Shemp and K. Zeller (eds) Construction theory of functions of several variables, 85-100, Springer, Berlin.
#' Miller, DL and Wood, SN. (2014) Finite area smoothing with generalized distance splines. Environmental and Ecological Statistics 4, 715-731
#'
#' @author David L. Miller 
#'
#' @examples
#' ### Not run
#' # load some data
#' data(wt2)
#'
#' # create the sample
#' samp.ind<-sample(1:length(wt2$data$x),250)
#' wt2.samp<- list(x=wt2$data$x[samp.ind],
#'                 y=wt2$data$y[samp.ind],
#'                 z=wt2$data$z[samp.ind]+rnorm(250)*0.9)
#'
#' # fit the model
#' b<-gam(z~s(x,y,bs="msg",k=200,xt=list(bnd=wt2$bnd,mds.dim=5)),data=wt2.samp)
#'
#' # predict
#' pred.grid<-data.frame(expand.grid(x=seq(min(wt2$data$x),max(wt2$data$x),len=50),
#'                                   y=seq(min(wt2$data$y),max(wt2$data$y),len=50)))
#' x <- pred.grid$x; y <- pred.grid$y
#' ind<-inSide(wt2$bnd,x,y)
#' pred.grid<-pred.grid[ind,]
#' pred.mat<-matrix(NA,50,50)
#' pred.mat[ind]<-predict(b,pred.grid)
#'
#' # plot what happened
#' par(mfrow=c(2,2))
#' true.mat<-wt2$true.matrix
#'
#' image(true.mat,main="Truth",xlab="x",ylab="y",col=heat.colors(1000))
#' contour(true.mat,add=TRUE,labcex=0.3,lwd=0.5)
#' image(pred.mat,main="Fitted model",xlab="x",ylab="y",col=heat.colors(1000))
#' contour(pred.mat,add=TRUE,labcex=0.3,lwd=0.5)
#'
#' plot(b$smooth[[1]]$msg$grid,pch=19,cex=0.3,
#'      main="Starting grid",xlab="x",ylab="y")
#'
#' plot(b$smooth[[1]]$msg$mds.obj$points,pch=19,cex=0.3,
#'      main="Starting grid\n(MDS space)",xlab="x",ylab="y")
#'
#' @keywords models smoothing spatial
smooth.construct.msg.smooth.spec<-function(object,data,knots){

   # this is the msg smooth.spec file
   # this does most of the work

   # for now TODO:
   #  just get spatial smoothing working
   #     do the clever stuff with previous objects as in gam.mds.R
   # THEN:
   #  if no bnd supplied do the general thing
   #  also do the s(.) thing
   #  select grid resolution sensibly

   # extract the boundary
   bnd<-object$xt$bnd

   # for the WAD case 
   if(!is.null(bnd)){
      if(length(names(data))!=2){
         stop("msg can only be used with 2D smooths!\n")
      }
      if(any(names(data)!=c("x","y"))){
         stop("Names of data are not \"x\" and \"y\"\n")
      }
   }

   if(is.null(object$xt$mds.dim)){
      stop("No MDS projection dimension supplied!\n")
   }

   # extract the MDS dimension
   mds.dim<-object$xt$mds.dim

   grid.res<-object$xt$mds.grid.res
   if(is.null(grid.res)){
      # pick a grid size...

      # just pick something for now
      #grid.res<-c(40,40)
      grid.res<-120
   }

   # if there was an old object in the extra stuff, use it
   #old.obj<-object$xt$old.obj
   old.obj<-NULL

   if(!is.null(old.obj)){
      # object to store all the results for later
      new.obj<-old.obj

      # pull out the grid D matrix
      D.grid<-old.obj$D
      my.grid<-old.obj$grid

      # also the pred and sample D matrices if
      # they are there
      if(!is.null(old.obj$D.samp)){
         D.samp<-old.obj$D.samp
      }else{
         D.samp<-NULL
      }
      if(!is.null(old.obj$D.pred)){
         D.pred<-old.obj$D.pred
      }else{
         D.pred<-NULL
      }

      if(!is.null(old.obj$m)){
         m<-old.obj$m
      }
      if(!is.null(old.obj$bs)){
         bs<-old.obj$bs
      }
      if(!is.null(old.obj$k)){
         k<-old.obj$k
      }

      if(!is.null(old.obj$mds.dim)){
         mds.dim<-old.obj$mds.dim
      }

   }else{
      # object to store all the results for later
      new.obj<-list()

      # in the WAD case, need to create a grid but not in the 
      # GDS case...
      if(!is.null(bnd)){
         # create the grid
         grid.obj<-create_refgrid(bnd,grid.res)
         D.grid<-create_distance_matrix(grid.obj$x,grid.obj$y,bnd,faster=0)
         grid.obj<-list(D=D.grid,grid=list(x=grid.obj$x,y=grid.obj$y))

         D.grid<-grid.obj$D
         my.grid<-grid.obj$grid
         # store!
         new.obj$D<-D.grid
         new.obj$grid<-my.grid

         object$msg<-new.obj
      }
   }

   # for the WAD case, insert the points into the grid
   if(!is.null(bnd)){
      # now we have the grid object, insert the data into that
      # and store it as the data
      grid.mds<-cmdscale(D.grid,eig=TRUE,k=mds.dim,x.ret=TRUE)
      object$msg$mds.obj<-grid.mds
      mds.data<-as.data.frame(insert.mds.generic(grid.mds,data,my.grid,bnd=bnd))

      object$msg$metric<-"WAD"

   # in the GDS case
   }else{

      new.obj<-list()

      # choose the distance metric
      if(is.null(object$xt$metric)){
         object$xt$metric<-"euclidean"
      }

      # the data in matrix form
      #data.names<-names(data)
      mdata<-as.matrix(as.data.frame(data))

      ## separate the predictors from response
      #ind<-rep(FALSE,ncol(mdata))
      #ind[match(object$term,data.names)]<-TRUE

      ## save the response
      #response.var<-mdata[,!ind]
      #mdata<-mdata[,ind]

      new.obj$metric<-object$xt$metric
      dist.metric<-object$xt$metric

      if(dist.metric=="mahalanobis"){
         # find the inverse covariance matrix
         cov.mat<-try(solve(cov(mdata)))

         # if something bad happened use the pseudo-inverse
         if(class(cov.mat)=="try-error"){
            cov.mat<-ginv(cov(mdata))
         }

         D<-apply(mdata,1,mahalanobis,x=mdata,cov=cov.mat,inverted=TRUE)
      }else{
      # Euclidean or anything else allowed by dist()
         D<-as.matrix(dist(mdata,method=dist.metric,diag=TRUE,upper=TRUE))
      }
      new.obj$D<-D

      mds.obj<-cmdscale(D,eig=TRUE,k=mds.dim,x.ret=TRUE)
      new.obj$mds.obj<-mds.obj
      mds.data<-as.data.frame(mds.obj$points)

      object$msg<-new.obj
   }

   # make some variable names up
   mds.names<-paste("mds-",1:dim(mds.data)[2],sep="")
   # remove any already in the data
   names(mds.data)<-mds.names

   # make sure there are the right stuff is in the object before passing
   # to Duchon, but save beforehand!
   save.dim<-object$dim
   save.term<-object$term
   save.data<-data

   object$term<-mds.names
   object$dim<-mds.dim
   data<-mds.data

   object$msg$term<-mds.names
   object$msg$dim<-mds.dim
   object$msg$data<-mds.data

   # if knots were supplied, they're going to be ignored, warn about that!
   if(length(knots)!=0){
      warning("Knots were supplied but will be ignored!\n")
      knots<-list()
   }

   # set the penalty order
   object$p.order<-c(2,mds.dim/2-1)

   # make the duchon splines object as usual
   object<-smooth.construct.ds.smooth.spec(object,data,knots)

   if(!is.null(object$xt$extra.penalty)){
      object<-extra.penalty(object)
   }

   # recover the stuff we want in the object
   object$term<-save.term
   object$dim<-save.dim
   data<-save.data

   class(object)<-"msg.smooth"
   object
}


#' Multidimensional Scaling for Generalized additive models - msg
#'
#' Provides smoothing methods for multidimensional scaling-based projections 
#' of the data.
#'
#' Usage is split into two cases: (1) For geographical smooths of two 
#' coordinates, within-area distances with respect to the boundary are used to 
#' create a distance matrix that is then projected into as many dimensions as 
#' required. (2) General distance smoothing where distances are calculated 
#' between observations based on all covariates supplied using Euclidean or 
#' Mahalanobis distances.
#'
#' In both cases smoothing is performed using Duchon splines (see 
#' \code{\link{Duchon.spline}} for more information).
#'
#' @aliases Predict.matrix.msg.smooth
#' @method Predict.matrix msg.smooth
#' @export
#' @import mgcv
#' @useDynLib msg wood_path
#'
#' @param object a smooth specification object, usually generated by a term 
#'    \code{s(...,bs="ds",...)}. Note that \code{xt} object is needed, see 
#'    Details.
#' @param data a list containing just the data (including any \code{by} 
#'    variable) required by this term, with names corresponding to 
#'    \code{object$term} (and \code{object$by}). The \code{by} variable is the 
#'    last element.
Predict.matrix.msg.smooth<-function(object,data){

   save.dim<-object$dim
   save.term<-object$term

   object$term<-object$msg$term
   object$dim<-object$msg$dim

   #### MAGIC HAPPENS HERE!!!!

   if(!is.null(object$xt$bnd)){
      bnd<-object$xt$bnd
   }else{
      bnd<-NULL
   }

   mds.obj<-object$msg$mds.obj
   my.grid<-object$msg$grid
   mds.data<-as.data.frame(insert.mds(data,my.grid,mds.obj,bnd))


   # make some variable names up
   mds.names<-paste("mds-",1:dim(mds.data)[2],sep="")
   # remove any already in the data
   names(mds.data)<-mds.names

   Predict.matrix.duchon.spline(object,mds.data)
}
dill/msg documentation built on May 15, 2019, 8:30 a.m.