rsf.fit <- function(data,UD,R=list(),formula=NULL,integrated=TRUE,level.UD=0.99,reference="auto",debias=TRUE,smooth=TRUE,standardize=TRUE,integrator="MonteCarlo",error=0.01,max.mem="1 Gb",interpolate=TRUE,trace=TRUE,...)
{
STATIONARY <- TRUE
CTMM <- UD@CTMM
isotropic <- CTMM$isotropic
axes <- CTMM$axes
GEO <- c('longitude','latitude')
max.mem <- ustring(max.mem)
# control list for optimizer
control <- list()
# pass trace argument (demoted)
if(trace) { control$trace <- trace-1 }
CALC <- integrated || length(R) # anything to calculate?
if(!integrated) # prepare available region polygon
{
if(class(level.UD)[1]=="numeric")
{
# level.UD <- as.sf(UD,level.UD=level.UD)
level.UD <- SpatialPolygonsDataFrame.UD(UD,level.UD=level.UD)
# subset to point estimate contour
level.UD <- level.UD@polygons[[2]]
}
else
{
# project polygon to data's (spTransform is bad)
if(!any(grepl('sf',class(level.UD)))) { level.UD <- sf::st_as_sf(level.UD) }
level.UD <- sf::st_transform(level.UD,sf::st_crs(projection(data)))
# sf sampling below is too slow, so now have to convert back to sp!
level.UD <- sf::as_Spatial(level.UD)@polygons[[1]]
}
# AREA <- as.numeric(sf::st_area(level.UD))
# level.UD should now be class Polygons in projection of data
AREA <- level.UD@area
}
if(!CTMM$isotropic)
{
if("ISO" %in% names(CTMM))
{ ISO <- CTMM$ISO }
else
{
ISO <- simplify.ctmm(CTMM,'minor')
if(trace) { message("Fitting isotropic autocorrelation model.") }
ISO <- ctmm.fit(data,ISO,trace=max(trace-1,0))
}
CTMM <- ISO
UD@CTMM <- ISO
UD$DOF.area <- DOF.area(ISO)
}
# smooth the data, but don't drop
if(smooth && any(CTMM$error>0))
{ data[,c(axes,GEO)] <- predict(data,CTMM=CTMM,t=data$t,complete=TRUE)[,c(axes,GEO)] }
n <- nrow(data)
# for simulation - uncorrelated, error-less
IID <- CTMM
IID$tau <- NULL
IID$omega <- FALSE
IID$error <- FALSE
# extract weights
W <- mean(UD$DOF.area) # +1 for mean not being detrended accounted for
w <- UD$weights * W
R <- listify(R)
TEST <- sapply(R,raster::inMemory)
TEST <- any(!TEST)
if(TEST) { message('Some rasters are not loaded in RAM and may be slow to process. See help("raster::readAll").') }
if(length(R) && is.null(names(R)))
{
warning("R is not a named list of rasters.")
names(R) <- paste("R",1:length(R))
}
# expand any categorical variables into indicator variables
if(length(R))
{
if(!is.null(formula))
{
RVARS <- names(R) # this will be updated
VARS <- all.vars(formula) # this will be updated
DVARS <- VARS[ VARS %nin% RVARS ]
}
else
{ DVARS <- NULL }
STUFF <- expand.factors(R=R,formula=formula,reference=reference,data=data,DVARS)
data <- STUFF$data
R <- STUFF$R
formula <- STUFF$formula
}
# how to sample rasters
interpolate <- rep(interpolate,length(R))
interpolate <- ifelse(interpolate,"bilinear","simple")
names(interpolate) <- names(R)
## The basic setup is:
# RVARS - raster variables
# DVARS - data annotation variables - overridden by RVARS
# TERMS - all predictors - some may be RVARS
# CVARS - composite variables in TERMS that are single RVARS || DVARS
# OFFSET - offset T/F variables
RVARS <- names(R)
if(is.null(formula))
{
TERMS <- RVARS
DVARS <- NULL
CVARS <- NULL
OFFSET <- NULL
if(length(R))
{
formula <- paste(TERMS,collapse="+")
formula <- paste("~",formula)
formula <- formula(formula)
}
else
{ formula <- formula(~0) }
}
else
{
VARS <- all.vars(formula)
DVARS <- VARS[ VARS %nin% RVARS ]
for(D in DVARS) { data[[D]] <- as.numeric(data[[D]]) } # model.matrix will rename otherwise
# this doesn't work with poly(), etc.
# TERMS <- attr(stats::terms(formula),"term.labels")
# dummy data for model parameter names
DATA <- data.frame(data)
DATA[RVARS] <- as.list(rep(0,length(RVARS)))
# DATA[['rr']] <- 0 # why was this here?
TERMS <- colnames(stats::model.matrix(formula,data=DATA))
TERMS <- TERMS[TERMS!="(Intercept)"]
CVARS <- TERMS[ TERMS %nin% VARS ] # terms that are not simple variables
OFFSET <- get.offset(formula)
if(attr(stats::terms(formula),"response")[1]>0) { stop("Response variable not yet supported.") }
}
environment(formula) <- globalenv()
if(length(DVARS)+length(CVARS)>0 && standardize)
{
message("Users are responsible for standardizing rasters when interactions are supplied.")
standardize <- FALSE
}
else
{
RSCALE <- rep(1,length(R))
RSHIFT <- rep(0,length(R))
names(RSCALE) <- names(RSHIFT) <- names(R)
}
VARS <- c(RVARS,CVARS) # vars that need to be recorded per location
if(length(DVARS)) { STATIONARY <- FALSE }
INTERCEPT <- DVARS %in% TERMS
if(any(INTERCEPT))
{
TERMS <- TERMS[ TERMS %nin% DVARS ]
warning(sum(INTERCEPT),"+",attr(stats::terms(formula),"intercept")," intercept terms ignored.")
}
# evaluate term on data.frame
# term can be multiple\
# offset does not work with model.matrix code
evaluate <- function(term,envir,offset=FALSE)
{
if(length(dim(envir))==2) # (term,DATA[time,var])
{
NAS <- which(apply(envir,1,function(r){any(is.na(r[VARS]))}))
envir[NAS,VARS] <- 0
envir <- data.frame(envir) # matrices can't be environments, but data.frames can't matrix multiply...
if(!STATIONARY) { envir <- cbind(envir,data) }
if(offset)
{
RET <- stats::model.frame(formula,envir)
RET <- stats::model.offset(RET)
RET[NAS] <- 0
}
else
{
# NAs will be dropped silently here
RET <- stats::model.matrix(formula,envir)[,term,drop=FALSE]
RET[NAS,] <- NA
}
}
else # [space,time,var] loop over runs
{
if(STATIONARY) # time doesn't matter
{
DIM <- dim(envir)
dim(envir) <- c(DIM[1]*DIM[2],DIM[3])
colnames(envir) <- VARS
RET <- evaluate(term,envir,offset=offset)
dim(RET) <- c(DIM[1:2],length(term))
}
else # !STATIONARY
{
RET <- vapply(1:dim(envir)[1],function(i)
{
ENVIR <- envir[i,,]; # [time,var]
dim(ENVIR) <- dim(envir)[-1];
colnames(ENVIR) <- VARS
ENVIR <- cbind(ENVIR,data); # [time,vars]
evaluate(term,ENVIR,offset=offset)
},array(0,c(nrow(data),length(term)))) # [time,terms,space]
dim(RET) <- c(nrow(data),length(term),dim(envir)[1])
RET <- aperm(RET,c(3,1,2)) # [space,time,terms]
}
}
return(RET)
}
## prepare raster data ##
# will write over the assigned values
PROJ <- projection(data)
X <- list(UD$r$x)
Y <- list(UD$r$y)
Z <- Z.ind <- list()
dX <- UD$dr['x']
dY <- UD$dr['y']
dZ <- Xo <- Yo <- Zo <- rep(NA,length(R))
for(i in 1 %:% length(R))
{
PROJ[i] <- raster::projection(R[[i]])
RANGE <- raster::cellStats(R[[i]],'range',na.rm=TRUE)
# don't standardize logical variables
if(standardize && abs(RANGE[[1]]-0)>.Machine$double.eps && abs(RANGE[[2]]-1)>.Machine$double.eps)
{
# # raster::median is not defined correctly
# RSHIFT[i] <- raster::median(R[[i]],na.rm=TRUE)
# RSCALE[i] <- raster::median(abs(R[[i]]),na.rm=TRUE)
RSHIFT[i] <- raster::cellStats(R[[i]],'mean',na.rm=TRUE)
R[[i]] <- R[[i]] - RSHIFT[i]
RSCALE[i] <- raster::cellStats(R[[i]],'sd',na.rm=TRUE)
R[[i]] <- R[[i]]/RSCALE[i]
}
DIM <- dim(R[[i]])
if(integrator!="MonteCarlo" || DIM[3]>1) # RasterLayer (static)
{
X[[i]] <- raster::xFromCol(R[[i]],1:DIM[2])
Y[[i]] <- raster::yFromRow(R[[i]],1:DIM[1])
# resolution
dX[i] <- abs( stats::median( diff(X[[i]]) ) )
dY[i] <- abs( stats::median( diff(Y[[i]]) ) )
# origin
Xo[i] <- X[[i]][1] %% dX[i]
Yo[i] <- Y[[i]][1] %% dY[i]
}
# eventually the 3D code needs to use extract + 1D interpolation
if(DIM[3]>1) # RasterStack or RasterBrick
{
Z[[i]] <- raster::getZ(R[[i]])
dZ[i] <- stats::median(diff(Z[[i]]))
R[[i]] <- raster::as.array(R[[i]])[,,]
STATIONARY <- FALSE
R[[i]] <- aperm(R[[i]],c(2,1,3)) # [x,y,z]
if(class(Z[[i]])[1]=="Date") { Z[[i]] <- as.POSIXct(Z[[i]]) } # numeric is in days
if(class(Z[[i]])[1]=="POSIXct") { Z[[i]] <- as.numeric(Z[[i]]) }
# index for data times in raster stack
Z.ind[[i]] <- (data$t - Z[[i]][1])/dZ[i] + 1
} # end XYZ
} # end R loop
if(length(R)) { names(X) <- names(Y) <- names(dX) <- names(dY) <- names(Xo) <- names(Yo) <- RVARS[1:length(X)] }
if(length(Z)) { names(Z) <- names(dZ) <- names(Z.ind) <- RVARS[1:length(Z)] }
# check for compatible raster grids
if(integrator!="MonteCarlo" && length(R))
{
CONSISTENT <- TRUE
if(length(R)>1)
{
TEST <- c(diff(dX)/mid(dX),diff(dY)/mid(dY),diff(Xo)/mid(dX),diff(Yo)/mid(dY))
# TEST <- nant(TEST,0)
if(max(abs(TEST))>.Machine$double.eps) # check for consistent resolution, origin
{ CONSISTENT <- FALSE }
}
if(CONSISTENT) # extract pixel areas for integration
{
dA <- raster::area(R[[1]])
# raster outputs km^2 sometimes and m^2 others? WTF?
if(grepl("longlat",projection(R[[1]]))) { dA <- dA * 1000^2 }
TEST <- raster::cellStats(dA,'min') / sqrt(det.covm(CTMM$sigma))
if(TEST>0.1) # HR^2
{ warning("Raster resolution is ",round(TEST,digits=1),"\u00D7 coarse compared to home-range size.") }
} # TODO generalize this for projected covariates
else # choose minimum resolution for integration grid
{
dx <- min(UD$dr['x'],dX)
dy <- min(UD$dr['y'],dY)
}
}
else if(!length(R))
{ CONSISTENT <- TRUE }
# setup integrated spatial covariates
if(!integrated)
{
SVARS <- NULL
NSPA <- 0
}
else
{
mu <- c(CTMM$mu)
std <- sqrt(CTMM$sigma@par[c('major','minor')])
names(std) <- names(mu) <- axes
SVARS <- axes
NSPA <- 2
SCALE <- std
if(isotropic)
{
SVARS <- c(SVARS,'rr')
NSPA <- NSPA + 1
SCALE <- c(SCALE,prod(std))
}
else
{
SVARS <- c(SVARS,'xx','yy','xy')
NSPA <- NSPA + 3
SCALE <- c(SCALE,std[1]^2,std[2]^2,prod(std))
theta <- CTMM$sigma@par['angle']
ROT <- rotate(theta) # rotate theta from left and -theta from right
}
names(SCALE) <- SVARS
}
VARS <- c(VARS,SVARS)
TERMS <- c(TERMS,SVARS)
# data and simulation sampled covariates
DATA <- matrix(0,nrow(data),length(VARS))
colnames(DATA) <- VARS
# natural scales for differentiation/optimization
parscale <- rep(1,length(TERMS))
lower <- rep(-Inf,length(TERMS))
upper <- rep(Inf,length(TERMS))
names(parscale) <- names(lower) <- names(upper) <- TERMS
if(integrated)
{
LOV <- ifelse(integrator=="MonteCarlo",-1,0)
if(isotropic)
{ lower['rr'] <- LOV }
else
{ lower[c('xx','yy')] <- LOV }
}
# might not use all rasters
if(standardize) { RSCALE <- RSCALE[names(RSCALE) %in% TERMS] }
# minimal assignment
beta.null <- numeric(length(TERMS))
names(beta.null) <- TERMS
if(isotropic)
{ beta.null['rr'] <- 1 }
else
{ beta.null[c('xx','yy')] <- c(1,1) }
# initial estimates
beta <- beta.null
if(length(CTMM$beta))
{
COPY <- TERMS[TERMS %in% names(CTMM$beta)]
beta[COPY] <- CTMM$beta[COPY]
if(standardize) { beta[names(RSCALE)] <- beta[names(RSCALE)] * RSCALE }
}
# store raster covariates
for(i in 1%:%length(RVARS))
{
r <- RVARS[i]
# store data-sampled covariates
if(i==1 || PROJ[i]!=PROJ[i-1])
{
xy <- get.telemetry(data,GEO)
xy <- project(xy,to=PROJ[i])
}
DIM <- dim(R[[i]])
if(DIM[3]==1) # 2D
{
DATA[,r] <- raster::extract(R[[i]],xy,method=interpolate[i])
# DATA[,RI] <- bint(R[[i]],t(xy))
}
else # 3D space-time stack
{
# convert to indices
XY <- xy
XY[,1] <- (XY[,1] - X[[i]][1])/dX[i] + 1
XY[,2] <- (XY[,2] - Y[[i]][1])/dY[i] + 1
XY <- cbind(XY,Z.ind[[i]])
DATA[,r] <- tint(R[[i]],t(XY))
}
}
# calculate and store composite covariates
DATA[,CVARS] <- evaluate(CVARS,DATA)
### store sampled integrated terms
if(integrated)
{
# de-trend mean
DATA[,'x'] <- data$x - mu['x']
DATA[,'y'] <- data$y - mu['y']
# rotate -theta to major-minor axes
if(!isotropic) { DATA[,axes] <- DATA[,axes] %*% ROT }
# standardize
DATA[,'x'] <- DATA[,'x']/std['x']
DATA[,'y'] <- DATA[,'y']/std['y']
# variance/covariance terms (relative to pilot estimate)
if(isotropic) # beta is correction to standardized 1/sigma
{ DATA[,'rr'] <- -( DATA[,'x']^2 + DATA[,'y']^2 )/2 }
else # beta is correction to standardized solve(sigma)
{
DATA[,'xx'] <- -DATA[,'x']^2 /2
DATA[,'yy'] <- -DATA[,'y']^2 /2
DATA[,'xy'] <- -DATA[,'x']*DATA[,'y']
}
} # end if(integrated)
WDAVE <- c(w %*% DATA[,TERMS])
DAVE <- WDAVE / W
DCOV <- vapply(1:length(w),function(i){w[i]*outer(DATA[i,TERMS]-DAVE)},outer(DAVE)) # [TERMS,TERMS,TIMES]
DCOV <- apply(DCOV,1:2,sum) / sum(w)
dimnames(DCOV) <- list(TERMS,TERMS)
names(DAVE) <- names(WDAVE) <- TERMS
NAS <- is.na(WDAVE)
if(any(NAS))
{
VARS <- paste0(VARS[NAS],collapse=",")
IND <- rowSums(is.na(DATA))
IND <- which(IND>0)
HEAD <- head(IND)
if(length(HEAD)<length(IND))
{ HEAD <- paste0(c(HEAD,"..."),collapse=",") }
else
{ HEAD <- paste0(HEAD,collapse=",") }
STOP <- paste0("NA values in sampled variables ",VARS," at points ",HEAD)
stop(STOP)
}
SATA <- array(0,c(0,dim(DATA))) # simulated data [track,time,vars]
dimnames(SATA)[[3]] <- VARS
nloglike <- function(beta,zero=0,verbose=FALSE)
{
SAMP <- SATA[,,TERMS,drop=FALSE] %.% beta # [track,time]
# SHIFT <- stats::median(SAMP,na.rm=TRUE)
# SHIFT <- nant(SHIFT,0)
SHIFT <- max(SAMP,na.rm=TRUE)
if(abs(SHIFT)==Inf) { SHIFT <- 0 }
SAMP <- SAMP - SHIFT
SAMP <- exp(SAMP) # + exp(SHIFT)
SAMP[] <- nant(SAMP,0) # treat NA as inaccessible region
if(length(OFFSET))
{
# ONE <- rep(TRUE,nrow(SATA))
ONE <- evaluate(OFFSET,SATA,offset=TRUE)
dim(ONE) <- dim(ONE)[1:2]
SAMP <- ONE * SAMP
}
nll <- -c(WDAVE %*% beta)
if(STATIONARY)
{
SAMP <- c(SAMP)
if(integrator=="MonteCarlo")
{ MEAN <- mean(SAMP) }
else # Newton integration
{ MEAN <- sum(SAMP*dA) }
log.MEAN <- log(MEAN) + SHIFT
if(integrator=="MonteCarlo")
{
if(debias || verbose) # numerical error variance (per W^2)
{
VAR.log <- stats::var(SAMP)/length(SAMP) /MEAN^2
# MEAN-log != log-MEAN bias for small N (per W)
if(debias) { log.MEAN <- log.MEAN + W/2*VAR.log } # +1/2 from Taylor series
}
}
else
{ VAR.log <- 0 }
nll <- W *(nll/W + log.MEAN - zero/W)
if(verbose) { VAR.log <- W^2 * VAR.log }
}
else # !STATIONARY
{
MEAN <- apply(SAMP,2,mean) # [time]
log.MEAN <- log(MEAN) + SHIFT
if(debias || verbose) # numerical error variance (per w^2)
{ VAR.log <- apply(SAMP,2,stats::var)/dim(SAMP)[1] /MEAN^2 }
if(debias) # MEAN-log != log-MEAN bias for small N (per w)
{ log.MEAN <- log.MEAN + w/2*VAR.log } # +1/2 from Taylor series
nll <- sum(nll/n + w*log.MEAN - zero/n) # can't divide by w
if(verbose) { VAR.log <- sum(w^2 * VAR.log) }
}
nll <- c(nll)
nll <- nant(nll,Inf)
if(verbose)
{
RET <- list(loglike=-nll,Z=log.MEAN,VAR.loglike=VAR.log)
return(RET)
}
else
{ return(nll) }
}
N <- ifelse(STATIONARY,1,8) # starting value, will increase iteratively
N.OLD <- 0
loglike <- -Inf
STDloglike <- Inf
beta.init <- beta
if(CALC)
{ ERROR.BIG <- TRUE }
else # nothing to calculate below
{
ERROR.BIG <- FALSE
loglike <- 0 # log(1/AREA) later
VAR.loglike <- 0
}
while(ERROR.BIG)
{
# double the random sample
if(integrator=="MonteCarlo")
{
if(integrated)
{
SIM <- simulate(IID,t=1:(N*nrow(data)),complete=TRUE)
SIM <- data.frame(SIM)
}
else
{
## sf::st_sample is waaaay toooo slooooow
# SIM <- sf::st_sample(level.UD,size=N*nrow(data),type="random",exact=TRUE)
# SIM <- sf::st_transform(SIM,sf::st_crs(DATUM))
# SIM <- sf::st_coordinates(SIM)
SIM <- sp::spsample(level.UD,n=N*nrow(data),type="random")
SIM <- SIM@coords
SIM <- project(SIM,from=projection(data),to=DATUM)
colnames(SIM) <- GEO
}
SATA <- fbind(SATA,array(0,c(N,nrow(data),length(VARS))))
}
else # deterministic quadrature grid
{
if(CONSISTENT) #
{
if(integrated) # sample as far out as necessary
{
z <- qmvnorm(1-error,dim=2)
z2 <- z^2 * chisq.ci(1,DOF=DOF.area(CTMM),level=1-error)[3]
# could do this slightly better with math
AVAIL <- ellipsograph(mu=CTMM$mu,sigma=z2*methods::getDataPart(CTMM$sigma),PLOT=FALSE)
}
else # only sample within available area
{ AVAIL <- level.UD@Polygons[[1]]@coords }
colnames(AVAIL) <- c('x','y')
# switch to raster projection
AVAIL <- project(AVAIL,from=projection(CTMM),to=PROJ[1])
# raster grid locations
XG <- X[[1]]
XG <- XG[XG>=min(AVAIL[,'x'])]
XG <- XG[XG<=max(AVAIL[,'x'])]
YG <- Y[[1]]
YG <- YG[YG>=min(AVAIL[,'y'])]
YG <- YG[YG<=max(AVAIL[,'y'])]
DIM <- c(length(XG),length(YG))
xy <- array(0,c(DIM,2))
xy[,,1] <- XG
xy <- aperm(xy,c(2,1,3))
xy[,,2] <- YG
xy <- aperm(xy,c(2,1,3))
dim(xy) <- c(prod(DIM),2)
colnames(xy) <- c('x','y')
SIM <- project(xy,from=PROJ[1],to=projection(CTMM))
# subset of working points in area
if(integrated)
{
SUB <- t(SIM) - c(CTMM$mu) # [2,n]
SUB <- mpow.covm(CTMM$sigma,-1/2) %*% SUB # [2,n]
SUB <- SUB['x',]^2 + SUB['y',]^2 # [n]
SUB <- (SUB <= z2)
}
else # arbitrary contour
{
SUB <- sp::point.in.polygon(xy[,'x'],xy[,'y'],AVAIL[,'x'],AVAIL[,'y'])
SUB <- as.logical(SUB)
}
SIM <- SIM[SUB,] # in CTMM projection
xy <- xy[SUB,] # in raster projection
if(length(R))
{ dA <- raster::extract(dA,xy,method="bilinear") }
else
{ dA <- array(prod(UD$dr),nrow(xy)) }
N <- nrow(xy)
}
else # elliptical grid
{ stop('Inconsistent grids not yet supported when method!="MonteCarlo".') }
if(STATIONARY)
{ SATA <- array(0,c(N,1,length(VARS))) }
else
{ SATA <- array(0,c(N,nrow(data),length(VARS))) }
} # end quadrature points
dimnames(SATA)[[3]] <- VARS
SUB <- N.OLD+1:N # new indices
# store simulation-sampled raster covariates
for(i in 1%:%length(RVARS))
{
r <- RVARS[i]
# store data-sampled covariates
if(integrator=="MonteCarlo")
{
if(i==1 || PROJ[i]!=PROJ[i-1])
{
if(integrated)
{ xy <- get.telemetry(SIM,GEO) }
else
{ xy <- SIM }
xy <- project(xy,to=PROJ[i])
}
}
else if(!CONSISTENT) # CONSISTENT case done above
{ stop() }
# !CONSISTENT UNFINISHED !!!!!!!!!!!!!!!
DIM <- dim(R[[i]])
if(DIM[3]==1)
{ SATA[SUB,,r] <- raster::extract(R[[i]],xy,method=interpolate[i]) }
else
{
XY <- xy
XY[,'x'] <- (XY[,'x'] - X[[i]][1])/dX[i] + 1
XY[,'y'] <- (XY[,'y'] - Y[[i]][1])/dY[i] + 1
XY <- cbind(XY,rep(data$t,N))
SATA[SUB,,r] <- tint(R[[i]],t(xy))
}
}
SATA[SUB,,CVARS] <- evaluate(CVARS,SATA[SUB,,,drop=FALSE])
if(integrated)
{
# de-trend
SATA[SUB,,'x'] <- SIM[,'x'] - mu['x']
SATA[SUB,,'y'] <- SIM[,'y'] - mu['y']
# rotate -theta
if(!isotropic) { SATA[SUB,,axes] <- SATA[SUB,,axes] %.% ROT }
# standardize
SATA[SUB,,'x'] <- SATA[SUB,,'x']/std['x']
SATA[SUB,,'y'] <- SATA[SUB,,'y']/std['y']
# variance/covariance terms
if(isotropic)
{ SATA[SUB,,'rr'] <- -( SATA[SUB,,'x']^2 + SATA[SUB,,'y']^2 )/2 }
else
{
SATA[SUB,,'xx'] <- -SATA[SUB,,'x']^2/2
SATA[SUB,,'yy'] <- -SATA[SUB,,'y']^2/2
SATA[SUB,,'xy'] <- -SATA[SUB,,'x']*SATA[SUB,,'y']
}
}
# update SIM count
N <- nrow(SATA)
# adaptive precision - error target is for parameters or sqrt(STDloglike)
if(integrator=="MonteCarlo")
{
if(trace) { message("Maximizing likelihood @ n=",N,"\u00D7",nrow(data),'=',N*nrow(data)) }
precision <- clamp(STDloglike,0,1)/sqrt(2) # expected STD[loglike] this round
precision <- max(precision,error) # no point in exceeding threshold error
precision <- precision^2 # improve over Monte-Carlo error
precision <- log(precision)/log(.Machine$double.eps) # relative to machine error
precision <- clamp(precision,1/8,1/2)
control$precision <- precision
}
else # STATIONARY ONLY FOR NOW !!!!!!!!!
{ if(trace) { message("Maximizing likelihood @ n=",N) } }
NLL <- nloglike(beta)
# fix bad prior model
if(any(beta!=beta.null))
{
if(nloglike(beta.null)<=NLL)
{ beta <- beta.null }
}
# fix bad early runs
if(any(beta!=beta.init))
{
if(nloglike(beta.init)<=NLL)
{ beta <- beta.init }
}
RESULT <- optimizer(beta,nloglike,parscale=parscale,lower=lower,upper=upper,control=control)
beta.OLD <- beta
beta <- RESULT$par
loglike.OLD <- loglike
loglike <- -RESULT$value
COV <- RESULT$covariance
if(integrator!="MonteCarlo")
{
VAR.loglike <- 0
break
}
STUFF <- nloglike(beta,zero=-loglike,verbose=TRUE)
VAR.loglike <- STUFF$VAR.loglike
STDloglike <- sqrt(VAR.loglike)
STDbeta <- (beta-beta.OLD)/sqrt(diag(COV))
if(trace)
{
message(" SD[log(\u2113)] = ",STDloglike)
message(" \u0394log(\u2113) = ",loglike-loglike.OLD)
message(" \u0394\u03B2/SD[\u03B2] = ",paste(STDbeta,collapse=' '))
}
# numbers for next iteration
N.OLD <- N
# estimated errors for this round
ERROR.BIG <- c(STDloglike,abs(c(loglike-loglike.OLD,STDbeta))/sqrt(2)) > error
ERROR.BIG <- any(ERROR.BIG)
SIZE <- 2*utils::object.size(SATA)
if(SIZE[1]>max.mem)
{
SIZE <- format(SIZE,units="auto")
warning("Calculation stopped before ",SIZE," allocation.")
break
}
} # while(ERROR.BIG)
# compute hessian
if(CALC)
{
if(trace) { message("Calculating Hessian") }
DIFF <- genD(par=beta,fn=nloglike,zero=-loglike,parscale=parscale,lower=lower,upper=upper,Richardson=2,mc.cores=1)
hess <- DIFF$hessian
grad <- DIFF$gradient
# more robust covariance calculation than straight inverse
COV <- cov.loglike(hess,grad)
dimnames(COV) <- list(TERMS,TERMS)
}
else # no parameters
{
COV <- matrix(0,0,0)
}
if(integrated)
{
if(integrator=="MonteCarlo")
{
# fixed log-normal terms from importance sampling
SUMNORM <- apply(DATA[,axes,drop=FALSE]^2,1,sum) # [time]
SUMNORM <- - 1/2*c(w %*% SUMNORM) # + log(1) terms
loglike <- loglike + SUMNORM
# un-shift precision matrices # mu was zero centered
if(isotropic)
{ beta['rr'] <- 1 + beta['rr'] }
else
{
beta['xx'] <- 1 + beta['xx']
beta['yy'] <- 1 + beta['yy']
beta['xy'] <- 0 + beta['xy']
}
}
# log-likelihood adjustment (unscale)
loglike <- loglike - W*log(prod(std)) # 1/2 from sqrt
# un-scale mu, sigma, COV[...]
beta[SVARS] <- beta[SVARS] / SCALE
COV[SVARS,] <- COV[SVARS,] / SCALE
COV[,SVARS] <- t( t(COV[,SVARS]) / SCALE )
# convert spatial parameters to adjusted mean and covariance estimates
fn <- function(beta)
{
if(isotropic)
{
# beta['rr'] is precision
beta['rr'] <- 1/beta['rr']
# beta[c('x','y')] is correction to precision*mu
beta[axes] <- beta['rr']*beta[axes] + c(CTMM$mu)
}
else
{
sigma <- matrix(beta[c('xx','xy','xy','yy')],2,2)
sigma <- PDsolve(sigma)
beta[c('xx','yy','xy')] <- covm(sigma)@par # (major,minor,angle)
# rotate +theta
beta[axes] <- c(ROT %*% sigma %*% beta[axes]) + c(CTMM$mu)
}
return(beta)
}
# fix parscale for Jacobian
parscale[names(SCALE)] <- 1/SCALE
EX <- names(parscale)
EX <- EX[EX %nin% names(SCALE)]
if(length(EX)) { parscale[EX] <- sqrt(diag(COV)[EX]) }
J <- genD(par=beta,fn=fn,order=1,drop=FALSE,parscale=parscale,lower=lower,upper=upper)$gradient
beta <- fn(beta)
COV <- J %*% COV %*% t(J)
if(!isotropic) { beta['xy'] <- beta['xy'] + CTMM$sigma@par['angle'] }
TERMS <- TERMS[ TERMS %nin% SVARS ]
SVARS <- axes
if(isotropic)
{ SVARS <- c(SVARS,"major") }
else
{ SVARS <- c(SVARS,"major","minor","angle") }
TERMS <- c(TERMS,SVARS)
} # end if(integrated)
else
{
if(integrator=="MonteCarlo")
{ loglike <- loglike - W*log(AREA) }
}
names(beta) <- TERMS
dimnames(COV) <- list(TERMS,TERMS)
# debias code (variance only)
if(integrated && debias)
{
if(!is.null(CTMM$MLE))
{ DEBIAS <- sqrt(det.covm(CTMM$sigma)/det.covm(CTMM$MLE$sigma)) }
else
{ DEBIAS <- max(W,2)/max(W-1,1) }
DSCALE <- rep(1,length(TERMS))
names(DSCALE) <- TERMS
DSCALE['major'] <- DEBIAS
if(!isotropic) { DSCALE['minor'] <- DEBIAS }
beta <- beta * DSCALE
COV <- COV * outer(DSCALE)
# impact on RSF variables missing
}
## convert to ctmm() object format with extra beta slot & features
if(integrated)
{
mu <- beta[axes]
COV.mu <- COV[axes,axes]
beta <- rm.name(beta,axes)
COV <- rm.name(COV,axes)
if(isotropic)
{ PAR <- 'major' }
else
{ PAR <- c('major','minor','angle') }
sigma <- beta[PAR]
sigma <- covm(sigma,isotropic=isotropic,axes=axes)
beta <- rm.name(beta,PAR)
DAVE <- rm.name(DAVE,c(axes,'rr','xx','xy','yy'))
DCOV <- rm.name(DCOV,c(axes,'rr','xx','xy','yy'))
}
else
{
mu <- CTMM$mu
sigma <- covm(Inf,isotropic=isotropic,axes=axes)
COV.mu <- diag(Inf,length(axes))
dimnames(COV.mu) <- list(axes,axes)
}
# unstandardize
if(standardize && length(RSCALE))
{
beta <- beta/RSCALE
RVARS <- names(RSCALE)
COV[RVARS,] <- COV[RVARS,,drop=FALSE] / RSCALE
COV[,RVARS] <- t( t(COV[,RVARS,drop=FALSE]) / RSCALE )
DAVE <- DAVE * RSCALE + RSHIFT[RVARS]
DCOV <- DCOV * outer(RSCALE)
}
# package results and return
# turn this into ctmm object
RSF <- ctmm(axes=axes,mu=mu,COV.mu=COV.mu,beta=beta,sigma=sigma,isotropic=isotropic,COV=COV)
RSF@info <- CTMM@info
RSF$loglike <- loglike
RSF$VAR.loglike <- VAR.loglike
RSF$integrated <- integrated
RSF$integrator <- integrator
RSF$formula <- formula
if(!integrated) { RSF$level.UD <- level.UD }
RSF$used.mean <- DAVE
RSF$used.cov <- DCOV
# copy over autocorrelation information in a reasonable way
if(integrated)
{
RSF$tau <- CTMM$tau
RSF$omega <- CTMM$omega
RSF$error <- CTMM$error
VAR <- diag(CTMM$COV)
COR <- stats::cov2cor(CTMM$COV)
NEW <- names(VAR)[names(VAR) %nin% TERMS]
OLD <- names(VAR)[names(VAR) %in% TERMS]
ALL <- c(colnames(COV),NEW)
DIM <- length(ALL)
BLANK <- matrix(0,DIM,DIM)
dimnames(BLANK) <- list(ALL,ALL)
# copy over RSF stuff
BLANK[rownames(COV),colnames(COV)] <- COV
# adjust old parameters to equal new parameters, while preserving correlations
VAR[OLD] <- diag(COV)[OLD]
STD <- sqrt(VAR)
COV.OLD <- COR * outer(STD)
BLANK[NEW,NEW] <- COV.OLD[NEW,NEW]
BLANK[NEW,OLD] <- COV.OLD[NEW,OLD]
BLANK[OLD,NEW] <- COV.OLD[OLD,NEW]
RSF$COV <- BLANK
RSF$features <- rownames(RSF$COV)
}
else # !integrated
{
if(length(beta)) { dimnames(RSF$COV) <- list(TERMS,TERMS) }
RSF$features <- TERMS
RSF$range <- FALSE
}
# check if some data fell outside of polygon
if(!integrated)
{
# if any data isn't inside polygon then loglike = -Inf
AVAIL <- level.UD@Polygons[[1]]@coords
TEST <- sp::point.in.polygon(data$x,data$y,AVAIL[,1],AVAIL[,2])
TEST <- sum(TEST==0) # number of exterior points
if(TEST>0)
{
warning(TEST," data points outside of available area.")
RSF$loglike <- -Inf # P(outside)=0 -> log(P)=-Inf
}
}
# AIC,AICc,BIC
RSF$method <- CTMM$method # not sure if this is the best idea
RSF <- ic.ctmm(RSF,W) # don't include mu, sigma
RSF$range <- TRUE
return(RSF)
}
get.offset <- function(formula,variable=TRUE)
{
OFFSET <- stats::terms(formula)
OFF <- attr(OFFSET,"offset")
if(is.null(OFF)) { return(OFF) }
OFFSET <- rownames(attr(OFFSET,"factors"))[OFF]
if(variable)
{
OFFSET <- sub("offset(","",OFFSET,fixed=TRUE)
OFFSET <- substr(OFFSET,1,nchar(OFFSET)-1)
}
return(OFFSET)
}
# expand raster factors into
expand.factors <- function(R,formula,reference="auto",data=NULL,DVARS=NULL,fixed=FALSE)
{
FACT <- sapply(R,raster::is.factor)
reference <- array(reference,sum(FACT))
names(reference) <- names(R)[FACT]
for(NAME in names(R))
{
if(raster::is.factor(R[[NAME]]))
{
FACT <- R[[NAME]]
R <- R[names(R)!=NAME]
REF <- reference[NAME]
LEVELS <- raster::levels(FACT)[[1]]$ID # assuming one layer
if(fixed)
{
TERMS <- attr(stats::terms(formula),"term.labels")
# I am not good with regexp
# HEAD <- paste0(NAME,"[")
HEAD <- paste0(NAME,".")
REF <- TERMS[grepl(HEAD,TERMS,fixed=TRUE)][1] # *NAME[#/ref]*
REF <- strsplit(REF,HEAD,fixed=TRUE)[[1]][2] # #/ref]*
# REF <- strsplit(REF,"/",fixed=TRUE)[[1]][2] # ref]*
REF <- strsplit(REF,"_",fixed=TRUE)[[1]][2] # ref]*
# REF <- strsplit(REF,"]",fixed=TRUE)[[1]][1] # ref
# REF <- which(LEVELS==REF)
}
else if(reference[NAME]=="auto") # fix base layer
{
PROJ <- raster::projection(FACT)
XY <- get.telemetry(data,c("longitude","latitude"))
colnames(XY) <- c('x','y')
XY <- project(XY,to=PROJ)
XY <- raster::extract(FACT,XY) # don't interpolate
UNIQUE <- unique(XY) # may be missing some levels
XY <- tabulate(match(XY,UNIQUE)) # tallies
REF <- UNIQUE[which.max(XY)]
message(NAME," reference category set to ",REF,".")
}
# else the reference category is specified by `reference`
REF <- REF[REF %in% LEVELS]
DIFF <- LEVELS %nin% REF
FACT <- lapply(LEVELS[DIFF],function(l){FACT==l})
# I cannot figure out how to use deratify...
# FACT <- raster::deratify(FACT) # creates a single layer for each level?
LEVELS <- paste0(NAME,".",LEVELS,"_",REF)[DIFF]
names(FACT) <- LEVELS
reference[NAME] <- REF
R <- c(R,FACT)
rm(FACT)
# expand terms
if(!fixed && !is.null(formula))
{
formula <- as.character(formula)[2]
formula <- sapply(LEVELS,function(l){gsub(NAME,l,formula,fixed=TRUE)})
formula <- paste(formula,collapse="+")
formula <- paste("~",formula)
formula <- eval(parse(text=formula))
formula <- simplify.formula(formula)
}
} # end if factor
} # end raster expansion
for(NAME in DVARS)
{
if(is.factor(data[[NAME]]))
{
FACT <- data[[NAME]]
KEEP <-
data <- data[names(data)!=NAME]
LEVELS <- levels(FACT) # assuming one layer
if(fixed)
{
TERMS <- attr(stats::terms(formula),"term.labels")
# I am not good with regexp
# HEAD <- paste0(NAME,"[")
HEAD <- paste0(NAME,".")
reference <- TERMS[grepl(HEAD,TERMS,fixed=TRUE)][1] # *NAME[#/ref]*
reference <- strsplit(reference,HEAD,fixed=TRUE)[[1]][2] # #/ref]*
# reference <- strsplit(reference,"/",fixed=TRUE)[[1]][2] # ref]*
reference <- strsplit(reference,"_",fixed=TRUE)[[1]][2] # ref]*
# reference <- strsplit(reference,"]",fixed=TRUE)[[1]][1] # ref
reference <- which(LEVELS==reference)
}
else if(reference=="auto") # fix base layer
{
UNIQUE <- unique(FACT) # may be missing some levels
XY <- tabulate(match(FACT,UNIQUE)) # tallies
reference <- UNIQUE[which.max(XY)]
message(NAME," reference category set to ",reference,".")
}
# else the reference category is specified by `reference`
REF <- reference[reference %in% LEVELS]
DIFF <- LEVELS %nin% reference
FACT <- lapply(LEVELS[DIFF],function(l){FACT==l})
# LEVELS <- paste0(NAME,"[",LEVELS,"/",LEVELS[reference],"]")[DIFF]
LEVELS <- paste0(NAME,".",LEVELS,"_",REF)[DIFF]
names(FACT) <- LEVELS
data[LEVELS] <- FACT
# expand terms
if(!fixed && !is.null(formula))
{
formula <- as.character(formula)[2]
formula <- sapply(LEVELS,function(l){gsub(NAME,l,formula,fixed=TRUE)})
formula <- paste(formula,collapse="+")
formula <- paste("~",formula)
formula <- eval(parse(text=formula))
formula <- simplify.formula(formula)
}
}
} # end telemetry expansion
if(!is.null(formula)){ environment(formula) <- globalenv() }
RETURN <- list(data=data,R=R,formula=formula)
}
# prepare raster data for processing
R.prepare <- function(R)
{
PROJ <- raster::projection(R)
DIM <- dim(R)
X <- raster::xFromCol(R,1:DIM[2])
Y <- raster::yFromRow(R,1:DIM[1])
Z <- raster::getZ(R)
dX <- stats::median(diff(X))
dY <- stats::median(diff(Y))
R <- raster::as.array(R)[,,] # last dim will be dropped if length-1
if(length(dim(R))==2) # RasterLayer (static)
{
R <- aperm(R,2:1)
dZ <- NULL
} # [x,y]
else if (length(dim(R))==3) # RasterStack or RasterBrick
{
STATIONARY <- FALSE
R <- aperm(R,c(2,1,3)) # [x,y,z]
if(class(Z)[1]=="Date") { Z <- as.POSIXct(Z) } # numeric is in days
if(class(Z)[1]=="POSIXct") { Z <- as.numeric(Z) }
dZ <- stats::median(diff(Z))
}
R <- list(R=R,PROJ=PROJ,X=X,Y=Y,Z=Z,dX=dX,dY=dY,dZ=dZ)
return(R)
}
# sample location xy (in projection proj) from raster R with grid coordinates X,Y in (in projection PROJ)
R.extract <- function(xy,proj,R,X,Y,Z=NULL,PROJ,dX,dY,dZ=NULL)
{
DIM <- dim(R)
xy <- project(xy,from=proj,to=PROJ)
# continuous index
xy[,1] <- (xy[,1] - X[1])/dX + 1
xy[,2] <- (xy[,2] - Y[1])/dY + 1
if(length(DIM)==2)
{
E <- bint(R,t(xy),ext=NA)
}
else # xyt
{
# missing t axis
# this is not fully coded yet, but it is not used either
E <- tint(R,t(xy),ext=NA)
}
return(E)
}
# Evaluate raster on new spatial grid
R.grid <- function(r,proj,R,interpolate='bilinear')
{
DIM <- c(length(r$x),length(r$y))
xy <- array(0,c(DIM,2))
xy[,,1] <- r$x
xy <- aperm(xy,c(2,1,3))
xy[,,2] <- r$y
xy <- aperm(xy,c(2,1,3))
dim(xy) <- c(prod(DIM),2)
colnames(xy) <- c('x','y')
PROJ <- projection(R)
xy <- project(xy,from=proj,to=PROJ)
if(dim(R)[3]==1)
{
G <- raster::extract(R,xy,method=interpolate)
dim(G) <- DIM
# G <- R.extract(xy,proj=proj,R=R,X=X,Y=Y,PROJ=PROJ,dX=dX,dY=dY)
# G <- array(G,DIM)
}
else
{
G <- array(NA,dim(xy))
for(i in 1:dim(R)[3])
{
G[,i] <- raster::extract(R[,,i],xy,method=interpolate)
# G[,,i] <- R.extract(xy,proj,R=R[,,i],X=X,Y=Y,PROJ=PROJ,dX=dX,dY=dY)
}
dim(G) <- c(DIM,dim(R)[3])
}
return(G)
}
# is RSF model stationary or non-stationary
is.stationary <- function(R,CTMM)
{
STATIONARY <- TRUE
DIM <- sapply(R,function(r){length(dim(r))})
DIM <- max(DIM)
if(DIM==3) { STATIONARY <- FALSE }
formula <- CTMM$formula
if(!is.null(formula))
{
VARS <- all.vars(formula)
DVARS <- VARS[ VARS %nin% names(R) ]
if(length(DVARS)) { STATIONARY <- FALSE }
}
return(STATIONARY)
}
# UNFINISHED
# this is for cross validation only
rsf.loglike <- function(data,CTMM,R=list(),smooth=TRUE,...)
{
isotropic <- CTMM$isotropic
beta <- CTMM$beta
Z <- CTMM$Z
integrated <- CTMM$integrated
level.UD <- CTMM$level.UD
formula <- CTMM$formula
n <- nrow(data)
UD <- list(CTMM=CTMM,weights=rep(1,n),DOF.area=n)
# rsf.mcint(data,UD,R=R,formula=formula,integrated=integrated,level.UD=level.UD,isotropic=isotropic,beta=beta,smooth=smooth,NORM=Z,...)
}
# model selection on anisotropy
# model selection on phenomenological spatial parameters
# model selection on covariates
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.