#' Run MCDS.exe as a backend for mrds
#'
#' Rather than use the R code provided in `mrds`, one can also use the binary
#' of `MCDS.exe`, to reproduce the results given by Distance for Windows. There
#' is no guarantee that one approach is "better" than the other, but `mrds`
#' will select the model with the better likelihood and provide answers to
#' this. By default (once `MCDS.exe` is available) both `MCDS.exe` and R will
#' be used to obtain detection function parameter estimates. To select only
#' to use the `MCDS.exe` optimizer set \code{control=list(optimizer='MCDS')}
#' or only use the R optimizer set \code{control=list(optimizer='R')}.
#'
#' Please see our examples pages for further information: https://examples.distancesampling.org/
#'
#' If you are running a non-Windows operating system, you can follow the
#' instructions below to have `MCDS.exe` run using `wine`.
#'
#' @aliases MCDS mcds_dot_exe
#' @name MCDS.exe
#' @rdname mcds_dot_exe
#'
#' @section Obtaining MCDS.exe:
#' The following code can be used to download `MCDS.exe` from the distance
#' sampling website:
#' \code{download.file("http://distancesampling.org/R/MCDS.exe", paste0(system.file(package="mrds"),"/MCDS.exe"), mode = "wb")}
#' The MCDS binary will be installed to the main directory of your your local R
#' mrds library. Alternatively, you can copy the `MCDS.exe` from your local
#' Distance for Windows installation if you prefer. The location of your local
#' mrds library main directory can be found by running the following in R:
#' \code{system.file("MCDS.exe", package="mrds")}.
#'
#' @section Running MCDS.exe on non-Windows platforms:
#' This has been tentatively tested on a mac but should currently be
#' considered largely experimental.
#'
#' One can still use MCDS.exe even if you are running a mac computer. To
#' do this one will need to install `wine` a Windows emulator. It is important
#' to use a version of `wine` which can run 32-bit programs.
#'
#' The package will attempt to work out which `wine` binary to use (and detect
#' if it is installed), but this doesn't always work. In this case, the
#' location of the `wine` binary can be specified in the `control` `list`
#' provided to `ddf` using the `winebin` element or supply the `winebin`
#' argument to the `ds` function. For example, if `wine` is installed at
#' `/usr/bin/local/wine` you can set `control$winebin` to that
#' location to use that binary.
#'
#' On macOS, this can be achieved using the `homebrew` package management
#' system and installing the `wine-crossover` package. You may need to change
#' the \code{control$winebin} to be `wine`, `wine64` or `wine32on64`,
#' depending on your system's setup. This package tries to work out what to
#' do, but likely doesn't handle all corner cases. Currently this is untested
#' on Mac M1 systems.
#'
#' @section Stopping using MCDS.exe:
#' Once this feature is enabled, using `ddf` will by default run both
#' its built-in R optimizer and `MCDS.exe`. To disable this behaviour, specify which
#' you wish to use with via the \code{optimizer=} option described above. Alternatively,
#' if you wish to permanently stop using MCDS.exe, remove
#' the `MCDS.exe` binary file. You can find which folder it is in by running the following in R:
#' \code{system.file("MCDS.exe", package="mrds")}.
#'
#' @author David L Miller and Jonah McArthur
NULL
#' @title create.command.file
# a function to take the input as required for the ddf function in mrds and
# create a command file for the mcds engine, which will perform an equivalent
# analysis.
#' @inheritParams ddf
#' @rdname create.command.file
#' @author Jonah McArthur
#' @importFrom utils write.table
create.command.file <- function(dsmodel=call(), data,
method, meta.data, control) {
# create a temporary directory
directory <- tempdir()
# create command file
command.file.name <- tempfile(pattern="cmdtmp",
tmpdir=directory,
fileext=".txt")
file.create(command.file.name)
out.file <- tempfile(pattern="out",
tmpdir=directory,
fileext=".txt")
log.file <- tempfile(pattern="log",
tmpdir=directory,
fileext=".txt")
stat.file <- tempfile(pattern="stat",
tmpdir=directory,
fileext=".txt")
plot.file <- tempfile(pattern="plot",
tmpdir=directory,
fileext=".txt")
data.file <- tempfile(pattern="data",
tmpdir=directory,
fileext=".txt")
# Create Data File
data.info <- create.data.file(data, dsmodel, data.file)
# extract reformatted data
data <- data.info$data
# extract information about covariates
covar.fields <- data.info$covar.fields
cluster <- data.info$cluster
# HEADER section
# ~~~~~~~~~~~~~~
# specify the location of output files
cat(out.file, file=command.file.name, "\n", append=TRUE)
cat(log.file, file=command.file.name, "\n", append=TRUE)
cat(stat.file, file=command.file.name, "\n", append=TRUE)
cat(plot.file, file=command.file.name, "\n", append=TRUE)
cat("None", file=command.file.name, "\n", append=TRUE)
cat("None", file=command.file.name, "\n", append=TRUE)
# OPTION section
# ~~~~~~~~~~~~~~
cat("OPTIONS;", file=command.file.name, "\n", append=TRUE)
# specify the type of transect and its width and units
if(meta.data$point == TRUE){
cat(paste("DISTANCE=RADIAL /UNITS='Meters' /WIDTH=",
meta.data$width, ";", sep=""), file=command.file.name, "\n",
append=TRUE)
cat("TYPE=POINT;", file=command.file.name, "\n", append=TRUE)
}else{
cat(paste("DISTANCE=PERP /UNITS='Meters' /WIDTH=",
meta.data$width, ";", sep=""), file=command.file.name, "\n",
append=TRUE)
cat("TYPE=LINE;", file=command.file.name, "\n", append=TRUE)
}
# define whether there are clusters
if(cluster == TRUE){
cat("OBJECT=CLUSTER;", file=command.file.name, "\n", append=TRUE)
}else{
cat("OBJECT=SINGLE;", file=command.file.name, "\n", append=TRUE)
}
# managing the output options
if(is.null(control$debug) == FALSE){
if(control$debug == TRUE){
cat("DEBUG=ON;", file=command.file.name, "\n", append=TRUE)
}
}
if(is.null(control$showit) == FALSE){
output_info_levels <- c("SUMMARY","RESULTS","SELECTION","ALL")
specified_output_level <- output_info_levels[control$showit+1]
cat(paste("PRINT=", specified_output_level, ";", sep=""),
file=command.file.name, "\n", append=TRUE)
}
# the user will specify the adjustment term selection
cat("SELECTION=SPECIFY;", file=command.file.name, "\n",
append=TRUE)
cat("END;", file=command.file.name, "\n", append=TRUE)
# DATA section
# ~~~~~~~~~~~~~~
cat("DATA /STRUCTURE=FLAT;", file=command.file.name, "\n",
append=TRUE)
# combine data field names into single character value
fields.comb <- paste(names(data), collapse = ",")
cat(paste("FIELDS=", fields.comb, ";", sep=""),
file=command.file.name, "\n", append=TRUE)
# Don't tell MCDS about clusters
# If clusters and cluster is a cov in the model
# if(cluster && "SIZE" %in% covar.fields){
# cat(paste("SizeC=SIZE;", sep=""),
# file=command.file.name, "\n", append=TRUE)
# }
# Access the model parameters
mod_paste <- paste(dsmodel)
mod.vals <- try(eval(parse(text=mod_paste[2:length(mod_paste)])))
# Get a list of the factor variables in the model formula
factor.vars <- rownames(attr(terms(mod.vals$formula),"factors"))
# Process to retain only the variable names
for(i in seq(along = factor.vars)){
# Find first bracket
start <- gregexpr("\\(", factor.vars[i])[[1]][1] + 1
# Find second bracket
end <- gregexpr("\\)", factor.vars[i])[[1]][1] -1
# Retain part within brackets
factor.vars[i] <- substr(factor.vars[i], start, end)
}
# Add factor information
factor.fields <- NULL
for(i in seq(along = covar.fields)){
# If either the data format is factor OR it is specified in formula as factor OR it is a character
if(is.factor(data[[covar.fields[i]]]) || covar.fields[i] %in% factor.vars || is.character(data[[covar.fields[i]]])){
factor.fields <- c(factor.fields, covar.fields[i])
# Column in data may not be a factor field may only have been specified in formula
labels <- levels(as.factor(data[[covar.fields[i]]]))
# Reorder the labels - in R the baseline comes first but in MCDS the baseline comes last
if(length(labels) > 1){
labels <- c(labels[2:length(labels)], labels[1])
}
# Write to command file
cat(paste("FACTOR /NAME=", toupper(covar.fields[i]),
" /LEVELS=", length(labels), " /LABELS=",
paste(labels, collapse = ", "), ";", sep=""), file=command.file.name, "\n",
append=TRUE)
}
}
# Data file may need adapted depending on platform
if(Sys.info()[['sysname']]=="Darwin"){
plat.specific.data.file <- gsub("/", "\\\\", data.file)
}else{
plat.specific.data.file <- data.file
}
# input the absolute path to the data file
cat(paste("INFILE=", plat.specific.data.file, " /NOECHO;", sep=""),
file=command.file.name, "\n", append=TRUE)
cat("END;", file=command.file.name, "\n", append=TRUE)
# ESTIMATE section
# ~~~~~~~~~~~~~~~~
cat("ESTIMATE;", file=command.file.name, "\n", append=TRUE)
# we are only interested in the estimates for detection probability
cat("DETECTION=ALL;", file=command.file.name, "\n", append=TRUE)
# specify the key function
cat("ESTIMATOR /KEY=", file=command.file.name, append=TRUE)
if(mod.vals$key == "hn"){
cat("HNORMAL", file=command.file.name, append=TRUE)
}else if(mod.vals$key == "hr"){
cat("HAZARD", file=command.file.name, append=TRUE)
}else if(mod.vals$key == "unif"){
cat("UNIFORM", file=command.file.name, append=TRUE)
}else{
cat("NEXPON", file=command.file.name, append=TRUE)
}
adj.pres <- FALSE
# check if adjustment terms are used
if(is.null(mod.vals$adj.series) == FALSE){
adj.pres <- TRUE
# specify the type of adjustment term
if(mod.vals$adj.series == "cos"){
cat(" /ADJUST=COSINE", file=command.file.name, append=TRUE)
}else if(mod.vals$adj.series == "herm"){
cat(" /ADJUST=HERMITE", file=command.file.name, append=TRUE)
}else if(mod.vals$adj.series == "poly"){
cat(" /ADJUST=POLY", file=command.file.name, append=TRUE)
}
# specify the order of adjustment terms
if(!is.null(mod.vals$adj.order)){
cat(paste(" /ORDER=", paste(mod.vals$adj.order,collapse=","),sep=""),
file=command.file.name, append=TRUE)
}
# specify the scaling of adjustment parameters
if(mod.vals$adj.scale == "width"){
cat(" /ADJSTD=W", file=command.file.name, append=TRUE)
}else{
cat(" /ADJSTD=SIGMA", file=command.file.name, append=TRUE)
}
# specify the number of adjustment parameters
cat(paste(" /NAP=", length(mod.vals$adj.order), sep=""),
file=command.file.name, append=TRUE)
}
# specify which fields are covariates
if(length(covar.fields) > 0){
cat(paste(" /COVARIATES=", paste(covar.fields,collapse=","), sep=""),
file=command.file.name, append=TRUE)
}
# allowing for initial values for the parameters
if(!(length(control$initial) == 1 && is.na(control$initial))){
inits <- numeric()
if(!mod.vals$key == "unif"){
# first start value is the exp intercept scale param
inits[1] <- exp(control$initial$scale[1])
}
if(mod.vals$key == "hr"){
# second start value is the exp shape param
inits[2] <- exp(control$initial$shape[1])
}
# Next are any covariates
if(length(covar.fields) > 0){
cov.inits <- control$initial$scale[-1]
# Check if any are factors
if(!is.null(factor.fields)){
#Sort out ordering and may need to change scale intercept!
}
inits <- c(inits, cov.inits)
}
# If there are any adjustments
if(!is.null(control$initial$adjustment)){
inits <- c(inits, control$initial$adjustment)
}
# paste all the initial values together
cat(paste(" /START=", paste(inits,collapse=","), sep=""),
file=command.file.name, append=TRUE)
}
# defining upper and lower bounds for parameters
#if(!is.null(control$lowerbounds)){
# cat(paste(" /LOWER=", paste(control$lowerbounds,collapse=","), sep=""),
# file=command.file.name, append=TRUE)
#}
#if(!is.null(control$upperbounds)){
# cat(paste(" /UPPER=", paste(control$upperbounds,collapse=","), sep=""),
# file=command.file.name, append=TRUE)
#}
# ending the ESTIMATOR line
cat(";", file=command.file.name, "\n", append=TRUE)
# specifying monotonicity constraint
if(is.null(meta.data$mono.strict)){
meta.data$mono.strict <- FALSE
}
if(is.null(meta.data$mono)){
meta.data$mono <- FALSE
}
if(meta.data$mono.strict == TRUE){
cat("MONOTONE=STRICT;", file=command.file.name, "\n", append=TRUE)
}else if(meta.data$mono == TRUE){
cat("MONOTONE=WEAK;", file=command.file.name, "\n", append=TRUE)
}else{
cat("MONOTONE=NONE;", file=command.file.name, "\n", append=TRUE)
}
# specifying the truncation distance
cat(paste("DISTANCE /WIDTH=",meta.data$width,sep=""),
file=command.file.name, append=TRUE)
# dealing with grouped data
if(is.null(meta.data$binned) == FALSE){
if(meta.data$binned == TRUE){
cat(paste(" /INTERVALS=", paste(meta.data$breaks, collapse=","),
sep=""), file=command.file.name, append=TRUE)
}
}
# specifying left trunction distance, if included in the input
if(is.null(meta.data$left) == FALSE){
cat(paste(" /LEFT=", meta.data$left, sep=""),
file=command.file.name, append=TRUE)
}
cat(";", file=command.file.name, "\n", append=TRUE)
cat("END;", file=command.file.name, append=TRUE)
cat("\n", file=command.file.name, append=TRUE)
ret <- list(command.file.name = command.file.name,
stats.file.name = stat.file,
log.file.name = log.file,
out.file.name = out.file,
data.file.name = data.file,
plot.file.name = plot.file)
return(ret)
}
#' @importFrom utils read.table write.table
mcds.results.and.refit <- function(statsfile, model.list, debug=FALSE){
stats <- read.table(statsfile, row.names=NULL)
# Name columns
colnames(stats) <- c("Stratum", "Sample", "Estimator", "Module", "Statistic",
"Value", "CV", "Lcl", "Ucl", "Df")
# Extract only parameters relating to the detection function (Module 2)
stats <- stats[stats$Module == 2,]
# Extract the log likelihood value (statistic 9)
ll <- stats[stats$Statistic == 9,]$Value
# Extract the estimated values of each parameter (statistics 100+)
starting.values <- stats$Value[stats$Statistic > 100]
# Put estimates into model as initial values
model.list$control$initial <- list()
mod.paste <- paste(model.list$dsmodel)
mod.vals <- try(eval(parse(text=mod.paste[2:length(mod.paste)])))
# As long as it's not a uniform there will be a scale parameter
if(!mod.vals$key == "unif"){
# Log scale value as mrds works on log scale for and MCDS works on natural scale for the scale parameter
model.list$control$initial$scale <- log(starting.values[1])
starting.values <- starting.values[-1]
}
# If it's a hazard rate extract the shape parameter (now the 1st element)
if(mod.vals$key == "hr"){
model.list$control$initial$shape <- log(starting.values[1])
starting.values <- starting.values[-1]
}
# Adjustment parameters come at the end
if(!is.null(mod.vals$adj.order)){
ind <- (length(starting.values)-length(mod.vals$adj.order)+1):
length(starting.values)
model.list$control$initial$adjustment <- starting.values[ind]
starting.values <- starting.values[-ind]
}
# Any thing else must be covariate parameters associated with the scale parameter
if(length(starting.values) >0){
model.list$control$initial$scale <- c(model.list$control$initial$scale, starting.values)
}
# We are not refitting just using mrds to make the model with the parameter estimates from MCDS
model.list$control$nofit <- TRUE
refit <- ddf(dsmodel = model.list$dsmodel, mrmodel=NULL,
data = model.list$data,
method = model.list$method,
meta.data = model.list$meta.data,
control = model.list$control)
# For debugging
if(debug){
message(paste0("MCDS.exe log likehood: ", round(ll,7)))
message(paste0("MCDS.exe pars: ", paste(round(stats$Value[stats$Statistic > 100],7), collapse=", ")))
message(paste0("mrds refitted log likehood: ", round(refit$lnl,7)))
message(paste0("mrds refitted pars: ", paste(round(refit$par, 7), collapse=", ")))
}
return(refit)
}
run.MCDS <- function(dsmodel, data, method, meta.data, control){
if(control$showit>0){
message("Running MCDS.exe...")
}
# create the test file
test.file <- create.command.file(dsmodel, data, method,
meta.data, control)
# When exiting the function tidy up any files that may have been created
on.exit(tidy.tmp.files(unlist(test.file)))
if(control$showit>0){
message(paste("Command file written to", test.file$command.file.name))
message(paste("Stats file written to", test.file$stats.file.name))
}
# find MCDS.exe
path.to.MCDS.dot.exe <- system.file("MCDS.exe", package="mrds")
# actually run MCDS.exe (use wine if necessary)
if(Sys.info()[['sysname']]!="Windows"){
if(!is.null(control$winebin)){
winebin <- control$winebin
}else{
winebin <- "wine"
}
# macOS specific
if(Sys.info()[['sysname']]=="Darwin"){
osx.version <- as.numeric_version(system("sw_vers -productVersion",
intern = TRUE))
if(!is.null(control$winebin)){
winebin <- control$winebin
}else if(osx.version>as.numeric_version(10.15)){
# Catalina or after removed the 32-bit support, so use wine32on64
winebin <- "wine32on64"
}
}
if(Sys.which(winebin)==""){
stop("MCDS.exe could not be run. wine (needed to run MCDS.exe on non-Windows platforms) could not automatically be detected. See documentation for details.", call. = FALSE)
}
wine.call <- paste(winebin, path.to.MCDS.dot.exe, "0,", test.file$command.file.name)
# only provide the output from MCDS.exe when we have showit>0
w <- suppressWarnings(try(system(wine.call, intern=TRUE,
ignore.stdout=control$showit<1, ignore.stderr=control$showit<1)
, silent = TRUE))
}else{
# on Windows just execute the MCDS binary
w <- suppressWarnings(try(system(paste0(path.to.MCDS.dot.exe,
" 0, ", test.file$command.file.name), intern=TRUE,
ignore.stdout=control$showit<1, ignore.stderr=control$showit<1, show.output.on.console = FALSE)
, silent = TRUE))
}
# Check if MCDS.exe was successful
finish.status <- attr(w, "status")
# Read in the log file
log.file <- readLines(test.file$log.file.name)
# Check for warnings or errors
warn.index <- grep("[Ww]arning", log.file)
err.index <- grep("[Ee]rror", log.file)
# If the finish.status is not 1 and there are no warnings or errors
if(finish.status != 1 && length(warn.index) == 0 && length(err.index) ==0){
warning(paste("MCDS finish status indicates warnings / error but none were found in the log file. Finish status = ", finish.status, sep = ""), call. = FALSE, immediate. = TRUE)
}
# Display warnings from log file
for(i in seq(along = warn.index)){
ignore.warning <- any(grepl("Cannot estimate encounter rate variance empirically as there is only one sample.", log.file[warn.index[i]]),
grepl("When cluster size is a covariate, variance of the cluster size, density of individuals, and abundance estimates can only be obtained via the bootstrap.", log.file[warn.index[i]]))
if(!ignore.warning) message(log.file[warn.index[i]], appendLF = TRUE)
}
# Display errors from log file
for(i in seq(along = err.index)){
message(log.file[err.index[i]], appendLF = TRUE)
}
# If there are any errors return a value of try-error
if(length(err.index) > 0){
ret.val <- list()
class(ret.val) <- "try-error"
return(ret.val)
}
# little extra parameter here to avoid infinite recursion when
# we run ddf in mcds.results.and.refit
control$optimizer <- "R"
model.list <- list(dsmodel=dsmodel, data=data,
method=method, meta.data=meta.data, control=control)
# try to refit the model
mm <- try(mcds.results.and.refit(test.file$stats.file.name, model.list,
debug=control$showit>0), silent = TRUE)
return(mm)
}
# Tidy up temporary files
tidy.tmp.files <- function(files = character()){
# Try to remove each of the files
for(i in seq(along = files)){
# Quietly try and remove files that were created
tmp <- suppressWarnings(try(file.remove(files[i]), silent = TRUE))
}
}
#' @importFrom utils write.table
create.data.file <- function(data, dsmodel, data.file){
# Rename columns to those expected by MCDS.exe
data.cols <- c("SMP_LABEL", "SMP_EFFORT", "DISTANCE")
# SMP_LABEL (sample label)
if("Sample.Label" %in% names(data)){
index <- which(names(data) == "Sample.Label")
names(data)[index] <- "SMP_LABEL"
}else{
data$SMP_LABEL <- rep(1, nrow(data))
}
# SMP_EFFORT (sample effort)
if("Effort" %in% names(data)){
index <- which(names(data) == "Effort")
names(data)[index] <- "SMP_EFFORT"
}else{
data$SMP_EFFORT <- rep(1, nrow(data))
}
# DISTANCE
index <- which(names(data) == "distance")
names(data)[index] <- "DISTANCE"
# STR_LABEL (stratum label)
if("Region.Label" %in% names(data)){
# Replicate rather than rename in case it is included as a covariate
data$STR_LABEL <- data$Region.Label
data.cols <- c(data.cols, "STR_LABEL")
}
# STR_AREA (stratum area)
if("Area" %in% names(data)){
index <- which(names(data) == "Area")
names(data)[index] <- "STR_AREA"
data.cols <- c(data.cols, "STR_AREA")
}
# Check if there are cluster sizes
# Let's not tell MCDS about clusters - not needed for detection function fitting
cluster <- FALSE
# if("size" %in% names(data)){
# index <- which(names(data) == "size")
# names(data)[index] <- "SIZE"
# data.cols <- c(data.cols, "SIZE")
# cluster <- TRUE
# }else{
# cluster <- FALSE
# }
# Check if there are any covariates in the model
if(length(all.vars(dsmodel)) > 0){
# Get model covariates
covar.fields <- all.vars(dsmodel)
data.cols <- c(data.cols, covar.fields)
}else{
covar.fields <- NULL
}
# remove all non-essential columns from the dataset
data <- data[,data.cols]
# Cluster Size must be named "SIZE" - but we do not want it to know about clusters so name "CS" instead
if("size" %in% names(data)){
index <- which(names(data) == "size")
names(data)[index] <- "CS"
}
if("size" %in% covar.fields){
index <- which(covar.fields == "size")
covar.fields[index] <- "CS"
}
# create data file to pass to mcds
file.create(data.file)
write.table(data, file=data.file, col.names=FALSE,
row.names=FALSE, sep="\t", quote = FALSE)
# return reformatted dataframe and the information about covariates
output <- list(data = data,
covar.fields = covar.fields,
cluster = cluster)
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.