#' Frequentist inference for the one thermal mass model (1TM)
#'
#' Brief description
#'
#' @param data A dataset in the format of either \code{\link{cwall_east}}
#' or \code{\link{cwall_north}}.
#' @param flux A character scalar. Should we use: only internal heat
#' fluxes (\code{flux = "in"}); only external heat fluxes
#' (\code{flux = "in"}); both internal and external heat fluxes
#' (\code{flux = "both"}).
#' @param air A logical scalar. Should we use air (\code{TRUE}) or
#' surface (\code{FALSE}) temperatures?
#' @param init A numeric vector of length 3. Optional initial estimates of
#' the parameters R1, R2 and C1 of the 1TM. These will only be used if
#' \code{flux = "both"}, because when \code{flux = "in"} and
#' \code{flux = "out"} non-iterative estimates of these parameters can be
#' calculated.
#' @param tau A numeric scalar. Time interval between successive measurements,
#' in hours.
#' @param hetero A logical scalar. Only relevant when \code{flux = "both"}.
#' Should we allow the error variance to be different for the Qin and Qout
#' parts of the model (\code{hetero = TRUE}) or assume a constant error
#' variance (\code{hetero = FALSE})? The default is \code{hetero = TRUE}
#' because Qout data tend to be much more variable than the Qin data.
#' @param correlation \strong{Not working yet!}
#' A logical scalar. Should we estimate the correlation
#' between the contemporaneous errors for Qin and Qout
#' (\code{correlation = TRUE}) or assume that this correlation is
#' equal to zero (\code{correlation = FALSE})? If \code{hetero = FALSE}
#' then \code{correlation = FALSE} is used by default.
#' @param sum_R A logical scalar. Should we output results for the parameters
#' (R1, R2, C1) [\code{sum_R = FALSE}] or for (R1 + R2, ratio of the Rs, C1)
#' [\code{sum_R = TRUE}]. The ratio of the Rs is R1/R2 if the estimate of
#' R1 is greater than the estimate of R2 and R2/R1 otherwise.
#' @param ep A numeric scalar. Lower bound imposed on the (positive)
#' parameters R1, R2 and C1. Passed to the \code{lower} argument of
#' \code{\link[stats]{nls}}.
#' @param nls_control An optional list of control settings to be passed to
#' \code{\link[stats]{nls}}.
#' @param gnls_control An optional list of control settings to be passed to
#' \code{\link[nlme]{gnls}}.
#' @param use_gnls A logical scalar. In the (\code{flux = "both"},
#' \code{hetero = FALSE}) case should we use \code{\link[nlme]{gnls}} to
#' fit the model (\code{use_gnls == TRUE}) or \code{\link[stats]{nls}}
#' (\code{use_gnls == FALSE})? This is provided as a means to check that
#' these two functions give equivalent results.
#' @param use_logs A logical scalar. Should the model fitting use the
#' parameterisation (log R1, log R2, log C1) [\code{use_logs = TRUE}]
#' or (R1, R2, C1) [\code{use_logs = FALSE}].
#' \strong{Not yet implemented fully or properly!}
#' @param sim_data Not implemented yet.
#' @param ... Further arguments to be pased to \code{\link[stats]{nls}} or
#' \code{\link[nlme]{gnls}}.
#' @details
#' The default value of \code{nlsTol} (100) is rather larger than the
#' default in \code{\link[nlme]{gnlsControl}} because experimentation with
#' the example data in \code{\link[walls]{walls}} suggests that smaller
#' values tend to trigger an error in \code{\link[nlme]{gnls}}, that is,
#' "step halving factor reduced below minimum in NLS step".
#' Increasing \code{nlsTol} substantially seems to avoid early termination
#' of the fitting algorithm and produce sensible estimates.
#' As a sanity check the ...
#' @return An object of class \code{"nls"} returned from
#' \code{\link[stats]{nls}}.
#' @references Gori, V. (2017) A novel method for the estimation of
#' thermophysical properties of walls from short and seasonally
#' independent in-situ surveys. Doctoral thesis. UCL (University
#' College London).
#' \url{http://discovery.ucl.ac.uk/1568418/}
#' @examples
#' ### cwall_east --------
#'
#' ## Qin only
#' in_air <- one_tm(data = cwall_east)
#' in_air
#'
#' # Output from some methods available for nls objects
#' summary(in_air)
#' coef(in_air)
#' sigma(in_air)
#' vcov(in_air)
#' cov2cor(vcov(in_air))
#' confint(in_air)
#' logLik(in_air)
#' plot(in_air)
#' plot(in_air, form = resid(., type = "p") ~ x1)
#' qqnorm(in_air) # `wrong' way round!
#' qqnorm(residuals(in_air, type = "pearson"))
#'
#' in_surface <- one_tm(data = cwall_east, air = FALSE)
#' in_surface
#'
#' ## Qout only
#' out_air <- one_tm(data = cwall_east, flux = "out")
#' out_air
#' out_surface <- one_tm(data = cwall_east, flux = "out", air = FALSE)
#' out_surface
#'
#' ## Both Qin and Qout
#'
#' # Common error variance for Qin and Qout
#' both_air_homo <- one_tm(data = cwall_east, flux = "both", hetero = FALSE)
#' both_air_homo
#' plot(both_air_homo, form = resid(., type = "p") ~ fitted(.) | in_out)
#'
#' # Different error variances for Qin and Qout
#' both_air_hetero <- one_tm(data = cwall_east, flux = "both")
#' both_air_hetero
#' plot(both_air_hetero, form = resid(., type = "p") ~ fitted(.) | in_out)
#' plot(both_air_hetero, form = resid(., type = "p") ~ x1 | in_out)
#' plot(both_air_hetero, form = in_out ~ resid(., type = "p"))
#' plot(both_air_hetero, Qsp ~ fitted(.) | in_out, abline = c(0, 1))
#'
#' # Plot contemporaneous residuals against each other
#' resids <- residuals(both_air_hetero)
#' res1 <- resids[both_air_hetero$data$in_out == "inside"]
#' res2 <- resids[both_air_hetero$data$in_out == "outside"]
#' plot(res1, res2)
#'
#' ### cwall_north --------
#'
#' ## Qin only
#' in_surface <- one_tm(data = cwall_north)
#' in_surface
#'
#' ## Qout only
#' out_surface <- one_tm(data = cwall_north, flux = "out")
#' out_surface
#'
#' both_air_hetero <- one_tm(data = cwall_north, flux = "both")
#' # Plot contemporaneous residuals against each other
#' resids <- residuals(both_air_hetero)
#' res1 <- resids[both_air_hetero$data$in_out == "inside"]
#' res2 <- resids[both_air_hetero$data$in_out == "outside"]
#' plot(res1, res2)
#' cor(res1, res2)
#'
#' # Inference for R1 + R2 [sum_R] and R1 / (R1 + R2) [frac_R]
#' both_air_hetero <- one_tm(data = cwall_north, flux = "both", sum_R = TRUE)
#' @export
one_tm <- function(data, flux = c("in", "out", "both"), air = TRUE,
init = NULL, tau = 1 / 12, hetero = TRUE,
correlation = FALSE, sum_R = FALSE, ep = 1e-6,
nls_control = NULL, gnls_control = NULL, use_gnls = FALSE,
use_logs = FALSE, sim_data = NULL, ...) {
# Check that flux has a suitable value
flux <- match.arg(flux)
# Check that ep is positive
if (ep <= 0) {
stop("ep must be positive")
}
# Create separate data frames for Qin and Qout
# These will probably be needed for the setting of initial estimates
in_data_df <- create_data_df(data, flux = "in", air, tau)
out_data_df <- create_data_df(data, flux = "out", air, tau)
# If we are using both Qin and Qout then create a data frame for these
if (flux == "both") {
both_data_df <- create_data_df(data, flux, air, tau)
}
if (!is.null(sim_data)) {
in_data_df <- create_data_df(sim_data, flux = "in", air, tau)
out_data_df <- create_data_df(sim_data, flux = "out", air, tau)
if (flux == "both") {
both_data_df <- create_data_df(sim_data, flux = "both", air, tau)
}
}
# Calculate estimates of R1, R2 and C1 based on the Q_in data only and the
# Q_out data only. These provide the exact estimates to send to stats::nls()
# to obtain an "nls" fitted object and initial estimates to send to
# nlme::gnls() when flux = "both", i.e. we use both Q_in and Q_out data
# in_init are based on the Q_in data only
# out_init are based on the Q_out data only
in_init <- init_ests(flux = "in", data_df = in_data_df)
out_init <- init_ests(flux = "out", data_df = out_data_df)
# Also extract the estimates of the error standard deviation
in_sigma <- attr(in_init, "sigma_hat")
out_sigma <- attr(out_init, "sigma_hat")
if (flux == "both" & !is.null(init)) {
# init will only be used if flux == "both"
# Check that init is suitable
if (!is.vector(init, mode = "numeric")) {
stop("init must be a numeric vector")
}
if (length(init) != 3) {
stop("init must be a numeric vector of length 3")
}
init <- c(R1 = init[1], R2 = init[2], C1 = init[3])
}
# Initial estimates for (log R1, log R2, log C1) in case we need them later
log_in_init <- log(in_init)
names(log_in_init) <- c("logR1", "logR2", "logC1")
log_out_init <- log(out_init)
names(log_out_init) <- c("logR1", "logR2", "logC1")
# Extract any arguments supplied in ..., particularly algorithm and lower
user_args <- list(...)
algorithm <- user_args$algorithm
lower <- user_args$lower
if (flux == "in" || flux == "out") {
if (use_logs) {
if (flux == "in") {
f <- Qinp ~ log_in_reg_fn(x1, x2, Qinp1, logR1, logR2, logC1, tau)
my_data <- in_data_df
my_init <- log_in_init
} else {
f <- Qoutp ~ log_out_reg_fn(x1, x3, Qoutp1, logR1, logR2, logC1, tau)
my_data <- out_data_df
my_init <- log_out_init
}
fit <- stats::nls(f, data = my_data, start = my_init, ...)
} else {
if (flux == "in") {
f <- Qinp ~ in_reg_fn(x1, x2, Qinp1, R1, R2, C1, tau)
my_data <- in_data_df
my_init <- in_init
} else {
f <- Qoutp ~ out_reg_fn(x1, x3, Qoutp1, R1, R2, C1, tau)
my_data <- out_data_df
my_init <- out_init
}
if (is.null(algorithm) & is.null(lower)) {
fit <- stats::nls(f, data = my_data, start = my_init,
algorithm = "port", lower = 0, ...)
} else if (!is.null(algorithm) & is.null(lower)) {
fit <- stats::nls(f, data = my_data, start = my_init,
lower = if (algorithm == "port") 0 else -Inf, ...)
} else if (is.null(algorithm) & !is.null(lower)) {
fit <- stats::nls(f, data = my_data, start = my_init,
algorithm = "port", ...)
} else {
fit <- stats::nls(f, data = my_data, start = my_init, ...)
}
}
# If sum_R = TRUE then output results for R1 + R2 and R1 / (R1 + R2)
if (sum_R) {
ests <- coef(fit)
sum_init <- ests["R1"] + ests["R2"]
frac_init <- ests["R1"] / (ests["R1"] + ests["R2"])
my_init <- c(sum_init, frac_init, ests["C1"])
names(my_init) <- c("sum_R", "frac_R", "C1")
if (flux == "in") {
f <- Qinp ~ sum_in_reg_fn(x1, x2, Qinp1, sum_R, frac_R, C1, tau)
} else {
f <- Qoutp ~ sum_out_reg_fn(x1, x3, Qoutp1, sum_R, frac_R, C1, tau)
}
fit <- stats::nls(f, data = my_data, start = my_init, algorithm = "port",
lower = pmax(my_init - ep, ep), upper = my_init + ep)
}
} else {
my_data <- both_data_df
# If hetero = TRUE then use nlme::ngls() to allow the error variances to be
# different for the Qin and Qout parts of the model.
#
# Otherwise, use stats::nls() to estimate a common error variance.
# In this case we start from the parameter estimates from the fit (out_fit)
# to the Qout data alone. This is because the Qout data tend to be much
# more variable than the Qin data and therefore, if we assume a constant
# error variance, the Qout part of the model dominates.
#
# This, i.e. stats::nls() assuming a constant error variance is also
# used to provide decent initial values for stats::gnls() in the
# hetero = TRUE case
if (hetero) {
# Get some initial estimates using stats::nls()
fit <- stats::nls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
in_out, tau),
data = my_data, start = out_init, lower = ep,
algorithm = "port", control = nls_control, ...)
# The ratio between the respective estimated error standard deviations
# for the individual fits to the Qin data and the Qout data
sigma_ratio <- in_sigma / out_sigma
nls_init <- coef(fit)
# If nlsTol has not been set then set it too a large value
if (is.null(gnls_control$nlsTol)) {
gnls_control <- c(gnls_control, list(nlsTol = 100))
}
# Fit the model with different error variances but error correlation = 0
fit <- nlme::gnls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
in_out, tau),
data = my_data, start = nls_init,
weights = nlme::varIdent(c(inside = sigma_ratio ^ 2),
form = ~ 1 | in_out),
control = gnls_control, ...)
# If sum_R = TRUE then output results for R1 + R2 and R1 / (R1 + R2)
if (sum_R) {
ests <- coef(fit)
sum_init <- ests["R1"] + ests["R2"]
frac_init <- ests["R1"] / (ests["R1"] + ests["R2"])
my_init <- c(sum_init, frac_init, ests["C1"])
names(my_init) <- c("sum_R", "frac_R", "C1")
fit <- nlme::gnls(Qsp ~ sum_both_reg_fn(x1, x23, Qsp1, sum_R, frac_R,
C1, in_out, tau),
data = my_data, start = my_init,
weights = nlme::varIdent(c(inside = sigma_ratio ^ 2),
form = ~ 1 | in_out),
control = gnls_control, ...)
# Use logs
# my_init <- log(my_init)
# names(my_init) <- c("log_sum_R", "log_frac_R", "log_C1")
# fit <- nlme::gnls(Qsp ~ log_sum_both_reg_fn(x1, x23, Qsp1, log_sum_R,
# log_frac_R, log_C1,
# in_out, tau),
# data = my_data, start = my_init,
# weights = nlme::varIdent(c(inside = sigma_ratio),
# form = ~ 1 | in_out),
# control = gnls_control, verbose = TRUE, ...)
# Use logs / logit
# my_init <- c(log(my_init[1]), log(my_init[2] / (1 - my_init[2])),
# log(my_init[3]))
# names(my_init) <- c("log_sum_R", "logit_frac_R", "log_C1")
# print(my_init)
# fit <- nlme::gnls(Qsp ~ trans_sum_both_reg_fn(x1, x23, Qsp1, log_sum_R,
# logit_frac_R, log_C1,
# in_out, tau),
# data = my_data, start = my_init,
# weights = nlme::varIdent(c(inside = sigma_ratio),
# form = ~ 1 | in_out),
# control = gnls_control, verbose = TRUE, ...)
}
if (correlation) {
# pjn <- corSymm(form = ~ 1 | the_time)
# pjn <- Initialize(pjn, data = my_data)
# print(pjn)
# print("fm4 in")
# print(fit)
# print(coef(fit))
# fm4 <- update(fit, correlation = nlme::corCompSymm(value = 0,
# form = ~ 1 | the_time,
# fixed = FALSE))
# form = ~ in_out | the_time,
# form = ~ the_time | the_time,
# fixed = FALSE),
# start = coef(fit),
# evaluate = TRUE)
# print("fm4 out")
# print(fm4)
# return(fm4)
# Get the estimates of sigma for inside and outside
in_out_sigma <- coef(fit$modelStruct$varStruct, uncons = FALSE,
allCoef = TRUE)
out_sigma <- coef(fit$modelStruct$varStruct, uncons = FALSE,
allCoef = FALSE)
# print("out_sigma")
# print(out_sigma)
in_sig <- in_out_sigma[1]
out_sig <- in_out_sigma[2]
sigma_ratio <- in_out_sigma[1] / in_out_sigma[2]
# print("sigmas")
# print(in_out_sigma)
# print(sigma_ratio)
if (sum_R) {
# print("HERE 2")
# ests <- coef(fit)
# sum_init <- ests["R1"] + ests["R2"]
# frac_init <- ests["R1"] / (ests["R1"] + ests["R2"])
# my_init <- c(sum_init, frac_init, ests["C1"])
# Coefs from sum_R = TRUE
my_init <- coef(fit)
names(my_init) <- c("log_sum_R", "log_frac_R", "log_C1")
# print(my_init)
fit <- nlme::gnls(Qsp ~ log_sum_both_reg_fn(x1, x23, Qsp1, log_sum_R,
log_frac_R, log_C1,
in_out, tau),
data = my_data, start = my_init,
weights = nlme::varIdent(
c(outside = out_sigma ^ 2),
form = ~ 1 | in_out),
correlation = nlme::corCompSymm(value = -0.1,
form = ~ 1 | the_time,
fixed = FALSE),
control = gnls_control, verbose = TRUE, ...)
}
else if (use_logs) {
log_nls_init <- log(nls_init)
names(log_nls_init) <- c("logR1", "logR2", "logC1")
fit <- nlme::gnls(Qsp ~ log_both_reg_fn(x1, x23, Qsp1, logR1,
logR2, logC1, in_out, tau),
data = my_data, start = log_nls_init,
weights = nlme::varIdent(c(inside = sigma_ratio),
form = ~ 1 | in_out),
correlation = nlme::corCompSymm(value = 0,
form = ~ 1 | the_time,
fixed = TRUE),
control = gnls_control, ...)
} else {
gnls_init <- coef(fit)
# print(gnls_init)
# print(gnls_control)
# print("TRY")
# print(out_sigma ^ 2)
fit <- nlme::gnls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
in_out, tau),
data = my_data, start = gnls_init,
weights = nlme::varIdent(
c(outside = out_sigma ^ 2),
form = ~ 1 | in_out),
correlation = nlme::corCompSymm(value = 0,
form = ~ 1 | the_time,
fixed = TRUE),
control = gnls_control, ...)
}
}
} else {
# If hetero = FALSE then the Q_out data will tend to dominate the
# fit, because the error variance tends to be much greater for the
# Q_out data than the Q_in data. Therefore, we use out_init as
# initial estimates for R1, R2 and C1
#
#
#
#
fit <- stats::nls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
in_out, tau),
data = both_data_df, start = out_init,
lower = ep, algorithm = "port", ...)
nls_init <- coef(fit)
if (use_gnls) {
fit <- nlme::gnls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
in_out, tau), data = my_data,
start = nls_init,
control = gnls_control, ...)
}
# If sum_R = TRUE then output results for R1 + R2 and R1 / (R1 + R2)
if (sum_R) {
ests <- coef(fit)
sum_init <- ests["R1"] + ests["R2"]
frac_init <- ests["R1"] / (ests["R1"] + ests["R2"])
my_init <- c(sum_init, frac_init, ests["C1"])
names(my_init) <- c("sum_R", "frac_R", "C1")
f <- Qsp ~ sum_both_reg_fn(x1, x23, Qsp1, sum_R, frac_R, C1, in_out, tau)
fit <- stats::nls(f, data = my_data, start = my_init, algorithm = "port",
lower = pmax(my_init - ep, ep), upper = my_init + ep)
}
}
}
# Make "walls" the first class
class(fit) <- c("walls", class(fit))
# Manually add the data to the object returned
fit$data <- my_data
if (flux == "in") {
which_data <- "Qin only"
} else if (flux == "out") {
which_data <- "Qin only"
} else {
which_data <- "Qin and Qout"
}
attr(fit$data, "which_data") <- "Qin and Qout"
# If we used nlme::gnls() the reverse the order of the classes of the
# object returned
if (class(fit)[2] == "gnls") {
class(fit) <- c("walls", "gls", "gnls")
}
# I have found that unless I do these things plot(object) doesn't work
# in cases where it is necessary to access the original data frame used
# in the call to nlme::gnls()
# Save the name of the function, for use in summary.walls().
fit$walls_fn <- "one_tm"
return(fit)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.