R/1TM_fit.R

#' Frequentist inference for the one thermal mass model (1TM)
#'
#' Brief description
#'
#' @param data A dataset in the format of either \code{\link{cwall_east}}
#'   or \code{\link{cwall_north}}.
#' @param flux A character scalar.  Should we use: only internal heat
#'   fluxes (\code{flux = "in"}); only external heat fluxes
#'   (\code{flux = "in"}); both internal and external heat fluxes
#'   (\code{flux = "both"}).
#' @param air A logical scalar.  Should we use air (\code{TRUE}) or
#'   surface (\code{FALSE}) temperatures?
#' @param init A numeric vector of length 3.  Optional initial estimates of
#'   the parameters R1, R2 and C1 of the 1TM.  These will only be used if
#'   \code{flux = "both"}, because when \code{flux = "in"} and
#'   \code{flux = "out"} non-iterative estimates of these parameters can be
#'   calculated.
#' @param tau A numeric scalar. Time interval between successive measurements,
#'   in hours.
#' @param hetero A logical scalar.  Only relevant when \code{flux = "both"}.
#'   Should we allow the error variance to be different for the Qin and Qout
#'   parts of the model (\code{hetero = TRUE}) or assume a constant error
#'   variance (\code{hetero = FALSE})?  The default is \code{hetero = TRUE}
#'   because Qout data tend to be much more variable than the Qin data.
#' @param correlation \strong{Not working yet!}
#'   A logical scalar.  Should we estimate the correlation
#'   between the contemporaneous errors for Qin and Qout
#'   (\code{correlation = TRUE}) or assume that this correlation is
#'   equal to zero (\code{correlation = FALSE})?  If \code{hetero = FALSE}
#'   then \code{correlation = FALSE} is used by default.
#' @param sum_R A logical scalar.  Should we output results for the parameters
#'   (R1, R2, C1) [\code{sum_R = FALSE}] or for (R1 + R2, ratio of the Rs, C1)
#'   [\code{sum_R = TRUE}].  The ratio of the Rs is R1/R2 if the estimate of
#'   R1 is greater than the estimate of R2 and R2/R1 otherwise.
#' @param ep A numeric scalar.  Lower bound imposed on the (positive)
#'   parameters R1, R2 and C1.  Passed to the \code{lower} argument of
#'   \code{\link[stats]{nls}}.
#' @param nls_control An optional list of control settings to be passed to
#'   \code{\link[stats]{nls}}.
#' @param gnls_control An optional list of control settings to be passed to
#'   \code{\link[nlme]{gnls}}.
#' @param use_gnls A logical scalar.  In the (\code{flux = "both"},
#'   \code{hetero = FALSE}) case should we use \code{\link[nlme]{gnls}} to
#'   fit the model (\code{use_gnls == TRUE}) or \code{\link[stats]{nls}}
#'   (\code{use_gnls == FALSE})?  This is provided as a means to check that
#'   these two functions give equivalent results.
#' @param use_logs A logical scalar.  Should the model fitting use the
#'   parameterisation (log R1, log R2, log C1) [\code{use_logs = TRUE}]
#'   or (R1, R2, C1) [\code{use_logs = FALSE}].
#'   \strong{Not yet implemented fully or properly!}
#' @param sim_data Not implemented yet.
#' @param ... Further arguments to be pased to \code{\link[stats]{nls}} or
#'   \code{\link[nlme]{gnls}}.
#' @details
#'  The default value of \code{nlsTol} (100) is rather larger than the
#'  default in \code{\link[nlme]{gnlsControl}} because experimentation with
#'  the example data in \code{\link[walls]{walls}} suggests that smaller
#'  values tend to trigger an error in \code{\link[nlme]{gnls}}, that is,
#'  "step halving factor reduced below minimum in NLS step".
#'  Increasing \code{nlsTol} substantially seems to avoid early termination
#'  of the fitting algorithm and produce sensible estimates.
#'  As a sanity check the ...
#' @return An object of class \code{"nls"} returned from
#'   \code{\link[stats]{nls}}.
#' @references Gori, V. (2017) A novel method for the estimation of
#'   thermophysical properties of walls from short and seasonally
#'   independent in-situ surveys. Doctoral thesis. UCL (University
#'   College London).
#'   \url{http://discovery.ucl.ac.uk/1568418/}
#' @examples
#' ### cwall_east --------
#'
#' ## Qin only
#' in_air <- one_tm(data = cwall_east)
#' in_air
#'
#' # Output from some methods available for nls objects
#' summary(in_air)
#' coef(in_air)
#' sigma(in_air)
#' vcov(in_air)
#' cov2cor(vcov(in_air))
#' confint(in_air)
#' logLik(in_air)
#' plot(in_air)
#' plot(in_air, form = resid(., type = "p") ~ x1)
#' qqnorm(in_air) # `wrong' way round!
#' qqnorm(residuals(in_air, type = "pearson"))
#'
#' in_surface <- one_tm(data = cwall_east, air = FALSE)
#' in_surface
#'
#' ## Qout only
#' out_air <- one_tm(data = cwall_east, flux = "out")
#' out_air
#' out_surface <- one_tm(data = cwall_east, flux = "out", air = FALSE)
#' out_surface
#'
#' ## Both Qin and Qout
#'
#' # Common error variance for Qin and Qout
#' both_air_homo <- one_tm(data = cwall_east, flux = "both", hetero = FALSE)
#' both_air_homo
#' plot(both_air_homo, form = resid(., type = "p") ~ fitted(.) | in_out)
#'
#' # Different error variances for Qin and Qout
#' both_air_hetero <- one_tm(data = cwall_east, flux = "both")
#' both_air_hetero
#' plot(both_air_hetero, form = resid(., type = "p") ~ fitted(.) | in_out)
#' plot(both_air_hetero, form = resid(., type = "p") ~ x1 | in_out)
#' plot(both_air_hetero, form = in_out ~ resid(., type = "p"))
#' plot(both_air_hetero, Qsp ~ fitted(.) | in_out, abline = c(0, 1))
#'
#' # Plot contemporaneous residuals against each other
#' resids <- residuals(both_air_hetero)
#' res1 <- resids[both_air_hetero$data$in_out == "inside"]
#' res2 <- resids[both_air_hetero$data$in_out == "outside"]
#' plot(res1, res2)
#'
#' ### cwall_north --------
#'
#' ## Qin only
#' in_surface <- one_tm(data = cwall_north)
#' in_surface
#'
#' ## Qout only
#' out_surface <- one_tm(data = cwall_north, flux = "out")
#' out_surface
#'
#' both_air_hetero <- one_tm(data = cwall_north, flux = "both")
#' # Plot contemporaneous residuals against each other
#' resids <- residuals(both_air_hetero)
#' res1 <- resids[both_air_hetero$data$in_out == "inside"]
#' res2 <- resids[both_air_hetero$data$in_out == "outside"]
#' plot(res1, res2)
#' cor(res1, res2)
#'
#' # Inference for R1 + R2 [sum_R] and R1 / (R1 + R2) [frac_R]
#' both_air_hetero <- one_tm(data = cwall_north, flux = "both", sum_R = TRUE)
#' @export
one_tm <- function(data, flux = c("in", "out", "both"), air = TRUE,
                   init = NULL, tau = 1 / 12, hetero = TRUE,
                   correlation = FALSE, sum_R = FALSE, ep = 1e-6,
                   nls_control = NULL, gnls_control = NULL, use_gnls = FALSE,
                   use_logs = FALSE, sim_data = NULL, ...) {
  # Check that flux has a suitable value
  flux <- match.arg(flux)
  # Check that ep is positive
  if (ep <= 0) {
    stop("ep must be positive")
  }
  # Create separate data frames for Qin and Qout
  # These will probably be needed for the setting of initial estimates
  in_data_df <- create_data_df(data, flux = "in", air, tau)
  out_data_df <- create_data_df(data, flux = "out", air, tau)
  # If we are using both Qin and Qout then create a data frame for these
  if (flux == "both") {
    both_data_df <- create_data_df(data, flux, air, tau)
  }
  if (!is.null(sim_data)) {
    in_data_df <- create_data_df(sim_data, flux = "in", air, tau)
    out_data_df <- create_data_df(sim_data, flux = "out", air, tau)
    if (flux == "both") {
      both_data_df <- create_data_df(sim_data, flux = "both", air, tau)
    }
  }
  # Calculate estimates of R1, R2 and C1 based on the Q_in data only and the
  # Q_out data only.  These provide the exact estimates to send to stats::nls()
  # to obtain an "nls" fitted object and initial estimates to send to
  # nlme::gnls() when flux = "both", i.e. we use both Q_in and Q_out data
  # in_init are based on the Q_in data only
  # out_init are based on the Q_out data only
  in_init <- init_ests(flux = "in", data_df = in_data_df)
  out_init <- init_ests(flux = "out", data_df = out_data_df)
  # Also extract the estimates of the error standard deviation
  in_sigma <- attr(in_init, "sigma_hat")
  out_sigma <- attr(out_init, "sigma_hat")
  if (flux == "both" & !is.null(init)) {
    # init will only be used if flux == "both"
    # Check that init is suitable
    if (!is.vector(init, mode = "numeric")) {
      stop("init must be a numeric vector")
    }
    if (length(init) != 3) {
      stop("init must be a numeric vector of length 3")
    }
    init <- c(R1 = init[1], R2 = init[2], C1 = init[3])
  }
  # Initial estimates for (log R1, log R2, log C1) in case we need them later
  log_in_init <- log(in_init)
  names(log_in_init) <- c("logR1", "logR2", "logC1")
  log_out_init <- log(out_init)
  names(log_out_init) <- c("logR1", "logR2", "logC1")
  # Extract any arguments supplied in ..., particularly algorithm and lower
  user_args <- list(...)
  algorithm <- user_args$algorithm
  lower <- user_args$lower
  if (flux == "in" || flux == "out") {
    if (use_logs) {
      if (flux == "in") {
        f <- Qinp ~ log_in_reg_fn(x1, x2, Qinp1, logR1, logR2, logC1, tau)
        my_data <- in_data_df
        my_init <- log_in_init
      } else {
        f <- Qoutp ~ log_out_reg_fn(x1, x3, Qoutp1, logR1, logR2, logC1, tau)
        my_data <- out_data_df
        my_init <- log_out_init
      }
      fit <- stats::nls(f, data = my_data, start = my_init, ...)
    } else {
      if (flux == "in") {
        f <- Qinp ~ in_reg_fn(x1, x2, Qinp1, R1, R2, C1, tau)
        my_data <- in_data_df
        my_init <- in_init
      } else {
        f <- Qoutp ~ out_reg_fn(x1, x3, Qoutp1, R1, R2, C1, tau)
        my_data <- out_data_df
        my_init <- out_init
      }
      if (is.null(algorithm) & is.null(lower)) {
        fit <- stats::nls(f, data = my_data, start = my_init,
                          algorithm = "port", lower = 0, ...)
      } else if (!is.null(algorithm) & is.null(lower)) {
        fit <- stats::nls(f, data = my_data, start = my_init,
                          lower = if (algorithm == "port") 0 else -Inf, ...)
      } else if (is.null(algorithm) & !is.null(lower)) {
        fit <- stats::nls(f, data = my_data, start = my_init,
                          algorithm = "port", ...)
      } else {
        fit <- stats::nls(f, data = my_data, start = my_init, ...)
      }
    }
    # If sum_R = TRUE then output results for R1 + R2 and R1 / (R1 + R2)
    if (sum_R) {
      ests <- coef(fit)
      sum_init <- ests["R1"] + ests["R2"]
      frac_init <- ests["R1"] / (ests["R1"] + ests["R2"])
      my_init <- c(sum_init, frac_init, ests["C1"])
      names(my_init) <- c("sum_R", "frac_R", "C1")
      if (flux == "in") {
        f <- Qinp ~ sum_in_reg_fn(x1, x2, Qinp1, sum_R, frac_R, C1, tau)
      } else {
        f <- Qoutp ~ sum_out_reg_fn(x1, x3, Qoutp1, sum_R, frac_R, C1, tau)
      }
      fit <- stats::nls(f, data = my_data, start = my_init, algorithm = "port",
                        lower = pmax(my_init - ep, ep), upper = my_init + ep)
    }
  } else {
    my_data <- both_data_df
    # If hetero = TRUE then use nlme::ngls() to allow the error variances to be
    # different for the Qin and Qout parts of the model.
    #
    # Otherwise, use stats::nls() to estimate a common error variance.
    # In this case we start from the parameter estimates from the fit (out_fit)
    # to the Qout data alone.  This is because the Qout data tend to be much
    # more variable than the Qin data and therefore, if we assume a constant
    # error variance, the Qout part of the model dominates.
    #
    # This, i.e. stats::nls() assuming a constant error variance is also
    # used to provide decent initial values for stats::gnls() in the
    # hetero = TRUE case
    if (hetero) {
      # Get some initial estimates using stats::nls()
      fit <- stats::nls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
                                           in_out, tau),
                        data = my_data, start = out_init, lower = ep,
                        algorithm = "port", control = nls_control, ...)
      # The ratio between the respective estimated error standard deviations
      # for the individual fits to the Qin data and the Qout data
      sigma_ratio <- in_sigma / out_sigma
      nls_init <- coef(fit)
      # If nlsTol has not been set then set it too a large value
      if (is.null(gnls_control$nlsTol)) {
        gnls_control <- c(gnls_control, list(nlsTol = 100))
      }
      # Fit the model with different error variances but error correlation = 0
      fit <- nlme::gnls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
                                           in_out, tau),
                        data = my_data, start = nls_init,
                        weights = nlme::varIdent(c(inside = sigma_ratio ^ 2),
                                                 form = ~ 1 | in_out),
                        control = gnls_control, ...)
      # If sum_R = TRUE then output results for R1 + R2 and R1 / (R1 + R2)
      if (sum_R) {
        ests <- coef(fit)
        sum_init <- ests["R1"] + ests["R2"]
        frac_init <- ests["R1"] / (ests["R1"] + ests["R2"])
        my_init <- c(sum_init, frac_init, ests["C1"])
        names(my_init) <- c("sum_R", "frac_R", "C1")
        fit <- nlme::gnls(Qsp ~ sum_both_reg_fn(x1, x23, Qsp1, sum_R, frac_R,
                                                C1, in_out, tau),
                          data = my_data, start = my_init,
                          weights = nlme::varIdent(c(inside = sigma_ratio ^ 2),
                                                   form = ~ 1 | in_out),
                          control = gnls_control, ...)
        # Use logs
#        my_init <- log(my_init)
#        names(my_init) <- c("log_sum_R", "log_frac_R", "log_C1")
#        fit <- nlme::gnls(Qsp ~ log_sum_both_reg_fn(x1, x23, Qsp1, log_sum_R,
#                                                    log_frac_R, log_C1,
#                                                in_out, tau),
#                          data = my_data, start = my_init,
#                          weights = nlme::varIdent(c(inside = sigma_ratio),
#                                                   form = ~ 1 | in_out),
#                          control = gnls_control, verbose = TRUE, ...)
        # Use logs / logit
#        my_init <- c(log(my_init[1]), log(my_init[2] / (1 - my_init[2])),
#                     log(my_init[3]))
#        names(my_init) <- c("log_sum_R", "logit_frac_R", "log_C1")
#        print(my_init)
#        fit <- nlme::gnls(Qsp ~ trans_sum_both_reg_fn(x1, x23, Qsp1, log_sum_R,
#                                                    logit_frac_R, log_C1,
#                                                    in_out, tau),
#                          data = my_data, start = my_init,
#                          weights = nlme::varIdent(c(inside = sigma_ratio),
#                                                   form = ~ 1 | in_out),
#                          control = gnls_control, verbose = TRUE, ...)
      }
      if (correlation) {
#        pjn <- corSymm(form = ~ 1 | the_time)
#        pjn <- Initialize(pjn, data = my_data)
#        print(pjn)
#        print("fm4 in")
#        print(fit)
#        print(coef(fit))
#        fm4 <- update(fit, correlation = nlme::corCompSymm(value = 0,
#                                              form = ~ 1 | the_time,
#                                              fixed = FALSE))
#                                              form = ~ in_out | the_time,
#                                              form = ~ the_time | the_time,
#                                              fixed = FALSE),
#                                              start = coef(fit),
#                                              evaluate = TRUE)
#        print("fm4 out")
#        print(fm4)
#        return(fm4)
        # Get the estimates of sigma for inside and outside
        in_out_sigma <- coef(fit$modelStruct$varStruct, uncons = FALSE,
                             allCoef = TRUE)
        out_sigma <- coef(fit$modelStruct$varStruct, uncons = FALSE,
                             allCoef = FALSE)
#        print("out_sigma")
#        print(out_sigma)
        in_sig <- in_out_sigma[1]
        out_sig <- in_out_sigma[2]
        sigma_ratio <- in_out_sigma[1] / in_out_sigma[2]
#        print("sigmas")
#        print(in_out_sigma)
#        print(sigma_ratio)
        if (sum_R) {
#          print("HERE 2")
#          ests <- coef(fit)
#          sum_init <- ests["R1"] + ests["R2"]
#          frac_init <- ests["R1"] / (ests["R1"] + ests["R2"])
#          my_init <- c(sum_init, frac_init, ests["C1"])
          # Coefs from sum_R = TRUE
          my_init <- coef(fit)
          names(my_init) <- c("log_sum_R", "log_frac_R", "log_C1")
#          print(my_init)
          fit <- nlme::gnls(Qsp ~ log_sum_both_reg_fn(x1, x23, Qsp1, log_sum_R,
                                                      log_frac_R, log_C1,
                                                  in_out, tau),
                            data = my_data, start = my_init,
                            weights = nlme::varIdent(
                              c(outside = out_sigma ^ 2),
                                                     form = ~ 1 | in_out),
                            correlation = nlme::corCompSymm(value = -0.1,
                                                            form = ~ 1 | the_time,
                                                            fixed = FALSE),
                            control = gnls_control, verbose = TRUE, ...)
        }
        else if (use_logs) {
          log_nls_init <- log(nls_init)
          names(log_nls_init) <- c("logR1", "logR2", "logC1")
          fit <- nlme::gnls(Qsp ~ log_both_reg_fn(x1, x23, Qsp1, logR1,
                                                   logR2, logC1, in_out, tau),
                            data = my_data, start = log_nls_init,
                            weights = nlme::varIdent(c(inside = sigma_ratio),
                                                     form = ~ 1 | in_out),
                            correlation = nlme::corCompSymm(value = 0,
                                                            form = ~ 1 | the_time,
                                                            fixed = TRUE),
                            control = gnls_control, ...)
        } else {
          gnls_init <- coef(fit)
#          print(gnls_init)
#          print(gnls_control)
#          print("TRY")
#          print(out_sigma ^ 2)
          fit <- nlme::gnls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
                                               in_out, tau),
                            data = my_data, start = gnls_init,
                            weights = nlme::varIdent(
                              c(outside = out_sigma ^ 2),
                                                     form = ~ 1 | in_out),
                            correlation = nlme::corCompSymm(value = 0,
                                                            form = ~ 1 | the_time,
                                                            fixed = TRUE),
                            control = gnls_control, ...)
        }
      }
    } else {
      # If hetero = FALSE then the Q_out data will tend to dominate the
      # fit, because the error variance tends to be much greater for the
      # Q_out data than the Q_in data.  Therefore, we use out_init as
      # initial estimates for R1, R2 and C1
      #
      #
      #
      #
      fit <- stats::nls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
                                             in_out, tau),
                        data = both_data_df, start = out_init,
                        lower = ep, algorithm = "port", ...)
      nls_init <- coef(fit)
      if (use_gnls) {
        fit <- nlme::gnls(Qsp ~ both_reg_fn(x1, x23, Qsp1, R1, R2, C1,
                                             in_out, tau), data = my_data,
                          start = nls_init,
                          control = gnls_control, ...)
      }
      # If sum_R = TRUE then output results for R1 + R2 and R1 / (R1 + R2)
      if (sum_R) {
        ests <- coef(fit)
        sum_init <- ests["R1"] + ests["R2"]
        frac_init <- ests["R1"] / (ests["R1"] + ests["R2"])
        my_init <- c(sum_init, frac_init, ests["C1"])
        names(my_init) <- c("sum_R", "frac_R", "C1")
        f <- Qsp ~ sum_both_reg_fn(x1, x23, Qsp1, sum_R, frac_R, C1, in_out, tau)
        fit <- stats::nls(f, data = my_data, start = my_init, algorithm = "port",
                          lower = pmax(my_init - ep, ep), upper = my_init + ep)
      }
    }
  }
  # Make "walls" the first class
  class(fit) <- c("walls", class(fit))
  # Manually add the data to the object returned
  fit$data <- my_data
  if (flux == "in") {
    which_data <- "Qin only"
  } else if (flux == "out") {
    which_data <- "Qin only"
  } else {
    which_data <- "Qin and Qout"
  }
  attr(fit$data, "which_data") <- "Qin and Qout"
  # If we used nlme::gnls() the reverse the order of the classes of the
  # object returned
  if (class(fit)[2] == "gnls") {
    class(fit) <- c("walls", "gls", "gnls")
  }
  # I have found that unless I do these things plot(object) doesn't work
  # in cases where it is necessary to access the original data frame used
  # in the call to nlme::gnls()
  # Save the name of the function, for use in summary.walls().
  fit$walls_fn <- "one_tm"
  return(fit)
}
paulnorthrop/walls documentation built on May 15, 2019, 10:02 p.m.