R/numspec_fulldistribution.R

Defines functions speciesnum.ModelSite.theta sumspeciesRV_raw predsumspeciesRV_newdata predsumspeciesRV

Documented in predsumspeciesRV predsumspeciesRV_newdata sumspeciesRV_raw

#' @title Compute predicted distributions of number species detected
#' @description Uses [discreteRV] to compute distribution of number of species 
#' @examples
#' fit <- readRDS("./tmpdata/7_3_02_clim_someclimate_year_woody500m_msnm_det1stO.rds")
#' indata <- readRDS("./private/data/clean/7_2_10_input_data.rds")
#' det_dist_2060 <- predsumspeciesRV_newdata(fit,
#'                          Xocc = indata$holdoutdata$Xocc[indata$holdoutdata$Xocc$ModelSiteID == 2060, , drop = FALSE],
#'                          Xobs = indata$holdoutdata$yXobs[indata$holdoutdata$yXobs$ModelSiteID == 2060, , drop = FALSE],
#'                          ModelSiteVars = "ModelSiteID")

#' @param fit A runjags fitted object
#' @param UseFittedLV If TRUE the fitted LV variables are used, if false then 100 LV values are simulated.
#' @param chains The chains of MCMC to use. Default is all chains.
#' @param cl A cluster object created by parallel::makeCluster. If NULL no cluster is used.
#' @param type If "draws" then a list of distributions for each site and draw is returned
#' If "marginal" then a single distribution for each site is returned and this distribution is marginalised over the draws.
#' @export
predsumspeciesRV <- function(fit, chains = NULL, UseFittedLV = TRUE, nLVsim = 100, type = "marginal", cl = NULL){
  stopifnot(type %in% c("draws", "marginal"))
  
  fit$data <- as_list_format(fit$data)
  if (is.null(chains)){chains <- 1:length(fit$mcmc)}
  draws <- do.call(rbind, fit$mcmc[chains])
  if ( (is.null(fit$data$nlv)) || (fit$data$nlv == 0)){ #LVs not in model, add dummy variables
    LVbugs <- matrix2bugsvar(matrix(0, nrow = nrow(fit$data$Xocc), ncol = 2), "LV")
    LVbugs.draws <- Rfast::rep_row(LVbugs, nrow(draws))
    colnames(LVbugs.draws) <- names(LVbugs)
    
    lv.coef.bugs <- matrix2bugsvar(matrix(0, nrow = fit$data$n, ncol = 2), "lv.coef")
    lv.coef.draws <- Rfast::rep_row(lv.coef.bugs, nrow(draws))
    colnames(lv.coef.draws) <- names(lv.coef.bugs)
    draws <- cbind(draws, lv.coef.draws, LVbugs.draws)
    fit$data$nlv <- 2
    UseFittedLV <- TRUE #calculations faster when not simulating 1000s of LV values, especially since they are all ignored here.
  } 
  
  if (UseFittedLV){nLVsim <- NULL} #don't pass number of simulations if not going to use them
  
  numspecRV_drawsites <- sumspeciesRV_raw(
    Xocc = fit$data$Xocc,
    Xobs = fit$data$Xobs,
    ModelSite = fit$data$ModelSite,
    numspecies = fit$data$n,
    nlv = fit$data$nlv,
    draws = draws,
    useLVindraws = UseFittedLV,
    nLVsim = nLVsim,
    cl = cl
  )
  # convert predictions for each site and theta into predictions for each site, marginal across theta distribution
  if (type == "draws") {out <- numspecRV_drawsites}
  if (type == "marginal") {
    numspecRVs <- numspecRV_drawsites[["numspecRVs"]]
    sitedrawidxs <- numspecRV_drawsites[["sitedrawidxs"]]
    sumRVs_margpost <- pbapply::pblapply(unique(sitedrawidxs[["siteidx"]]),
                             function(siteidx){
                               sumRV_margpost <- randselRV(numspecRVs[sitedrawidxs[["siteidx"]] == siteidx])
                               return(sumRV_margpost)
                             },
                             cl = cl)
    return(sumRVs_margpost)
  }
}

#' @describeIn predsumspeciesRV Distribution of species numbers for new ModelSite
predsumspeciesRV_newdata <- function(fit, Xocc, Xobs = NULL, ModelSiteVars = NULL, chains = NULL, nLVsim = 100, type = "marginal", cl = NULL){
  stopifnot(type %in% c("draws", "marginal"))
  
  if (!is.null(Xobs)) {datalist <- prep_new_data(fit, Xocc, Xobs, ModelSite = ModelSiteVars)}
  else{datalist <- list(
    Xocc = apply.designmatprocess(fit$XoccProcess, Xocc),
    Xobs = NULL,
    ModelSite = NULL
  )}
  UseFittedLV <- FALSE # no LV available for new model sites
  
  fit$data <- as_list_format(fit$data)
  if (is.null(chains)){chains <- 1:length(fit$mcmc)}
  draws <- do.call(rbind, fit$mcmc[chains])
  
  if ( (is.null(fit$data$nlv)) || (fit$data$nlv == 0)){ #LVs not in model, add dummy variables
    lv.coef.bugs <- matrix2bugsvar(matrix(0, nrow = fit$data$n, ncol = 2), "lv.coef")
    lv.coef.draws <- Rfast::rep_row(lv.coef.bugs, nrow(draws))
    colnames(lv.coef.draws) <- names(lv.coef.bugs)
    draws <- cbind(draws, lv.coef.draws)
    fit$data$nlv <- 2
    nLVsim = 2 #calculations faster when not simulating 1000s of LV values, especially since they are all ignored here.
  }
  
  numspecRV_drawsites <- sumspeciesRV_raw(
    Xocc = datalist$Xocc,
    Xobs = datalist$Xobs,
    ModelSite = datalist$ModelSite,
    numspecies = fit$data$n,
    nlv = fit$data$nlv,
    draws = draws,
    useLVindraws = FALSE,
    nLVsim = nLVsim,
    cl = cl
  )
  if (type == "draws") {out <- numspecRV_drawsites}
  if (type == "marginal") {
    numspecRVs <- numspecRV_drawsites[["numspecRVs"]]
    sitedrawidxs <- numspecRV_drawsites[["sitedrawidxs"]]
    sumRVs_margpost <- pbapply::pblapply(unique(sitedrawidxs[["siteidx"]]),
                                         function(siteidx){
                                           sumRV_margpost <- randselRV(numspecRVs[sitedrawidxs[["siteidx"]] == siteidx])
                                           return(sumRV_margpost)
                                         },
                                         cl = cl)
    return(sumRVs_margpost)
  }
  return(out)
}


#' @param Xocc A matrix of occupancy covariates. Must have a single row. Columns correspond to covariates.
#' @param Xobs A matrix of detection covariates, each row is a visit. If NULL then expected number of species in occupation is returned
#' @param theta A vector of model parameters, labelled according to the BUGS labelling convention seen in runjags
#' @param LVvals A matrix of LV values. Each column corresponds to a LV. To condition on specific LV values, provide a matrix of row 1.
#' @describeIn predsumspeciesRV For raw model information. Returns distributions per draw.
#' @export
sumspeciesRV_raw <- function(Xocc, Xobs = NULL, ModelSite = NULL, numspecies, nlv, draws, useLVindraws = TRUE, nLVsim = NULL, cl = NULL){
  # prepare parameters
  nspecall <- numspecies
  ndraws <- nrow(draws)
  nsites <- nrow(Xocc)
  noccvar <- ncol(Xocc)
  if (!is.null(Xobs)) {
    nobsvar <- ncol(Xobs)
    ModelSiteIdxs <- ModelSite
    stopifnot(length(ModelSiteIdxs) == nrow(Xobs))
    stopifnot(all(ModelSiteIdxs %in% 1:nsites))
    if (!all(1:nsites %in% ModelSiteIdxs)){warning("Some ModelSite do not have observation covariate information.")}
  }
  u.b <- bugsvar2array(draws, "u.b", 1:nspecall, 1:noccvar)
  if (!is.null(Xobs)) {v.b <- bugsvar2array(draws, "v.b", 1:nspecall, 1:nobsvar)}
  lv.coef <- bugsvar2array(draws, "lv.coef", 1:nspecall, 1:nlv)
  
  if (useLVindraws){stopifnot(is.null(nLVsim))}
  if (!useLVindraws){stopifnot(is.numeric(nLVsim))}
  
  
  sitedrawidxs <- expand.grid(siteidx = 1:nsites, drawidx = 1:ndraws) 
  
  if (!useLVindraws){ # predicting as if LVs not known, so simulate from their distribution
    lvsim <- matrix(rnorm(nlv * nLVsim), ncol = nlv, nrow = nLVsim)
  } else {
    LVvals <- bugsvar2array(draws, "LV", 1:nsites, 1:nlv)
  }
  
  
  # for each modelsite and each draw apply the following function:
  # note that I've had to use pblapply because pbapply was doing some weird simplifications
  numspecRVs <- pbapply::pblapply(1:nrow(sitedrawidxs), 
                               function(sitedrawidx_row){
                                 sitedrawidx <- sitedrawidxs[sitedrawidx_row, , drop = FALSE]
                                 Xocc <- Xocc[sitedrawidx[["siteidx"]], , drop = FALSE]
                                 if (!is.null(Xobs)) {Xobs <- Xobs[ModelSiteIdxs == sitedrawidx[["siteidx"]], , drop = FALSE]}
                                 u.b_theta <- matrix(u.b[,, sitedrawidx[["drawidx"]] ], nrow = nspecall, ncol = noccvar)
                                 if (!is.null(Xobs)) {v.b_theta <- matrix(v.b[,, sitedrawidx[["drawidx"]] ], nrow = nspecall, ncol = nobsvar)}
                                 else {v.b_theta = NULL}
                                 lv.coef_theta <- matrix(lv.coef[,, sitedrawidx[["drawidx"]] ], nrow = nspecall, ncol = nlv)
                                 if (useLVindraws){
                                   LVvals_thetasite <- matrix(LVvals[sitedrawidx[["siteidx"]], , sitedrawidx[["drawidx"]], drop = FALSE], nrow = 1, ncol = nlv)
                                 } else {
                                   LVvals_thetasite <- lvsim
                                 }
                                 sumRV <- speciesnum.ModelSite.theta(Xocc = Xocc, Xobs = Xobs,
                                                                                          u.b = u.b_theta,
                                                                                          v.b = v.b_theta,
                                                                                          lv.coef = lv.coef_theta,
                                                                                          LVvals = LVvals_thetasite)
                                 return(sumRV)
                               },
                               cl = cl)
  
  return(list(
    numspecRVs = numspecRVs,
    sitedrawidxs = sitedrawidxs
  ))
}

speciesnum.ModelSite.theta <- function(Xocc, Xobs = NULL, u.b, v.b = NULL, lv.coef = NULL, LVvals = NULL){
  ## Probability of Site Occupancy
  stopifnot(nrow(Xocc) == 1)
  if (is.null(Xobs)) {stopifnot(is.null(v.b))}
  Xocc <- as.matrix(Xocc)
  ModelSite.Occ.Pred.CondLV <- poccupy.ModelSite.theta(Xocc, u.b, lv.coef, LVvals)
  
  if (is.null(Xobs)){
    sum_occ_margLV <- sumRV_margrow(ModelSite.Occ.Pred.CondLV)
    return(sum_occ_margLV)
  }
  
  ## Probability of Detection
  if (!is.null(Xobs)){
    Detection.Pred.Cond <- pdetection_occupied.ModelSite.theta(Xobs, v.b) # probability conditional on occupied
    # probability of no detections, given occupied
    NoDetections.Pred.Cond <- Rfast::colprods(1 - Detection.Pred.Cond)
    
    
    ## probability of no detections, marginal on occupancy
    NoDetections.Pred.marg_1 <- Rfast::eachrow(ModelSite.Occ.Pred.CondLV, NoDetections.Pred.Cond, oper = "*")  #occupied component
    NoDetections.Pred.marg_2 <- 1 - ModelSite.Occ.Pred.CondLV  #plus unoccupied component
    AnyDetections.Pred.marg <- 1 - (NoDetections.Pred.marg_1 + NoDetections.Pred.marg_2)
    sum_det_margLV <- sumRV_margrow(AnyDetections.Pred.marg)
    return(sum_det_margLV)
  }
}
sustainablefarms/linking-data documentation built on Oct. 28, 2020, 2:41 a.m.