R/sarabc.R

Defines functions sim_sar sar_abc print.sarabc c.sarabc

Documented in sar_abc sim_sar

#' Simulates a SAR model
#' @param G Square matrix of class \code{dgCMatrix} of size \eqn{n\times n}{n*n}.
#' @param X Numeric matrix of covariates of size \eqn{n\times k}{n * k}.
#' @param rho Numeric scalar.
#' @param beta Numeric colvector of length \eqn{k}.
#' @param p Number of iterations in the approximation to \eqn{(I_n - \rho W)^{-1}}.
#' @param xmean Numeric vector of length \eqn{n}.
#' 
#' @details Follows a model SAR
#' 
#' \deqn{%
#' y = \rho \mathbf{W}y + X\beta + \varepsilon,\quad\varepsilon\sim \mbox{MVN}(0,\sigma^2 I_n)
#' }{%
#' y = rho * W * y + X*beta + eps, eps ~ MVN(0, sigma^2 *I_n)
#' }
#' 
#' @return A numeric vector of length \eqn{n}.
#' 
#' @export
sim_sar <- function(G, X, rho, beta, p=5, xmean=rep(0, nrow(G))) {
  eps <- mvtnorm::rmvnorm(1, xmean)
  
  dim(eps) <- c(nrow(G),1)
  
  out <- Matrix::Diagonal(nrow(G)) + rho*G
  for (i in 1:(p-1)) {
    G <- G %*% G
    rho <- rho*rho;
    out <- out + rho*G
  }
  
  as.vector(out %*% (X %*% beta + eps))
}

#' @rdname sim_sar
#' @export
sar_sim<-sim_sar

#' SAR estimation using Aproximate Bayesian Computation
#' @param y Numeric vector of length \eqn{n}. Dependent variable.
#' @param X Numeric matrix of size \eqn{n\times k}{n*k}. Covariates.
#' @param W Square matrix of class \code{dgCMatrix} of size \eqn{n\times n}{n*n}.
#' @param rho Numeric vector of length \eqn{N}. Prior of \eqn{\rho}{rho}.
#' @param beta Numeric matrix of size \eqn{N\times k}{N*k}. Prior of \eqn{\beta}{beta}.
#' @param N Integer scalar. Number of simulations to run.
#' @param p Integer scalar. See \code{\link{sar_sim}}.
#' @param no_inv Logical scalar. When \code{FALSE} runs the model approximating
#' the inverse of I_n - rho W. Otherwise naively predicts \eqn{y} as in OLS.
#' @param no_moran Logical scalar. When \code{TRUE} does not includes Moran's I
#' in the set of statistics to compute distances.
#' @param keep Numeric scalar between (0,1]. Sets what proportion of the simulated
#' data to keep after ranking according to distances.
#' @param cl An object generated by \code{\link[parallel:makeCluster]{makeCluster}}.
#' @param ... Ignored.
#' @param sweights Numeric vector of length 3. Weights for the distance.
#' @return An object of class sar_abc
#' @examples 
#' # Simple example ------------------------------------------------------------
#' set.seed(133)
#' 
#' # Parameters
#' n <- 200
#' rho  <- .25
#' beta <- -.6
#' 
#' # Dataset
#' W    <- netdiffuseR::rgraph_ws(n, 6, .15)
#' W    <- W/(Matrix::rowSums(W) + 1e-15)
#' X <- matrix(rnorm(n*1), ncol=1)
#' y <- sim_sar(W, X, rho, beta)
#' 
#' # Estimating
#' res <- sar_abc(y, X, W, N=1e4)
#' res
#' 
#' # Comparing with OLS
#' lm(y~0+I(as.matrix(W %*% y)) +X , data.frame(y,X))
#' 
#' # Comparing with sphet
#' \dontrun{
#' library(sphet)
#' spreg(y~X, data.frame(y,X), listw = mat2listw(W), model = 'lag')
#' }
#' @export
sar_abc <- function(
  y, X, W,
  rho  = runif(N,-1,1),
  beta = matrix(runif(N*ncol(X), -2,2), nrow=N),
  sweights = c(1,2,2),
  N=1e4L, p=5L, no_inv=FALSE, no_moran=FALSE, keep=1e3L, cl=NULL,  ...) {
  
  # Names of stats
  nam <- c("MSE", "Mean(y)", "Moran's I")
  
  # Normalizing weights
  sweights <- sweights/sum(sweights)
  
  if (length(cl)) {
    # Dividing the job
    Ns <- clusterSplit(cl, 1:N)
    
    # Calling the library
    res <- parallel::parLapply(cl, X=Ns, function(X,y,cov,W,rho,beta,p,no_inv,no_moran,sweights) {
      sar_abc_cpp(y, cov, W, rho[X], beta[X,,drop=FALSE], sweights, length(X), p, no_inv, no_moran)
    }, y=y, cov=X, W=W, rho=rho, beta=beta, p=p, no_inv=no_inv, no_moran=no_moran, sweights=sweights)
    
    res <- lapply(res, function(r) {
      r$keep    <- keep
      r$N       <- N
      r$include <- NA
      
      names(r$stats0)    <- nam
      colnames(r$stats1) <- nam
      
      colnames(r$beta) <- colnames(X)
      structure(r, class = 'sarabc')
    })
    
    res <- do.call(c, res)
    return(res)
    
  }
  
  # Serial
  res <- sar_abc_cpp(y, X, W, rho, beta, sweights, N, p, no_inv, no_moran)
  
  # Checking which to include
  res$include <- which(rank(res$distance) <= keep)
  res$keep <- keep
  res$N    <- N
  
  names(res$stats0)    <- nam
  colnames(res$stats1) <- nam
  
  colnames(res$beta) <- colnames(X)
  structure(res, class = 'sarabc')
  
}

#' @export
print.sarabc <- function(x, keep=x$keep, ...) {
  inc <- which(rank(x$distance) <= min(max(keep,1), x$N))
  
  cat(sprintf("-- Simulated parameters (%d/%d) --\n", length(inc), x$N))
  print(with(x, apply(data.frame(rho, beta)[inc,], 2, summary)))
  
  cat(sprintf("\n-- Target summary stats --\n Observed:\n"))
  print(x$stats0)
  cat("\n Simulated:\n")
  print(apply(x$stats1[inc,,drop=FALSE],2,summary))
  invisible(x)
}

#' @export
c.sarabc <- function(..., recursive=FALSE) {
  sars <- list(...)
  
  sars <- sars[sapply(sars, length)>0]
  
  test <- which(!sapply(sars, inherits, what = "sarabc"))
  if (length(test)) 
    stop("Some objects are not of class -sarabc-:\n\t", 
         paste0(test, collapse = "', "), ".")
  
  # Merging outcomes
  res <- sars[[1]]
  for (s in sars[-1]) {
    res[['rho']] <- c(res[['rho']], s[['rho']])
    res[['beta']] <- rbind(res[['beta']], s[['beta']])
    res[['distance']] <- c(res[['distance']], s[['distance']])
    res[['stats1']] <- rbind(res[['stats1']], s[['stats1']])
  }
  
  res[['N']] <- length(res[['rho']])
  res[['include']] <- which(rank(res[['distance']]) <= min(max(res[['keep']], 1), res[['N']]))
  
  res
}
gvegayon/sarabc documentation built on May 17, 2019, 9:30 a.m.