R/ash1.wgt.R

Defines functions wgt.lim bin1.wgt ash1.wgt

Documented in ash1.wgt

################################################################################
# Function: ash1.wgt.r
# Programmer: Tony Olsen
#	Based on original script by Susan Hornsby
# Date: February 1, 2005
# Last Revised: January 27, 2012
#
#' Compute the Average Shifted Histogram (ASH) for Weighted Data
#'
#' Calculate the average shifted histogram estimate of a density based on data
#'   from a survey design with weights.
#'
#' @param x Vector of data used to estimate the density. NAs are allowed
#'
#' @param wgt Vector of Weights for each observation from a probability sample.
#'   The default is equal weights (equal probability).
#'
#' @param m The number of empty bins to add to the ends when the range is not
#'   completely specified.  The default is 5.
#'
#' @param nbin The number of bins for density estimation.  The default is 50.
#'
#' @param ab Optional range for support associated with the density. Both
#'   values may be equal to NA.  If equal to NA, then corresponding limit will
#'   be based on nicerange(). The default is NULL.
#'
#' @param support The type of support.  If equal to "Continuous", then data are
#'   from a continuous distribution.  If equal to "Ordinal", then data are from
#'   a discrete distribution defined for integers only.  The default is
#'   "Continuous".
#'
#' @return A list containing the ASH density estimate.  List consists of
#'   \itemize{
#'     \item tcen - x-coordinate for center of bin
#'     \item f - y-coordinate for density estimate height
#'   }
#'
#' @keywords survey misc
#'
#' @author Tony Olsen \email{Olsen.tony@epa.gov}
#' 
#' @references 
#'   Scott, D. W. (1985). "Averaged shifted histograms: effective nonparametric 
#'   density estimators in several dimensions." The Annals of Statistics 13(3):
#'   1024-1040.
#'
#' @examples
#' x <- rnorm(100, 10, sqrt(10))
#' wgt <- runif(100, 10, 100)
#' rslt <- ash1.wgt(x, wgt)
#' plot(rslt)
#'
#' @export
################################################################################

ash1.wgt <- function(x, wgt = rep (1, length (x)), m = 5, nbin = 50, ab = NULL,
   support = "Continuous") {

# Bin the possibly weighted data
	v <- bin1.wgt(x, wgt, nbin, ab, support=support)

# Set delta based on range and number of bins
	delta <- attr(v, "delta")
	h <- m*delta

# Set up vectors for estimation
	nbin <- attr(v,"nbin")
	a <- attr(v,"ab")[1]
	b <- attr(v,"ab")[2]

# Add m-1 empty bins on ends when no ab boundary specified
	adj <- 0
	if( is.null(ab) ) {
		v <- c(rep(0,m-1), v, rep(0,m-1))
		nbin <- nbin + 2* (m - 1)
		adj <- m - 1
	}
	else if( is.na(ab[1])) {
		v <- c(rep(0, m-1), v)
		nbin <- nbin + (m - 1)
		adj <- m-1
	}
	else if( is.na(ab[2])) {
		v <- c(v, rep(0,m-1) )
		nbin <- nbin
	}

# Compute lower limit, center, and upper limit of bins
	tlow <- a - adj*delta + ( (1:nbin) - 1.0)*delta
	tcen <- a - adj*delta + ( (1:nbin) - 0.5)*delta
	tup  <- a - adj*delta + (1:nbin)*delta

# Compute density height
	f <- rep(0,nbin)
	for(i in 1:nbin) {
		mlow <- max(1,i-m+1)
		mhi <- min(nbin,i+m-1)
		for(k in mlow:mhi) {
			if(tup[k] >= a & tlow[k] <= b )
				f[i] <- f[i] + v[k]*wgt.lim(k-i,m, mlow=mlow-i, mhi=mhi-i)
		}
	}

# Adjust height so density area equals 1
	f <- f/(delta*sum(f))

# Construct output
	ash <- list(x=tcen, y=f)
	attr(ash, "delta") <- delta
	attr(ash, "m") <- m
	attr(ash, "h") <- h
	attr(ash, "support") <- support

# Return the result
	return(ash)
}


#
# BIN algorithm for unequal-probability sample,
#

bin1.wgt <- function(x, wgt=rep(1,length(x)), nbin=50,
                     ab=nicerange(x), support="Continuous") {

# Arguments
#   x  Vector of data to be used to estimate density. NAs are allowed.
#
#   wgt  vector of weights for each observation from a probability sample.
#            default is equal weights (equal probability)
#
#   nbin  Number of bins for density estimate
#
#   ab  optional range for support associated with the density.  Both values may
#       be equal to NA.  If equal to NA, then corresponding limit will be based
#       on nicerange().
#
#   support  If equal to "Continuous", then data are from a continuous
#            distribution.  If equal to "Ordinal", then data are from a discrete
#            distribution defined for integers only.

# Remove any missing data
	x <- x[!is.na(x)]
	wgt <- wgt[!is.na(x)]
	n <- length(x)

# Check that nbin is positive
	if (nbin <= 0)
		stop("\nNumber of bin intervals nonpositive")

# Check for ab range
	tmp <- nicerange(x)
	if(is.null(ab)) {
		ab <- tmp
	} else {
		if( is.na(ab[1]) ) ab[1] <- tmp[1]
		if( is.na(ab[2]) ) ab[2] <- tmp[2]
		if (ab[1] >= ab[2])
			stop("\nInterval vector has negative orientation")
	}

# Determine delta
# Continuous data case
	if(support == "Continuous")
	delta <- (ab[2] - ab[1])/	nbin
	if(support == "Ordinal") {
		delta <- 1
		ab[1] <- floor(ab[1]) - 0.5
		ab[2] <- ceiling(ab[2]) + 0.5
		nbin <- ab[2] - ab[1] + 1
	}

# Sum weighted data in bins
	v <- rep(0,nbin)
	for(k in 1:nbin) {
		for(i in 1:n) {
			v[k] <- ifelse(((ab[1]+(k-1)*delta)<=x[i]) & (x[i]<(ab[1]+k*delta)),
                     v[k] + wgt[i], v[k])
		}
	}
	attr(v, "nbin") <- nbin
	attr(v, "ab") <- ab
	attr(v, "delta") <- delta
	attr(v, "support") <- support

# Return the result
	return(v)
}


#
# Define weight function
#
wgt.lim <- function(i, m, mlow=(1-m), mhi=(m-1)) {
	K <- function(t) { (15/16)*(1-t^2)^2 }
	I <- mlow:mhi
	w <- m*K(i/m)/sum(K(I/m))
	return(w)
}


#
# Find nice range for binning
#

nicerange <- function (x, beta = 0.1) {
	ab <- range(x)
	del <- ((ab[2] - ab[1]) * beta)/2
	return(c(ab + c(-del, del)))
}
mhweber/spsurvey documentation built on Sept. 17, 2020, 4:24 a.m.