################################################################################
# Function: cdf.test
# Programmer: Tom Kincaid
# Date: August 23, 2000
# Last Revised: January 25, 2016
#
#' Test for Difference Between Two Estimated Cumulative Distribution Functions
#'
#' This function tests for differences between cumulative distribution functions
#' (CDFs) generated by probability surveys. The function returns a variety of
#' test statistics along with their degrees of freedom and p values. The
#' inferential procedures divide the CDFs into a discrete set of intervals
#' (classes) and then utilize procedures that have been developed for analysis
#' of categorical data from probability surveys. The function calculates the
#' Wald, Rao-Scott first order corrected (mean eigenvalue corrected), and
#' Rao-Scott second order corrected (Satterthwaite corrected) test statistics.
#' Both standard versions of the three statistics, which are distributed as
#' Chi-squared random variables, and alternate version of the statistics, which
#' are distributed as F random variables, are available. The default test
#' statistic is the F distribution version of the Wald statistic. The user
#' supplies the set of upper bounds that define the intervals (classes) into
#' which the CDFs are divided (binned). The minimum number of classes is three.
#' The Horvitz-Thompson ratio estimator, i.e., the ratio of two Horvitz-
#' Thompson estimators, is used to calculate estimates of the class proportions
#' for the CDFs. Variance estimates for the test statistics are calculated
#' using either the local mean variance estimator or the simple random sampling
#' (SRS) variance estimator. The choice of variance estimator is subject to
#' user control. The SRS variance estimator uses the independent random sample
#' approximation to calculate joint inclusion probabilities. The function can
#' accommodate a stratified sample. For a stratified sample, separate class
#' proportion estimates and associated covariance estimates are calculated for
#' each stratum, which are used to produce estimates for all strata combined.
#' Strata that contain a single value are removed. For a stratified sample,
#' when either the size of the resource or the sum of the size-weights of the
#' resource is provided for each stratum, those values are used as stratum
#' weights for calculating the estimates for all strata combined. For a
#' stratified sample when neither the size of the resource nor the sum of the
#' size-weights of the resource is provided for each stratum, estimated values
#' are used as stratum weights for calculating the estimates for all strata
#' combined. The function can accommodate single-stage and two-stage samples
#' for both stratified and unstratified sampling designs. Finite population and
#' continuous population correction factors can be utilized in variance
#' estimation. The function checks for compatability of input values and
#' removes missing values.
#'
#' @param bounds Vector of upper bounds that define classes for the CDFs,
#' which must contain at least two values.
#'
#' @param z_1 Vector of response value for each sample one site.
#'
#' @param wgt_1 Vector of final adjusted weight (inverse of the sample
#' inclusion probability) for each sample one site, which is either the weight
#' for a single- stage sample or the stage two weight for a two-stage sample.
#'
#' @param x_1 Vector of x-coordinate for location for each sample one site,
#' which is either the x-coordinate for a single-stage sample or the stage two
#' x-coordinate for a two-stage sample. The default is NULL.
#'
#' @param y_1 Vector of y-coordinate for location for each sample one site,
#' which is either the y-coordinate for a single-stage sample or the stage two
#' y-coordinate for a two-stage sample. The default is NULL.
#'
#' @param z_2 Vector of response value for each sample two site.
#'
#' @param wgt_2 Vector of final adjusted weight (inverse of the sample
#' inclusion probability) for each sample two site, which is either the weight
#' for a single- stage sample or the stage two weight for a two-stage sample.
#'
#' @param x_2 Vector of x-coordinate for location for each sample two site,
#' which is either the x-coordinate for a single-stage sample or the stage two
#' x-coordinate for a two-stage sample. The default is NULL.
#'
#' @param y_2 Vector of y-coordinate for location for each sample two site,
#' which is either the y-coordinate for a single-stage sample or the stage two
#' y-coordinate for a two-stage sample. The default is NULL.
#'
#' @param stratum_1 Vector of the stratum for each sample one site. The
#' default is NULL.
#'
#' @param stratum_2 Vector of the stratum for each sample two site. The
#' default is NULL.
#'
#' @param cluster_1 Vector of the stage one sampling unit (primary sampling
#' unit or cluster) code for each sample one site. The default is NULL.
#'
#' @param cluster_2 Vector of the stage one sampling unit (primary sampling
#' unit or cluster) code for each sample two site. The default is NULL.
#'
#' @param wgt1_1 Vector of the final adjusted stage one weight for each sample
#' one site. The default is NULL.
#'
#' @param x1_1 Vector of the stage one x-coordinate for location for each
#' sample one site. The default is NULL.
#'
#' @param y1_1 Vector of the stage one y-coordinate for location for each
#' sample one site. The default is NULL.
#'
#' @param wgt1_2 Vector of the final adjusted stage one weight for each sample
#' two site. The default is NULL.
#'
#' @param x1_2 Vector of the stage one x-coordinate for location for each
#' sample two site. The default is NULL.
#'
#' @param y1_2 Vector of the stage one y-coordinate for location for each
#' sample two site. The default is NULL.
#'
#' @param popsize_1 The known size of the sample one resource - the total
#' number of sampling units of a finite resource or the measure of an
#' extensive resource, which is required for calculation of finite and
#' continuous population correction factors for a single-stage sample. For a
#' stratified sample, this variable also is used to calculate strata weights.
#' For a stratified sample, this variable must be a vector containing a value
#' for each stratum and must have the names attribute set to identify the
#' stratum codes. The default is NULL.
#'
#' @param popsize_2 The known size of the sample two resource - the total
#' number of sampling units of a finite resource or the measure of an
#' extensive resource, which is required for calculation of finite and
#' continuous population correction factors for a single-stage sample. For a
#' stratified sample, this variable also is used to calculate strata weights.
#' For a stratified sample, this variable must be a vector containing a value
#' for each stratum and must have the names attribute set to identify the
#' stratum codes. The default is NULL.
#'
#' @param popcorrect_1 Logical value that indicates whether finite or
#' continuous population correction factors should be employed during variance
#' estimation for sample one, where TRUE = use the correction factor and FALSE
#' = do not use the correction factor. The default is FALSE. To employ the
#' correction factor for a single-stage sample, values must be supplied for
#' arguments pcfsize_1 and support_1. To employ the correction factor for a
#' two-stage sample, values must be supplied for arguments N.cluster_1,
#' stage1size_1, and support_1.
#'
#' @param pcfsize_1 Size of the sample one resource, which is required for
#' calculation of finite and continuous population correction factors for a
#' single-stage sample. For a stratified sample this argument must be a
#' vector containing a value for each stratum and must have the names
#' attribute set to identify the stratum codes. The default is NULL.
#'
#' @param N.cluster_1 The number of stage one sampling units in the sample one
#' resource, which is required for calculation of finite and continuous
#' population correction factors for a two-stage sample. For a stratified
#' sample this variable must be a vector containing a value for each stratum
#' and must have the names attribute set to identify the stratum codes. The
#' default is NULL.
#'
#' @param stage1size_1 Size of the stage one sampling units of a two-stage
#' sample for sample one, which is required for calculation of finite and
#' continuous population correction factors for a two-stage sample and must
#' have the names attribute set to identify the stage one sampling unit codes.
#' For a stratified sample, the names attribute must be set to identify both
#' stratum codes and stage one sampling unit codes using a convention where
#' the two codes are separated by the & symbol, e.g., "Stratum 1&Cluster 1".
#' The default is NULL.
#'
#' @param support_1 Vector of the support value for each sample one site - the
#' value one (1) for a site from a finite resource or the measure of the
#' sampling unit associated with a site from an extensive resource, which is
#' required for calculation of finite and continuous population correction
#' factors. The default is NULL.
#'
#' @param popcorrect_2 Logical value that indicates whether finite or
#' continuous population correction factors should be employed during variance
#' estimation for sample two, where TRUE = use the correction factor and FALSE
#' = do not use the correction factor. The default is FALSE. To employ the
#' correction factor for a single-stage sample, values must be supplied for
#' arguments pcfsize_2 and support_2. To employ the correction factor for a
#' two-stage sample, values must be supplied for arguments N.cluster_2,
#' stage1size_2, and support_2.
#'
#' @param pcfsize_2 Size of the sample two resource, which is required for
#' calculation of finite and continuous population correction factors for a
#' single-stage sample. For a stratified sample this argument must be a
#' vector containing a value for each stratum and must have the names
#' attribute set to identify the stratum codes. The default is NULL.
#'
#' @param N.cluster_2 The number of stage one sampling units in the sample two
#' resource, which is required for calculation of finite and continuous
#' population correction factors for a two-stage sample. For a stratified
#' sample this variable must be a vector containing a value for each stratum
#' and must have the names attribute set to identify the stratum codes. The
#' default is NULL.
#'
#' @param stage1size_2 Vector of the size of the stage one sampling units of a
#' two-stage sample for sample two, which is required for calculation of
#' finite and continuous population correction factors for a two-stage sample
#' and must have the names attribute set to identify the stage one sampling
#' unit codes. For a stratified sample, the names attribute must be set to
#' identify both stratum codes and stage one sampling unit codes using a
#' convention where the two codes are separated by the & symbol, e.g.,
#' "Stratum 1&Cluster 1". The default is NULL.
#'
#' @param support_2 Vector of the support value for each sample two site - the
#' value one (1) for a site from a finite resource or the measure of the
#' sampling unit associated with a site from an extensive resource, which is
#' required for calculation of finite and continuous population correction
#' factors. The default is NULL.
#'
#' @param sizeweight_1 Logical value that indicates whether size-weights
#' should be used in the analysis for sample one, where TRUE = use the
#' size-weights and FALSE = do not use the size-weights. The default is
#' FALSE.
#'
#' @param swgt_1 Vector of the size-weight for each sample one site, which is
#' the stage two size-weight for a two-stage sample. The default is NULL.
#'
#' @param swgt1_1 Vector of the stage one size-weight for each sample one
#' site. The default is NULL.
#'
#' @param sizeweight_2 Logical value that indicates whether size-weights
#' should be used in the analysis for sample two, where TRUE = use the
#' size-weights and FALSE = do not use the size-weights. The default is
#' FALSE.
#'
#' @param swgt_2 Vector of the size-weight for each sample two site, which is
#' the stage two size-weight for a two-stage sample. The default is NULL.
#'
#' @param swgt1_2 Vector of the stage one size-weight for each sample two
#' site. The default is NULL.
#'
#' @param vartype_1 The choice of variance estimator for sample one, where
#' "Local" = local mean estimator and "SRS" = SRS estimator. The default is
#' "Local".
#'
#' @param vartype_2 The choice of variance estimator for sample two, where
#' "Local" = local mean estimator and "SRS" = SRS estimator. The default is
#' "Local".
#'
#' @param check.ind Logical value that indicates whether compatability
#' checking of the input values is conducted, where TRUE = conduct
#' compatibility checking and FALSE = do not conduct compatibility checking.
#' The default is TRUE.
#'
#' @param warn.ind Logical value that indicates whether warning messages were
#' generated, where TRUE = warning messages were generated and FALSE = warning
#' messages were not generated. The default is NULL.
#'
#' @param warn.df A data frame for storing warning messages. The default is
#' NULL.
#'
#' @param warn.vec Vector that contains names of the population type, the
#' subpopulation, and an indicator. The default is NULL.
#'
#' @return An object in data frame format containing the test statistic, degrees
#' of freedom (two values labeled Degrees of Freedom_1 and Degrees of
#' Freedom_2), and p value for the Wald, mean eigenvalue, and Satterthwaite
#' test procedures, which includes both Chi-squared distribution and F
#' distribution versions of the procedures. For the Chi-squared versions of
#' the test procedures, Degrees of Freedom_1 contains the relevant value and
#' Degrees of Freedom_2 is set to missing (NA). For the F-based versions of
#' the test procedures Degrees of Freedom_1 contains the numerator degrees of
#' freedom and Degrees of Freedom_2 contains the denominator degrees of
#' freedom.
#'
#' @section Other Functions Required:
#' \describe{
#' \item{\code{\link{input.check}}}{check input values for errors,
#' consistency, and compatibility with analytical functions}
#' \item{\code{\link{wnas}}}{remove missing values}
#' \item{\code{\link{vecprint}}}{takes an input vector and outputs a
#' character string with line breaks inserted}
#' \item{\code{\link{cdf.test.prop}}}{calculates an estimate of the
#' population proportions in the set of classes}
#' \item{\code{\link{cdf.test.size.prop}}}{calculates a size-weighted
#' estimate of the population proportions in the set of classes}
#' \item{\code{\link{cdfvar.test}}}{calculates estimates of the
#' variance-covariance matrix of the population proportions in the set of
#' classes}
#' }
#'
#' @author Tom Kincaid \email{Kincaid.Tom@epa.gov}
#'
#' @keywords survey
#'
#' @examples
#' n <- 100
#' resp <- rnorm(n, 10, 1)
#' wgt <- runif(n, 10, 100)
#' sample1 <- list(z=resp, wgt=wgt)
#' sample2 <- list(z=resp+0.5, wgt=wgt)
#' bounds <- sort(c(sample1$z, sample2$z))[floor(seq((2*n)/3, (2*n),
#' length=3))]
#' cdf.test(bounds=bounds, z_1=sample1$z, wgt_1=sample1$wgt, z_2=sample2$z,
#' wgt_2=sample2$wgt, vartype_1="SRS", vartype_2="SRS")
#'
#' xcoord <- runif(n)
#' ycoord <- runif(n)
#' sample1 <- list(z=resp, wgt=wgt, x=xcoord, y=ycoord)
#' sample2 <- list(z=1.05*resp, wgt=wgt, x=xcoord, y=ycoord)
#' cdf.test(bounds=bounds, z_1=sample1$z, wgt_1=sample1$wgt, x_1=sample1$x,
#' y_1=sample1$y, z_2=sample2$z, wgt_2=sample2$wgt, x_2=sample2$x,
#' y_2=sample2$y)
#'
#' @export
################################################################################
cdf.test <- function(bounds, z_1, wgt_1, x_1 = NULL, y_1 = NULL, z_2, wgt_2,
x_2 = NULL, y_2 = NULL, stratum_1 = NULL, stratum_2 = NULL, cluster_1 = NULL,
cluster_2 = NULL, wgt1_1 = NULL, x1_1 = NULL, y1_1 = NULL, wgt1_2 = NULL,
x1_2 = NULL, y1_2 = NULL, popsize_1 = NULL, popsize_2 = NULL,
popcorrect_1 = FALSE, pcfsize_1 = NULL, N.cluster_1 = NULL,
stage1size_1 = NULL, support_1 = NULL, popcorrect_2 = FALSE,
pcfsize_2 = NULL, N.cluster_2 = NULL, stage1size_2 = NULL, support_2 = NULL,
sizeweight_1 = FALSE, swgt_1 = NULL, swgt1_1 = NULL, sizeweight_2 = FALSE,
swgt_2 = NULL, swgt1_2 = NULL, vartype_1 = "Local", vartype_2 = "Local",
check.ind = TRUE, warn.ind = NULL, warn.df = NULL, warn.vec = NULL) {
# As necessary, create a data frame for warning messages
if(is.null(warn.ind)) {
warn.ind <- FALSE
warn.df <- NULL
warn.vec <- rep(NA, 3)
}
fname <- "cdf.test"
# Check for existence of response variables and determine the number of
# values
if(is.null(z_1))
stop("\nValues must be provided for the sample one response variable.")
if(!is.numeric(z_1))
stop("\nValues for the sample one response variable must be numeric.")
nresp_1 <- length(z_1)
if(is.null(z_2))
stop("\nValues must be provided for the sample two response variable.")
if(!is.numeric(z_2))
stop("\nValues for the sample two response variable must be numeric.")
nresp_2 <- length(z_2)
# Ensure upper bounds that define classes for the CDFs are unique
ind <- any(duplicated(bounds))
if(ind) {
bounds <- unique(bounds)
warn.ind <- TRUE
warn <- paste("The argument providing upper bounds that define classes for the CDFs \nincludes duplicated values.\n", sep="")
act <- "The bounds argument was modified to remove duplicated values.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname),
subpoptype=warn.vec[1], subpop=warn.vec[2],
indicator=warn.vec[3], stratum=NA, warning=I(warn),
action=I(act)))
}
# Check for the minimum number of classes
if(length(bounds) < 2)
stop("\nThe vector of upper bounds that define classes for the CDFs must include at \nleast two unique values.")
# Assign logical values to the indicator variables for stratified samples
stratum.ind_1 <- length(unique(stratum_1)) > 1
stratum.ind_2 <- length(unique(stratum_2)) > 1
# If sample one is stratified, convert stratum to a factor, determine stratum
# levels, and calculate number of strata,
if(stratum.ind_1) {
stratum_1 <- factor(stratum_1)
stratum.levels_1 <- levels(stratum_1)
nstrata_1 <- length(stratum.levels_1)
} else {
stratum.levels_1 <- NULL
nstrata_1 <- NULL
}
# If sample two is stratified, convert stratum to a factor, determine stratum
# levels, and calculate number of strata,
if(stratum.ind_2) {
stratum_2 <- factor(stratum_2)
stratum.levels_2 <- levels(stratum_2)
nstrata_2 <- length(stratum.levels_2)
} else {
stratum.levels_2 <- NULL
nstrata_2 <- NULL
}
# Assign logical values to the indicator variables for two stage samples
cluster.ind_1 <- length(unique(cluster_1)) > 1
cluster.ind_2 <- length(unique(cluster_2)) > 1
# Assign the values of popcorrect to the indicator variables for use of the
# population correction factor
pcfactor.ind_1 <- popcorrect_1
pcfactor.ind_2 <- popcorrect_2
# Assign the values of sizeweight to the indicator variables for use of size
# weights
swgt.ind_1 <- sizeweight_1
swgt.ind_2 <- sizeweight_2
# Begin the section that checks for compatibility of sample one input values
if(check.ind) {
# If the sample has two stages, convert cluster to a factor, determine cluster
# levels, and calculate number of clusters
if(cluster.ind_1) {
if(stratum.ind_1) {
cluster.in <- cluster_1
cluster_1 <- tapply(cluster_1, stratum_1, factor)
cluster.levels_1 <- sapply(cluster_1, levels, simplify=FALSE)
ncluster_1 <- sapply(cluster.levels_1, length)
} else {
cluster_1 <- factor(cluster_1)
cluster.levels_1 <- levels(cluster_1)
ncluster_1 <- length(cluster.levels_1)
}
}
# Check for compatibility of input values
temp <- input.check(nresp_1, wgt_1, NULL, NULL, x_1, y_1, stratum.ind_1,
stratum_1, stratum.levels_1, nstrata_1, cluster.ind_1, cluster_1,
cluster.levels_1, ncluster_1, wgt1_1, x1_1, y1_1, popsize_1,
pcfactor.ind_1, pcfsize_1, N.cluster_1, stage1size_1, support_1,
swgt.ind_1, swgt_1, swgt1_1, vartype_1, conf=95)
popsize_1 <- temp$popsize
pcfsize_1 <- temp$pcfsize
N.cluster_1 <- temp$N.cluster
stage1size_1 <- temp$stage1size
# If the sample was stratified and had two stages, then reset cluster to its
# input value
if(stratum.ind_1 && cluster.ind_1)
cluster_1 <- cluster.in
# End the section that checks for compatibility of sample one input values
}
# Begin the section that checks for compatibility of sample two input values
if(check.ind) {
# If the sample has two stages, convert cluster to a factor, determine cluster
# levels, and calculate number of clusters
if(cluster.ind_2) {
if(stratum.ind_2) {
cluster.in <- cluster_2
cluster_2 <- tapply(cluster_2, stratum_2, factor)
cluster.levels_2 <- sapply(cluster_2, levels, simplify=FALSE)
ncluster_2 <- sapply(cluster.levels_2, length)
} else {
cluster_2 <- factor(cluster_2)
cluster.levels_2 <- levels(cluster_2)
ncluster_2 <- length(cluster.levels_2)
}
}
# Check for compatibility of input values
temp <- input.check(nresp_2, wgt_2, NULL, NULL, x_2, y_2, stratum.ind_2,
stratum_2, stratum.levels_2, nstrata_2, cluster.ind_2, cluster_2,
cluster.levels_2, ncluster_2, wgt1_2, x1_2, y1_2, popsize_2,
pcfactor.ind_2, pcfsize_2, N.cluster_2, stage1size_2, support_2,
swgt.ind_2, swgt_2, swgt1_2, vartype_2, conf=95)
popsize_2 <- temp$popsize
pcfsize_2 <- temp$pcfsize
N.cluster_2 <- temp$N.cluster
stage1size_2 <- temp$stage1size
# If the sample was stratified and had two stages, then reset cluster to its
# input value
if(stratum.ind_2 && cluster.ind_2)
cluster_2 <- cluster.in
# End the section that checks for compatibility of sample two input values
}
# Remove missing values for sample one
if(vartype_1 == "Local") {
if(swgt.ind_1) {
if(stratum.ind_1) {
if(cluster.ind_1)
temp <- wnas(list(z=z_1, wgt=wgt_1, x=x_1, y=y_1,
stratum=stratum_1, cluster=cluster_1, wgt1=wgt1_1, x1=x1_1,
y1=y1_1, swgt=swgt_1, swgt1=swgt1_1))
else
temp <- wnas(list(z=z_1, wgt=wgt_1, x=x_1, y=y_1,
stratum=stratum_1, swgt=swgt_1))
} else {
if(cluster.ind_1)
temp <- wnas(list(z=z_1, wgt=wgt_1, x=x_1, y=y_1,
cluster=cluster_1, wgt1=wgt1_1, x1=x1_1, y1=y1_1, swgt=swgt_1,
swgt1=swgt1_1))
else
temp <- wnas(list(z=z_1, wgt=wgt_1, x=x_1, y=y_1, swgt=swgt_1))
}
z_1 <- temp$z
wgt_1 <- temp$wgt
x_1 <- temp$x
y_1 <- temp$y
if(stratum.ind_1)
stratum_1 <- temp$stratum
if(cluster.ind_1) {
cluster_1 <- temp$cluster
wgt1_1 <- temp$wgt1
x1_1 <- temp$x1
y1_1 <- temp$y1
swgt1_1 <- temp$swgt1
}
swgt_1 <- temp$swgt
} else {
if(stratum.ind_1) {
if(cluster.ind_1)
temp <- wnas(list(z=z_1, wgt=wgt_1, x=x_1, y=y_1,
stratum=stratum_1, cluster=cluster_1, wgt1=wgt1_1, x1=x1_1,
y1=y1_1))
else
temp <- wnas(list(z=z_1, wgt=wgt_1, x=x_1, y=y_1,
stratum=stratum_1))
} else {
if(cluster.ind_1)
temp <- wnas(list(z=z_1, wgt=wgt_1, x=x_1, y=y_1,
cluster=cluster_1, wgt1=wgt1_1, x1=x1_1, y1=y1_1))
else
temp <- wnas(list(z=z_1, wgt=wgt_1, x=x_1, y=y_1))
}
z_1 <- temp$z
wgt_1 <- temp$wgt
x_1 <- temp$x
y_1 <- temp$y
if(stratum.ind_1)
stratum_1 <- temp$stratum
if(cluster.ind_1) {
cluster_1 <- temp$cluster
wgt1_1 <- temp$wgt1
x1_1 <- temp$x1
y1_1 <- temp$y1
}
}
} else {
if(swgt.ind_1) {
if(stratum.ind_1) {
if(cluster.ind_1)
temp <- wnas(list(z=z_1, wgt=wgt_1, stratum=stratum_1,
cluster=cluster_1, wgt1=wgt1_1, swgt=swgt_1, swgt1=swgt1_1))
else
temp <- wnas(list(z=z_1, wgt=wgt_1, stratum=stratum_1,
swgt=swgt_1))
} else {
if(cluster.ind_1)
temp <- wnas(list(z=z_1, wgt=wgt_1, cluster=cluster_1,
wgt1=wgt1_1, swgt=swgt_1, swgt1=swgt1_1))
else
temp <- wnas(list(z=z_1, wgt=wgt_1, swgt=swgt_1))
}
z_1 <- temp$z
wgt_1 <- temp$wgt
if(stratum.ind_1)
stratum_1 <- temp$stratum
if(cluster.ind_1) {
cluster_1 <- temp$cluster
wgt1_1 <- temp$wgt1
swgt1_1 <- temp$swgt1
}
swgt_1 <- temp$swgt
} else {
if(stratum.ind_1) {
if(cluster.ind_1)
temp <- wnas(list(z=z_1, wgt=wgt_1, stratum=stratum_1,
cluster=cluster_1, wgt1=wgt1_1))
else
temp <- wnas(list(z=z_1, wgt=wgt_1, stratum=stratum_1))
} else {
if(cluster.ind_1)
temp <- wnas(list(z=z_1, wgt=wgt_1, cluster=cluster_1,
wgt1=wgt1_1))
else
temp <- wnas(list(z=z_1, wgt=wgt_1))
}
z_1 <- temp$z
wgt_1 <- temp$wgt
if(stratum.ind_1)
stratum_1 <- temp$stratum
if(cluster.ind_1) {
cluster_1 <- temp$cluster
wgt1_1 <- temp$wgt1
}
}
}
# Remove missing values for sample two
if(vartype_2 == "Local") {
if(swgt.ind_2) {
if(stratum.ind_2) {
if(cluster.ind_2)
temp <- wnas(list(z=z_2, wgt=wgt_2, x=x_2, y=y_2,
stratum=stratum_2, cluster=cluster_2, wgt1=wgt1_2, x1=x1_2,
y1=y1_2, swgt=swgt_2, swgt1=swgt1_2))
else
temp <- wnas(list(z=z_2, wgt=wgt_2, x=x_2, y=y_2,
stratum=stratum_2, swgt=swgt_2))
} else {
if(cluster.ind_2)
temp <- wnas(list(z=z_2, wgt=wgt_2, x=x_2, y=y_2,
cluster=cluster_2, wgt1=wgt1_2, x1=x1_2, y1=y1_2, swgt=swgt_2,
swgt1=swgt1_2))
else
temp <- wnas(list(z=z_2, wgt=wgt_2, x=x_2, y=y_2, swgt=swgt_2))
}
z_2 <- temp$z
wgt_2 <- temp$wgt
x_2 <- temp$x
y_2 <- temp$y
if(stratum.ind_2)
stratum_2 <- temp$stratum
if(cluster.ind_2) {
cluster_2 <- temp$cluster
wgt1_2 <- temp$wgt1
x1_2 <- temp$x1
y1_2 <- temp$y1
swgt1_2 <- temp$swgt1
}
swgt_2 <- temp$swgt
} else {
if(stratum.ind_2) {
if(cluster.ind_2)
temp <- wnas(list(z=z_2, wgt=wgt_2, x=x_2, y=y_2,
stratum=stratum_2, cluster=cluster_2, wgt1=wgt1_2, x1=x1_2,
y1=y1_2))
else
temp <- wnas(list(z=z_2, wgt=wgt_2, x=x_2, y=y_2,
stratum=stratum_2))
} else {
if(cluster.ind_2)
temp <- wnas(list(z=z_2, wgt=wgt_2, x=x_2, y=y_2,
cluster=cluster_2, wgt1=wgt1_2, x1=x1_2, y1=y1_2))
else
temp <- wnas(list(z=z_2, wgt=wgt_2, x=x_2, y=y_2))
}
z_2 <- temp$z
wgt_2 <- temp$wgt
x_2 <- temp$x
y_2 <- temp$y
if(stratum.ind_2)
stratum_2 <- temp$stratum
if(cluster.ind_2) {
cluster_2 <- temp$cluster
wgt1_2 <- temp$wgt1
x1_2 <- temp$x1
y1_2 <- temp$y1
}
}
} else {
if(swgt.ind_2) {
if(stratum.ind_2) {
if(cluster.ind_2)
temp <- wnas(list(z=z_2, wgt=wgt_2, stratum=stratum_2,
cluster=cluster_2, wgt1=wgt1_2, swgt=swgt_2, swgt1=swgt1_2))
else
temp <- wnas(list(z=z_2, wgt=wgt_2, stratum=stratum_2,
swgt=swgt_2))
} else {
if(cluster.ind_2)
temp <- wnas(list(z=z_2, wgt=wgt_2, cluster=cluster_2,
wgt1=wgt1_2, swgt=swgt_2, swgt1=swgt1_2))
else
temp <- wnas(list(z=z_2, wgt=wgt_2, swgt=swgt_2))
}
z_2 <- temp$z
wgt_2 <- temp$wgt
if(stratum.ind_2)
stratum_2 <- temp$stratum
if(cluster.ind_2) {
cluster_2 <- temp$cluster
wgt1_2 <- temp$wgt1
swgt1_2 <- temp$swgt1
}
swgt_2 <- temp$swgt
} else {
if(stratum.ind_2) {
if(cluster.ind_2)
temp <- wnas(list(z=z_2, wgt=wgt_2, stratum=stratum_2,
cluster=cluster_2, wgt1=wgt1_2))
else
temp <- wnas(list(z=z_2, wgt=wgt_2, stratum=stratum_2))
} else {
if(cluster.ind_2)
temp <- wnas(list(z=z_2, wgt=wgt_2, cluster=cluster_2,
wgt1=wgt1_2))
else
temp <- wnas(list(z=z_2, wgt=wgt_2))
}
z_2 <- temp$z
wgt_2 <- temp$wgt
if(stratum.ind_2)
stratum_2 <- temp$stratum
if(cluster.ind_2) {
cluster_2 <- temp$cluster
wgt1_2 <- temp$wgt1
}
}
}
# For a stratified sample one, check for strata that no longer contain any
# values, as necesssary adjust popsize, remove strata that contain a single
# value, and output a warning message
if(stratum.ind_1) {
stratum_1 <- factor(stratum_1)
stratum.levels.old <- stratum.levels_1
stratum.levels_1 <- levels(stratum_1)
nstrata.old <- nstrata_1
nstrata_1 <- length(stratum.levels_1)
if(nstrata_1 < nstrata.old) {
warn.ind <- TRUE
temp <- match(stratum.levels_1, stratum.levels.old)
temp.str <- vecprint(stratum.levels.old[-temp])
warn <- paste("The following sample one strata no longer contain any values and were removed from the analysis:\n", temp.str, sep="")
act <- "Strata were removed from the analysis.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname),
subpoptype=warn.vec[1], subpop=warn.vec[2], indicator=warn.vec[3],
stratum=NA, warning=I(warn), action=I(act)))
if(!is.null(popsize_1))
popsize_1 <- popsize_1[temp]
}
ind <- FALSE
for(i in 1:nstrata_1) {
stratum.i <- stratum_1 == stratum.levels_1[i]
if(sum(stratum.i) == 1) {
warn.ind <- TRUE
warn <- paste("The sample one stratum named \"", stratum.levels_1[i], "\" contains a single value and was removed from the analysis.\n", sep="")
act <- "Stratum was removed from the analysis.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname),
subpoptype=warn.vec[1], subpop=warn.vec[2],
indicator=warn.vec[3], stratum=NA, warning=I(warn),
action=I(act)))
z_1 <- z_1[!stratum.i]
wgt_1 <- wgt_1[!stratum.i]
if(vartype_1 == "Local") {
x_1 <- x_1[!stratum.i]
y_1 <- y_1[!stratum.i]
}
stratum_1 <- stratum_1[!stratum.i]
if(cluster.ind_1) {
cluster_1 <- cluster_1[!stratum.i]
wgt1_1 <- wgt1_1[!stratum.i]
if(vartype_1 == "Local") {
x1_1 <- x1_1[!stratum.i]
y1_1 <- y1_1[!stratum.i]
}
}
if(swgt.ind_1) {
swgt_1 <- swgt_1[!stratum.i]
if(cluster.ind_1)
swgt1_1 <- swgt1_1[!stratum.i]
}
if(!is.null(popsize_1))
popsize_1 <- popsize_1[names(popsize_1) != stratum.levels_1[i]]
ind <- TRUE
}
}
if(ind) {
stratum_1 <- factor(stratum_1)
stratum.levels_1 <- levels(stratum_1)
nstrata_1 <- length(stratum.levels_1)
}
# For a stratified sample two, check for strata that no longer contain any
# values, as necesssary adjust popsize, remove strata that contain a single
# value, and output a warning message
if(stratum.ind_2) {
stratum_2 <- factor(stratum_2)
stratum.levels.old <- stratum.levels_2
stratum.levels_2 <- levels(stratum_2)
nstrata.old <- nstrata_2
nstrata_2 <- length(stratum.levels_2)
if(nstrata_2 < nstrata.old) {
warn.ind <- TRUE
temp <- match(stratum.levels_2, stratum.levels.old)
temp.str <- vecprint(stratum.levels.old[-temp])
warn <- paste("The following sample two strata no longer contain any values and were removed from the analysis:\n", temp.str, sep="")
act <- "Strata were removed from the analysis.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname),
subpoptype=warn.vec[1], subpop=warn.vec[2], indicator=warn.vec[3],
stratum=NA, warning=I(warn), action=I(act)))
if(!is.null(popsize_2))
popsize_2 <- popsize_2[temp]
}
ind <- FALSE
for(i in 1:nstrata_2) {
stratum.i <- stratum_2 == stratum.levels_2[i]
if(sum(stratum.i) == 1) {
warn.ind <- TRUE
warn <- paste("The sample two stratum named \"", stratum.levels_2[i], "\" contains a single value and was removed from the analysis.\n", sep="")
act <- "Stratum was removed from the analysis.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname),
subpoptype=warn.vec[1], subpop=warn.vec[2],
indicator=warn.vec[3], stratum=NA, warning=I(warn),
action=I(act)))
z_2 <- z_2[!stratum.i]
wgt_2 <- wgt_2[!stratum.i]
if(vartype_2 == "Local") {
x_2 <- x_2[!stratum.i]
y_2 <- y_2[!stratum.i]
}
stratum_2 <- stratum_2[!stratum.i]
if(cluster.ind_2) {
cluster_2 <- cluster_2[!stratum.i]
wgt1_2 <- wgt1_2[!stratum.i]
if(vartype_2 == "Local") {
x1_2 <- x1_2[!stratum.i]
y1_2 <- y1_2[!stratum.i]
}
}
if(swgt.ind_2) {
swgt_2 <- swgt_2[!stratum.i]
if(cluster.ind_2)
swgt1_2 <- swgt1_2[!stratum.i]
}
if(!is.null(popsize_2))
popsize_2 <- popsize_2[names(popsize_2) != stratum.levels_2[i]]
ind <- TRUE
}
}
if(ind) {
stratum_2 <- factor(stratum_2)
stratum.levels_2 <- levels(stratum_2)
nstrata_2 <- length(stratum.levels_2)
}
# Check whether the number of sample one strata is one
if(nstrata_1 == 1) {
warn.ind <- TRUE
warn <- "Only a single sample one stratum was available for the analysis.\n"
act <- "An unstratified data analysis was used.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname),
subpoptype=warn.vec[1], subpop=warn.vec[2], indicator=warn.vec[3],
stratum=NA, warning=I(warn), action=I(act)))
stratum.ind_1 <- FALSE
}
}
# Check whether the number of sample two strata is one
if(nstrata_2 == 1) {
warn.ind <- TRUE
warn <- "Only a single sample two stratum was available for the analysis.\n"
act <- "An unstratified data analysis was used.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname),
subpoptype=warn.vec[1], subpop=warn.vec[2], indicator=warn.vec[3],
stratum=NA, warning=I(warn), action=I(act)))
stratum.ind_2 <- FALSE
}
}
# Check whether the vector of sample one response values is empty
if(length(z_1) == 0)
stop("\nEstimates cannot be calculated since the vector of sample one response values is empty.")
# Check whether the vector of sample two response values is empty
if(length(z_2) == 0)
stop("\nEstimates cannot be calculated since the vector of sample two response values is empty.")
# If the sample has two stages, determine whether there are any stage one
# sampling units with a sufficient number of sites to allow variance calculation
if(cluster.ind_1) {
temp <- sapply(split(cluster_1, cluster_1), length) == 1
if(all(temp)) {
stop("\nA variance estimate cannot be calculated since all of the stage one sampling \nunit(s) for sample one contain a single stage two sampling unit.")
}
if(any(temp)) {
temp.str <- vecprint(names(temp)[temp])
warn <- paste("Since the following stage one sampling units for sample one contain a single \nstage two sampling unit, a variance estimate cannot be calculated and the mean \nof the variance estimates for stage one sampling units with two or more sites \nwill be used:\n", temp.str, sep="")
act <- "The mean of the variance estimates will be used.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname), subpoptype=NA,
subpop=NA, indicator=NA, stratum=NA, warning=I(warn),
action=I(act)))
}
}
if(cluster.ind_2) {
temp <- sapply(split(cluster_2, cluster_2), length) == 1
if(all(temp)) {
stop("\nA variance estimate cannot be calculated since all of the stage one sampling \nunit(s) for sample two contain a single stage two sampling unit.")
}
if(any(temp)) {
temp.str <- vecprint(names(temp)[temp])
warn <- paste("Since the following stage one sampling units for sample two contain a single \nstage two sampling unit, a variance estimate cannot be calculated and the mean \nof the variance estimates for stage one sampling units with two or more sites \nwill be used:\n", temp.str, sep="")
act <- "The mean of the variance estimates will be used.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname), subpoptype=NA,
subpop=NA, indicator=NA, stratum=NA, warning=I(warn),
action=I(act)))
}
}
# Calculate additional required values for sample one
if(!is.null(popsize_1)) {
sum.popsize_1 <- sum(popsize_1)
} else {
if(stratum.ind_1) {
if(cluster.ind_1) {
popsize.hat_1 <- tapply(wgt_1*wgt1_1, stratum_1, sum)
sum.popsize.hat_1 <- sum(wgt_1*wgt1_1)
} else {
popsize.hat_1 <- tapply(wgt_1, stratum_1, sum)
sum.popsize.hat_1 <- sum(wgt_1)
}
} else {
if(cluster.ind_1)
popsize.hat_1 <- sum(wgt_1*wgt1_1)
else
popsize.hat_1 <- sum(wgt_1)
}
}
# Calculate additional required values for sample two
if(!is.null(popsize_2)) {
sum.popsize_2 <- sum(popsize_2)
} else {
if(stratum.ind_2) {
if(cluster.ind_2) {
popsize.hat_2 <- tapply(wgt_2*wgt1_2, stratum_2, sum)
sum.popsize.hat_2 <- sum(wgt_2*wgt1_2)
} else {
popsize.hat_2 <- tapply(wgt_2, stratum_2, sum)
sum.popsize.hat_2 <- sum(wgt_2)
}
} else {
if(cluster.ind_2)
popsize.hat_2 <- sum(wgt_2*wgt1_2)
else
popsize.hat_2 <- sum(wgt_2)
}
}
# Create the data frame for estimates
Results <- data.frame(array(0, c(6, 4)))
dimnames(Results) <- list(c("Wald", "Wald_F", "Mean_Eigenvalue",
"Mean_Eigenvalue_F", "Satterthwaite", "Satterthwaite_F"),
c("Test_Statistic", "Degrees_of_Freedom_1", "Degrees_of_Freedom_2",
"p_Value"))
# Assign the number of bins
n.bin <- length(bounds)
m <- n.bin - 1
# Assign length of the response vectors
n1 <- length(z_1)
n2 <- length(z_2)
# Branch to handle stratified and unstratified data for sample one
if(stratum.ind_1) {
# Begin the section for stratified data
# Initialize objects for storing estimates for all strata combined
sam1.nbin <- numeric(n.bin)
sam1.phat <- numeric(n.bin)
sam1.phatvar <- array(0, c(n.bin, n.bin))
sam1.df <- numeric(1)
# Begin the loop for individual strata
for(i in 1:nstrata_1) {
# Calculate estimates
stratum.i <- stratum_1 == stratum.levels_1[i]
if(swgt.ind_1) {
temp.phat <- cdf.test.size.prop(z_1[stratum.i], wgt_1[stratum.i],
bounds, cluster.ind_1, cluster_1[stratum.i], wgt1_1[stratum.i],
swgt_1[stratum.i], swgt1_1[stratum.i])
} else {
temp.phat <- cdf.test.prop(z_1[stratum.i], wgt_1[stratum.i], bounds,
cluster.ind_1, cluster_1[stratum.i], wgt1_1[stratum.i])
}
temp <- cdfvar.test(z_1[stratum.i], wgt_1[stratum.i], x_1[stratum.i],
y_1[stratum.i], bounds, temp.phat, stratum.ind_1,
stratum.levels_1[i], cluster.ind_1, cluster_1[stratum.i],
wgt1_1[stratum.i], x1_1[stratum.i], y1_1[stratum.i],
popsize_1[i], pcfactor.ind_1, pcfsize_1[i], N.cluster_1[i],
stage1size_1[[i]], support_1[stratum.i], swgt.ind_1,
swgt_1[stratum.i], swgt1_1[stratum.i], vartype_1, warn.ind, warn.df,
warn.vec)
sam1.nbin <- sam1.nbin + temp$nbin
sam1.df <- sam1.df + temp$df
warn.ind <- temp$warn.ind
warn.df <- temp$warn.df
# Add estimates to the objects for all strata combined
if(!is.null(popsize_1)) {
sam1.phat <- sam1.phat + (popsize_1[i]/sum.popsize_1)*temp.phat
sam1.phatvar <- sam1.phatvar +
((popsize_1[i]/sum.popsize_1)^2)*as.matrix(temp$varest)
} else {
sam1.phat <- sam1.phat +
(popsize.hat_1[i]/sum.popsize.hat_1)*temp.phat
sam1.phatvar <- sam1.phatvar +
((popsize.hat_1[i]/sum.popsize.hat_1)^2)*as.matrix(temp$varest)
}
# End the loop for individual strata
}
# End the section for stratified data
} else {
# Begin the section for unstratified data
# Check whether the vector of response values contains a single element
if(length(z_1) == 1)
stop("\nEstimates cannot be calculated since the vector of response values for sample \none contains a single element.")
# Calculate estimates
if(swgt.ind_1) {
sam1.phat <- cdf.test.size.prop(z_1, wgt_1, bounds, cluster.ind_1,
cluster_1, wgt1_1, swgt_1, swgt1_1)
} else {
sam1.phat <- cdf.test.prop(z_1, wgt_1, bounds, cluster.ind_1,
cluster_1, wgt1_1)
}
temp <- cdfvar.test(z_1, wgt_1, x_1, y_1, bounds, sam1.phat,
stratum.ind_1, stratum.levels_1, cluster.ind_1, cluster_1, wgt1_1,
x1_1, y1_1, popsize_1, pcfactor.ind_1, pcfsize_1, N.cluster_1,
stage1size_1, support_1, swgt.ind_1, swgt_1, swgt1_1, vartype_1,
warn.ind, warn.df, warn.vec)
sam1.nbin <- temp$nbin
sam1.phatvar <- as.matrix(temp$varest)
sam1.df <- temp$df
warn.ind <- temp$warn.ind
warn.df <- temp$warn.df
# End section for unstratified data
}
# Branch to handle stratified and unstratified data for sample two
if(stratum.ind_2) {
# Begin the section for stratified data
# Initialize objects for storing estimates for all strata combined
sam2.nbin <- numeric(n.bin)
sam2.phat <- numeric(n.bin)
sam2.phatvar <- array(0, c(n.bin, n.bin))
sam2.df <- numeric(1)
# Begin the loop for individual strata
for(i in 1:nstrata_2) {
# Calculate estimates
stratum.i <- stratum_2 == stratum.levels_2[i]
if(swgt.ind_2) {
temp.phat <- cdf.test.size.prop(z_2[stratum.i], wgt_2[stratum.i],
bounds, cluster.ind_2, cluster_2[stratum.i], wgt1_2[stratum.i],
swgt_2[stratum.i], swgt1_2[stratum.i])
} else {
temp.phat <- cdf.test.prop(z_2[stratum.i], wgt_2[stratum.i], bounds,
cluster.ind_2, cluster_2[stratum.i], wgt1_2[stratum.i])
}
temp <- cdfvar.test(z_2[stratum.i], wgt_2[stratum.i], x_2[stratum.i],
y_2[stratum.i], bounds, temp.phat, stratum.ind_2,
stratum.levels_2[i], cluster.ind_2, cluster_2[stratum.i],
wgt1_2[stratum.i], x1_2[stratum.i], y1_2[stratum.i], popsize_2[i],
pcfactor.ind_2, pcfsize_2[i], N.cluster_2[i], stage1size_2[[i]],
support_2[stratum.i], swgt.ind_2, swgt_2[stratum.i],
swgt1_2[stratum.i], vartype_2, warn.ind, warn.df, warn.vec)
sam2.nbin <- sam2.nbin + temp$nbin
sam2.df <- sam2.df + temp$df
warn.ind <- temp$warn.ind
warn.df <- temp$warn.df
# Add estimates to the objects for all strata combined
if(!is.null(popsize_2)) {
sam2.phat <- sam2.phat + (popsize_2[i]/sum.popsize_2)*temp.phat
sam2.phatvar <- sam2.phatvar +
((popsize_2[i]/sum.popsize_2)^2)*as.matrix(temp$varest)
} else {
sam2.phat <- sam2.phat +
(popsize.hat_2[i]/sum.popsize.hat_2)*temp.phat
sam2.phatvar <- sam2.phatvar +
((popsize.hat_2[i]/sum.popsize.hat_2)^2)*as.matrix(temp$varest)
}
# End the loop for individual strata
}
# End the section for stratified data
} else {
# Begin the section for unstratified data
# Check whether the vector of response values contains a single element
if(length(z_2) == 1)
stop("\nEstimates cannot be calculated since the vector of response values for sample \ntwo contains a single element.")
# Calculate estimates
if(swgt.ind_2) {
sam2.phat <- cdf.test.size.prop(z_2, wgt_2, bounds, cluster.ind_2,
cluster_2, wgt1_2, swgt_2, swgt1_2)
} else {
sam2.phat <- cdf.test.prop(z_2, wgt_2, bounds, cluster.ind_2,
cluster_2, wgt1_2)
}
temp <- cdfvar.test(z_2, wgt_2, x_2, y_2, bounds, sam2.phat,
stratum.ind_2, stratum.levels_2, cluster.ind_2, cluster_2, wgt1_2,
x1_2, y1_2, popsize_2, pcfactor.ind_2, pcfsize_2, N.cluster_2,
stage1size_2, support_2, swgt.ind_2, swgt_2, swgt1_2, vartype_2,
warn.ind, warn.df, warn.vec)
sam2.nbin <- temp$nbin
sam2.phatvar <- as.matrix(temp$varest)
sam2.df <- temp$df
warn.ind <- temp$warn.ind
warn.df <- temp$warn.df
# End section for unstratified data
}
# Determine whether the combined number of values in any bin is less than five
# and generate a warning message, as necessary
if(any((sam1.nbin + sam2.nbin) < 5)) {
warn.ind <- TRUE
warn <- "The combined number of values in at least one class is less than five.\n"
act <- "The user should consider using a smaller number of classes.\n"
warn.df <- rbind(warn.df, data.frame(func=I(fname),
subpoptype=warn.vec[1], subpop=warn.vec[2], indicator=warn.vec[3],
stratum=NA, warning=I(warn), action=I(act)))
}
# Calculate the Wald chi square statistic
difr <- sam1.phat[-n.bin] - sam2.phat[-n.bin]
tqr <- qr(sam1.phatvar[-n.bin,-n.bin] + sam2.phatvar[-n.bin,-n.bin])
if (tqr$rank < m) {
Results[1, 1] <- NA
Results[1, 2] <- NA
Results[1, 3] <- NA
Results[1, 4] <- NA
} else {
Results[1, 1] <- difr %*% solve(tqr) %*% difr
Results[1, 2] <- m
Results[1, 3] <- NA
Results[1, 4] <- 1 - pchisq(Results[1, 1], Results[1, 2])
}
# Calculate the F-based version of the Wald statistic
f.df <- sam1.df + sam2.df
if (tqr$rank < m) {
Results[2, 1] <- NA
Results[2, 2] <- NA
Results[2, 3] <- NA
Results[2, 4] <- NA
} else {
Results[2, 1] <- ((f.df - m + 1)/(f.df*m))*Results[1, 1]
Results[2, 2] <- m
Results[2, 3] <- f.df - m + 1
Results[2, 4] <- 1 - pf(Results[2, 1], Results[2, 2], Results[2, 3])
}
# Calculate the mean eigenvalue-corrected chi square statistic
phatmean <- ((n1 * sam1.phat) + (n2 * sam2.phat)) / (n1 + n2)
difr1 <- sam1.phat[-n.bin] - phatmean[-n.bin]
difr2 <- sam2.phat[-n.bin] - phatmean[-n.bin]
tqr <- qr(diag(phatmean[-n.bin], nrow=length(phatmean[-n.bin])) -
outer(phatmean[-n.bin], phatmean[-n.bin]))
if (tqr$rank < m) {
Results[3, 1] <- NA
Results[3, 2] <- NA
Results[3, 3] <- NA
Results[3, 4] <- NA
} else {
rmatinv <- solve(tqr)
chisqtmp <- (n1 * difr1 %*% rmatinv %*% difr1) + (n2 * difr2 %*%
rmatinv %*% difr2)
sam1.amat <- n1 * (rmatinv %*% sam1.phatvar[-n.bin,-n.bin])
sam2.amat <- n2 * (rmatinv %*% sam2.phatvar[-n.bin,-n.bin])
eigmat <- eigen(((n2*sam1.amat) + (n1*sam2.amat)) / (n1 + n2))
eigmean <- mean(eigmat$values)
Results[3, 1] <- chisqtmp / eigmean
Results[3, 2] <- m
Results[3, 3] <- NA
Results[3, 4] <- 1 - pchisq(Results[3, 1], Results[3, 2])
}
# Calculate the F-based version of the mean eigenvalue-corrected statistic
if (tqr$rank < m) {
Results[4, 1] <- NA
Results[4, 2] <- NA
Results[4, 3] <- NA
Results[4, 4] <- NA
} else {
Results[4, 1] <- Results[3, 1]/m
Results[4, 2] <- m
Results[4, 3] <- f.df*m
Results[4, 4] <- 1 - pf(Results[4, 1], Results[4, 2], Results[4, 3])
}
# Calculate the Satterthwaite-corrected chi square statistic
if (tqr$rank < m) {
Results[5, 1] <- NA
Results[5, 2] <- NA
Results[5, 3] <- NA
Results[5, 4] <- NA
} else {
cvsquare <- var(eigmat$values) / (eigmean ^ 2)
Results[5, 1] <- Results[3, 1] / (1 + cvsquare)
Results[5, 2] <- m / (1 + cvsquare)
Results[5, 3] <- NA
Results[5, 4] <- 1 - pchisq(Results[5, 1], Results[5, 2])
}
# Calculate the F-based version of the Satterthwaite-corrected statistic
if (tqr$rank < m) {
Results[6, 1] <- NA
Results[6, 2] <- NA
Results[6, 3] <- NA
Results[6, 4] <- NA
} else {
Results[6, 1] <- Results[4, 1]
Results[6, 2] <- Results[5, 2]
Results[6, 3] <- f.df*Results[5, 2]
Results[6, 4] <- 1 - pf(Results[6, 1], Results[6, 2], Results[6, 3])
}
# Depending on whether the function was called directly or was called by
# cdftest.analysis, return appropriate results
if(is.na(warn.vec[1])) {
# As necessary, output a message indicating that warning messages were generated
# during execution of the program
if(warn.ind) {
warn.df <<- warn.df
if(nrow(warn.df) == 1)
cat("During execution of the program, a warning message was generated. The warning \nmessage is stored in a data frame named 'warn.df'. Enter the following command \nto view the warning message: warnprnt()\n")
else
cat(paste("During execution of the program,", nrow(warn.df), "warning messages were generated. The warning \nmessages are stored in a data frame named 'warn.df'. Enter the following \ncommand to view the warning messages: warnprnt() \nTo view a subset of the warning messages (say, messages number 1, 3, and 5), \nenter the following command: warnprnt(m=c(1,3,5))\n"))
}
# Return the Results data frame
Results
} else {
# Return the Results data frame, the warn.ind logical value, and the warn.df
# data frame
list(Results=Results, warn.ind=warn.ind, warn.df=warn.df)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.