Nothing
#' @title Cate-Nelson models for bivariate data
#'
#' @description Produces critical-x and critical-y values for bivariate data
#' according to a Cate-Nelson analysis.
#'
#' @param x A vector of values for the x variable.
#' @param y A vector of values for the y variable.
#' @param plotit If \code{TRUE}, produces plots of the output.
#' @param hollow If \code{TRUE}, uses hollow circles on the plot to indicate
#' data not fitting the model.
#' @param xlab The label for the x-axis.
#' @param ylab The label for the y-axis.
#' @param trend \code{"postive"} if the trend of y vs. x is generally
#' positive. \code{"negative"} if negative.
#' @param clx Indicates which of the listed critical x values
#' should be chosen as the critical x value for the final model.
#' @param cly Indicates which of the listed critical y values
#' should be chosen as the critical y value for the final model.
#' @param xthreshold Indicates the proportion of potential critical x values
#' to display in the output. A value of \code{1} would display
#' all of them.
#' @param ythreshold Indicates the proportion of potential critical y values
#' to display in the output. A value of \code{1} would display
#' all of them.
#' @param progress If \code{TRUE}, prints an indicator of progress as for loops
#' progress.
#' @param verbose If \code{FALSE}, suppresses printed output of tables.
#' @param listout If \code{TRUE}, outputs a list of data frames instead of a
#' a single data frame. This allows a data frame of critical
#' values and associated statistics to be extracted, for example
#' if one would want to sort by Cramer's V.
#'
#' @details Cate-Nelson analysis divides bivariate data into two groups.
#' For data with a positive trend, one group has a
#' large \code{x} value associated with a large \code{y} value, and
#' the other group has a small \code{x} value associated with a small
#' \code{y} value. For a negative trend, a small \code{x} is
#' associated with a large \code{y}, and so on.
#'
#' The analysis is useful for bivariate data which don't conform well
#' to linear, curvilinear, or plateau models.
#'
#' This function will fail if either of the largest two or smallest
#' two x values are identical.
#'
#' @note The method in this function follows \cite{Cate, R. B., & Nelson, L.A.
#' (1971). A simple statistical procedure for partitioning soil test
#' correlation data into two classes. Soil Science Society of America
#' Proceedings 35, 658-660.}
#'
#' An earlier version of this function was published in \cite{Mangiafico,
#' S.S. 2013. Cate-Nelson Analysis for Bivariate Data Using
#' R-project. J.of Extension 51:5, 5TOT1.}
#'
#' @author Salvatore Mangiafico, \email{mangiafico@njaes.rutgers.edu}
#'
#' @references \url{https://rcompanion.org/rcompanion/h_02.html}
#'
#' Cate, R. B., & Nelson, L.A. (1971). A simple statistical procedure
#' for partitioning soil test correlation data into two classes.
#' Soil Science Society of America Proceedings 35, 658–660.
#'
#' @seealso \code{\link{cateNelsonFixedY}}
#'
#' @concept Cate-Nelson
#' @concept agronomy
#'
#' @return A data frame of statistics from the analysis: number of observations,
#' critical level for x, sum of squares, critical value for y, the
#' number of observations in each of the quadrants (I, II, III, IV),
#' the number of observations that conform with the model,
#' the proportion of observations that conform with the model,
#' the number of observations that do not conform to the model,
#' the proportion of observations that do not conform to the model,
#' a p-value for the Fisher exact test for the data divided into
#' the groups indicated by the model,
#' and Cramer's V for the data divided into
#' the groups indicated by the model.
#'
#' Output also includes printed lists of critical values,
#' explanation of the values in the data frame,
#' and plots: y vs. x; sum of squares vs. critical x value;
#' the number of observations that do not conform to the model vs.
#' critical y value;
#' and y vs. x with the critical values shown as lines on the plot,
#' and the quadrants labeled.
#'
#' @examples
#' data(Nurseries)
#' cateNelson(x = Nurseries$Size,
#' y = Nurseries$Proportion,
#' plotit = TRUE,
#' hollow = TRUE,
#' xlab = "Nursery size in hectares",
#' ylab = "Proportion of good practices adopted",
#' trend = "positive",
#' clx = 1,
# cly = 1,
#' xthreshold = 0.10,
#' ythreshold = 0.15)
#'
#' @importFrom graphics plot abline text
#' @importFrom stats anova fisher.test lm
#'
#' @export
#'
cateNelson =
function(x, y, plotit=TRUE, hollow=TRUE, xlab="X", ylab="Y",
trend="positive", clx=1, cly=1,
xthreshold=0.10, ythreshold=0.10,
progress=TRUE, verbose=TRUE, listout=FALSE)
{
n = length(x)
l = n-1
dataset = data.frame(x=x, y=y,
xgroup=as.factor(c("a",rep("b", l))),
ygroup=as.factor(c("c",rep("d", l))),
Critical.x.value=as.numeric(rep(0.00, n)),
Sum.of.squares=as.numeric(rep(0.00, n)),
Critical.y.value=as.numeric(rep(0.00, n)),
q.i=as.integer(rep(0,n)),
q.ii=as.integer(rep(0,n)),
q.iii=as.integer(rep(0,n)),
q.iv=as.integer(rep(0,n)),
Q.i=as.integer(rep(0,n)),
Q.ii=as.integer(rep(0,n)),
Q.iii=as.integer(rep(0,n)),
Q.iv=as.integer(rep(0,n)),
Q.model=as.integer(rep(0,n)),
Q.err=as.integer(rep(0,n)),
Cramer.V=as.numeric(rep(0.00, n))
)
dataset = dataset[with(dataset, order(x, y)),]
for(i in (2:n)){
dataset$Critical.x.value[i] = (dataset$x[i] + dataset$x[i-1])/2
}
dataset$Critical.x.value[1] = dataset$Critical.x.value[1]/2
m = n-2
if(progress){tick=0}
for(j in (3:m)){
if(progress){tick=tick+1}
if(progress){if(tick>(m-3)/100){tick=0;cat(".")}}
for (i in (1:n)){
dataset$xgroup[i] = "b"
if(dataset$x[i] < dataset$Critical.x.value[j])
dataset$xgroup[i] = as.factor("a")
}
fit = lm(y ~ xgroup, data=dataset)
fit1 = anova(fit)
dataset$Sum.of.squares[j] = (fit1[1,2])
}
if(progress){cat("\n")}
dataset$Sum.of.squares[1] = min(dataset$Sum.of.squares[3:(n-2)])
dataset$Sum.of.squares[2] = min(dataset$Sum.of.squares[3:(n-2)])
dataset$Sum.of.squares[n-1] = min(dataset$Sum.of.squares[3:(n-2)])
dataset$Sum.of.squares[n] = min(dataset$Sum.of.squares[3:(n-2)])
max.ss = min(dataset$Sum.of.squares)+
(max(dataset$Sum.of.squares)-min(dataset$Sum.of.squares))*
(1-xthreshold)
if(plotit){dataset4 = dataset[2:length(dataset[[1]]),]}
dataset2 = dataset[with(dataset, order(-Sum.of.squares)),]
rownames(dataset2) <- 1:nrow(dataset2)
CLX = dataset2$Critical.x.value[clx]
for (i in (1:n)){
dataset$xgroup[i] = if(dataset$x[i] < CLX) "a" else "b"
}
if(verbose){cat("\n")
cat("Critical x that maximize sum of squares:","\n","\n")
print(dataset2[dataset2$Sum.of.squares >= max.ss,][c("Critical.x.value", "Sum.of.squares")])}
dataset = dataset[with(dataset, order(y, x)),]
dataset$Critical.y.value[1] = 0
for(k in c(2:n)){
dataset$Critical.y.value[k] = (dataset$y[k]+dataset$y[k-1])/2
}
dataset$Critical.y.value[1] = min(dataset$Critical.y.value[2:n])
if(progress){tick=0}
for(j in c(1:n)){
if(progress){tick=tick+1}
if(progress){if(tick>(m-3)/100){tick=0;cat(".")}}
for (i in c(1:n)){
dataset$ygroup[i] = "d"
if(dataset$y[i] < dataset$Critical.y.value[j])
dataset$ygroup[i] = "c"
}
for (i in c(1:n)){
dataset$q.i[i] = with(dataset, ifelse
((dataset$xgroup[i]=="a" & dataset$ygroup[i]=="d"), 1, 0))
dataset$q.ii[i] = with(dataset, ifelse
((dataset$xgroup[i]=="b" & dataset$ygroup[i]=="d"), 1, 0))
dataset$q.iii[i] = with(dataset, ifelse
((dataset$xgroup[i]=="b" & dataset$ygroup[i]=="c"), 1, 0))
dataset$q.iv[i] = with(dataset, ifelse
((dataset$xgroup[i]=="a" & dataset$ygroup[i]=="c"), 1, 0))
}
dataset$Q.i[j] = sum(dataset$q.i)
dataset$Q.ii[j] = sum(dataset$q.ii)
dataset$Q.iii[j] = sum(dataset$q.iii)
dataset$Q.iv[j] = sum(dataset$q.iv)
dataset$Cramer.V[j] = as.numeric(cramerV(matrix(c(dataset$Q.i[j],
dataset$Q.ii[j],
dataset$Q.iv[j],
dataset$Q.iii[j]),
nrow=2,
byrow=TRUE)))
if (trend=="positive")
dataset$Q.model[j] = dataset$Q.ii[j] + dataset$Q.iv[j]
if (trend=="negative")
dataset$Q.model[j] = dataset$Q.i[j] + dataset$Q.iii[j]
if (trend=="positive")
dataset$Q.err[j] = dataset$Q.i[j] + dataset$Q.iii[j]
if (trend=="negative")
dataset$Q.err[j] = dataset$Q.ii[j] + dataset$Q.iv[j]
}
if(progress){cat("\n")}
min.qerr = min(dataset$Q.err)+
(max(dataset$Q.err)-min(dataset$Q.err))*(ythreshold)
dataset3 = dataset[with(dataset, order(Q.err)),]
rownames(dataset3) <- 1:nrow(dataset3)
if(verbose){cat("\n")
cat("Critical y that minimize errors:","\n","\n")
print(dataset3[dataset3$Q.err <= min.qerr,][c("Critical.y.value",
"Q.i","Q.ii","Q.iii","Q.iv",
"Q.model","Q.err","Cramer.V")])}
CLY = dataset3$Critical.y.value[cly]
N = 1
Z = data.frame(n =as.numeric(rep(0.00, N)),
CLx=as.numeric(rep(0.00, N)),
SS =as.numeric(rep(0.00, N)),
CLy=as.numeric(rep(0.00, N)),
Q.I=as.integer(rep(0,N)),
Q.II=as.integer(rep(0,N)),
Q.III=as.integer(rep(0,N)),
Q.IV=as.integer(rep(0,N)),
Q.Model=as.integer(rep(0.00,N)),
p.Model=as.numeric(rep(0.00,N)),
Q.Error=as.integer(rep(0.00,N)),
p.Error=as.numeric(rep(0.00,N)),
Fisher.p.value=as.numeric(rep(0.00,N)),
Cramer.V=as.numeric(rep(0.00,N))
)
Z$n[1] = n
Z$CLx[1] = CLX
Z$SS[1] = dataset2$Sum.of.squares[1]
Z$CLy[1] = CLY
Z$Q.I[1] = dataset3$Q.i[cly]
Z$Q.II[1] = dataset3$Q.ii[cly]
Z$Q.III[1] = dataset3$Q.iii[cly]
Z$Q.IV[1] = dataset3$Q.iv[cly]
Z$Q.Model[1] = dataset3$Q.model[cly]
Z$p.Model[1] = dataset3$Q.model[cly]/n
Z$Q.Error[1] = dataset3$Q.err[cly]
Z$p.Error[1] = dataset3$Q.err[cly]/n
M=matrix(c(Z$Q.I[1],Z$Q.II[1],Z$Q.IV[1],Z$Q.III[1]),
nrow=2,
byrow=TRUE)
Z$Fisher.p.value = fisher.test(M)$p.value
Z$Cramer.V = dataset3$Cramer.V[cly]
if (plotit)
{
if (trend=="positive")
{
pchi=as.integer(rep(16,n))
pchi[(dataset$x<CLX)&(dataset$y<CLY)] = 16
pchi[(dataset$x>CLX)&(dataset$y>CLY)] = 16
pchi[(dataset$x>CLX)&(dataset$y<CLY)] = 1
pchi[(dataset$x<CLX)&(dataset$y>CLY)] = 1
}
if (trend=="negative")
{
pchi=as.integer(rep(16,n))
pchi[(dataset$x<CLX)&(dataset$y<CLY)] = 1
pchi[(dataset$x>CLX)&(dataset$y>CLY)] = 1
pchi[(dataset$x>CLX)&(dataset$y<CLY)] = 16
pchi[(dataset$x<CLX)&(dataset$y>CLY)] = 16
}
if (hollow==FALSE)
{
pchi=as.integer(rep(16,n))
}
}
if (plotit)
plot(dataset$x,dataset$y,pch=16,xlab=xlab,ylab=ylab)
if (plotit){
plot(Sum.of.squares~Critical.x.value,
data=dataset4,
xlab="Critical-x value",
ylab="Sum of squares")}
if (plotit)
plot(Q.err~Critical.y.value, data=dataset, xlab="Critical-y value",
ylab="Number of points in error quadrants")
if (plotit)
plot(Cramer.V~Critical.y.value, data=dataset, xlab="Critical-y value",
ylab="Cramer's V")
if (plotit)
{
plot(dataset$x,dataset$y,pch=pchi,
xlab=xlab,
ylab=ylab)
abline(v=CLX, col="blue")
abline(h=CLY, col="blue")
max.x = max(dataset$x)
max.y = max(dataset$y)
min.x = min(dataset$x)
min.y = min(dataset$y)
text (min.x+(max.x-min.x)*0.01, min.y+(max.y-min.y)*0.99,
labels="I")
text (min.x+(max.x-min.x)*0.99, min.y+(max.y-min.y)*0.99,
labels="II")
text (min.x+(max.x-min.x)*0.99, min.y+(max.y-min.y)*0.01,
labels="III")
text (min.x+(max.x-min.x)*0.01, min.y+(max.y-min.y)*0.01,
labels="IV")
}
if(verbose){cat("\n")
cat("n = Number of observations","\n")
cat("CLx = Critical value of x","\n")
cat("SS = Sum of squares for that critical value of x","\n")
cat("CLy = Critical value of y","\n")
cat("Q = Number of observations which fall into quadrants I, II, III, IV",
"\n")
cat("Q.Model = Total observations which fall into the quadrants predicted by the model",
"\n")
cat("p.Model = Percent observations which fall into the quadrants predicted by the model",
"\n")
cat("Q.Error = Observations which do not fall into the quadrants predicted by the model",
"\n")
cat("p.Error = Percent observations which do not fall into the quadrants predicted by the model",
"\n")
cat("Fisher.p = p-value from Fisher exact test dividing data into these quadrants",
"\n")
cat("Cramer.V = Cramer's V statistic from dividing data into these quadrants",
"\n")
cat("\n")
cat("Final model:","\n","\n")}
if(!listout){
return(Z)}
if(listout){
print(Z)
cat("\n")
Zed = list(Critical.x = dataset2[c("Critical.x.value", "Sum.of.squares")],
Critical.y = dataset3[c("Critical.y.value",
"Q.i","Q.ii","Q.iii","Q.iv",
"Q.model","Q.err","Cramer.V")],
Final.model = Z)
return(Zed)}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.