inst/doc/probout.R

## ---- eval = TRUE, message = FALSE--------------------------------------------
 require(GDAdata)

 data(Decathlon)
 
 x <- Decathlon[,c("m100","m400","m1500")]

## ---- fig.height = 7, fig.width = 7-------------------------------------------
plot(x[,1], x[,3], xlab = "100 meter timings", ylab = "1500 meter timings", 
     main = "", pch = 16, cex = .5)

## ---- message = FALSE---------------------------------------------------------
  require(probout)
  require(FNN)

## ---- eval = TRUE-------------------------------------------------------------
lead <- leader(x)

## ---- fig.height = 7, fig.width = 7-------------------------------------------
plot(x[,1], x[,3], xlab = "100 meter timings", ylab = "1500 meter timings", 
     main = "leader observations (blue)", pch = 16, cex = .5)
leads <- lead[[1]]$leaders
points(x[leads,1],x[leads,3],pch="+",cex=1.5,col="dodgerblue")

## ---- echo = FALSE, message = FALSE-------------------------------------------
  require(mclust)
  require(MASS)

## ---- eval = TRUE-------------------------------------------------------------
ntimes <- 100
P <- matrix( NA, length(leads), ntimes)
for (i in 1:ntimes) {
   P[,i] <- partProb( simData(lead[[1]]), method = "distance")
}
pprobs <- apply( P, 1, median)

## ---- fig.height = 7, fig.width = 7-------------------------------------------
thresh <- .95
plot(x[,1], x[,3], xlab = "100 meter timings", ylab = "1500 meter timings", 
     main = "leaders: outlier prob > .95 red else blue", pch = 16, cex = .5)
out <- leads[pprobs > thresh]
points(x[leads,1],x[leads,3],pch="+",cex=1.5,col="dodgerblue")
points(x[out,1],x[out,3],pch="*",cex=1.5,col="red")

## ---- eval = TRUE-------------------------------------------------------------
probs <- allProb(lead[[1]],pprobs)

## ---- fig.height = 7, fig.width = 7-------------------------------------------
plot(x[,1], x[,3], xlab = "100 meter timings", ylab = "1500 meter timings", 
     main = "outlier prob > .95 (red)", pch = 16, cex = .5)
out <- (1:nrow(x))[probs > thresh]
points(x[out,1],x[out,3],pch=1,col="red")

## ---- eval = TRUE-------------------------------------------------------------
nsim <-	 (1:15)*1000
nleads <- length(leads)
ntimes <- 100

P <- array( NA, c(nleads, length(nsim), ntimes))
dimnames(P) <- list(NULL, nsim, NULL)

for (i in 1:ntimes) {
   for (j in seq(along = nsim)) {
      P[,j,i] <- partProb( simData(lead[[1]], nsim[j]), method = "distance")
   }
}
probs <- apply( P, c(1,2), median)

## ---- fig.height = 7, fig.width = 7-------------------------------------------
plot( nsim, sample(range(probs),size=length(nsim),replace=T), ylim = c(.90,1),
 type="n", xlab = "# simulated observations", ylab = "leader group probability")
dummy <- apply( probs, 1, function(p) lines(nsim,p))
abline( v = 8000, lty = 2)

Try the probout package in your browser

Any scripts or data that you put into this service are public.

probout documentation built on Feb. 11, 2022, 5:10 p.m.