curveRep <- function(x, y, id, kn=5, kxdist=5, k=5, p=5, force1=TRUE,
metric=c('euclidean','manhattan'),
smooth=FALSE, extrap=FALSE, pr=FALSE) {
metric <- match.arg(metric)
id <- as.character(id)
omit <- is.na(x + y)
missfreq <- NULL; nomit <- sum(omit)
if(nomit) {
m <- tapply(omit, id, sum)
missfreq <- table(m)
x <- x[!omit]; y <- y[!omit]; id <- id[!omit]
}
n <- length(x)
ns <- table(id)
nunique <- length(unique(ns))
if(nunique==1 || nunique <= kn) ncuts <- c(sort(unique(ns)),Inf) else {
grouped.n <- cut2(ns, g=kn)
ncuts <- cut2(ns, g=kn, onlycuts=TRUE)
if(force1 && ncuts[2] > 1 && min(ns)==1)
ncuts <- sort(unique(c(1:2, ncuts)))
}
nlev <- length(ncuts)-1
res <- vector('list', nlev)
names(res) <- as.character(ncuts[-length(ncuts)])
clust <- function(x, k)
if(diff(range(x))==0 || NROW(x) < k+1) rep(1, NROW(x)) else
clara(x, k, metric=metric)$clustering
interp <- if(extrap)
function(x, y=NULL, xout) approxExtrap(x, y, xout=xout)$y else
function(x, y=NULL, xout) approx(x, y, xout=xout, rule=2)$y
## Cluster by sample size first
if(pr) cat('Creating',nlev,'sample size groups\n\n')
for(i in 1:nlev) {
## Get list of curve ids in this sample size group
if(i==nlev) {
below <- ns <= ncuts[i+1]
brack <- ']'
} else {
below <- ns < ncuts[i+1]
brack <- ')'
}
ids <- names(ns)[ns >= ncuts[i] & below]
if(pr) cat('Processing sample size [',ncuts[i],',',ncuts[i+1],
brack,' containing ', length(ids),' curves\n',sep='')
if(length(ids) < kxdist) res[[i]] <- list(ids) else {
## Cluster by distribution of x within sample size group
## Summarize these ids by clustering on range of x,
## plus the largest gap if minimum sample size > 2
## Use only the x position is min sample size is 1
s <- id %in% ids
ssize <- min(tapply(x[s], id[s], function(w) length(unique(w))))
z <- tapply((1:n)[s], id[s],
function(j) if(ssize==1) x[j][1] else
if(ssize==2) range(x[j]) else
c(range(x[j]),max(diff(sort(x[j])))))
z <- matrix(unlist(z), nrow=length(z), byrow=TRUE)
if(kxdist > nrow(z) - 1)
stop('number of curves to cluster must be >= kxdist+1')
distclusters <- clust(z, kxdist)
if(pr) {
cat(' Number of curves in each x-dist cluster:\n')
print(table(distclusters))
}
resi <- list()
## Within x distribution and within sample size interval,
## cluster on linearly interpolated y at p equally spaced x points
## unless <2 unique x-points for some curve
for(clus in 1:max(distclusters)) {
idc <- ids[distclusters==clus]
if(pr) cat(' Processing x-distribution group', clus,
'containing', length(idc),'curves\n')
s <- id %in% idc
ssize <- min(tapply(x[s], id[s], function(w) length(unique(w))))
if(ssize > 1) {
xrange <- range(x[s])
xseq <- seq(xrange[1], xrange[2], length.out=p)
}
g <- if(ssize==1) function(j) c(mean(x[j]), mean(y[j])) else
if(smooth && ssize > 2)
function(j) interp(clowess(x[j],y[j]), xout=xseq) else
function(j) interp(x[j], y[j], xout=xseq)
z <- tapply((1:n)[s], id[s], g)
z <- matrix(unlist(z), nrow=length(idc), byrow=TRUE)
yclusters <- clust(z, min(k, max(length(idc)-2,1)))
names(yclusters) <- idc
resi[[clus]] <- yclusters
}
res[[i]] <- resi
}
}
structure(list(res=res, ns=table(ns), nomit=nomit, missfreq=missfreq,
ncuts=ncuts, kn=kn, kxdist=kxdist, k=k, p=p,
smooth=smooth, x=x, y=y, id=id),
class='curveRep')
}
print.curveRep <- function(x, ...) {
sm <- if(x$smooth) 'smooth' else 'not smoothed'
ncuts <- x$ncuts
cat('kn:',x$kn, ' kxdist:',x$kxdist, ' k:',x$k,
' p:',x$p, ' ', sm, '\n\n', sep='')
cat('Frequencies of number of non-missing values per curve:\n')
print(x$ns)
if(length(x$missfreq)) {
cat(x$nomit, 'missing values excluded.\n\n')
cat('\nFrequency of number of missing values per curve:\n')
print(x$missfreq)
}
cat('\nSample size cuts:', paste(ncuts, collapse=' '),'\n')
cat('Number of x distribution groups per sample size group:',
paste(sapply(x$res, length), collapse=' '),'\n\n')
res <- x$res
ng <- length(res)
for(i in 1:ng) {
ngroup <- res[[i]]
maxclus <- max(unlist(ngroup))
w <- matrix(NA, nrow=maxclus, ncol=length(ngroup),
dimnames=list(paste('Cluster',1:maxclus),
paste('x-Dist', 1:length(ngroup))))
j <- 0
for(xdistgroup in ngroup) {
j <- j+1
w[,j] <- tabulate(xdistgroup, nbins=maxclus)
}
brack <- if(i==ng) ']' else ')'
z <- if(is.infinite(ncuts[i+1])) ncuts[i] else
paste('[', ncuts[i], ',', ncuts[i+1], brack, sep='')
cat('\nNumber of Curves for Sample Size ', z, '\n',sep='')
print(w)
}
invisible()
}
plot.curveRep <- function(x, which=1:length(res),
method=c('all','lattice','data'),
m=NULL, probs=c(.5,.25,.75),
nx=NULL, fill=TRUE,
idcol=NULL, freq=NULL, plotfreq=FALSE,
xlim=range(x), ylim=range(y),
xlab='x', ylab='y', colorfreq=FALSE, ...) {
method <- match.arg(method)
retdat <- FALSE
if(method == 'data') {
retdat <- TRUE
method <- 'lattice'
sRequire('lattice')
}
ncuts <- x$ncuts
res <- x$res; id <- x$id; y <- x$y; k <- x$k; x <- x$x
nng <- length(res)
samp <- function(ids)
if(!length(m) || is.character(m) ||
length(ids) <= m) ids else sample(ids, m)
if(is.character(m) &&
(m != 'quantiles' || method != 'lattice'))
stop('improper value of m')
if(method=='lattice') {
if(length(which) != 1)
stop('must specify one n range to plot for method="lattice" or "data"')
nres <- names(res)
nname <- if(length(nres)==1) NULL else
if(nres[which]=='1' & nres[which+1]=='2') 'n=1' else {
brack <- if(which==length(nres)) ']' else ')'
z <- if(is.infinite(ncuts[which+1])) ncuts[which] else
paste('[',ncuts[which],',',ncuts[which+1],brack,sep='')
paste('n ',z, sep='')
}
res <- res[[which]]
n <- length(x)
X <- Y <- xdist <- cluster <- sizecluster <- numeric(n)
curve <- character(n)
if(length(freq)) {
unique.cats <- unique(freq)
Freqtab <- matrix(0, nrow=n, length(unique.cats),
dimnames=list(NULL, unique.cats))
}
st <- 1
for(jx in 1:length(res)) {
xgroup <- res[[jx]]
ids <- names(xgroup)
for(jclus in 1:max(xgroup)) {
all.ids.in.cluster <- ids[xgroup==jclus]
if(length(freq)) {
freqtab <- table(freq[all.ids.in.cluster])
nfreqtab <- names(freqtab)
}
plotted.ids.in.cluster <- samp(all.ids.in.cluster)
for(cur in plotted.ids.in.cluster) {
s <- id %in% cur
np <- sum(s)
i <- order(x[s])
en <- st+np-1
if(en > n) stop('program logic error 1')
X[st:en] <- x[s][i]
Y[st:en] <- y[s][i]
xdist[st:en] <- jx
cluster[st:en] <- jclus
curve[st:en] <- cur
sizecluster[st:en] <- sum(xgroup==jclus)
if(length(freq)) Freqtab[st:en, nfreqtab] <- rep(freqtab, each=np)
st <- st+np
}
}
}
Y <- Y[1:en]; X <- X[1:en]
distribution <- xdist[1:en]; cluster <- cluster[1:en]
curve <- curve[1:en]; sizecluster <- sizecluster[1:en]
if(length(freq)) Freqtab <- Freqtab[1:en,,drop=FALSE]
textfun <- function(subscripts, groups=NULL) {
if(!length(subscripts)) return()
txt <- if(length(freq) && length(groups)) {
tab <- Freqtab[subscripts[1],]
if(plotfreq) {
mx <- max(Freqtab, na.rm=TRUE)
f <- mx/(.1*plotfreq)
y <- 1
fnam <- names(tab)
long <- fnam[nchar(fnam)==max(nchar(fnam))][1]
lx <- convertX(unit(1, 'strwidth', long), 'npc', valueOnly=TRUE)
for(i in 1:length(tab)) {
y <- y - .075
grid.text(fnam[i], x=lx-.005, y=y+.025, just=c(1,.5),
gp=gpar(fontsize=7, col=gray(.4)))
if(tab[i] > 0)
grid.polygon(x=c(lx, lx+tab[i]/f, lx+tab[i]/f, lx, lx),
y=c(y, y, y+.05, y+.05, y),
gp=gpar(fill=gray(.7), col=gray(.7)))
if(tab[i]==mx)
grid.text(mx, x=lx+mx/f + .01, y=y+.025,
just=c(0,.5), gp=gpar(fontsize=7, col=gray(.4)))
}
return()
}
txt <- paste(names(tab), tab, sep=':')
txt2 <- txt
paste(txt, collapse=';')
} else {
size <- sizecluster[subscripts[1]]
paste('N=',size,sep='')
}
if (!colorfreq | is.null(idcol) | is.null(freq)) #Do same as original
{
grid.text(txt, x = 0.005, y = 0.99, just = c(0, 1),
gp = gpar(fontsize = 9, col = gray(0.25)))
}
else #color freq text using idcol
{
mycolors<-data.frame(idcol,freq)
mycolors<-unique(mycolors)
curtext<-txt2[1]
curtrt<-strsplit(curtext,':')[[1]][1]
curcol<-as.character( mycolors[ curtrt==as.character( mycolors[,2]) ,1] )
grid.text(curtext,
x = 0.005,
y = 0.99,
just = c(0, 1),
gp = gpar(fontsize = 9, col =curcol)
)
emspace<-2*strwidth('M', units="figure")
for (i in 2:length(txt2))
{
curtext<-txt2[i]
curtrt<-strsplit(curtext,':')[[1]][1]
curcol<-as.character( mycolors[ curtrt==as.character( mycolors[,2]) ,1] )
grid.text(curtext,
x = emspace+strwidth(txt2[i-1], units="figure") ,
y = 0.99, just = c(0, 1),
gp = gpar(fontsize = 9, col = curcol)
)
}
}
}
pan <- if(length(idcol))
function(x, y, subscripts, groups, type, ...) {
groups <- as.factor(groups)[subscripts]
textfun(subscripts, groups)
for(g in levels(groups)) {
idx <- groups == g
xx <- x[idx]; yy <- y[idx]; ccols <- idcol[g]
if (any(idx)) {
switch(type,
p = lattice::lpoints(xx, yy, col = ccols),
l = lattice::llines(xx, yy, col = ccols),
b = { lattice::lpoints(xx, yy, col = ccols)
lattice::llines(xx, yy, col = ccols) })
}
}
} else function(x, y, subscripts, groups, ...) {
lattice::panel.superpose(x, y, subscripts, groups, ...)
textfun(subscripts, groups)
}
if(retdat) return(data.frame(x=X, y=Y, distribution, cluster,
curve=curve, ninterval=nname))
if(is.character(m))
print(xYplot(Y ~ X | distribution*cluster,
method='quantiles', probs=probs, nx=nx,
xlab=xlab, ylab=ylab,
xlim=xlim, ylim=ylim,
main=nname, as.table=TRUE,
panel=function(x, y, subscripts, ...) {
if(length(subscripts)) {
panel.xYplot(x, y, subscripts, ...)
textfun(subscripts)
}
})) else
print(lattice::xyplot(Y ~ X | distribution*cluster, groups=curve,
xlab=xlab, ylab=ylab,
xlim=xlim, ylim=ylim,
type=if(nres[which]=='1')'b' else 'l',
main=nname, panel=pan, as.table=TRUE))
return(invisible())
}
for(jn in which) {
ngroup <- res[[jn]]
for(jx in 1:length(ngroup)) {
xgroup <- ngroup[[jx]]
ids <- names(xgroup)
for(jclus in 1:max(xgroup)) {
rids <- ids[xgroup==jclus]
nc <- length(rids)
ids.in.cluster <- samp(rids)
for(curve in 1:length(ids.in.cluster)) {
s <- id %in% ids.in.cluster[curve]
i <- order(x[s])
type <- if(length(unique(x[s]))==1)'b' else 'l'
if(curve==1) {
plot(x[s][i], y[s][i], xlab=xlab, ylab=ylab,
type='n', xlim=xlim, ylim=ylim)
brack <- if(jn==nng) ']' else ')'
z <- if(is.infinite(ncuts[jn+1])) ncuts[jn] else
paste('[', ncuts[jn],',',ncuts[jn+1],brack,sep='')
title(paste('n ', z, ' x=',jx,
' c=',jclus,' ',nc,' curves', sep=''), cex=.5)
}
lines(x[s][i], y[s][i], type=type,
col=if(length(idcol))
idcol[ids.in.cluster[curve]] else curve)
}
}
if(fill && max(xgroup) < k)
for(i in 1:(k - max(xgroup)))
plot(0, 0, type='n', axes=FALSE, xlab='', ylab='')
}
}
}
curveSmooth <- function(x, y, id, p=NULL, pr=TRUE) {
omit <- is.na(x + y)
if(any(omit)) {
x <- x[!omit]; y <- y[!omit]; id <- id[!omit]
}
uid <- unique(id)
m <- length(uid)
pp <- length(p)
if(pp) {
X <- Y <- numeric(p*m)
Id <- rep(id, length.out=p*m)
}
st <- 1
en <- 0
ncurve <- 0
for(j in uid) {
if(pr) {
ncurve <- ncurve + 1
if((ncurve %% 50) == 0) cat(ncurve,'')
}
s <- id==j
xs <- x[s]
ys <- y[s]
if(length(unique(xs)) < 3) {
if(pp) {
en <- st + length(xs) - 1
X[st:en] <- xs
Y[st:en] <- ys
Id[st:en] <- j
}
} else {
if(pp) {
uxs <- sort(unique(xs))
xseq <- if(length(uxs) < p) uxs else
seq(min(uxs), max(uxs), length.out=p)
ye <- approx(clowess(xs, ys), xout=xseq)$y
n <- length(xseq)
en <- st + n - 1
X[st:en] <- xseq
Y[st:en] <- ye
Id[st:en] <- j
} else y[s] <- approx(clowess(xs, ys), xout=xs)$y
}
st <- en + 1
}
if(pr) cat('\n')
if(pp) {
X <- X[1:en]
Y <- Y[1:en]
Id <- Id[1:en]
list(x=X, y=Y, id=Id)
} else list(x=x, y=y, id=id)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.