Nothing
### MAIN sparseKMA FUNCTION (with the normalized L^2 distance)
sparseKMA <- function(data, x, K, m.prop = .3, perc=0.03, tol=0.01, iter.max=50,
n.out = 500, vignette=TRUE){
# data is the matrix representing the functions (n x p)
# [I assume to have a vectorized version of the data AFTER smoothing]
# x is a (n x p) matrix giving the domain of each function,
# or a p-dimensional vector giving the common domain
# K is the number of clusters
# m.prop is the sparsity parameter (proportion of unrelevant domain
# where w(x) = 0) --> (default 30%)
# perc is the alignment parameter (max proportion of shift / dilation at each iter
# of the warping procedure) --> (3% default)
# WARNING: 5% is already extreme; don't set this above 8-10%
# tol is the tolerance criterion on the weighting function to exit the loop (1% default)
# iter.max is the maximum number of iteration (50 default)
# n.out is the number of abscissa points on which w(x) is estimated
# vignette is a boolean (should the algorithm progress be reported?)
# preliminaries
n.obs <- dim(data)[1]
n.abs <- dim(data)[2]
if(length(dim(x))==0){
x.reg <- matrix(x,n.obs,length(x),byrow=TRUE)
}
if(length(dim(x))==2){
x.reg <- x
}
index <- rep(1000,n.obs)
index.old <- rep(5000,n.obs)
labels <- sample.int(K,n.obs,replace=TRUE)
labels.old <- sample.int(K,n.obs,replace=TRUE)
grid.shift <- seq(-perc,perc,len=9)
grid.dil <- 1 + seq(-perc,perc,len=9)
grid.val <- t(expand.grid(grid.shift, grid.dil))
xout <- seq(min(x.reg), max(x.reg), len=n.out)
w <- rep(1, length(xout))
w.old <- rep(10, length(xout))
b.old <- b <- rep(1, length(x))
iter <- 0
# initialize the template
mytmp <- colMeans(matapprox.Y(x.reg, data, xout), na.rm = TRUE)
template <- NULL
for(k in 1:K)template <- rbind(template, mytmp)
while( (mean(abs(index - index.old)) > tol | sum(abs(labels - labels.old)) > 0 ) & iter < iter.max ){
iter <- iter + 1
index.old <- index
labels.old <- labels
b.old <- b
xout.old <- xout
xout <- seq(min(x.reg), max(x.reg), len=n.out)
if(iter>1)w <- as.vector(matapprox.y(rbind(xout.old), w, xout))
w.old <- w
#this is not necessary for the algorithm:
#D <- diff(range(xout))
#m <- D*m.prop
# alignment step
warp.def <- matrix(NA,n.obs,2)
for(i in (1:n.obs)){
cand <- scale(t(x.reg[i,,drop=FALSE])%*%grid.val[2,,drop=FALSE], center=-grid.val[1,], scale=FALSE)
datir <- matapprox.y(t(cand),data[i,],xout=xout)# rows are possible warpings, columns are time instances
index.temp <- NULL
for(k in 1:K){# normalized L^2 norm between functions, with sparsity weighting function
matrice.norma <- scale(scale(datir, center=template[k,], scale=FALSE)^2, center = FALSE, scale = 1/w)
normak <- apply(matrice.norma,1,integral,x=xout)/(diff(range(xout))*rowSums(!is.na(matrice.norma))/dim(matrice.norma)[2])
# index.temp: matrix with normalized L2 norm to the template, for every possible warping (rows) and cluster (columns)
index.temp <- cbind(index.temp, normak)
}
quale <- which(index.temp==min(index.temp), arr.ind=TRUE)
quale <- quale[sample(dim(quale)[1],1),]
index[i] <- index.temp[quale[1],quale[2]]
warp.def[i,] <- t(grid.val[,quale[1]])
labels[i] <- quale[2]
}
# within-cluster normalization step
for(k in 1:K){
ind <- which(labels == k)
warp.def[ind,1] <- (warp.def[ind,1] - colMeans(warp.def[ind,])[1])/colMeans(warp.def[ind,])[2]
warp.def[ind,2] <- warp.def[ind,2]/colMeans(warp.def[ind,])[2]
}
x.reg <- x.reg*warp.def[,2]+warp.def[,1]
# update the weighting function
b <- GetWCSSalign(matapprox.Y(x.reg, data, xout), labels, warp.def, matapprox.x(x.reg, xout), xout)$bcss.perfeature
b.ord <- sort(b)
c.star <- b.ord[ceiling(length(b.ord)*m.prop)]
w <- GetOptimalW(b, c.star)
# update the templates
mytmp <- colMeans(matapprox.Y(x.reg, data, xout), na.rm = TRUE)
ind.mod <- which(w!=0)
template <- NULL
for(k in 1:K){
ksel <- which(labels==k)
mytmp2 <- colMeans(matapprox.Y(x.reg[ksel,,drop=FALSE], data[ksel,,drop=FALSE], xout), na.rm = TRUE)
mytmp[ind.mod] <- mytmp2[ind.mod]
#mytmp2[which(colSums(!is.na(mytmp)) < 2)] <- NA
template <- rbind(template, mytmp)
}
if(vignette) {
cat('Iteration: ', iter, ', mean distance: ', mean(index), '\n')
}
}
return(list(template=template, temp.abscissa=xout, labels=labels, warping=warp.def, reg.abscissa=x.reg, distance=index, w=w, x.bcss=b))
}
### MAIN sparseKMA FUNCTION (with the \rho(f1^\prime, f2^\prime) similarity of Sangalli et al., 2010)
sparseKMArho <- function(data, x, K, m.prop=0.3, perc=0.03, tol=0.01, template.est = 'raw',
n.out=500, iter.max=50, vignette=TRUE){
# data is the matrix representing the functions (n x p)
# NOTE: data can be an array (n x p x d) in case of multidimensional functions R -> R^d
# [I assume to have a vectorized version of the data AFTER smoothing]
# [here I also assume that these are the functions FIRST DERIVATIVES]
# [because the similarity index is ONLY based on derivatives, so no reasons to treat the original functions]
# x is a (n x p) matrix giving the domain of each function,
# or a p-dimensional vector giving the common domain
# K is the number of clusters
# m.prop is the sparsity parameter (proportion of unrelevant domain
# where w(x) = 0) --> (default 30%)
# perc is the alignment parameter (max proportion of shift / dilation at each iter
# of the warping procedure) --> (3% default)
# WARNING: 5% is already extreme; don't set this above 8-10%
# tol is the tolerance criterion on the weighting function to exit the loop (1% default)
# template.est is a text string giving choices for the template estimation method
# [currently 2 choices are supported:'raw' or 'loess']
# ['raw' just computes the vector means across functions]
# ['loess' estimates the template via the R loess function]
# n.out is the number of abscissa points on which w(x) is estimated
# iter.max is the maximum number of iteration (50 default)
# vignette is a boolean (should the algorithm progress be reported?)
# preliminaries
n.obs <- dim(data)[1]
n.abs <- dim(data)[2]
if(length(dim(data))<3){
data <- array(data, dim=c(n.obs,n.abs,1))
}
n.dim <- dim(data)[3]
if(length(dim(x))==0){
x.reg <- matrix(x,n.obs,length(x),byrow=TRUE)
}
if(length(dim(x))==2){
x.reg <- x
}
index <- rep(1000,n.obs)
index.old <- rep(5000,n.obs)
n.opt <- 9 # keep value small for computational reasons
labels <- sample.int(K,n.obs,replace=TRUE)
labels.old <- sample.int(K,n.obs,replace=TRUE)
grid.shift <- seq(-perc,perc,len=n.opt)
grid.dil <- 1 + seq(-perc,perc,len=n.opt)
grid.val <- t(expand.grid(grid.shift, grid.dil))
xout <- seq(min(x.reg, na.rm = TRUE), max(x.reg, na.rm = TRUE), len=n.out)
w <- rep(1, length(xout))
w.old <- rep(10, length(xout))
b.old <- b <- rep(1, length(x))
iter <- 0
if(template.est == 'loess')loess.span <- 0.15
# initialize the template
template <- array(NA, dim=c(K, n.out, n.dim))
for(d in 1:n.dim){
if(template.est == 'raw'){
mytmp <- colMeans(matapprox.Y(x.reg, data[,,d], xout), na.rm = TRUE)
}else if(template.est == 'loess'){
loess.temp <- loess(as.numeric(data[,,d])~as.numeric(x.reg), span=loess.span)
mytmp <- predict(loess.temp,xout)
}
for(k in 1:K)template[k,,d] <- mytmp
}
while( mean(abs(index - index.old)/index.old) > tol & sum(abs(labels - labels.old)) > 0 & iter < iter.max ){
iter <- iter + 1
index.old <- index
labels.old <- labels
b.old <- b
xout.old <- xout
xout <- seq(min(x.reg, na.rm = TRUE), max(x.reg, na.rm = TRUE), len=n.out)
if(iter>1)w <- as.vector(matapprox.y(rbind(xout.old), w, xout))
w.old <- w
#this is not necessary for the algorithm:
#D <- diff(range(xout))
#m <- D*m.prop
# alignment step
warp.def <- matrix(NA,n.obs,2)
for(i in (1:n.obs)){# loop over samples
cand <- scale(t(x.reg[i,,drop=FALSE])%*%grid.val[2,,drop=FALSE], center=-grid.val[1,], scale=FALSE)
index.temp <- array(NA, dim=c(n.opt^2, K, n.dim))
for(d in 1:n.dim){# loop over dimensions
datir <- matapprox.y(t(cand),data[i,,d],xout=xout)# rows are possible warpings, columns are abscissa instances
datirnorm <- apply(datir, 1, L2norm, x=xout)# compute also the normalization term of each warped function
for(k in 1:K){# loop over clusters
matrice.norma <- scale(datir, center=FALSE, scale=L2norm(xout, template[k,,d])/(template[k,,d]*w))/datirnorm # similarity index rho between functions, with sparsity weighting function
normak <- apply(matrice.norma,1,integral,x=xout)
index.temp[,k,d] <- normak
# index.temp: array with rho similarity to the template, for every possible warping (rows) and cluster (columns)
# the third dimension is the functions' original dimension
}
}
# average the similarity index over dimensions
index.temp <- apply(index.temp, c(1,2), mean)
# maximize the similarity to the template
quale <- which(index.temp==max(index.temp), arr.ind=TRUE)
quale <- quale[sample(dim(quale)[1],1),]
index[i] <- index.temp[quale[1],quale[2]]
warp.def[i,] <- t(grid.val[,quale[1]])
labels[i] <- quale[2]
}
# normalization step:
# warping functions must have identity mean within each cluster
for(k in 1:K){
ind <- which(labels == k)
warp.def[ind,1] <- (warp.def[ind,1] - colMeans(warp.def[ind,,drop=FALSE])[1])/colMeans(warp.def[ind,,drop=FALSE])[2]
warp.def[ind,2] <- warp.def[ind,2]/colMeans(warp.def[ind,,drop=FALSE])[2]
}
x.reg <- x.reg*warp.def[,2]+warp.def[,1]
# update the weighting function:
# w(x) is non-zero on the portion of the domain with maximal within-cluster similarity
b <- GetWCSSalignRho(matapprox.multiY(x.reg, data, xout), labels, warp.def, matapprox.x(x.reg, xout), xout)$wcss.perfeature
b.ord <- sort(b)
c.star <- b.ord[ceiling(length(b.ord)*m.prop)]
w <- GetOptimalW(b, c.star)
# update the templates
ind.mod <- which(w!=0)
template <- array(NA, dim=c(K, n.out, n.dim))
for(k in 1:K){
ksel <- which(labels==k)
if(length(ksel) > 0){
if(template.est == 'raw'){
mytmp2 <- apply(matapprox.multiY(x.reg[ksel,,drop=FALSE], data[ksel,,,drop=FALSE], xout), c(2,3), mean, na.rm = TRUE)
}else if(template.est == 'loess'){
mytmp2 <- matrix(NA, n.out, n.dim)
for(d in 1:n.dim){
loess.temp <- loess(as.numeric(data[ksel,,d,drop=FALSE])~as.numeric(x.reg[ksel,,drop=FALSE]), span=loess.span)
mytmp2[,d] <- predict(loess.temp,xout)
}
}
}else{
pickatrandom <- sample(1:n.obs,1)
mytmp2 <- matrix(NA, n.out, n.dim)
for(d in 1:n.dim)mytmp2[,d] <- approx(x.reg[pickatrandom,], data[pickatrandom,,d], xout,ties='ordered')$y
labels[pickatrandom] <- k
}
template[k,,] <- mytmp2
}
if(vignette) {
cat('Iteration: ', iter, ', mean distance: ', mean(index), '\n')
}
}
return(list(template=template, temp.abscissa=xout, labels=labels, warping=warp.def, reg.abscissa=x.reg,
distance=index, w=w, x.bcss=b))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.