Nothing
#' @importFrom grDevices n2mfrow rainbow gray.colors adjustcolor gray
#' @importFrom graphics abline boxplot lines matlines matplot par plot polygon points axis mtext
#' @importFrom utils menu combn txtProgressBar setTxtProgressBar getFromNamespace tail
#' @importFrom stats as.dist cutree hclust model.matrix predict quantile splinefun terms dist cor
#' @importFrom fda create.bspline.basis Data2fd eval.fd int2Lfd pca.fd
#' @import qrcm cluster ggpubr ggplot2
# clusters of effects in pseudo multi-type response model: applications and theory
#' @export
clustEff <- function(Beta, Beta.lower = NULL, Beta.upper = NULL,
k = c(2, min(5, (ncol(Beta)-1))), ask = FALSE,
diss.mat, alpha = .5,
step = c("both", "shape", "distance"),
cut.method = c("mindist", "length", "conf.int"),
method = "ward.D2", approx.spline = FALSE, nbasis = 50,
conf.level = 0.9, stand = FALSE, plot = TRUE, trace = TRUE){
if(!is.matrix(Beta)) Beta <- as.matrix(Beta)
if(stand) Beta <- scale(Beta)
n <- nrow(Beta); q <- ncol(Beta)
if(q < 2) stop("At least two curves to use this procedure!")
p <- 1:n / n; lenP <- length(p)
if(approx.spline){
splines <- create.bspline.basis(rangeval=range(p), nbasis=nbasis, norder=3)
fd <- Data2fd(Beta, argvals=p, basisobj=splines)
temp.p <- seq(fd$basis$rangeval[1], fd$basis$rangeval[2], l=max(c(501, 50*10+1)))
Beta <- eval.fd(temp.p, fd, int2Lfd(0))
if(!is.null(Beta.lower) & !is.null(Beta.upper)){
fd.lower <- Data2fd(Beta.lower, argvals=p, basisobj=splines)
fd.upper <- Data2fd(Beta.upper, argvals=p, basisobj=splines)
Beta.lower <- eval.fd(temp.p, fd.lower, int2Lfd(0))
Beta.upper <- eval.fd(temp.p, fd.upper, int2Lfd(0))
}
p <- temp.p
}
nms <- colnames(Beta)
if(is.null(nms)) nms <- paste0("X", 1:q)
colnames(Beta) <- nms
if(length(k) > 2) stop("The length of k have to be 1 or 2")
if(length(k) == 2 & min(k) == 1) stop("The minimum value of clusters to look for have to be 2")
step <- match.arg(step)
cut.method <- match.arg(cut.method)
if(is.null(Beta.lower) & is.null(Beta.lower) & cut.method == "conf.int") stop("cut.method `conf.int' can not be used without Beta.lower and Beta.upper")
if(cut.method == "conf.int"){
code <- 0
if((nrow(Beta.lower) != n) | (nrow(Beta.upper) != n)) code <- 1
if((ncol(Beta.lower) != q) | (ncol(Beta.upper) != q)) code <- 2
if(code != 0) stop("Dimensions of the matrices mismatched!")
}
if(alpha < 0 | alpha > 1) stop("alpha must be in (0,1)!")
METHODS <- c("ward.D", "single", "complete", "average", "mcquitty", "median", "centroid", "ward.D2")
i.meth <- pmatch(method, METHODS)
if(is.na(i.meth)) stop("invalid clustering method", paste("", method), "\n use one of this methods:", paste("", METHODS))
if(i.meth == -1) stop("ambiguous clustering method", paste("", method))
mat <- if(missing(diss.mat)) distshape(Beta, alpha = alpha, step = step, trace = trace) else diss.mat
ogg <- hclust(mat, method=method)
if(plot){
par(mar = c(4,3.3,1.5,1.5)+.1, mfrow = c(1, 3))
plot(ogg, main="", xlab="", ylab="", ann=TRUE, sub="")
mtext(side=2, line=2, text="Height")
}
distance <- double(max(1, length(k)))
if(length(k) == 1){
pick <- k
}
else{
if(ask){
par(mfrow=c(1, 1))
plot(ogg, main="", xlab="", ylab="", ann=TRUE, sub="")
mtext(side=2, line=2, text="Height")
pick <- 1
while(pick > 0 && pick < (q+1)){
pick <- menu(1:q, title = "\n Select how many clusters (or 0 to exit):\n")
if(pick > 0 && pick < (q+1)){
break
}
if(pick == 0) stop("Cluster algorithm aborted!!!")
}
}
else{
if(trace) cat("\nChoosing the optimal number of clusters: ", sep="")
pick <- switch(cut.method,
"mindist"={
for(pick in k[1]:k[2]){
oc <- cutree(ogg, k = pick)
distance[pick - 1] <- .get_ave_sil_width(mat, oc)
}
names(distance) <- k[1]:k[2]
(k[1]:k[2])[which.max(distance)]},
"length"={
distance <- diff(rev(ogg$height))
names(distance) <- 2:(length(distance)+1)
tempIndex <- which(distance < 0)
tempIndex2 <- which.min(distance[tempIndex])
as.integer(names(tempIndex2))},
"conf.int"={
for(pick in k[1]:k[2]){
oc <- cutree(ogg, k = pick)
num.clust <- length(unique(oc))
tempDist <- vector(length = num.clust)
for(.i in unique(oc)){
indCl <- (oc == .i)
BetaLmean <- rowMeans(Beta.lower[, indCl, drop = FALSE])
BetaRmean <- rowMeans(Beta.upper[, indCl, drop = FALSE])
xx1 <- Beta[, indCl, drop = FALSE] - BetaRmean
xx2 <- Beta[, indCl, drop = FALSE] - BetaLmean
tempDist[.i] <- mean((colMeans(xx1 < 0) + colMeans(xx2 >= 0)) / 2)
}
distance[pick - 1] <- sum(tempDist * summary(silhouette(oc, mat))$clus.avg.widths) / sum(tempDist)
}
names(distance) <- k[1]:k[2]
(k[1]:k[2])[which.max(distance)]})
if(trace) cat(paste0(pick, "\n\n"), sep="")
}
}
if(plot) abline(h = mean(c(max(ogg$height), rev(ogg$height), 0)[c(pick, (pick+1))]), col=2, lty=2)
oc <- cutree(ogg, k = pick)
num.clust <- length(unique(oc))
tab <- table(oc); ord <- order(tab); oc2 <- rep(0, length(oc))
for(i in 1:num.clust) oc2 <- replace(oc2, oc == ord[i], values = i)
oc <- oc2
BetaUpper <- BetaLower <- BetaMedio <- Signif.interval <- NULL
for(i in 1:num.clust){
BetaMedio <- cbind(BetaMedio, rowMeans(Beta[, oc == i, drop = FALSE]))
if(!is.null(Beta.lower) & !is.null(Beta.upper)){
BetaLower <- cbind(BetaLower, rowMeans(Beta.lower[, oc == i, drop = FALSE]))
BetaUpper <- cbind(BetaUpper, rowMeans(Beta.upper[, oc == i, drop = FALSE]))
Signif.interval <- cbind(Signif.interval, apply(cbind(BetaLower[, i], BetaUpper[, i]), 1, prod))
}
else{
a <- (1 - conf.level)/2; a <- c(a, 1 - a)
BetaLower <- cbind(BetaLower, apply(Beta[, oc == i, drop = FALSE], 1, quantile, probs = .1))
BetaUpper <- cbind(BetaUpper, apply(Beta[, oc == i, drop = FALSE], 1, quantile, probs = .9))
}
# if(plot){
# if(i == 1) {
# matplot(p, Beta[, oc == i, drop = FALSE], type="l", col=i, lty=2, ylim=rangeBeta, lwd=.8, xlab="", ylab="", axes=FALSE)
# axis(1); axis(2); mtext(side=1, line=2, text="p"); mtext(side=2, line=2, text="s(p)")
# lines(p, BetaMedio[, 1], col=1, lwd=1)
# }
# else{
# matlines(p, Beta[, which(oc == i), drop = FALSE], col=i, lwd=.8, lty=2)
# lines(p, BetaMedio[, i], col=i, lwd=1)
# }
# if(!is.null(Beta.lower) & !is.null(Beta.upper)){
# yy <- c(BetaLower[, i], tail(BetaUpper[, i], 1), rev(BetaUpper[, i]), BetaLower[1, i])
# xx <- c(p, tail(p, 1), rev(p), p[1])
# polygon(xx, yy, col = adjustcolor(i, alpha.f = 0.25), border = NA)
# }
# }
}
# BetaDist <- sapply(1:num.clust, function(.i) (Beta[, oc == .i, drop = FALSE] - BetaMedio[, .i])^2, simplify=FALSE)
# BetaDistMedio <- lapply(BetaDist, function(.x) sqrt(colSums(.x)))
BetaDistMedio <- sapply(1:num.clust, function(.i) c(sqrt(2 * (1 - suppressWarnings(cor(Beta[, oc == .i, drop = FALSE], BetaMedio[, .i]))))), simplify = FALSE)
oggSil <- if(length(unique(oc)) > 1) silhouette(oc, mat) else NULL
DissMedio <- sapply(seq_len(num.clust), function(.i){
tt <- which(oc == .i)
tempcont <- NULL
if(length(tt) == 1){
tempcont <- c(tempcont, 0)
}
else{
tempcont <- as.matrix(mat)[tt,tt][upper.tri(as.matrix(mat)[tt,tt])]
}
tempcont
}, simplify=FALSE)
names(BetaDistMedio) <- names(DissMedio) <- seq_len(num.clust) #names(BetaDist) <-
if(plot){
if(length(BetaDistMedio) > 0){
boxplot(split(oggSil[,3], oc), names=as.integer(names(table(oc))), ylab="", axes=FALSE)
axis(1, at=1:num.clust, labels=1:num.clust); axis(2); mtext(side=2, line=2, text="Silhouette")
}
if(ncol(BetaMedio) > 1){
rangeBeta <- range(cbind(BetaMedio, BetaLower, BetaUpper))
matplot(p, BetaMedio, type="l", lty=1, lwd=1, axes=FALSE, xlab="", ylab="", ylim=rangeBeta)
axis(1); axis(2); mtext(side=1, line=2, text="p"); mtext(side=2, line=2, text="s(p)")
for(i in 1:num.clust){
yy <- c(BetaLower[, i], tail(BetaUpper[, i], 1), rev(BetaUpper[, i]), BetaLower[1, i])
xx <- c(p, tail(p, 1), rev(p), p[1])
polygon(xx, yy, col = adjustcolor(i, alpha.f = 0.25), border = NA)
}
}
par(mfrow=c(1,1), mar=c(5,4,4,2)+.1)
}
obj <- list(call = match.call(), p = p, X = Beta, clusters = oc,
X.mean = BetaMedio, X.mean.dist = BetaDistMedio,
X.lower = Beta.lower, X.mean.lower = BetaLower,
X.upper = Beta.upper, X.mean.upper = BetaUpper,
Signif.interval = (Signif.interval > 0), k = pick,
diss.matrix = mat, X.mean.diss = DissMedio,
oggSilhouette = oggSil, oggHclust = ogg,
distance = distance, step = step, method = method, cut.method = cut.method, alpha = alpha)
class(obj) <- "clustEff"
return(obj)
}
#' @export
distshape <- function(Beta, alpha=.5, step=c("both", "shape", "distance"), trace=TRUE){
if(!is.matrix(Beta)) Beta <- as.matrix(Beta)
n <- nrow(Beta)
q <- ncol(Beta)
p <- 1:n/n
step <- match.arg(step)
id <- combn(1:q, 2)
mat <- matrix(NA, q, q)
matShape <- matDist <- matrix(NA, n, ncol(id), dimnames = list(1:n, paste0(id[1,], "-", id[2,])))
alphaDist <- NULL
tem <- tem2 <- TRUE
if(trace) cat("\nComputing Dissimilarity matrix: \n", sep="")
if(step %in% c("both", "shape")){
Smat <- apply(Beta, 2, splinefun, x=p)
Smat2 <- sign(sapply(1:length(Smat), function(.i) Smat[[.i]](1:n/n, deriv=1)))
}
if(trace) pb <- txtProgressBar(min=0, max=ncol(id), style=3)
for(i in 1:ncol(id)){
if(trace) setTxtProgressBar(pb, i)
if(step %in% c("both", "distance")){
matDist[, i] <- abs(Beta[, id[1, i]] - Beta[, id[2, i]])
# matDist[i, ] <- sqrt(2*(1-cor(s11, s22)))
}
if(step %in% c("both", "shape")){
matShape[,i] <- (Smat2[, id[1, i]] * Smat2[, id[2, i]] == 1)
}
}
if(step %in% c("both", "distance")){
alphaDist <- apply(matDist, 1, function(.x) quantile(.x, alpha))
matDist <- (matDist <= alphaDist)
}
if(trace) close(pb)
mat[t(id)] <- .5*(colMeans(matDist) + colMeans(matShape))
mat <- 1 - as.dist(t(mat))
return(mat)
}
#' @export
print.clustEff <- function(x, digits=max(3L, getOption("digits") - 3L), ...){
cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")
tabClust <- as.integer(table(x$clusters))
cat("Clustering Effects algorithm with ", x$k, " clusters of sizes ",
paste(tabClust, collapse = ", "), "\n", sep = "")
cat("\n")
invisible(x)
}
#' @export
summary.clustEff <- function(object, ...){
nClust <- as.integer(table(object$clusters))
avClust <- unlist(lapply(object$X.mean.dist, mean, na.rm=TRUE))
avDiss <- unlist(lapply(object$X.mean.diss, mean))
avSilhouette <- summary(object$oggSilhouette)$clus.avg.widths
if(any(nClust == 1)) avSilhouette[nClust == 1] <- 1
avClust <- c(avClust, sum(avClust * prop.table(nClust)))
avDiss <- c(avDiss, sum(avDiss * prop.table(nClust)))
avSilhouette <- c(avSilhouette, sum(avSilhouette * prop.table(nClust)))
names(avSilhouette) <- names(avDiss) <- names(avClust) <- c(paste0("Cluster", seq_len(object$k)), "Average")
obj <- list(call = object$call, k = object$k, n = nrow(object$X), p = ncol(object$X),
step = object$step, alpha = object$alpha, method = object$method,
cut.method = object$cut.method, tabClust = table(object$clusters),
avClust = avClust, avSilhouette = avSilhouette, avDiss = avDiss)
class(obj) <- "summary.clustEff"
return(obj)
}
#' @export
print.summary.clustEff <- function(x, digits=max(3L, getOption("digits") - 3L), ...){
cat("\n######################", "\n\n")
cat("Selected n. of clusters:", format(x$k, digits=digits), "\n")
cat("n. of observations:", x$n, "\n")
cat("n. of curves:", x$p, "\n")
cat("Step selected:", x$step, "\n")
cat("alpha-percentile selected:", x$alpha, "\n")
cat("Cut method selected:", x$cut.method, "\n")
cat("Clustering method selected:", x$method, "\n\n")
cat("######################", "\n\n")
cat("Clustering table:")
print(x$tabClust, ...)
cat("\n######################", "\n\n")
cat("Average cluster distance:\n")
print(round(x$avClust, digits), ...)
cat("\n######################", "\n\n")
cat("Average cluster dissimilarity:\n")
print(round(x$avDiss, digits), ...)
cat("\n######################", "\n\n")
if(!is.null(x$avSilhouette)){
cat("Individual silhouette widths:\n")
print(round(x$avSilhouette, digits), ...)
cat("\n######################", "\n\n")
}
invisible(x)
}
#' @export
plot.clustEff <- function(x, xvar=c("clusters", "dendrogram", "boxplot", "numclust"), which,
polygon=TRUE, dissimilarity=TRUE, par=FALSE, ...){
xvar = match.arg(xvar)
if(!missing(which)){
if(any(which <= 0) | any(which > x$k)){
stop("Which values in 1-", x$k)
}
}
L <- list(...)
switch(xvar,
"dendrogram"={
if(is.null(L$main)) L$main <- "Dendrogram"
if(is.null(L$col)) L$col <- "red"
if(is.null(L$xlab)) L$xlab <- ""
if(is.null(L$ylab)) L$ylab <- "Height"
if(is.null(L$lty)) L$lty <- 2
if(is.null(L$mar)) L$mar <- c(2,3,4,1)+.1
par(mar=L$mar)
plot(x$oggHclust, main=L$main, xlab="", ylab="", ann=TRUE, sub="")
mtext(side=1, line=2, text=L$xlab)
mtext(side=2, line=2, text=L$ylab)
abline(h=mean(c(max(x$oggHclust$height), rev(x$oggHclust$height), 0)[c(x$k, (x$k+1))]), col=L$col, lty=L$lty)
},
"clusters"={
if(missing(which)) which <- seq(x$k)
yRange <- range(x$X.mean[, which])
if(!is.null(x$X.mean.lower) & !is.null(x$X.mean.upper)){
temp <- range(cbind(x$X.mean.lower, x$X.mean.upper))
yRange <- range(c(temp, yRange))
}
tabClust <- table(x$clusters)
if(is.null(L$xlab)) L$xlab <- "p"
if(is.null(L$ylab)) L$ylab <- "s(p)"
if(is.null(L$main)) L$main <- ""
if(is.null(L$lwd)) L$lwd <- c(1, 1.5)
if(length(L$lwd) == 1) L$lwd <- rep(L$lwd, 2)
if(is.null(L$type)) L$type <- "l"
if(is.null(L$ylim)) L$ylim <- yRange
if(is.null(L$lty)){L$lty <- c(1, 3)}
if(is.null(L$col)) L$col <- gray.colors(length(which))
if(length(L$col) == 1) L$col <- rep(L$col, length(which))
if(length(which) == 1){
if(is.null(L$mar)) L$mar <- c(4,3.3,2,1)+.1
par(mar=L$mar)
tempInd <- x$clusters == which
matplot(x$p, x$X[, tempInd], xlab="", ylab="", type=L$type, ylim=L$ylim, lwd=L$lwd[1], main=L$main, col=L$col[1], lty=L$lty[2], axes=FALSE)
axis(1); axis(2); mtext(side=1, line=2, text=L$xlab); mtext(side=2, line=2, text=L$ylab)
lines(x$p, x$X.mean[, which], lwd=L$lwd[2], col=L$col[1], lty = L$lty[1])
if(!is.null(x$X.mean.lower) & !is.null(x$X.mean.upper)){
if(!is.null(polygon)){
if(polygon){
yy <- c(x$X.mean.lower[, which], tail(x$X.mean.upper[, which], 1), rev(x$X.mean.upper[, which]), x$X.mean.lower[1, which])
xx <- c(x$p, tail(x$p, 1), rev(x$p), x$p[1])
polygon(xx, yy, col = adjustcolor(L$col[1], alpha.f = .25), border = NA)
}
else{
points(x$p, x$X.mean.lower[, which], lty = 2, lwd = L$lwd[2], type = "l", col = L$col[1])
points(x$p, x$X.mean.upper[, which], lty = 2, lwd = L$lwd[2], type = "l", col = L$col[1])
}
matlines(x$p, x$X[, tempInd], xlab="", ylab="", type=L$type, ylim=L$ylim, lwd=L$lwd[1], main=L$main, col=L$col[1], lty=L$lty[2], axes=FALSE)
lines(x$p, x$X.mean[, which], lwd=L$lwd[2], col=L$col[1], lty = L$lty[1])
}
}
abline(h=0, lty=3, col=1)
}
else{
if(is.null(L$mar)) L$mar <- c(4,3.3,1,1)+.1
par(mar=L$mar)
matplot(x$p, x$X.mean, xlab="", ylab="", type=L$type, ylim=L$ylim,
lwd=L$lwd[1], main=L$main, col=L$col, lty=L$lty[1], axes=FALSE)
# matplot(x$p, x$X[, x$clusters == which[1]], xlab="", ylab="", type=L$type, ylim=L$ylim, lwd=L$lwd[1], main=L$main, col=L$col[1], lty=L$lty, axes=FALSE)
axis(1); axis(2); mtext(side=1, line=2, text=L$xlab); mtext(side=2, line=2, text=L$ylab)
if(!is.null(x$X.mean.lower) & !is.null(x$X.mean.upper)){
if(!is.null(polygon)){
for(i in which){
if(polygon){
yy <- c(x$X.mean.lower[, i], tail(x$X.mean.upper[, i], 1), rev(x$X.mean.upper[, i]), x$X.mean.lower[1, i])
xx <- c(x$p, tail(x$p, 1), rev(x$p), x$p[1])
polygon(xx, yy, col = adjustcolor(L$col[i], alpha.f = .25), border = NA)# adjustcolor(L$col[1], alpha.f = 0.25)
}else{
points(x$p, x$X.mean.lower[, i], lty = 2, lwd = L$lwd[2], type = "l", col = L$col[i])
points(x$p, x$X.mean.upper[, i], lty = 2, lwd = L$lwd[2], type = "l", col = L$col[i])
}
}
matlines(x$p, x$X.mean, xlab="", ylab="", type=L$type, ylim=L$ylim,
lwd=L$lwd[2], main=L$main, col=L$col, lty=L$lty[1], axes=FALSE)
}
}
}
},
"boxplot"={
tabClust <- table(x$clusters)
k <- length(tabClust)
X <- if(dissimilarity) x$X.mean.diss else split(x$oggSilhouette[, 3], x$clusters)
if(is.null(L$main)) L$main <- if(dissimilarity) "Average dissimilarity within cluster" else "Silhouette within cluster"
if(is.null(L$labels)) L$labels <- as.integer(names(tabClust))[tabClust > 1]
if(is.null(L$ylab)) L$ylab <- if(dissimilarity) "Dissimilarity" else "Silhouette"
if(is.null(L$ylim)) L$ylim <- c(0, 1)
if(!dissimilarity) L$ylim <- c(0, max(unlist(X)))
if(is.null(L$mar)) L$mar <- c(3.3,3.3,3.3,1)+.1
par(mar=L$mar)
# par(mfrow=c(1,1))
if(length(X) > 0){
boxplot(X, names=L$labels, ylim=L$ylim, main=L$main, ylab="", axes=FALSE)
axis(1, at=1:k, labels=1:k); axis(2); mtext(side=2, line=2, text=L$ylab)
}
},
"numclust"={
if(is.null(L$xlab)) L$xlab <- "N. of clusters"
if(is.null(L$ylab)) L$ylab <- expression(pi[out]^k-pi[out]^(k+1))
if(is.null(L$main)) L$main <- ""
if(is.null(L$lwd)) L$lwd <- 1
if(is.null(L$pch)) L$pch <- 19
if(is.null(L$type)) L$type <- "b"
if(is.null(L$col)) L$col <- 1
if(is.null(L$mar)) L$mar <- c(4,4,2,1)+.1
par(mar=L$mar)
plot(diff(x$distance), type=L$type, axes=FALSE, col=L$col, lwd=L$lwd, xlab="", ylab="", pch=L$pch)
axis(1, at=1:length(diff(x$distance)), labels=names(x$distance)[-1])
axis(2)
mtext(side=1, line=2, text=L$xlab)
mtext(side=2, line=2, text=L$ylab)
})
par(mar=c(5,4,4,2)+.1)
}
#' @export
extract.object <- function(Y, X, intercept=TRUE, formula.p=~slp(p, 3), s, object, p, which){
if(missing(p)) p <- seq(.01, .99, .01)
if(!missing(object)){
if(!inherits(object, "iqr")){
stop("Wrong class object!")
}
else{
if(is.data.frame(object$mf)){
X <- as.matrix(object$mf[, -1])
}else{
X <- object$mf[[2]]
}
n <- nrow(X)
q <- nrow(object$coefficients)
intercept <- attr(attr(object$mf, "terms"), "intercept")
if(missing(which)) which <- 1:q
labels <- rownames(object$coefficients)
tempX <- tempXl <- tempXr <- list()
index <- 0
for(i in which){
index <- index + 1
predObj <- predict(object, type="beta", p=p)
tempX[[index]] <- predObj[[i]][, 2]
tempXl[[index]] <- predObj[[i]][, 4]
tempXr[[index]] <- predObj[[i]][, 5]
}
names(tempX) <- names(tempXl) <- names(tempXr) <- labels
}
}else{
Y <- as.matrix(Y)
X <- as.matrix(X)
qY <- ncol(Y)
qX <- ncol(X) + intercept
labels <- colnames(X)
if(is.null(labels)){
labels <- paste0(X, 1:(qX - 1))
colnames(X) <- labels
}
labels <- if(intercept) c("(Intercept)", labels) else labels
n <- nrow(X)
if(n != nrow(Y)) stop("Dimension mismatched!")
is.slp <- getFromNamespace("is.slp", "qrcm")
if(use.slp <- is.slp(formula.p)){
k <- attr(use.slp, "k")
intB <- attr(use.slp, "intB")
termlabelsB <- paste("slp", 1:k, sep = "")
k <- k + intB
coefnamesB <- (if (intB) c("(Intercept)", termlabelsB) else termlabelsB)
}
else{
B <- model.matrix(formula.p, data = data.frame(p=p))
k <- ncol(B)
termlabelsB <- attr(terms(formula.p), "term.labels")
coefnamesB <- colnames(B)
}
if(missing(s)) s <- matrix(1, nrow=qX, ncol=k)
colnames(s) <- coefnamesB
rownames(s) <- labels
if(missing(which)) which <- 1:(qX-intercept)
if(qY == 1){
object <- if(intercept) iqr(Y[, 1] ~ X, formula.p=formula.p, s=s) else iqr(Y[, 1] ~ -1 + X, formula.p=formula.p, s=s)
predObj <- predict(object, type="beta", p=p)
tempX <- sapply((2-!intercept):qX, function(i) predObj[[i]][,2])[, which]
tempXl <- sapply((2-!intercept):qX, function(i) predObj[[i]][,4])[, which]
tempXr <- sapply((2-!intercept):qX, function(i) predObj[[i]][,5])[, which]
colnames(tempX) <- colnames(tempXl) <- colnames(tempXr) <- if(intercept) labels[-1][which] else labels[which]
}
else{
predObj <- list()
for(i in 1:qY){
object <- if(intercept) iqr(Y[, i] ~ X, formula.p=formula.p, s=s) else iqr(Y[, i] ~ -1 + X, formula.p=formula.p, s=s)
predObj[[i]] <- predict(object, type="beta", p=p)
}
predObjX <- lapply(predObj, function(.x) simplify2array(lapply(.x, function(.x2) .x2[,2]))[, which+intercept, drop=FALSE])
predObjXl <- lapply(predObj, function(.x) simplify2array(lapply(.x, function(.x2) .x2[,4]))[, which+intercept, drop=FALSE])
predObjXr <- lapply(predObj, function(.x) simplify2array(lapply(.x, function(.x2) .x2[,5]))[, which+intercept, drop=FALSE])
tempX <- tempXl <- tempXr <- list()
for(i in 1:ncol(predObjX[[1]])){
tempX[[i]] <- sapply(1:qY, function(j) predObjX[[j]][, i])
tempXl[[i]] <- sapply(1:qY, function(j) predObjXl[[j]][, i])
tempXr[[i]] <- sapply(1:qY, function(j) predObjXr[[j]][, i])
}
names(tempX) <- names(tempXl) <- names(tempXr) <- if(intercept) labels[-1][which] else labels[which]
}
}
return(list(p=p, X=tempX, Xl=tempXl, Xr=tempXr))
}
# extract.object.interaction <- function(obj, extr_obj, p=1:99/100, interaction){
# n_list <- length(interaction)
# n_obj <- length(extr_obj$X)
# temp_temp <- summary(obj, p=p, cov=T)
# for(i in 1:n_list){
# temp_int <- interaction[[i]]
# se1 <- sapply(1:length(p), function(.i){
# temp_cov <- temp_temp[[.i]]$cov[temp_int, temp_int]
# sum(diag(temp_cov)) - 2*temp_cov[1,2]
# })
# extr_obj$X$interaction <- extr_obj$X[[temp_int[1]]]+ extr_obj$X[[temp_int[2]]]
# extr_obj$Xl$interaction <- extr_obj$X$interaction - 1.96*se1
# extr_obj$Xr$interaction <- extr_obj$X$interaction + 1.96*se1
# names(extr_obj$X)[n_obj+i] <- names(extr_obj$Xl)[n_obj+i] <- names(extr_obj$Xr)[n_obj+i] <- paste0("interaction",i)
# }
# return(extr_obj)
# }
#' @export
fpcac <- function(X, K = 2, fd = NULL, nbasis = 5, norder = 3, nharmonics = 3,
alpha = 0, niter = 30, Ksteps = 25, conf.level = 0.9, seed, disp = FALSE){
if(is.null(fd)) {
Xorig <- as.matrix(X)
bspl <- create.bspline.basis(rangeval = c(0, 1), nbasis = nbasis, norder = norder)
fd <- Data2fd(argvals = seq(0, 1, length.out = nrow(Xorig)), y = Xorig, basisobj = fd(basisobj = bspl))
}
else{
Xorig <- eval.fd(fd[[3]][[1]]/max(fd[[3]][[1]]), fd, Lfdobj=0)
}
X <- pca.fd(fd, nharm = nharmonics)$scores
n <- dim(X)[1]
p <- dim(X)[2]
no.trim <- floor(n*(1-alpha))
ll <- double(K)
ind <- integer(n)
dist <- ind
seqK <- seq.int(K)
seqN <- seq.int(n)
seqNoTrim <- seq.int(no.trim)
# nit = Number or random restarting (larger values provide more accurate solutions
# Ksteps = Number of k-Mean steps (not too many ksteps are needed)
# Initialize the objective function by a large enough value
vopt <- 1e+6
# set random seed if not missing
if(!missing(seed)) set.seed(seed)
#Ramdon restarts
for (iter in 1:niter){
# Randomly choose the K initial centers
cini <- X[sample(n, size=K, replace=F), ]
dim(cini) <- c(K, p)
# C-steps
for (t in 1:Ksteps){
# Distances of each data point to its closest center
ll <- sapply(seqK, function(k) rowSums((X - rep(cini[k,], rep.int(n, p)))^2))
dist <- apply(ll, 1, min)
ind <- apply(ll, 1, which.min)
# Modified data (Xmod) with the non-trimmed points and last
# column equal to the clusters allocations
qq <- seqN[dist <= sort(dist)[no.trim]]
xmod <- cbind(X[qq, ], ind[qq])
#Calculus of the new k centers
cini <- t(sapply(seqK, function(k) apply(xmod[xmod[, p+1] == k, 1:p, drop=FALSE], 2, mean, na.rm=TRUE)))
}
if(any(is.na(cini))) next
# Calculus of the trimmed k-variance
obj <- mean(sapply(seqNoTrim, function(l) sum((xmod[l, 1:p] - cini[xmod[l, p+1], ])^2)))
# Change the optimal value and the optimal centers (copt)
# if a reduction in the objective function happens
if (obj < vopt){
vopt <- obj
# Improvements in the objective functions are printed
if(disp){
cat("\n")
cat("Iter ", iter, "\t Objective function = ",
formatC(vopt, digits=5, width=8, format="f"),
"<----")
}
copt <- cini
}
else{
if(disp){
cat("\n")
cat("Iter ", iter, "\t Objective function = ", formatC(obj, digits=5, width=8, format="f"))
}
}
}
## Obtain the final cluster allocations (this is necesary, because a final
# cluster assigment movement is possible)
asig <- ind
ll <- sapply(seqK, function(k) rowSums((X - rep(copt[k,], rep.int(n, p)))^2))
dist <- apply(ll, 1, min)
ind <- apply(ll, 1, which.min)
# Compute the radius of the optimal ball ropt
ord <- sort(dist)
ropt <- ord[no.trim]
# Assign every observation to each cluster and 0 for the trimmed observations
asig <- ifelse(dist > ropt, 0, ind)
tab <- table(asig); if(length(tab) > K) {tab <- tab[-1]}
ord <- order(tab); copt <- copt[ord, ]; asig2 <- rep(0, length(asig))
for(i in 1:K) asig2 <- replace(asig2, asig == ord[i], values = i)
asig <- asig2
# Print the clusters results
# if(disp){
# cat("\n")
# for (k in seqK){
# # Group
# cat("\nCluster ", k, ":\n")
# print(seqN[asig == k])
# }
# # Trimmed observations
# if(sum(asig == 0) == 0)
# cat("\nNo trimmed observations!")
# else{
# cat("\nTrimmed observations:")
# print(seqN[asig == 0])
# }
# }
# calculate mean curves and silhouette
X.mean <- sapply(1:K, function(i) rowMeans(Xorig[, asig == i, drop = FALSE]))
X.mean.dist <- sapply(1:K, function(.i) c(sqrt(2 * (1 - suppressWarnings(cor(Xorig[, asig == .i, drop = FALSE], X.mean[, .i]))))), simplify = FALSE)
a <- (1 - conf.level)/2; a <- c(a, 1 - a)
X.mean.lower <- sapply(1:K, function(i) apply(Xorig[, asig == i, drop = FALSE], 1, quantile, probs = a[1]))
X.mean.upper <- sapply(1:K, function(i) apply(Xorig[, asig == i, drop = FALSE], 1, quantile, probs = a[2]))
# diss <- dist(X)
# oggSil <- silhouette(asig, diss)
# Function trimm returns the objetive value (vopt), the trimmed k-means centers (copt),
# the optimal radius (ropt) and the assignation of each observation (asig=0 are the
# trimmed ones
ris <- list("obj.function" = vopt, "centers" = copt, "radius" = ropt, "clusters" = asig,
"Xorig" = Xorig, "fd" = fd, "X" = X , "X.mean" = X.mean, "X.mean.dist" = X.mean.dist,
"X.mean.lower" = X.mean.lower, "X.mean.upper" = X.mean.upper,
# "diss.matrix" = diss, "oggSilhouette" = oggSil,
"call"=match.call())
class(ris) <- "fpcac"
return(ris)
}
#' @export
print.fpcac <- function(x, digits=max(3L, getOption("digits") - 3L), ...){
cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")
tabClust <- as.integer(table(x$clusters[x$clusters > 0]))
trimmed <- sum(x$clusters == 0)
cat("###################################", "\n")
cat("n. of clusters:", format(nrow(x$centers), digits=digits), "\n")
cat("n. of curves:", length(x$clusters), "\n")
cat("n. of harmonics:", ncol(x$centers), "\n")
cat("n. of trimmed curves:", trimmed, "\n")
cat("###################################", "\n\n")
cat("FPCAC algorithm with ", nrow(x$centers), " clusters of sizes ",
paste(tabClust, collapse = ", "), "\n", sep = "")
cat("\n")
invisible(x)
}
#' @export
summary.fpcac <- function(object, ...){
tabClust <- table(object$clusters)
trimmed <- sum(object$clusters == 0)
if(trimmed > 0) tabClust <- tabClust[-1]
nClust <- length(tabClust)
avClust <- unlist(lapply(object$X.mean.dist, mean, na.rm=TRUE))
# if(trimmed > 0) avClust <- avClust[-1]
# if(any(nClust == 1)) avClust[nClust == 1] <- 1
avClust <- c(avClust, sum(avClust * prop.table(tabClust)))
names(avClust) <- c(paste0("Cluster", seq_len(nClust)), "Average")
obj <- list(call = object$call, k = nClust, n = nrow(object$X), p = ncol(object$X),
trimmed = trimmed, tabClust = tabClust, avClust = avClust)
class(obj) <- "summary.fpcac"
return(obj)
}
#' @export
print.summary.fpcac <- function(x, digits=max(3L, getOption("digits") - 3L), ...){
cat("\n######################", "\n\n")
cat("Selected n. of clusters:", format(x$k, digits=digits), "\n")
cat("n. of curves:", x$n, "\n")
cat("n. of harmonics:", x$p, "\n")
cat("n. of trimmed curves:", x$trimmed, "\n")
cat("\n######################", "\n\n")
cat("Clustering table:")
print(x$tabClust, ...)
cat("\n######################", "\n\n")
cat("Average cluster distance:\n")
print(round(x$avClust, digits), ...)
cat("\n######################", "\n\n")
# if(!is.null(x$avSilhouette)){
# cat("Individual silhouette widths:\n")
# print(round(x$avSilhouette, digits), ...)
# cat("\n######################", "\n\n")
# }
invisible(x)
}
#' @export
plot.fpcac <- function(x, which, polygon=TRUE, conf.level, ...){
if(!missing(which)){
if(any(which <= 0) | any(which > x$k)){
stop("Which values in 1-", x$k)
}
}
x$k <- nrow(x$centers)
x$p <- seq_len(nrow(x$X.mean))
L <- list(...)
if(!missing(conf.level)){
a <- (1 - conf.level)/2; a <- c(a, 1 - a)
x$X.mean.lower <- sapply(1:x$k, function(i) apply(x$Xorig[, x$clusters == i, drop = FALSE], 1, quantile, probs = a[1]))
x$X.mean.upper <- sapply(1:x$k, function(i) apply(x$Xorig[, x$clusters == i, drop = FALSE], 1, quantile, probs = a[2]))
}
if(missing(which)) which <- seq(x$k)
yRange <- range(x$X.mean[, which])
temp <- range(cbind(x$X.mean.lower, x$X.mean.upper))
yRange <- range(c(temp, yRange))
tabClust <- table(x$clusters)
if(is.null(L$xlab)) L$xlab <- "Time"
if(is.null(L$ylab)) L$ylab <- "F(time)"
if(is.null(L$main)) L$main <- ""
if(is.null(L$lwd)) L$lwd <- c(1, 1.5)
if(length(L$lwd) == 1) L$lwd <- rep(L$lwd, 2)
if(is.null(L$type)) L$type <- "l"
if(is.null(L$ylim)) L$ylim <- yRange
if(is.null(L$lty)){L$lty <- c(1, 3)}
if(is.null(L$col)) L$col <- gray.colors(length(which))
if(length(L$col) == 1) L$col <- rep(L$col, length(which))
if(length(which) == 1){
if(is.null(L$mar)) L$mar <- c(4,3.3,2,1)+.1
par(mar=L$mar)
tempInd <- x$clusters == which
matplot(x$p, x$Xorig[, tempInd, drop = FALSE], xlab="", ylab="", type=L$type, ylim=L$ylim, lwd=L$lwd[1], main=L$main, col=L$col[1], lty=L$lty[2], axes=FALSE)
axis(1); axis(2); mtext(side=1, line=2, text=L$xlab); mtext(side=2, line=2, text=L$ylab)
lines(x$p, x$X.mean[, which], lwd=L$lwd[2], col=L$col[1], lty = L$lty[1])
if(!is.null(polygon)){
if(polygon){
yy <- c(x$X.mean.lower[, which], tail(x$X.mean.upper[, which], 1), rev(x$X.mean.upper[, which]), x$X.mean.lower[1, which])
xx <- c(x$p, tail(x$p, 1), rev(x$p), x$p[1])
polygon(xx, yy, col = adjustcolor(L$col[1], alpha.f = .25), border = NA)
}
else{
points(x$p, x$X.mean.lower[, which], lty = 2, lwd = L$lwd[2], type = "l", col = L$col[1])
points(x$p, x$X.mean.upper[, which], lty = 2, lwd = L$lwd[2], type = "l", col = L$col[1])
}
matlines(x$p, x$Xorig[, tempInd, drop = FALSE], xlab="", ylab="", type=L$type, ylim=L$ylim, lwd=L$lwd[1], main=L$main, col=L$col[1], lty=L$lty[2], axes=FALSE)
lines(x$p, x$X.mean[, which], lwd=L$lwd[2], col=L$col[1], lty = L$lty[1])
}
abline(h=0, lty=3, col=1)
}
else{
if(is.null(L$mar)) L$mar <- c(4,3.3,1,1)+.1
par(mar=L$mar)
matplot(x$p, x$X.mean, xlab="", ylab="", type=L$type, ylim=L$ylim,
lwd=L$lwd[1], main=L$main, col=L$col, lty=L$lty[1], axes=FALSE)
axis(1); axis(2); mtext(side=1, line=2, text=L$xlab); mtext(side=2, line=2, text=L$ylab)
if(!is.null(polygon)){
for(i in which){
if(polygon){
yy <- c(x$X.mean.lower[, i], tail(x$X.mean.upper[, i], 1), rev(x$X.mean.upper[, i]), x$X.mean.lower[1, i])
xx <- c(x$p, tail(x$p, 1), rev(x$p), x$p[1])
polygon(xx, yy, col = adjustcolor(L$col[i], alpha.f = .25), border = NA)# adjustcolor(L$col[1], alpha.f = 0.25)
}else{
points(x$p, x$X.mean.lower[, i], lty = 2, lwd = L$lwd[2], type = "l", col = L$col[i])
points(x$p, x$X.mean.upper[, i], lty = 2, lwd = L$lwd[2], type = "l", col = L$col[i])
}
}
matlines(x$p, x$X.mean, xlab="", ylab="", type=L$type, ylim=L$ylim,
lwd=L$lwd[2], main=L$main, col=L$col, lty=L$lty[1], axes=FALSE)
}
}
par(mar=c(5,4,4,2)+.1)
}
#' @export
opt.fpcac <- function(X, k.max = 5, method = c("silhouette", "wss"),
fd = NULL, nbasis = 5, norder = 3, nharmonics = 3,
alpha = 0, niter = 30, Ksteps = 10, seed,
diss = NULL, trace=FALSE){
if(!missing(seed)) set.seed(seed)
method <- match.arg(method)
if(trace) pb <- txtProgressBar(min=0, max=k.max, style=3)
objTemp <- sapply(seq_len(k.max), function(.k) {
if(trace) setTxtProgressBar(pb, .k)
if(is.null(fd)) fpcac(X, K=.k, alpha=alpha, niter=niter, Ksteps=Ksteps, nbasis=nbasis, norder=norder, nharmonics=nharmonics, disp=FALSE)
else fpcac(fd=fd, K=.k, alpha=alpha, niter=niter, Ksteps=Ksteps, nbasis=nbasis, norder=norder, nharmonics=nharmonics, disp=FALSE)
}, simplify = FALSE)
if(trace) close(pb)
names(objTemp) <- seq_len(k.max)
clusters <- simplify2array(lapply(objTemp, function(.x) .x$clusters))
nfolds <- dim(objTemp[[1]]$X)[1] %/% 5000
nfolds <- pmax(nfolds, 1)
foldid <- sort(rep(seq(nfolds), length = dim(objTemp[[1]]$X)[1]))
if(is.null(diss)) {
v <- NULL
for(i in 1:nfolds) {
# diss <- dist(objTemp[[1]]$X[foldid == i, ])
diss <- as.dist(sqrt(2*(1-cor(t(objTemp[[1]]$X[foldid == i, ])))))
temp <- if (method == "silhouette")
c(0, apply(clusters[foldid == i, -1], 2, .get_ave_sil_width, d=diss))
else
apply(clusters, 2, .get_withinSS, d=diss)
v <- rbind(v, temp)
}
v <- colMeans(v)
}
else{
v <- if (method == "silhouette")
c(0, apply(clusters[,-1], 2, .get_ave_sil_width, d=diss))
else
apply(clusters, 2, .get_withinSS, d=diss)
}
K.opt <- if(method == "wss") as.integer(names(which.min(diff(v)))) else which.max(v)
df <- data.frame(clusters = as.factor(1:k.max), y = v)
linecolor <- "steelblue"
ylab <- "Total Within Sum of Square"
if(method == "silhouette") ylab <- "Average silhouette width"
p <- ggline(df, x = "clusters", y = "y", group = 1,
color = linecolor, ylab = ylab, xlab = "Number of clusters k",
main = "Optimal number of clusters")
if(method == "silhouette")
p <- p + geom_vline(xintercept = which.max(v), linetype = 2, color = linecolor)
print(p)
out <- list(obj.function=v, clusters=clusters, K=seq_len(k.max), K.opt=K.opt, plot=p)
return(out)
}
.get_ave_sil_width <- function (d, cluster){
if (!requireNamespace("cluster", quietly = TRUE)) {
stop("cluster package needed for this function to work. Please install it.")
}
ss <- silhouette(cluster, d)
mean(ss[, 3])
}
.get_withinSS <- function (d, cluster){
d <- as.dist(d)
cn <- max(cluster)
clusterf <- as.factor(cluster)
clusterl <- levels(clusterf)
cnn <- length(clusterl)
if (cn != cnn) {
warning("cluster renumbered because maximum != number of clusters")
for (i in 1:cnn) cluster[clusterf == clusterl[i]] <- i
cn <- cnn
}
cwn <- cn
dmat <- as.matrix(d)
within.cluster.ss <- 0
for (i in 1:cn) {
cluster.size <- sum(cluster == i)
di <- as.dist(dmat[cluster == i, cluster == i])
within.cluster.ss <- within.cluster.ss + sum(di^2)/cluster.size
}
within.cluster.ss
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.