Nothing
gsac <- function(x, nc = Inf, maxiter = 40, zero = TRUE, r0 = 0, force.cs = FALSE, force.rs = FALSE, resort = "complete", method = "Kendall", tau0 = 0.5, stack = "max" , clean = TRUE, clean.Is = TRUE, cutoff = -20, ... ){
# x = matrix
# zero = set sparse areas to 0 or r0*expectation
# cs, rs = force row or column split (subtables do not overlap, rs&cs == TRUE is the TDP/SAC)
# resort = method for reordering the subtables ("complete" matrix, individual "subtable"s, "none")
# method = TDP method
# cutoff = minimum acceptable residuals. rows/columns with less than ... avg residual are removed ??
# list for the non-finished submatrices
# indices
Is <- list(lapply(dim(x),function(z) 1:z))
RM <- itab(x)
R0 <- r0*RM
x <- (1-r0)*x + R0
clusters <- list()
k <- 0
# stop if max. number of clusters is reached
while(length(Is) > 0){
# remove redundant clusters during algorithm
# better not?
if(clean.Is & length(Is) > 2){
ic <- getIsClusters(Is,dim(x))
sc <- setcover(ic)
Is <- Is[sc]
}
# reorder the current matrix
# z/Is always refers to the original matrix x
sz <- sapply(Is,function(y){
length(y[[1]])*length(y[[2]])
})
##print("sizes")
##print(sz)
#id <- which.max(sz)
Is <- Is[sz > 0]
#id <- length(Is)
if(stack %in% c(1,"first","f","fifo","FIFO","1st")){
id <- 1
}
if(stack %in% c("last","l","lifo","LIFO")){
id <- length(Is)
}
if(stack %in% c("max","m","highest","h","high","size")){
id <- which.max(sz)
}
if(stack %in% c("min","m","smallest","s","small")){
id <- which.min(sz)
}
##print(id)
z <- Is[[id]]
Is <- Is[-id]
##print(z)
##print("check01")
##print(any(is.na(x)))
if(any(sapply(z,length) == 1)){
if(length(z[[1]]) > 1){
z[[1]] <- z[[1]][ x[z[[1]],z[[2]]] > R0[z[[1]],z[[2]]] ]
}
if(length(z[[2]]) > 1){
z[[2]] <- z[[2]][ x[z[[1]],z[[2]]] > R0[z[[1]],z[[2]]] ]
}
if(all(sapply(z,length) > 0)){
clusters[[length(clusters)+1]] <- z
}else{
# else the cluster is empty
print("cluster empty!")
}
}else{
if(resort %in% c(FALSE,"none","n")){
# do not reorder subtables
x0 <- x[ z[[1]], z[[2]] , drop = FALSE]
}
if(resort %in% c("sub","s","subtable")){
#reorder subtables independently
##print(dim(xxx<-x[ z[[1]], z[[2]] ,drop=FALSE]))
##print(table(is.na(xxx)))
##print(any( apply(xxx,1,sum) == 0 ))
##print(any( apply(xxx,2,sum) == 0 ))
x0 <- optile(x[ z[[1]], z[[2]] ,drop=FALSE], ... )
z[[1]] <- z[[1]][attr(x0,"orders")[[1]]]
z[[2]] <- z[[2]][attr(x0,"orders")[[2]]]
}
if(resort %in% c("complete","c")){
# reorder complete matrix
ords <- optile(x, return.data = FALSE, ... )[[3]]
z[[1]] <- z[[1]][order(match(z[[1]],ords[[1]]))]
z[[2]] <- z[[2]][order(match(z[[2]],ords[[2]]))]
x0 <- x[ z[[1]], z[[2]] ,drop=FALSE]
}
##print("check02")
#remove zero rows / columns
rx0 <- (x0-RM[ z[[1]], z[[2]] ,drop=FALSE]) > 1e-12
rs <- apply(rx0,1,sum)
cs <- apply(rx0,2,sum)
if(any(rs == 0)){
rs0 <- which(rs==0)
z[[1]] <- z[[1]][-rs0]
}
if(any(cs == 0)){
cs0 <- which(cs==0)
z[[2]] <- z[[2]][-cs0]
}
##print("check03")
if(any(sapply(z,length)==1)){
# reduce, add to clusters, next z
# reduce subtable by all empty rows and columns
if(length(z[[1]]) > 1){
z[[1]] <- z[[1]][ x[z[[1]],z[[2]]] > R0[z[[1]],z[[2]]] ]
}
if(length(z[[2]]) > 1){
z[[2]] <- z[[2]][ x[z[[1]],z[[2]]] > R0[z[[1]],z[[2]]] ]
}
# if the subtable has only 1 row and/or column accept it as a cluster
if(all(sapply(z,length) > 0)){
clusters[[length(clusters)+1]] <- z
}else{
# else the cluster is empty
print("cluster empty!")
}
##print("check04a")
}else{
x0 <- x[ z[[1]], z[[2]] ,drop=FALSE]
##print(BCI(100*x0))
##print(BCI(x0))
cf <- cfluctile(x0, maxsplit = 1, plot = FALSE, tau0 = tau0, method = method)
##print("check04b")
## print(clusters)
if(length(cf[[1]]) == 1){
#print(z)
#print(clusters)
# no split >>>> reduce subtable by removing sparse rows and columns
# rx0 <- ((x0-extracat:::itab(x0))>0)+0
# rs <- apply(rx0,1,sum)
# cs <- apply(rx0,2,sum)
# if(any(rs == 0)){
# rs0 <- which(rs==0)
# x[z[[1]][rs0], z[[2]]] <- R0[z[[1]][rs0], z[[2]]]
# z[[1]] <- z[[1]][-rs0]
# }
# if(any(cs == 0)){
# cs0 <- which(cs==0)
# x[z[[1]], z[[2]][cs0]] <- R0[z[[1]], z[[2]][cs0]]
# z[[2]] <- z[[2]][-cs0]
# }
# remove all rows and columns with a negative avg residual
# ix0 <- extracat:::itab(x0)
# rx0 <- (x0-ix0)/sqrt(ix0)
rx0 <- (x0-RM[ z[[1]], z[[2]] ,drop=FALSE])/sqrt(RM[ z[[1]], z[[2]] ,drop=FALSE])
rm <- apply(rx0,1,mean)
cm <- apply(rx0,2,mean)
if(any(rm < cutoff)){
rs0 <- which(rm < cutoff)
x[z[[1]][rs0], z[[2]]] <- R0[z[[1]][rs0], z[[2]]]
z[[1]] <- z[[1]][-rs0]
}
if(any(cm < cutoff )){
cs0 <- which(cm < cutoff)
x[z[[1]], z[[2]][cs0]] <- R0[z[[1]], z[[2]][cs0]]
z[[2]] <- z[[2]][-cs0]
}
## print("check05a")
#print(z)
#print(clusters)
#print("5a")
# if any useful rows and columns are left accept the subtable as a cluster
if(all(sapply(z,length) > 0)){
clusters[[length(clusters)+1]] <- z
}# else the cluster is empty
##print(clusters)
}else{
# partition accepted: decide which subtables to keep
##print(attr(cf,"tau.values"))
r1 <- attr(cf, "orders")[[1]][[1]]
r2 <- attr(cf, "orders")[[1]][[2]]
c1 <- attr(cf, "orders")[[2]][[1]]
c2 <- attr(cf, "orders")[[2]][[2]]
#print(list(r1,r2,c1,c2))
m1 <- mean( x0[r1,c2] )
m2 <- mean( x0[r2,c1] )
# the "corner" with the smallest avg is dropped
sum1 <- sum( x0[r1,c1] )
sum2 <- sum( x0[r2,c1] )
sum3 <- sum( x0[r1,c2] )
sum4 <- sum( x0[r2,c2] )
##print(matrix(c(sum1,sum2,sum3,sum4),2,2))
if(m1 < m2){
z1 <- list( z[[1]] , z[[2]][c1] )
z2 <- list( z[[1]][r2] , z[[2]] )
if(force.rs){
z1[[1]] <- z1[[1]][r1]
}
if(force.cs){
z2[[2]] <- z2[[2]][c2]
}
if(zero){
x[z[[1]][r1], z[[2]][c2]] <- R0[z[[1]][r1], z[[2]][c2]]
##cat("killing ",length(z[[1]][r1]),"x", length(z[[2]][c2]))
if(force.rs&force.cs){
x[z[[1]][r2], z[[2]][c1]] <- R0[z[[1]][r2], z[[2]][c1]]
}
}
}else{
z1 <- list( z[[1]] , z[[2]][c2] )
z2 <- list( z[[1]][r1] , z[[2]] )
if(force.rs){
z1[[1]] <- z1[[1]][r2]
}
if(force.cs){
z2[[2]] <- z2[[2]][c1]
}
if(zero){
x[z[[1]][r2], z[[2]][c1]] <- R0[z[[1]][r2], z[[2]][c1]]
##cat("killing ",length(z[[1]][r2]),"x", length(z[[2]][c1]))
if(force.rs&force.cs){
x[z[[1]][r1], z[[2]][c2]] <- R0[z[[1]][r1], z[[2]][c2]]
}
}
}
Is[[length(Is)+1]] <- z1
Is[[length(Is)+1]] <- z2
##print("check05b")
}
}# end ifelse dim = 1
}# end ifelse dim=1
#print(Is)
#print(clusters)
#print("------------------")
# cleaning section: use setcover to remove redundant clusters (if nc < Inf)
tot <- length(Is) + length(clusters)
if(clean & tot >= nc & length(clusters) > 1){
## print("c1")
# remove redundant clusters
ic <- getIsClusters(clusters,dim(x))
sc <- setcover(ic)
##print(sc)
clusters <- clusters[sc]
tot <- length(Is) + length(clusters)
}
if(clean & tot >= nc & length(Is) > 1){
## print("c2")
# remove redundant Is-entries before accepting them as clusters
ic <- getIsClusters(Is,dim(x))
sc <- setcover(ic)
##print(sc)
Is <- Is[sc]
tot <- length(Is) + length(clusters)
}
if(clean & tot >= nc & length(clusters)*length(Is) > 0){
## print("c3")
# remove redundant clusters in clusters+Is
ic <- getIsClusters(c(clusters,Is),dim(x))
sc <- setcover(ic)
##print(sc)
sc1 <- which(sc > length(clusters))
Is <- Is[sc[sc1] - length(clusters)]
sc2 <- which(sc <= length(clusters))
##print(sc2)
clusters <- clusters[sc[sc2]]
tot <- length(Is) + length(clusters)
}
if(tot >= nc){
# no redundancies left (clean == TRUE)
clusters <- c(clusters,Is)
Is <- list()
print("Maximum number of clusters (nc) reached.")
}
if(k >= maxiter){
clusters <- c(clusters,Is)
Is <- list()
print("Maximum number of iteriations (maxiter) reached.")
}
}# end while
##print("finishing")
clusters <- rapply(clusters,f=base::sort,how="list")
##print(clusters)
# final "cleaning": remove redundant clusters
if(clean){
ic <- getIsClusters(clusters,dim(x))
sc <- setcover(ic)
clusters <- clusters[sc]
}
return(clusters)
}
getIsClusters <- function(Is,dim){
n <- length(Is)
idv <- sapply(Is,function(z){
M <- matrix(0,dim[1],dim[2])
M[z[[1]],z[[2]]] <- 1
return(as.vector(M))
})
return(idv)
}
setcover <- function(x, k = NULL, rat = 1, s = NULL, w = NULL, check = TRUE){
if(is.null(dim(x))){ dim(x) <- c(length(x),1)}
if(check){
x0 <- x
}
if(is.null(w)){
w <- rep(1,nrow(x))
}
w <- w/sum(w)
if(is.null(s)){
s <- sum( apply(x,1,max)*w )
}
if(is.null(k)) k <- ncol(x)
i <- 0
ids <- NULL
cov <- 0
while(i < k & cov/s < rat){
i <- i+1
#rs <- apply(x,2,sum)
rs <- w %*% x
ids <- c(ids,j<-which.max(rs))
cov <- cov + rs[j]
drp <- which(x[,j]==1)
w <- w[-drp]
x <- x[-drp,,drop=FALSE]
if(any(dim(x) < 2)){
i <- k
}
}
#cat("coverage = ", cov/s)
attr(ids,"coverage") <- cov/s
if(check){
# test for redundancy
# are all entries in a cluster in at least one other cluster?
all.ok <- FALSE
while(!all.ok){
cs <- apply(x0[,ids,drop=FALSE],1,sum)==1
un <- apply(x0[,ids,drop=FALSE],2,function(z){
table(cs,z)[2,2] == 0
})
if(any(un)){
ids <- ids[-which(un)[1]]
}else{
all.ok <- TRUE
}
}
}
return(ids)
}
# cseqmat <- function(x){
# # matrix to compute cluster sequences
# blc<-apply(x,2,function(z) which.max(z==1))
# return(outer(blc,blc,">")+0)
# }
# aggregate.table = function(x,compress = c(TRUE,FALSE), aggregate = sum , order = NULL){
# nd <- length(dim(x))
# #aggregate = as.list(rep(aggregate,nd)[1:nd])
# compress = as.list(rep(compress,nd)[1:nd])
# eq <- sapply(compress,"==",TRUE)
# if(is.null(order)){
# order <- order(eq)
# }
# x <- as.data.table(as.data.frame(x))
# # setnames(x,"N","Freq")
# x <- x[Freq > 0]
# fi <- ncol(x)
# xc <- x[,list(Freq)]
# #for(i in 1:nd){
# # x[,i,with=FALSE] <- as.factor(unlist(x[,i,with=FALSE]))
# #}
# for(i in order){
# if( is.logical(compress[[i]])){
# if(compress[[i]][1] == TRUE){
# # compress identical profiles
# f <- paste("Freq ~ ",names(x)[i],sep="")
# #setkeyv(x, names(x)[-c(i,fi)])
# #nmz <- names(x)[-c(i,fi)]
# #CM <- x[,as.list(xtabs(as.formula(f))),by=key(x)]
# #CM <- x[,as.list(xtabs(Freq~.,data=.SD)),by=nmz]
# a <- unlist(x[,i,with=FALSE])
# b <- apply(x[,-1,with=FALSE],1,paste,collapse=":")
# CM <- xtabs( x[,Freq] ~ a + b)
# #tt <- t(CM[,-c(1:(nd-1)),with=FALSE])
# require(amap)
# st <- subtree( suppressWarnings(hcluster(CM)),h=0)$data
# xc[,eval(quote(names(x)[i])) := as.factor(paste("C",i,st,sep=":")[unlist(x[,i,with=FALSE])])]
# }else{
# xc[,eval(quote(names(x)[i])) := x[,i,with=FALSE]]
# }
# }else{
# lvl <- paste("C",i,levels(x[,i,with=FALSE]),sep=":")
# stopifnot(length(lvl) == length(compress[[i]]))
# st <- compress[[i]]
# #yy <- sapply(unique(st),function(z) lvl[which(st==z)])
# xc[,eval(quote(names(x)[i])) := paste("C",i,st,sep=":")[as.integer(x[,i,with=FALSE])]]
# }
# }
# #mapply(function(y,z){
# # if(is.logical(z)){
# # if(z){}
# # }
# #
# #
# #},y = aggregate, z = compress)
# }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.