# R/CECb.R In IntClust: Integrated Data Analysis via Clustering

#### Documented in CECb

```CECb<-function(List,distmeasure=c("tanimoto","tanimoto"),normalize=FALSE,method=NULL,nrclusters=seq(5,25,1),weight=NULL,clust="agnes",linkage=c("flexible","flexible"),alpha=0.625,WeightClust=0.5,StopRange=FALSE){

if(class(List) != "list"){
stop("Data must be of type list")
}

if(is.null(nrclusters)){
stop("Give a number of clusters to cut the dendrogram into for each data modality.")
}

#Step 1: Take all features from A1 and A2
#Notation facility:

#Put all data in the same order
OrderNames=rownames(List[])
for(i in 1:length(List)){
List[[i]]=List[[i]][OrderNames,]
}

#Put up Incidence matrix for each data modality
Incidence=list()
for (i in 1:length(List)){
Incidence[[i]]=matrix(0,dim(List[[i]]),dim(List[[i]]))
rownames(Incidence[[i]])=rownames(List[[i]])
colnames(Incidence[[i]])=rownames(List[[i]])
}

#Repeat for t iterations: not necessary here since only thing that changes is the number of clusters the tree is cut into

#Step 2: apply hierarchical clustering on A1_prime and A2_prime + cut tree into nrclusters

DistM=lapply(seq(length(List)),function(i) Distance(List[[i]],distmeasure=distmeasure[i],normalize,method))
CheckDist<-function(Dist,StopRange){
if(StopRange==FALSE & !(0<=min(Dist) & max(Dist)<=1)){
message("It was detected that a distance matrix had values not between zero and one. Range Normalization was performed to secure this. Put StopRange=TRUE if this was not necessary")
Dist=Normalization(Dist,method="Range")
}
else{
Dist=Dist
}
}

DistM=lapply(seq(length(DistM)),function(i) CheckDist(DistM[[i]],StopRange))

for(k in 1:length(nrclusters)){
message(k)
MembersofClust=list()
for (i in 1:length(HClust_A)){
Temp=cutree(HClust_A[[i]],nrclusters[k])
MembersofClust[[i]]=matrix(1,dim(List[[i]]),dim(List[[i]]))

for(l in 1:length(Temp)){
label=Temp[l]
sameclust=which(Temp==label)
MembersofClust[[i]][l,sameclust]=0
}
Incidence[[i]]=Incidence[[i]]+MembersofClust[[i]]
}
}

if(is.null(weight)){
equalweights=1/length(List)
weight=list(rep(equalweights,length(List)))
}
else if(class(weight)=='list' & length(weight[])!=length(List)){
stop("Give a weight for each data matrix or specify a sequence of weights")
}
else{
message('The weights are considered to be a sequence, each situation is investigated')
}

if(class(weight)!="list"){
condition<-function(l){
l=as.numeric(l)
if( sum(l)==1 ){  #working with characters since with the numeric values of comb or permutations something goes not the way is should: 0.999999999<0.7+0.3<1??
#return(row.match(l,t1))
return(l)
}
else(return(0))
}

if(all(seq(1,0,-0.1)!=weight)){
for(i in 1:length(weight)){
rest=1-weight[i]
if(!(rest%in%weight)){
weight=c(weight,rest)
}
}
}

t1=permutations(n=length(weight),r=length(List),v=as.character(weight),repeats.allowed = TRUE)
t2=lapply(seq_len(nrow(t1)), function(i) if(sum(as.numeric(t1[i,]))==1) return(as.numeric(t1[i,])) else return(0)) #make this faster: lapply on a list or adapt permutations function itself: first perform combinations under restriction then perform permutations
t3=sapply(seq(length(t2)),function(i) if(!all(t2[[i]]==0)) return (i) else return(0))
t4=t2[which(t3!=0)]
weight=lapply(seq(length(t4)),function(i) rev(t4[[i]]))

}

if(class(weight)=="list" & "x" %in% weight[]){ #x indicates a free weight
for(i in 1:length(weight)){
w=weight[[i]]
weightsfordata=which(w!="x") #position of the provided weight = position of the data to which the weight is given
givenweights=as.numeric(w[weightsfordata])

stilltodistribute=1-sum(givenweights)

newweights=seq(stilltodistribute,0,-0.1)

t1=permutations(n=length(newweights),r=length(List)-length(weightsfordata),v=as.character(newweights),repeats.allowed = TRUE)
Input1=as.list(seq_len(nrow(t1)))
Input2=lapply(seq(length(Input1)),function(i) {Input1[[i]][length(Input1[[i]])+1]=stilltodistribute
return(Input1[[i]])})
t2=lapply(seq(length(Input2)), FUN=function(i){if(sum(as.numeric(t1[Input2[[i]],])+0.00000000000000002775)==Input2[[i]]) return(as.numeric(t1[i,])) else return(0)}) #make this faster: lapply on a list or adapt permutations function itself: first perform combinations under restriction then perform permutations
t3=sapply(seq(length(t2)),function(i) if(!all(t2[[i]]==0)) return (i) else return(0))
weightsforotherdata=t2[which(t3!=0)]

new=list()
for(i in 1:length(weightsforotherdata)){
w1=weightsforotherdata[[i]]
new[[i]]=rep(0,length(List))
new[[i]][weightsfordata]=givenweights
new[[i]][which(new[[i]]==0)]=w1
}

weight=new
}
}

weightedcomb<-function(w,Dist){
temp=lapply(seq_len(length(Dist)),function(i) w[i]*Dist[[i]])
temp=Reduce("+",temp)
return(temp)
}
IncidenceComb=lapply(weight,weightedcomb,Incidence)

namesweights=c()
CEC=list()
for (i in 1:length(IncidenceComb)){
CEC[[i]]=agnes(IncidenceComb[[i]],diss=TRUE,method="ward")
namesweights=c(namesweights,paste("Weight",weight[i],sep=" "))
if(all(weight[[i]]==WeightClust)){
Clust=CEC[i]
DistClust=IncidenceComb[i]
}
}

Results=lapply(seq(1,length(CEC)),function(i) return(c("DistM"=IncidenceComb[i],"Clust"=CEC[i])))
names(Results)=namesweights

out=list(Incidence=Incidence,Results=Results,Clust=c("DistM"=DistClust,"Clust"=Clust))
attr(out,'method')<-'CEC'
return(out)

}
```

## Try the IntClust package in your browser

Any scripts or data that you put into this service are public.

IntClust documentation built on May 2, 2019, 5:23 p.m.