Nothing
DetermineWeight_SimClust<-function(List,type=c("data","dist","clusters"),weight=seq(0,1,by=0.01),nrclusters=NULL,distmeasure=c("tanimoto","tanimoto"),normalize=FALSE,method=NULL,clust="agnes",linkage=c("ward","ward"),alpha=0.625,gap=FALSE,maxK=50,names=c("B","FP"),StopRange=FALSE,plottype="new",location=NULL){
CheckDist<-function(Dist,StopRange){
if(StopRange==FALSE & !(0<=min(Dist) & max(Dist)<=1)){
message("It was detected that a distance matrix had values not between zero and one. Range Normalization was performed to secure this. Put StopRange=TRUE if this was not necessary")
Dist=Normalization(Dist,method="Range")
}
else{
Dist=Dist
}
}
type<-match.arg(type)
if(type=="data"){
#for(a in 1:length(distmeasure)){
# if(distmeasure[[a]]=='euclidean'){
# stand<-function(c){
# minc=min(c)
# maxc=max(c)
# c1=(c-minc)/(maxc-minc)
# return(c1)
# }
# List[[a]]=apply(List[[a]],2,stand)
# }
#
#}
OrderNames=rownames(List[[1]])
for(i in 1:length(List)){
List[[i]]=List[[i]][OrderNames,]
}
Clusterings=lapply(seq(length(List)),function(i) Cluster(List[[i]],type,distmeasure[i],normalize,method,clust,linkage[i],alpha,gap,maxK,StopRange))
Dist=lapply(seq(length(List)),function(i) Clusterings[[i]]$DistM)
Dist=lapply(seq(length(Dist)),function(i) CheckDist(Dist[[i]],StopRange))
if(is.null(nrclusters)){
if(gap==FALSE){
stop("Specify a number of clusters of put gap to TRUE")
}
else{
clusters=sapply(seq(length(List)),function(i) Clusterings[[i]]$k$Tibs2001SEmax)
nrclusters=ceiling(mean(clusters))
}
}
for(i in 1:length(Clusterings)){
names(Clusterings)[i]=paste("Clust",i,sep=' ')
}
out<-list(Clusterings)
names(out)="ClusterSep"
}
else if(type=="dist"){
OrderNames=rownames(List[[1]])
for(i in 1:length(List)){
List[[i]]=List[[i]][OrderNames,OrderNames]
}
Clusterings=lapply(seq(length(List)),function(i) Cluster(List[[i]],type,distmeasure[i],normalize,method,clust,linkage[i],alpha,gap,maxK,StopRange))
Dist=List
Dist=lapply(seq(length(Dist)),function(i) CheckDist(Dist[[i]],StopRange))
if(is.null(nrclusters)){
if(gap==FALSE){
stop("Specify a number of clusters of put gap to TRUE")
}
else{
clusters=sapply(seq(length(List)),function(i) Clusterings[[i]]$k$Tibs2001SEmax)
nrclusters=ceiling(mean(clusters))
}
}
for(i in 1:length(Clusterings)){
names(Clusterings)[i]=paste("Clust",i,sep=' ')
}
out<-list(Clusterings)
names(out)="ClusterSep"
}
else{
Dist=lapply(seq(length(List)),function(i) return(List[[i]]$DistM))
Dist=lapply(seq(length(Dist)),function(i) CheckDist(Dist[[i]],StopRange))
OrderNames=rownames(Dist[[1]])
for(i in 1:length(Dist)){
Dist[[i]]=Dist[[i]][OrderNames,OrderNames]
}
Clusterings=List
for(i in 1:length(Clusterings)){
names(Clusterings)[i]=paste("Clust",i,sep=' ')
}
out<-list(Clusterings)
names(out)="ClusterSep"
}
namesw=c()
for(i in 1:length(names)){
namesw=c(namesw,paste("w_",names[i],sep=""))
}
namesJ=c()
for(i in 1:length(names)){
namesJ=c(namesJ,paste("J(sim",names[i],",simW)",sep=""))
}
namesR=c()
combs=combn(seq(length(List)),m=2,simplify=FALSE)
for(i in 1:length(combs)){
namesR=c(namesR,paste("J_",names[combs[[i]][1]],"/J_",names[combs[[i]]][2],sep=""))
}
labels<-c(namesw,namesJ,namesR)
ResultsWeight<-matrix(0,ncol=length(labels),nrow=length(weight))
#data.frame(col1=numeric(),col2=numeric(),col3=numeric(),col4=numeric())
colnames(ResultsWeight)=labels
if(is.null(weight)){
equalweights=1/length(List)
weight=list(rep(equalweights,length(List)))
}
else if(class(weight)=='list' & length(weight[[1]])!=length(List)){
stop("Give a weight for each data matrix or specify a sequence of weights")
}
else{
message('The weights are considered to be a sequence, each situation is investigated')
}
if(class(weight)!="list"){
condition<-function(l){
l=as.numeric(l)
if( sum(l)==1 ){ #working wit characters since with the numeric values of comb or permutations something goes not the way is should: 0.999999999<0.7+0.3<1??
#return(row.match(l,t1))
return(l)
}
else(return(0))
}
t1=permutations(n=length(weight),r=length(List),v=as.character(weight),repeats.allowed = TRUE)
t2=lapply(seq_len(nrow(t1)), function(i) if(sum(as.numeric(t1[i,]))==1) return(as.numeric(t1[i,])) else return(0)) #make this faster: lapply on a list or adapt permutations function itself: first perform combinations under restriction then perform permutations
t3=sapply(seq(length(t2)),function(i) if(!all(t2[[i]]==0)) return (i) else return(0))
weight=t2[which(t3!=0)]
}
if(class(weight)=="list" & "x" %in% weight[[1]]){ #x indicates a free weight
for(i in 1:length(weight)){
w=weight[[i]]
weightsfordata=which(w!="x") #position of the provided weight = position of the data to which the weight is given
givenweights=as.numeric(w[weightsfordata])
stilltodistribute=1-sum(givenweights)
newweights=seq(stilltodistribute,0,-0.1)
t1=permutations(n=length(newweights),r=length(List)-length(weightsfordata),v=as.character(newweights),repeats.allowed = TRUE)
Input1=as.list(seq_len(nrow(t1)))
Input2=lapply(seq(length(Input1)),function(i) {Input1[[i]][length(Input1[[i]])+1]=stilltodistribute
return(Input1[[i]])})
t2=lapply(seq(length(Input2)), FUN=function(i){if(sum(as.numeric(t1[Input2[[i]][1],])+0.00000000000000002775)==Input2[[i]][2]) return(as.numeric(t1[i,])) else return(0)}) #make this faster: lapply on a list or adapt permutations function itself: first perform combinations under restriction then perform permutations
t3=sapply(seq(length(t2)),function(i) if(!all(t2[[i]]==0)) return (i) else return(0))
weightsforotherdata=t2[which(t3!=0)]
new=list()
for(i in 1:length(weightsforotherdata)){
w1=weightsforotherdata[[i]]
new[[i]]=rep(0,length(List))
new[[i]][weightsfordata]=givenweights
new[[i]][which(new[[i]]==0)]=w1
}
weight=new
}
}
weightedcomb<-function(w,Dist){
temp=lapply(seq_len(length(Dist)),function(i) w[i]*Dist[[i]])
temp=Reduce("+",temp)
}
DistM=lapply(weight,weightedcomb,Dist)
hclustOr=lapply(seq(length(List)),function(i) cutree(Clusterings[[i]]$Clust,nrclusters))
hclustW=lapply(seq(length(weight)),function(i) cutree(agnes(DistM[[i]],diss=TRUE,method="ward"),nrclusters))
Counts=function(clusterlabs1,clusterlabs2){
index=c(1:length(clusterlabs1))
allpairs=combn(index,2,simplify=FALSE) #all pairs of indices: now check clutserlabels for every pair==> only 1 for loop
n11=n10=n01=n00=0
counts<-function(pair){
if(clusterlabs1[pair[1]]==clusterlabs1[pair[2]]){
if(clusterlabs2[pair[1]]==clusterlabs2[pair[2]]){
n11=n11+1
}
else{
n10=n10+1
}
}
else{
if(clusterlabs2[pair[1]]==clusterlabs2[pair[2]]){
n01=n01+1
}
else{
n00=n00+1
}
}
return(c(n11,n10,n01,n00))
}
n=lapply(seq(length(allpairs)),function(i) counts(allpairs[[i]]))
nn=Reduce("+",n)
#2: compute jaccard coefficient
Jac=nn[1]/(nn[1]+nn[2]+nn[3])
return(Jac)
}
Jaccards<-function(hclust){
jacs=lapply(seq(length(hclustOr)),function(i) Counts(clusterlabs1=hclustOr[[i]],clusterlabs2=hclust))
return(unlist(jacs))
}
AllJacs=lapply(hclustW,Jaccards) #make this faster:lapply + transfrom to data frame with plyr package
Ratios<-function(Jacs){
combs=combn(seq(length(List)),m=2,simplify=FALSE)
ratio<-function(v,Jacs){
return(Jacs[v[1]]/Jacs[v[2]])
}
ratios=lapply(seq(length(combs)),function(i) ratio(v=combs[[i]],Jacs=Jacs))
}
AllRatios=lapply(seq(length(AllJacs)),function(i) unlist(Ratios(AllJacs[[i]])))
ResultsWeight=t(mapply(c,weight,AllJacs,AllRatios))
colnames(ResultsWeight)=labels
#Choose weight with ratio closest to one==> smallest where this happens: ##### START HERE WITH OPTIMIZATION #####
ResultsWeight=cbind(ResultsWeight,rep(0,nrow(ResultsWeight)))
colnames(ResultsWeight)[ncol(ResultsWeight)]="trick"
Weight=ResultsWeight[which.min(rowSums(abs(ResultsWeight[,c(namesR,"trick")]-1))),c(1:length(List))]
plottypein<-function(plottype,location){
if(plottype=="pdf" & !(is.null(location))){
pdf(paste(location,".pdf",sep=""))
}
if(plottype=="new"){
dev.new()
}
if(plottype=="sweave"){
}
}
plottypeout<-function(plottype){
if(plottype=="pdf"){
dev.off()
}
}
plottypein(plottype,location)
plot(x=0,y=0,type="n",xlim=c(0,dim(ResultsWeight)[1]),ylim=c(min(ResultsWeight[,namesR]),max(ResultsWeight[,namesR])),xlab="",ylab="Ratios")
if(is.null(ncol(ResultsWeight[,namesR]))){
L=1
}
else{
L=ncol(ResultsWeight[,namesR])
}
for(i in 1:L){
lines(1:dim(ResultsWeight)[1],y=ResultsWeight[,namesR[i]],col=i)
}
abline(h=0,v=which.min(rowSums(abs(ResultsWeight[,c(namesR,"trick")]-1))),col="black",lwd=2)
mtext("Weight Combinations", side=1, line=3)
axis(1,labels=paste("Optimal weights:", paste(Weight,collapse=", "),sep=" "), at=which.min(rowSums(abs(ResultsWeight[,c(namesR,"trick")]-1))),line=1,tck=1,lwd=2)
plottypeout(plottype)
ResultsWeight=ResultsWeight[,-ncol(ResultsWeight)]
out[[2]]=ResultsWeight
out[[3]]=Weight
names(out)=c("ClusterSep","Result","Weight")
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.