Edit: 07/24/2020 TKDD REV2

Code running block: the framework execution chunk starts here!

instruction - Please run all chunks above (Ctrl+Alt+P) - Then set the parameter below (Input: DataT and gamma) - Run all chunks below to start the framwork

Explanation: FindMaxHomoPartition(DataT,gamma) - INPUT: DataT$X[i,j] is the value of jth independent variable of ith individual. - INPUT: DataT$Y[i] is the value of dependent variable of ith individual. - INPUT: DataT$clsLayer[i,k] is the cluster label of ith individual in kth cluster layer.

#========= Test 
library(MRReg)
source("supportExpFunctions.R")
load(file="../data/SimGT.RData")
# DataT<-clusterSimpleGenT1Func(10000)
# DataT<-clusterSimpleGenT2Func(10000)
# DataT<-clusterSimpleGenT3Func(10000)
#DataT<-clusterSimpleGenT4Func(10000) # Type of simulation datasets
DataT<-SimpleSimulation(1000,type=4)

gamma <- 0.05 # Gamma parameter

out<-FindMaxHomoOptimalPartitions(DataT,gamma, expFlag = FALSE )

out2<-greedyAlgo(out$DataT,out)
CoptGreedy<-out2$Copt

OPTresiduals<-getResidualFromCopt(out$Copt,out$models)$residuals
GreedyResiduals<-getResidualFromCopt(CoptGreedy,out$models)$residuals
RegResiduals<-out$models[[1]][[1]]$residuals
H0Residuals<-DataT$Y - mean(DataT$Y)
cat("\014") 
print(sprintf("OPT Residuals: RMSE=%g",sqrt(mean(OPTresiduals^2)) ))
print(sprintf("Greedy Residuals: RMSE=%g",sqrt(mean(GreedyResiduals^2)) ))
print(sprintf("Reg Residuals: RMSE=%g",sqrt(mean(RegResiduals^2)) ))
print(sprintf("\bar{Y} Residuals: RMSE=%g",sqrt(mean(H0Residuals^2)) ))
FscoreOut<-getPartitionFscore(T4GTCopt,out$Copt,DataT$clsLayer)
FscoreOut
getPartitionFscore(T4GTCopt,CoptGreedy,DataT$clsLayer)

TEST: iGraph output distplay

plotOptimalClustersTree(out)
PrintOptimalClustersResult(out, selFeature = TRUE)

Mixture Model

library("flexmix")
#DataT<-clusterSimpleGenT4Func(10000)
x<-DataT$X
y<-DataT$Y
df<- data.frame(y,x)
m2<-flexmix(y~.,data = df, k=4, control = list(minprior=0.2) )
predict(m2, data.frame( t(x[1,]) ) )
y[1]

Exp mixture model - as baseline method

library("flexmix")
library(MRReg)
source("supportExpFunctions.R")

DataT<-MRReg::SimpleSimulation(100,type=5)
x<-DataT$X
y<-DataT$Y
mixOut<-getRMSEFromMixtureModel(x,y,k=13, expFlag = TRUE)
getMixturePartitionFscore(DataT$TrueFeature,mixOut$clsVec)

Exp 100 mixture model

library(MRReg)
source("supportExpFunctions.R")
load(file="../data/SimGT.RData")
gamma<-0.05
RMSEtableRes<-matrix(0,100,5)
F1tableRes<-matrix(0,100,9)
simType=6
polyDegree = 3
for(i in seq(1,100))
{
  DataT<-MRReg::SimpleSimulation(100,type=simType, degree = polyDegree)
  x<-DataT$X
  y<-DataT$Y
  mixOut<-getRMSEFromMixtureModel(x,y,k=13, expFlag = FALSE)
  RMSEtableRes[i,4]<-mixOut$RMSE
  outMX<-getMixturePartitionFscore(DataT$TrueFeature,mixOut$clsVec)
  F1tableRes[i,9]<- outMX$Fscore
  F1tableRes[i,7]<-outMX$prcVal
  F1tableRes[i,8]<-outMX$recal

  out<-FindMaxHomoOptimalPartitions(DataT,gamma, expFlag = FALSE )

  out2<-greedyAlgo(out$DataT,out)
  CoptGreedy<-out2$Copt

  OPTresiduals<-getResidualFromCopt(out$Copt,out$models)$residuals
  GreedyResiduals<-getResidualFromCopt(CoptGreedy,out$models)$residuals
  RegResiduals<-out$models[[1]][[1]]$residuals
  H0Residuals<-DataT$Y - mean(DataT$Y)
  cat("\014") 
  RMSEtableRes[i,1] <-sqrt(mean(OPTresiduals^2)) 
  RMSEtableRes[i,2] <-sqrt(mean(GreedyResiduals^2))
  RMSEtableRes[i,3] <-sqrt(mean(RegResiduals^2))
  RMSEtableRes[i,5] <-sqrt(mean(H0Residuals^2))

  FscoreOut<-getPartitionFscore(T4GTCopt,out$Copt,DataT$clsLayer)
  F1tableRes[i,3]<- FscoreOut$Fscore
  F1tableRes[i,1]<-FscoreOut$prcVal
  F1tableRes[i,2]<-FscoreOut$recal

  GreedyFscoreOut<-getPartitionFscore(T4GTCopt,CoptGreedy,DataT$clsLayer)
  F1tableRes[i,6]<- GreedyFscoreOut$Fscore
  F1tableRes[i,4]<-GreedyFscoreOut$prcVal
  F1tableRes[i,5]<-GreedyFscoreOut$recal

  print(sprintf("Type%d #%d",simType,i) )
}
F1rowRes<-colMeans(F1tableRes)
RMSErowRes<-colMeans(RMSEtableRes)
print("Finish")
cat("\014")
save(RMSEtableRes,F1tableRes,F1rowRes,RMSErowRes,file = sprintf("T%dResout.rdata",simType) )

Data distribution

#barplot(table(DataT$X),xlab="Value",ylab="Frequency")
#barplot(table(DataT$Y),xlab="Value",ylab="Frequency")
DataT<-SimpleSimulation(indvN = 100, type = 5)
hist(DataT$X,xlab="Value",ylab="Frequency" ,main ="")
hist(DataT$Y,xlab="Value",ylab="Frequency" ,main ="")


DarkEyes/MRReg documentation built on Aug. 24, 2022, 5:47 p.m.