rm(list=ls())
library(tidyverse)
library(TFTSA)
options(digits = 3)
data("jdzzl","jdzzl_loess")

TML experiment

Raw data - Hierarchical clustering - Euclidean

tml <- jdzzl[[10]]
tml_loess <- jdzzl_loess[[10]]
fit_hc1 <- hclust(dist(tml))
fit_hc1
jpeg("plot/04_raw_hc_eu_tree.jpeg")
plot(fit_hc1)
dev.off()
plot(fit_hc1)
group_k2 <- cutree(fit_hc1,k=2)
group_k2 <- as.data.frame(group_k2)
group_k2$group_k2 <- factor(group_k2$group_k2)
group_k2$Date <- rownames(group_k2)
tml_long <- reshape2::melt(tml)
names(tml_long) <- c("Date","Timestamp","Traffic_volume")
tml_long <- merge(tml_long,group_k2,by="Date")
tml_long <- arrange(tml_long,Date,Timestamp)
ggplot(tml_long,aes(x=Timestamp,y=Traffic_volume,Group=Date,color=group_k2))+
  geom_line(alpha=0.4)+
  geom_smooth(span=0.2,se = F)+
  labs(color="Cluster")+
  scale_x_continuous(breaks = seq(0,288,24))+
  scale_y_continuous(breaks = seq(0,120,20))+
  theme_bw()
ggsave(file="plot/04_raw_hc_eu.jpg",width=7.29,height=4.5,dpi=600)

Raw data - Kmeans - Euclidean

fit_km1 <- kmeans(tml,centers = 2)
fit_km1$cluster
fit_km1$betweenss/fit_km1$totss
result=rep(0,19)
for(k in 2:19){
  fit_km <- kmeans(tml,center=k)
  result[k] <- fit_km$betweenss/fit_km$totss
}
round(result,2)
plot(1:19,result,type="b",main="Choosing the Optimal Number of Cluster",
     xlab="number of cluster: 1 to 20",ylab="betweenss / totss")
axis(1,at=seq(1,19))
fit_km1_center <- reshape2::melt(fit_km1$centers)
names(fit_km1_center) <- c("Center_of_cluster","Timestamp","Traffic_volume")
fit_km1_center$Center_of_cluster <- factor(abs(fit_km1_center$Center_of_cluster-3))
fit_km1_center$Center_of_cluster <- factor(fit_km1_center$Center_of_cluster)
ggplot(data=tml_long,aes(x=Timestamp,y=Traffic_volume,Group=Date,color=group_k2))+
  geom_line(alpha=0.2)+
  geom_line(data=fit_km1_center,aes(x=Timestamp,y=Traffic_volume,Group=Center_of_cluster,color=Center_of_cluster),size=1)+
  labs(color="Centers of clusters")+
  scale_x_continuous(breaks = seq(0,288,24))+
  scale_y_continuous(breaks = seq(0,120,20))+
  theme_bw()
ggsave(file="plot/04_raw_km_eu.jpg",width=7.29,height=4.5,dpi=600)

Loess - Hierarchical - Euclidean

fit_hc2 <- hclust(dist(tml_loess))
fit_hc2
plot(fit_hc2)
group_k2 <- cutree(fit_hc2,k=2)
group_k2 <- as.data.frame(group_k2)
group_k2$group_k2 <- factor(group_k2$group_k2)
group_k2$Date <- rownames(group_k2)
tml_long <- reshape2::melt(tml_loess)
names(tml_long) <- c("Date","Timestamp","Traffic_volume")
tml_long <- merge(tml_long,group_k2,by="Date")
tml_long <- arrange(tml_long,Date,Timestamp)
ggplot(tml_long,aes(x=Timestamp,y=Traffic_volume,Group=Date,color=group_k2))+
  geom_line(alpha=0.8,size=1)+
  labs(color="Cluster")+
  scale_x_continuous(breaks = seq(0,288,24))+
  scale_y_continuous(breaks = seq(0,120,20))+
  theme_bw()
ggsave(file="plot/04_loess_hc_eu.jpg",width=7.29,height=4.5,dpi=600)

Loess - Kmeans - Euclidean

fit_km2 <- kmeans(tml_loess,centers = 2)
fit_km2$cluster
fit_km2$betweenss/fit_km2$totss
tml_long <- reshape2::melt(tml_loess)
names(tml_long) <- c("Date","Timestamp","Traffic_volume")
tml_long <- merge(tml_long,group_k2,by="Date")
tml_long <- arrange(tml_long,Date,Timestamp)
fit_km2_center <- reshape2::melt(fit_km2$centers)
names(fit_km2_center) <- c("Center_of_cluster","Timestamp","Traffic_volume")
fit_km2_center$Center_of_cluster <- factor(fit_km2_center$Center_of_cluster)
ggplot(data=tml_long,aes(x=Timestamp,y=Traffic_volume,Group=Date,color=group_k2))+
  geom_line(alpha=0.5)+
  geom_line(data=fit_km2_center,aes(x=Timestamp,y=Traffic_volume,Group=Center_of_cluster,color=Center_of_cluster),size=1)+
  labs(color="Centers of clusters")+
  scale_x_continuous(breaks = seq(0,288,24))+
  scale_y_continuous(breaks = seq(0,120,20))+
  theme_bw()
ggsave(file="plot/04_loess_km_eu.jpg",width=7.29,height=4.5,dpi=600)

Raw data - Hierarchical - Enhanced

fit_hc3 <- hclust(enhanced_euclidean(tml,0.5,0.5))
plot(fit_hc3)
group_k3 <- as.data.frame(group_k3)
group_k3$group_k3 <- factor(group_k3$group_k3)
group_k3$Date <- rownames(group_k3)
tml_long <- reshape2::melt(tml)
names(tml_long) <- c("Date","Timestamp","Traffic_volume")
tml_long <- merge(tml_long,group_k3,by="Date")
tml_long <- arrange(tml_long,Date,Timestamp)
ggplot(tml_long,aes(x=Timestamp,y=Traffic_volume,Group=Date,color=group_k3))+
  geom_line(alpha=0.2)+
  geom_smooth(span=0.2,se=F)+
  labs(colour="Cluster")+
  scale_x_continuous(breaks = seq(0,288,24))+
  scale_y_continuous(breaks = seq(0,120,20))+
  theme_bw()
ggsave(file="plot/04_raw_hc_en.jpg",width=7.29,height=4.5,dpi=600)

Loess - Hierarchical - Enhanced

fit_hc3 <- hclust(enhanced_euclidean(tml_loess,0.5,0.5))
plot(fit_hc3)
group_k3 <- cutree(fit_hc3,k=3)
group_k3 <- as.data.frame(group_k3)
group_k3$group_k3 <- factor(group_k3$group_k3)
group_k3$Date <- rownames(group_k3)
tml_long <- reshape2::melt(tml_loess)
names(tml_long) <- c("Date","Timestamp","Traffic_volume")
tml_long <- merge(tml_long,group_k3,by="Date")
tml_long <- arrange(tml_long,Date,Timestamp)
ggplot(tml_long,aes(x=Timestamp,y=Traffic_volume,Group=Date,color=group_k3))+
  geom_line(alpha=1,size=1.1)+
  labs(color="Cluster")+
  scale_x_continuous(breaks = seq(0,288,24))+
  scale_y_continuous(breaks = seq(0,120,20))+
  theme_bw()
ggsave(file="plot/04_loess_hc_en.jpg",width=7.29,height=4.5,dpi=600)


ahorawzy/TFTSA documentation built on May 13, 2019, 12:18 p.m.