library(ggplot2)
library(TFTSA)
library(reshape2)

1. 读取数据

tmlzzloess <- read.csv("D://data//thesis//201610//tmlzzloess.csv",header = T)
tmlzznew <- read.csv("D://data//thesis//201610//tmlzznew.csv",header = T)

rownames(tmlzzloess) <- tmlzzloess[,1]
tmlzzloess <- tmlzzloess[,-1]
rownames(tmlzznew) <- tmlzznew[,1]
tmlzznew <- tmlzznew[,-1]

2. 设定object flow和flow database

2.1 将flow database设为原始数据

tmlobj <- tmlzznew[6,]
tmlbase <- tmlzznew[-6,]
pre1006 <- TFTSA::flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 22,fore_duration = 6)
flow_forecastplot(tmlobj,pre1006)+theme_bw()
df <- rbind(tmlobj,pre1006)
df <- t(df)
df <- as.data.frame(df)
df[3] <- 1:288
names(df) <- c("real value","forecast value","timestamp")
df <- melt(df,id.vars = "timestamp")
forecastplot <- ggplot(df,aes(x=df$timestamp,y=df$value,group=df$variable,color=df$variable))+geom_line()+geom_point()+
  scale_color_manual(values=c("steelblue","red"))+
  xlab("Timestamp")+ylab("Traffic volume")+labs(color="Legend")+
  scale_x_continuous(breaks = seq(0,288,24))+
  scale_y_continuous(breaks = seq(0,120,20))+
  theme_bw()
forecastplot
ggsave("D:\\交大云同步\\论文\\大论文\\实验\\绘图\\K近邻原始数据预测.jpg",width=7.29,height=4.5,dpi=600)
ggsave("D:\\交大云同步\\论文\\小论文\\4_基于KNN方法的短时交通流序列非对称损失预测\\绘图\\K近邻原始数据预测V2.jpg",width=7.29,height=4.5,dpi=600)
ggsave(file="plot/05_balance_raw.jpg",width=7.29,height=4.5,dpi=600)
flow_evaluate(tmlobj,pre1006)

不必要的波动很大,这种微小波动可能没用。

2.2 将flow database设为LOESS后的数据

tmlobj <- tmlzznew[6,]
tmlbase <- tmlzzloess[-6,]
pre1006 <- TFTSA::flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 22,fore_duration = 6,
                           save_detail = "D:\\交大云同步\\论文\\大论文\\实验\\detail2.csv")
df <- rbind(tmlobj,pre1006)
df <- t(df)
df <- as.data.frame(df)
df[3] <- 1:288
names(df) <- c("real value","forecast value","timestamp")
df <- melt(df,id.vars = "timestamp")
forecastplot <- ggplot(df,aes(x=df$timestamp,y=df$value,group=df$variable,color=df$variable))+geom_line()+geom_point()+
  scale_color_manual(values=c("steelblue","red"))+
  xlab("Timestamp")+ylab("Traffic volume")+labs(color="Legend")+
  scale_x_continuous(breaks = seq(0,288,24))+
  scale_y_continuous(breaks = seq(0,120,20))+
  theme_bw()
forecastplot
ggsave("D:\\交大云同步\\论文\\大论文\\实验\\绘图\\K近邻LOESS数据预测.jpg",width=7.29,height=4.5,dpi=600)
ggsave("D:\\交大云同步\\论文\\小论文\\4_基于KNN方法的短时交通流序列非对称损失预测\\绘图\\K近邻LOESS数据预测V2.jpg",width=7.29,height=4.5,dpi=600)
ggsave(file="plot/05_balance_loess.jpg",width=7.29,height=4.5,dpi=600)
flow_evaluate(tmlobj,pre1006)

可以看到除了不光滑外,预测效果整体良好。就是对于高峰值预测偏低。

3. 残差分析

3.1 0均值性

res1006 <- tmlobj[73:288] - pre1006[73:288]
x <- plot(73:288,res1006,xlab = "Timestamp",ylab="residual")
x <- abline(h=0)

3.2 正态性

qqnorm(res1006)
qqline(res1006)

3.3 自相关性

res1006 <- as.numeric(res1006)
acf(res1006)

3.4 纯随机性

Box.test(res1006)

4. 参数的灵敏度分析

4.1 关于k的灵敏度分析

pre1006_k5 <- flow_knn(tmlobj,tmlbase,start = 73,k = 5,lag_duration = 24,fore_duration = 12)
flow_forecastplot(tmlobj,pre1006_k5)
flow_evaluate(tmlobj,pre1006_k5)

4.1.1 参数取值检验

opti_k <- function(from,to){
  result <- data.frame(matrix(NA,10,3))
  for(i in 2:10){
    pre_k <- flow_knn(tmlobj,tmlbase,start = 73,k = i,lag_duration = 24,fore_duration = 12)
    result[i,1:3] <- flow_evaluate(tmlobj,pre_k)
  }
  result <- na.omit(result)
  names(result) <- c("mse","rmse","mae")
  return(result)
}
optik <- opti_k()
write.csv(optik,file="D:\\交大云同步\\论文\\大论文\\实验\\结果表\\optik.csv")

4.2 关于lag_duration的灵敏度分析

4.2.1 ld4

pre1006_ld4 <- flow_knn(tmlobj,tmlbase,start = 73,k=3,lag_duration = 4,fore_duration = 12)
flow_forecastplot(tmlobj,pre1006_ld4)
ggsave("D:\\交大云同步\\论文\\大论文\\实验\\绘图\\K近邻参数ld4.jpg",width=7.29,height=4.5,dpi=600)
ggsave(file="plot/05_lag_duration_4min.jpg",,width=7.29,height=4.5,dpi=600)
flow_evaluate(tmlobj,pre1006_ld4)

4.2.2 ld36

pre1006_ld36 <- flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 36,fore_duration = 12)
flow_forecastplot(tmlobj,pre1006_ld36)
flow_evaluate(tmlobj,pre1006_ld36)

4.2.3 ld48

pre1006_ld48 <- flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 24,fore_duration = 12)
flow_forecastplot(tmlobj,pre1006_ld48)
flow_evaluate(tmlobj,pre1006_ld48)

4.2.4 参数取值检验

opti_ld <- function(from,to){
  result <- data.frame(matrix(NA,10,3))
  for(i in seq(from,to,2)){
    pre_k <- flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = i,fore_duration = 12)
    result[i,1:3] <- flow_evaluate(tmlobj,pre_k)
  }
  result <- na.omit(result)
  names(result) <- c("mse","rmse","mae")
  return(result)
}
optild <- opti_ld(4,48)
write.csv(optild,file="D:\\交大云同步\\论文\\大论文\\实验\\结果表\\optild.csv")

4.3 关于fore_duration的灵敏度分析

4.3.1 fd6

pre1006_fd6 <- flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 24,fore_duration = 6)
flow_forecastplot(tmlobj,pre1006_fd6)
flow_evaluate(tmlobj,pre1006_fd6)

4.3.2 fd4

pre1006_fd4 <- flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 24,fore_duration = 4)
flow_forecastplot(tmlobj,pre1006_fd4)
flow_evaluate(tmlobj,pre1006_fd4)

4.3.3 fd2

pre1006_fd2 <- flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 24,fore_duration = 2)
flow_forecastplot(tmlobj,pre1006_fd2)
flow_evaluate(tmlobj,pre1006_fd2)

4.3.4 参数取值检验

opti_fd <- function(from,to){
  result <- data.frame(matrix(NA,20,3))
  for(i in seq(from,to,4)){
    pre_fd <- flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 24,fore_duration = i)
    result[i,1:3] <- flow_evaluate(tmlobj,pre_fd)
  }
  result <- na.omit(result)
  names(result) <- c("mse","rmse","mae")
  return(result)
}
pre_fd <- flow_knn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 24,fore_duration = 27,
                  save_detail = "D:\\交大云同步\\论文\\大论文\\实验\\结果表\\fd27.csv")


ahorawzy/TFTSA documentation built on May 13, 2019, 12:18 p.m.