library(ggplot2)
tml1001 <- read.csv("D:\\data\\thesis\\201610\\tmldata\\tml1001all.csv",header = T) dim(tml1001)
str(tml1001)
names(tml1001)
tml1001 <- tml1001[,c(5,15)] names(tml1001)
ggplot(data=tml1001,aes(tml1001$时间序号,tml1001$机动车当量))+ geom_point(colour="steelblue")+ geom_line(colour="steelblue")
rm(tml1001)
tmlall <- read.csv("D:\\data\\thesis\\201610\\tmldata\\tmlall.csv",header = T) dim(tmlall)
table(tmlall$日期)
此为没有填补缺失值的数据,没有利用价值,删除
rm(tmlall)
tmlallnew <- read.csv("D:\\data\\thesis\\201610\\tmldata\\tmlallnew.csv",header = T) table(tmlallnew$日期)
dim(tmlallnew)
names(tmlallnew)
此为填补了缺失值的数据
tmlzz <- read.csv("D:\\data\\thesis\\201610\\tmldata\\tmlzz.csv",header = T) dim(tmlzz)
rownames(tmlzz) <- tmlzz[,1] tmlzz <- tmlzz[,-1] dim(tmlzz)
查看是否有缺失值
sum(is.na(tmlzz))
缺失值定位
which(is.na(tmlzz),arr.ind = T)
tmlzz[8,232] = 28.625 sum(is.na(tmlzz))
缺失值填补完
写入新数据
write.csv(tmlzz,"D:\\data\\thesis\\201610\\tmldata\\tmlzznew.csv")
可以猜想,可以利用宽型数据作为数据库
ggplot(tmlallnew,aes(tmlallnew$时间序号,tmlallnew$机动车当量,group=tmlallnew$日期,color=tmlallnew$日期))+ geom_line(alpha=2/5)+ geom_smooth(method = "loess",span=0.1)+ xlab("时间序号")+ylab("车流量")+scale_color_hue("日期")
tmlallnew[tmlallnew$时间序号==332,2] = 232
write.csv(tmlallnew,"D:\\data\\thesis\\201610\\tmldata\\tmlallnew.csv")
ggplot(tmlallnew,aes(tmlallnew$时间序号,tmlallnew$机动车当量,group=tmlallnew$日期,color=tmlallnew$日期))+ geom_line(alpha=2/5)+ geom_smooth(method = "loess",span=0.1)+ xlab("时间序号")+ylab("车流量")+scale_color_hue("日期")+ scale_x_continuous(breaks = seq(0,288,24))+ scale_y_continuous(breaks = seq(0,120,20))
tmlallnew$是否为节假日 <- as.factor(tmlallnew$是否为节假日)
ggplot(tmlallnew,aes(tmlallnew$时间序号,tmlallnew$机动车当量,group=tmlallnew$日期,color=tmlallnew$是否为节假日))+ #geom_line(alpha=2/5)+ geom_smooth(method = "loess",span=0.2,se = F)+ xlab("时间序号")+ylab("车流量")+scale_color_hue("是否为节假日")+ scale_x_continuous(breaks = seq(0,288,24),limits = c(96,120))+ scale_y_continuous(breaks = seq(0,120,20))
ggplot(tmlallnew,aes(tmlallnew$时间序号,tmlallnew$机动车当量,group=tmlallnew$日期,color=tmlallnew$是否为节假日))+ #geom_line(alpha=2/5)+ geom_smooth(method = "loess",span=0.2,se = F)+ xlab("时间序号")+ylab("车流量")+scale_color_hue("是否为节假日")+ scale_x_continuous(breaks = seq(0,288,24),limits = c(120,144))+ scale_y_continuous(breaks = seq(0,120,20))
ggplot(tmlallnew,aes(tmlallnew$时间序号,tmlallnew$机动车当量,group=tmlallnew$日期,color=tmlallnew$是否为节假日))+ #geom_line(alpha=2/5)+ geom_smooth(method = "loess",span=0.2,se = F)+ xlab("时间序号")+ylab("车流量")+scale_color_hue("是否为节假日")+ scale_x_continuous(breaks = seq(0,288,24),limits = c(144,168))+ scale_y_continuous(breaks = seq(0,120,20))
ggplot(tmlallnew,aes(tmlallnew$时间序号,tmlallnew$机动车当量,group=tmlallnew$日期,color=tmlallnew$是否为节假日))+ #geom_line(alpha=2/5)+ geom_smooth(method = "loess",span=0.2,se = F)+ xlab("时间序号")+ylab("车流量")+scale_color_hue("是否为节假日")+ scale_x_continuous(breaks = seq(0,288,24),limits = c(168,192))+ scale_y_continuous(breaks = seq(0,120,20))
ggplot(tmlallnew,aes(tmlallnew$时间序号,tmlallnew$机动车当量,group=tmlallnew$日期,color=tmlallnew$是否为节假日))+ #geom_line(alpha=2/5)+ geom_smooth(method = "loess",span=0.2,se = F)+ xlab("时间序号")+ylab("车流量")+scale_color_hue("日期")+ scale_x_continuous(breaks = seq(0,288,24),limits = c(192,216))+ scale_y_continuous(breaks = seq(0,120,20))
fit_km1 <- kmeans(tmlzz[,1:120],centers = 5) fit_km1$cluster
as.matrix(dist(tmlzz[96:120]))[,1]
sort(as.matrix(dist(tmlzz[96:120]))[,1])
结果不尽如人意,有非节假日混入
sort(as.matrix(dist(tmlzz[120:144]))[,1])
选择目标序列,例如10月6日;生成序列库;
rownames(tmlzz)
dim(tmlzz)
tmlobj <- tmlzz[6,] rownames(tmlobj)
tmlbase <- tmlzz[-6,] rownames(tmlbase)
dim(tmlbase)
flowknn <- function(obj,base,start,k,lag_duration,fore_duration){ ld = lag_duration fd = fore_duration st = start foreflow <- obj foreflow[,] <- 0 foreflow[,1:st] = obj[,1:st] fl = st flowall = rbind(obj,base) while(fl<(ncol(obj)-1)){ fwin = fl - ld bwin = fl + fd - 1 knnames = names(sort(as.matrix(dist(flowall[,fwin:fl-1]))[,1]))[2:(2+k-1)] kn <- base[knnames,fl:bwin] foreflow[,fl:bwin] <- colMeans(kn) fl = bwin+1 } return(foreflow) }
pre <- flowknn(tmlobj,tmlbase,start = 73,k = 3,lag_duration = 24,fore_duration = 12) pre
plot(1:288,pre,type="l",col="red") points(1:288,tmlobj,col="blue")
tmlboth <- rbind(tmlobj,pre) tmlboth <- as.data.frame(tmlboth) tmlboth <- t(tmlboth) names(tmlboth) <- c("true","pre")
ggplot(tmlboth,aes(1:288,tmlboth$`06-10月-16`))+geom_point(colour="steelblue")+ geom_line(aes(1:288,tmlboth$`06-10月-161`),colour="red",size=1)+ xlab("时间序号")+ylab("车流量")+scale_color_hue("日期")+ scale_x_continuous(breaks = seq(0,288,24))+ scale_y_continuous(breaks = seq(0,120,20))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.