rm(list=ls())
library(TFTSA) library(tidyverse)
jd201610 <- read.csv("D:\\data\\thesis\\201610\\201610.csv",stringsAsFactors = F) dim(jd201610)
names(jd201610)
给变量改名字
names(jd201610) <- c("station_index","station_name","date","hour","minute","CDH","SX", "timestamp","small_pass_car","big_pass_car","small_fre_car", "mid_fre_car","big_fre_car","large_fre_car","box_car","mt_car","tlj_car")
table(jd201610$CDH,jd201610$SX)
车道号11,12,13属于上行方向;车道号31,32,33属于下行方向
head(jd201610,10)
unique(jd201610$station_name)
一共有12个观测站
table(jd201610$station_name)
黑水/乌沙数据量不够充足,可以剔除
jd201610 <- filter(jd201610,station_name != "黑水",station_name != "乌沙") unique(jd201610$station_name)
dim(jd201610)
数据量减少了不到1万行
10月13/14/15/16的数据,除了松坎观测站外,普遍缺失。
以“-”为分隔符,拆分字符串
str_split_todf <- function(strvec,sep){ tep <- strsplit(strvec,sep) len <- length(tep[[1]]) return(as.data.frame(matrix(unlist(tep),ncol=len,byrow = T),stringsAsFactors=F)) }
jd201610 <- cbind(jd201610,str_split_todf(jd201610$date,"-")) names(jd201610)[18:20] <- c("day","month","year")
jd201610[jd201610$month=="9月 ",19] <- "Sep" jd201610[jd201610$month=="10月",19] <- "Oct"
jd201610 <- mutate(jd201610,date=str_c(month,day,sep = "-")) jd201610 <- select(jd201610,-(day:year))
date_order <- unique(jd201610$date) date_order
head(jd201610)
names(jd201610)
jd201610 <- mutate(jd201610, volume=small_pass_car+small_fre_car+ 1.5*big_pass_car+1.5*mid_fre_car+ 3*big_fre_car+4*large_fre_car+4*box_car+ mt_car+tlj_car) %>% select(station_index:timestamp,volume) %>% select(-(hour:minute)) names(jd201610)
save(jd201610,file="D:\\data\\jd201610.RData")
by_timestamp <- group_by(jd201610,station_name,date,timestamp) jd201610tt <- summarise(by_timestamp,ttvolume=sum(volume,na.rm = T)) head(jd201610tt)
unique(jd201610tt$station_name)
tml <- filter(jd201610tt,station_name=="桐木岭站") tml <- arrange(tml, date, timestamp) x <- reshape2::dcast(tml,date~timestamp) rownames(x) <- x[[1]] x <- x[-1]
jdzzl <- plyr::dlply(jd201610tt,"station_name", function(x) reshape2::dcast(x,date~timestamp))
jdzzl <- lapply(jdzzl, function(x) { rownames(x) <- x[[1]] x <- x[-1] })
rowSums(is.na(x))
剔除缺失值在15个以上的日期
rowSums(is.na(x)) < 15
按日期排序,并剔除缺失值多的日期
x <- x[date_order,] x <- x[rowSums(is.na(x)) < 15,]
y <- as.matrix(x) y <- t(y) y <- as.vector(y)
na_loc <- which(is.na(y)) notna_loc <- which(!is.na(y)) for(i in na_loc){ y[i] <- mean(y[notna_loc[which(abs(i-notna_loc)<5)]]) }
y <- matrix(y,ncol = 288,byrow = T) #y <- as.data.frame(matrix(y,ncol = 288,byrow = T)) rownames(y) <- rownames(x) colnames(y) <- 1:288 y <- as.data.frame(y)
rm(x,y)
lapply(jdzzl, function(x) rowSums(is.na(x)))
jdzzl <- lapply(jdzzl, function(x){ x <- x[date_order,] x <- x[rowSums(is.na(x))<15,] })
lapply(jdzzl, function(x) rowSums(is.na(x)))
jdzzl <- lapply(jdzzl, function(x){ x <- as.matrix(x) y <- t(x) y <- as.vector(y) })
jdzzl_try <- jdzzl fillupna <- function(y){ na_loc <- which(is.na(y)) notna_loc <- which(!is.na(y)) for(i in na_loc){ y[i] <- mean(y[notna_loc[which(abs(i-notna_loc)<7)]]) } return(y) } jdzzl_try <- lapply(jdzzl_try, fillupna) lapply(jdzzl_try, function(x) sum(is.na(x)))
jdzzl <- lapply(jdzzl,function(y){ na_loc <- which(is.na(y)) notna_loc <- which(!is.na(y)) for(i in na_loc){ y[i] <- mean(y[notna_loc[which(abs(i-notna_loc)<7)]]) } return(y) }) lapply(jdzzl, function(x) sum(is.na(x)))
jdzzl <- plyr::dlply(jd201610tt,"station_name", function(x) reshape2::dcast(x,date~timestamp)) jdzzl <- lapply(jdzzl, function(x) { rownames(x) <- x[[1]] x <- x[-1] })
jdzzl <- lapply(jdzzl, function(x){ # 缺失值剔除 x <- x[date_order,] x <- x[rowSums(is.na(x))<15,] # 展开矩阵 x <- as.matrix(x) dates <- rownames(x) x <- t(x) x <- as.vector(x) # 缺失值插补 na_loc <- which(is.na(x)) notna_loc <- which(!is.na(x)) for(i in na_loc){ x[i] <- mean(x[notna_loc[which(abs(i-notna_loc)<7)]]) } # 重构矩阵 x <- matrix(x,ncol = 288,byrow = T) rownames(x) <- dates colnames(x) <- 1:288 return(x) })
曾经使用的原始版的handle_loess
handle_loess <- function(df,exp,resp,sp){ result <- rep(NA,nrow(df)) result <- stats::loess(df[,resp]~df[,exp],span=sp) return(result$fitted) }
这个handle_loess有一些问题:
names(jdzzl) x <- jdzzl[[10]]
reshape2::melt(x) %>% arrange(Var1,Var2) %>% ggplot(aes(x = Var2, y = value, group = Var1, color = Var1))+ geom_line()+ labs(x="timestamp",y="traffic volume")
handle_loess_fordf <- function(df,sp){ ndays <- nrow(df) timestamps <- 1:ncol(df) for(i in 1:ndays){ loess_result <- stats::loess(df[i,]~timestamps,span=sp) df[i,] <- loess_result$fitted } return(df) }
smoothed_x <- handle_loess_fordf(x,sp=0.2)
reshape2::melt(smoothed_x) %>% arrange(Var1,Var2) %>% ggplot(aes(x = Var2, y = value, group = Var1, color = Var1))+ geom_line()+ labs(x="timestamp",y="traffic volume")
rm(x)
jdzzl_loess <- lapply(jdzzl, handle_loess_fordf, sp=0.2)
检验
reshape2::melt(jdzzl_loess[[10]]) %>% arrange(Var1,Var2) %>% ggplot(aes(x = Var2, y = value, group = Var1, color = Var1))+ geom_line()+ labs(x="timestamp",y="traffic volume")
没什么问题,将jdzzl和jdzzl_loess写入包中
修改交调站名
names(jdzzl) <- c("丙妹","黑石","麻尾","南宁","平关","平胜","普宜","松坎","台盘","桐木岭") names(jdzzl_loess) <- c("丙妹","黑石","麻尾","南宁","平关","平胜","普宜","松坎","台盘","桐木岭")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.