只将df改为dt不能实现明显的加速,考虑改变搜索策略。
将用str_subset字符串匹配
caculate_natureflow <- function(objlink,sflinkcol){ t <- as.character(lapply(sflinkcol,stringr::str_subset,objlink)) l <- length(t[t!="character(0)"]) return(l) }
修改为查找是否%in%
#' @export caculate_natureflow <- function(objlink,sflinkcol){ t <- sapply(sflinkcol, function(x,y) y %in% x, y = objlink) return(sum(t)) }
继续将sapply改为vapply
#' @export caculate_natureflow <- function(objlink,sflinkcol){ t <- vapply(sflinkcol, function(x,y) y %in% x, y = objlink,logical(1)) return(sum(t)) }
继续将vapply简化为Filter
#' @export caculate_natureflow <- function(objlink,sflinkcol){ t <- Filter(function(x) objlink %in% x, sflinkcol) return(length(t)) }
最后并行化实现
library(parallel) cores <- detectCores() cluster <- makePSOCKcluster(cores) l <- parLapply(cluster,all_link,caculate_natureflow,demosf_dt$link) names(l) <- all_link simplify2array(l)
library(HVS) library(lubridate) library(magrittr) library(data.table)
得到路网邻接权重矩阵
load("D:/R/packages/HVS/data-raw/road_net.RData") road_net
根据权重矩阵计算里程矩阵和路径矩阵
result <- dijkstra_matrix(road_net) mileage <- result[[1]] path <- result[[2]]
核心的有两列:O列和D列,O和D在1~10中随机抽样10万次。
set.seed(1234) demosf <- data.frame(O=sample(1:10,100000,replace = T),D=sample(1:10,100000,replace = T)) dim(demosf)
去掉O和D相同的行
demosf[demosf$O == demosf$D,] %>% nrow()
demosf <- demosf[demosf$O != demosf$D,] dim(demosf)
demosf$path <- mapply(function(O,D) path[[O]][[D]],demosf$O,demosf$D) head(demosf)
demosf$link <- lapply(demosf$path,extract_link) head(demosf)
(all_link <- all_link(road_net))
caculate_natureflow("1-2",demosf$link)
sapply(all_link, caculate_natureflow,demosf$link)
system.time(sapply(all_link, caculate_natureflow,demosf$link))
时间从一分钟减少到了6秒多。
demosf_dt <- as.data.table(demosf) dim(demosf_dt)
setkey(demosf_dt,O,D)
demosf_dt$path <- mapply(function(O,D) path[[O]][[D]],demosf_dt$O,demosf_dt$D) head(demosf_dt)
demosf_dt$link <- lapply(demosf_dt$path,extract_link) head(demosf_dt)
(all_link <- all_link(road_net))
caculate_natureflow("1-2",demosf_dt$link)
sapply(all_link, caculate_natureflow,demosf_dt$link)
system.time(sapply(all_link, caculate_natureflow,demosf_dt$link))
时间提升到了6秒之内。
library(parallel) cores <- detectCores() cluster <- makePSOCKcluster(cores) l <- parLapply(cluster,all_link,caculate_natureflow,demosf_dt$link) names(l) <- all_link simplify2array(l)
system.time(parLapply(cluster,all_link,caculate_natureflow,demosf_dt$link))
速度继续大大提升。
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.