knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(tidyverse) library(stringr) library(leaflet) library(fuzzyjoin) library(corrplot) bt <- openPoznan::tram_bus_stops()
Number_of_string <- str_count(bt$Stop_Headsigns, ",") + 1 Max_number_of_machines <- max(Number_of_string) bt_machines <- bt %>% separate(Stop_Headsigns, into=paste0("stop",1:Max_number_of_machines)) bt_machines_id <- bt_machines %>% select(ID, 4:(Max_number_of_machines+3)) %>% gather(stop, line, -ID) %>% filter(!is.na(line)) bt_tram <- bt_machines %>% select(ID,Route_Type, Stop_Name, 4:(Max_number_of_machines+3)) %>% gather(stop, line, -ID, -Route_Type, -Stop_Name) %>% filter(Route_Type == 0) %>% filter(!is.na(line)) %>% mutate(line = gsub('T',"0.",line)) %>% transform(line = as.numeric(line)) bt_bus <- bt_machines %>% select(ID,Route_Type, Stop_Zone, Stop_Name, 4:(Max_number_of_machines+3)) %>% gather(stop, line, -ID, -Route_Type, -Stop_Zone, -Stop_Name) %>% filter(Route_Type == 3) %>% filter(!is.na(line)) %>% transform(line = as.numeric(line)) bt_machines_leaflet <- bt_machines %>% select(ID, Stop_Name, Longitude, Latitude, 4:(Max_number_of_machines+3)) %>% gather(stop, line, -ID, -Stop_Name, -Longitude, -Latitude) %>% filter(!is.na(line)) bt_machines_nd <- bt_machines %>% select(Stop_Name,Longitude, Latitude, 4:(Max_number_of_machines+3)) %>% gather(stop, line, -Stop_Name, -Longitude, -Latitude ) %>% filter(!is.na(line)) #Min bus number => Using zones #From all min_bus <- bt_bus %>% summarise(min = min(line)) %>% .$min #Zone A min_bus_A <- bt_bus %>% filter(Stop_Zone == "A") %>% summarise(min = min(line)) %>% .$min #Zone B min_bus_B <- bt_bus %>% filter(Stop_Zone == "B") %>% summarise(min = min(line)) %>% .$min #Zone C min_bus_C <- bt_bus %>% filter(Stop_Zone == "B") %>% summarise(min = min(line)) %>% .$min #max bus number => using zones #from all max_bus <- bt_bus %>% summarise(max = max(line)) %>% .$max #From A max_bus_A <- bt_bus %>% filter(Stop_Zone == "A") %>% summarise(max = max(line)) %>% .$max #From B max_bus_B <- bt_bus %>% filter(Stop_Zone == "B") %>% summarise(max = max(line)) %>% .$max #From C max_bus_C <- bt_bus %>% filter(Stop_Zone == "C") %>% summarise(max = max(line)) %>% .$max #max tram number in town max_day_tram <- bt_tram %>% filter(line < min_bus) %>% summarise(max = max(line)) %>% .$max #min tram number in towe min_day_tram <- bt_tram %>% filter(line < min_bus) %>% summarise(min = min(line)) %>% .$min # Bus for trams whole_number <- function (x) { x %% 1 == 0 } bus_for_trams <- bt_tram %>% filter(whole_number(line) == FALSE) # Night Trams night_trams <- bt_tram %>% filter(line > min_bus)
# Max number of machines on 1 stop bt_machines_id %>% group_by(ID) %>% mutate(line = gsub('T',"0.",line)) %>% count() %>% arrange(desc(n)) # Max number of tram on 1 stop bt_tram %>% group_by(ID) %>% mutate(line = gsub('T',"0.",line)) %>% count() %>% arrange(desc(n)) # Max number of bus on 1 stop bt_bus %>% group_by(ID) %>% count() %>% arrange(desc(n))
# Min stops of 1 machine bt_machines_id %>% group_by(line) %>% count() %>% arrange(n) # Min stops of 1 tram bt_tram %>% group_by(line) %>% count() %>% arrange(n) # Min stops of 1 bus bt_bus %>% group_by(line) %>% count() %>% arrange(n)
# Min machines on 1 stop bt_machines_id %>% group_by(ID) %>% count() %>% arrange(n) #Min tram on 1 stop bt_tram %>% group_by(ID) %>% count() %>% arrange(n) #Min bus on 1 stop bt_bus %>% group_by(ID) %>% count() %>% arrange(n)
#Max stops for 1 machine bt_machines_id %>% group_by(line) %>% count() %>% arrange(desc(n)) #Max stops for 1 tram bt_tram %>% group_by(line) %>% count() %>% arrange(desc(n)) #Max stops for 1 bus bt_bus %>% group_by(line) %>% count() %>% arrange(desc(n))
#leaflet with circles #This function counts stops in two directions (i.e. At stop with ID PONA71 i PONA72 (Polna) stops only tram number 2 and it's counted twice.) # Grouping for Machines (buses + trams) Group_by_stop <- bt_machines_leaflet %>% group_by(Stop_Name) %>% mutate(line = gsub('T',"0.",line)) %>% count(vars = "line") %>% arrange(desc(n)) bt_machines_leaflet_nd <- bt_machines_nd[!duplicated(bt_machines_nd$Stop_Name),] Similarities <- Group_by_stop %>% stringdist_right_join(bt_machines_leaflet_nd, by = "Stop_Name", max_dist = 0, distance_col = NULL, method = "osa") Similarities$Stop_Name.y <- NULL Similarities$stop <- NULL Similarities$line <- NULL colnames(Similarities)[which(names(Similarities) == "Stop_Name.x")] <- "Stop_Name" #Grouping only for trams Group_by_stop_trams <- bt_tram %>% group_by(Stop_Name) %>% mutate(line = gsub('T',"0.",line)) %>% count(vars = "line") %>% arrange(desc(n)) colnames(Group_by_stop_trams)[which(names(Group_by_stop_trams) == "n")] <- "Number_of_trams" Similarities_tram <- Group_by_stop_trams%>% stringdist_right_join(Similarities, by = "Stop_Name", max_dist = 0, distance_col = NULL, method = "osa") colnames(Similarities_tram)[which(names(Similarities_tram) == "Stop_Name.y")] <- "Stop_Name" #Grouping only for buses Group_by_stop_bus <- bt_bus %>% group_by(Stop_Name) %>% mutate(line = gsub('T',"0.",line)) %>% count(vars = "line") %>% arrange(desc(n)) colnames(Group_by_stop_bus)[which(names(Group_by_stop_bus) == "n")] <- "Number_of_buses" #Creating DF with number of Machines, buses and trams at given stop. Then doing some cleaning with DF. Similarities_tram_bus <- Group_by_stop_bus %>% stringdist_right_join(Similarities_tram, by = "Stop_Name", max_dist = 0, distance_col = NULL, method = "osa") colnames(Similarities_tram_bus)[which(names(Similarities_tram_bus) == "Stop_Name.y")] <- "Stop_Name" colnames(Similarities_tram_bus)[which(names(Similarities_tram_bus) == "n")] <- "Number_of_Machines" Similarities_tram_bus[1:2] <- NULL Similarities_tram_bus[2:3] <- NULL Similarities_tram_bus$vars.y <- NULL Similarities_tram_bus <- Similarities_tram_bus[,c(3,4,1,2,5,6)] Similarities_tram_bus[is.na(Similarities_tram_bus)] <- 0 #Drawing at Leaflet map leaflet(Similarities_tram_bus) %>% addTiles() %>% addCircles(lng = ~Longitude, lat = ~Latitude, weight = 1, radius = ~sqrt(Similarities_tram_bus$Number_of_Machines) * 25, popup = paste("Stop Name:", Similarities_tram_bus$Stop_Name, "<br>", "Number of Machines:", Similarities_tram_bus$Number_of_Machines, "<br>", "Number of Trams:", Similarities_tram_bus$Number_of_trams, "<br>", "Number of buses", Similarities_tram_bus$Number_of_buses))
# Usuwa duplikaty jeżdżące na danym przystanku bt_machines_leaflet_nd_line <- bt_machines_nd[!duplicated(bt_machines_nd[c(1,5)]),] Group_by_stop_nd_line<- bt_machines_leaflet_nd_line %>% group_by(Stop_Name) %>% mutate(line = gsub('T',"0.",line)) %>% count() %>% arrange(desc(n)) Similarities_nd_line <- Group_by_stop_nd_line %>% stringdist_right_join(bt_machines_leaflet_nd, by = "Stop_Name", max_dist = 0, distance_col = NULL, method = "osa") Similarities_nd_line$Stop_Name.y <- NULL Similarities_nd_line$stop <- NULL Similarities_nd_line$line <- NULL leaflet(Similarities_nd_line) %>% addTiles() %>% addCircles(lng = ~Longitude, lat = ~Latitude, weight = 1, radius = ~sqrt(Similarities$n) * 25, popup = ~Similarities$Stop_Name.x)
#Get names for matrix nameVals <- sort(unique(unlist(bt_tram[5]))) #Create empty matrix myMat <- matrix(0, length(nameVals), length(nameVals), dimnames = list(nameVals, nameVals)) #Count how many stops of given tram coincide with other trams. Tram_list_with_stops_name <- count(bt_tram, Stop_Name, line) nameVals_df <- data.frame(nameVals) max_numer_of_stops_for_tram <- data.frame() for (i in nameVals) { Max_for_one_tram <- sum (Tram_list_with_stops_name$line == i) result <- data.frame(Max_for_one_tram ) max_numer_of_stops_for_tram <- rbind(max_numer_of_stops_for_tram, result) } max_numer_of_stops_for_tram <- cbind(max_numer_of_stops_for_tram, nameVals_df) #Add Max values to row/col titles, so user can see with is higher. MyNames <- paste(nameVals, max_numer_of_stops_for_tram$Max_for_one_tram, sep=" [") MyNames <- paste0(MyNames, ']') myMat <- matrix(0, length(MyNames), length(MyNames), dimnames = list(MyNames, MyNames)) #Add max values to main diagonal myMat[col(myMat) == row(myMat)] <- myMat[col(myMat) == row(myMat)] + max_numer_of_stops_for_tram$Max_for_one_tram #ExpandGrid function Tests
similarities_percent <- expand.grid(line1 = unique(bt_tram$line), line2 = unique(bt_tram$line)) %>% arrange(line1, line2) %>% mutate(stops_common=0, stops_sum=0) for(i in 1:nrow(similarities_percent)){ #i <- 2 stops_line1 <- bt_tram %>% filter(line==similarities_percent$line1[i]) %>% select(ID) stops_line2 <- bt_tram %>% filter(line==similarities_percent$line2[i]) %>% select(ID) common <- inner_join(stops_line1, stops_line2, by="ID") sum <- length(unique(c(stops_line1$ID, stops_line2$ID))) similarities_percent$stops_common[i] <- nrow(common) similarities_percent$stops_sum[i] <- sum } similarities_percent <- similarities_percent %>% mutate(stops_percent = stops_common/stops_sum*100) cols = c(1,2) newdf = similarities_percent[,cols] for (i in 1:nrow(similarities_percent)){ newdf[i, ] = sort(similarities_percent[i, cols]) } similarities_percent <- similarities_percent[!duplicated(newdf),] # Corrplot similarities_percent_cor <- similarities_percent[-c(3:4)] wide = reshape(similarities_percent_cor[,1:3], idvar = c("line1"), timevar="line2", direction = "wide") rownames(wide) = wide$line1 wide = wide[,-1] colnames(wide) = sub("stops_percent.", "", colnames(wide)) wide = as.matrix(wide) wide[lower.tri(wide)] <- t(wide)[lower.tri(wide)] corrplot (wide, is.corr=FALSE, method = "square")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.