# Export Collisions Data
#functions
clean.vars <- function(x,prefix){
for(b in 1:ncol(x)){
if(class(x[,b]) == "character" & names(x)[b] != "geotext"){
x[,b] <- as.factor(x[,b])
message("Changing ",names(x)[b]," from character to factor")
}
if(class(x[,b]) == "factor"){
message("Changing ",names(x)[b]," from factor to interger")
#Create a lookup table
lookup <- data.frame(code = 1:nlevels(x[,b]), label = levels(x[,b]))
# Save ou the lookup table
write.csv(lookup,paste0("../cyipt-bigdata/forDB/lookup/",prefix,"_",colnames(x[b]),".csv"), row.names = F)
x[,b] <- as.integer(x[,b])
}else if(class(x[,b]) == "numeric"){
if(all(unique(x[,b]) %in% c(NA, 0:10000) )){
message("Changing ",names(x)[b]," from numeric to interger")
x[,b] <- as.integer(x[,b])
}
}
}
return(x)
}
clean.nas <- function(x){
togo <- NA
for(i in 1:ncol(x)){
if(all(is.na(x[,i]))){
message(paste0("removing ",names(x)[i]))
togo <- c(togo,i)
}
}
togo <- togo[!is.na(togo)]
x[,togo] <- NULL
return(x)
}
regions <- regions.todo
acc.list <- list()
for(b in 1:length(regions)){
if(file.exists(paste0("../cyipt-bigdata/osm-prep/",regions[b],"/collisions.Rds"))){
#Get file
acc <- readRDS(paste0("../cyipt-bigdata/osm-prep/",regions[b],"/collisions.Rds"))
message(paste0(Sys.time()," Processing collisions ",regions[b]," with ",nrow(acc)," collisions"))
acc <- as.data.frame(acc)
acc <- acc[,c("AccRefGlobal","CollisionLine","CollisionJunc")]
acc <- acc[!duplicated(acc$AccRefGlobal),]
if(!"region" %in% names(acc)){
acc$region <- regions[b]
print(paste0("Region Missing from ", regions[b]))
}
acc.list[[b]] <- acc
rm(acc)
}else{
message(paste0("Input File Missing for ",regions[b]," at ",Sys.time()))
}
}
acc.list <- bind_rows(acc.list)
#code
acc.all <- readRDS("../cyipt-bigdata/collisions/acc.Rds")
cas.all <- readRDS("../cyipt-bigdata/collisions/cas.Rds")
veh.all <- readRDS("../cyipt-bigdata/collisions/veh.Rds")
#Change Geometry for DB
acc.all <- st_transform(acc.all, 4326)
#Reduce precison of data to reduce file size
acc.all$geometry <- st_as_binary(acc.all$geometry, precision = 1000000)
acc.all$geometry <- st_as_sfc(acc.all$geometry)
#Change to PostGIS WKT
acc.all$geotext <- st_as_text(acc.all$geometry)
acc.all <- as.data.frame(acc.all)
acc.all$geometry <- NULL
#Remove rows where coordinates are not valid
#cc.all <- acc.all[acc.all$geotext != "POINT(NaN NaN)",]
summary(acc.all$geotext == "POINT(NaN NaN)")
summary(acc.all$geotext == "POINT(0 0)")
#combine in the line and junction matching
names(acc.all)
names(acc.list)
summary(duplicated(acc.list$CollisionLine))
osm.all <- read.csv("../cyipt-bigdata/forDB/roads_16Jan2018.csv", stringsAsFactors = F)
osm.all <- osm.all[,c("idGlobal","id","region")]
acc.list <- left_join(acc.list,osm.all, by = c("CollisionLine" = "id", "region" = "region"))
acc.list$CollisionLine <- NULL
acc.list$region <- NULL
names(acc.list) <- c("AccRefGlobal", "CollisionJunc","CollisionLine")
acc.all <- left_join(acc.all, acc.list, by = c("AccRefGlobal" = "AccRefGlobal"))
#Clean Up For DB
acc.all <- acc.all[,c("AccRefGlobal","DateTime","Severity", "nVehicles","nCasualties","RoadClass1","RoadNumber1","RoadType","SpeedLimit",
"JunctionDetail","JunctionControl", "RoadClass2", "RoadNumber2","CrossingControl",
"CrossingFacilities","Light","Weather","Surface","SpecialConditions","Hazards","CollisionJunc","CollisionLine","geotext")]
cas.all <- cas.all[,c("AccRefGlobal","VehicleRef","CasualtyRef","CasualtyClass","CasSex","Age",
"PedestrianMovement","PedestrianDirection","SchoolPupil","SeatBelt","CarPassenger",
"BusPassenger","CasualtyType","MaintenanceWorker","HomeArea","CasualtyIMD")]
veh.all <- veh.all[,c("AccRefGlobal","VehicleRef","VehicleType",
"TowingArticulation","Manoeuvre","VehFrom","VehTo",
"LocationRoad","LocationRestrictedAway","Junction","SkiddingOverturning",
"ObjectInCarriageway","LeavingCarriageway","ObjectOffCarriageway","VehicleLetter",
"PointofImpact","OtherVehicle","CombinedDamage","RoofUndersideDamage",
"SexDriver","AgeDriver","VehAgeBand",
"HitRun","ForeignVehicle","LeftHandDrive",
"EngineSize","Propulsion","AgeVehicle","DriverIMD",
"DriverArea","VehicleIMD","JourneyPurpose")]
#remove data missign lables
#cas.all[cas.all == "Data missing or out of range"] <- NA
#acc.all[acc.all == "Data missing or out of range"] <- NA
#veh.all[veh.all == "Data missing or out of range"] <- NA
#Remove any all NA columns
veh.all <- clean.nas(veh.all)
cas.all <- clean.nas(cas.all)
object.size(acc.all)
acc.all <- clean.vars(acc.all,"acc")
object.size(acc.all)
object.size(veh.all)
veh.all <- clean.vars(veh.all,"veh")
object.size(veh.all)
object.size(cas.all)
cas.all <- clean.vars(cas.all,"cas")
object.size(cas.all)
nrow(acc.all)
acc.all <- acc.all[!duplicated(acc.all$AccRefGlobal),]
nrow(acc.all)
write.csv(acc.all,"../cyipt-bigdata/forDB/accidents.csv", row.names = F, na = "")
write.csv(cas.all,"../cyipt-bigdata/forDB/casualties.csv", row.names = F, na = "")
write.csv(veh.all,"../cyipt-bigdata/forDB/vehicles.csv", row.names = F, na = "")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.