scripts/visualise/export_collisions2.R

# Export Collisions Data

#functions
clean.vars <- function(x,prefix){
  for(b in 1:ncol(x)){
    if(class(x[,b]) == "character" & names(x)[b] != "geotext"){
      x[,b] <- as.factor(x[,b])
      message("Changing ",names(x)[b]," from character to factor")
    }

    if(class(x[,b]) == "factor"){
      message("Changing ",names(x)[b]," from factor to interger")
      #Create a lookup table
      lookup <- data.frame(code = 1:nlevels(x[,b]), label = levels(x[,b]))
      # Save ou the lookup table
      write.csv(lookup,paste0("../cyipt-bigdata/forDB/lookup/",prefix,"_",colnames(x[b]),".csv"), row.names = F)
      x[,b] <- as.integer(x[,b])
    }else if(class(x[,b]) == "numeric"){
      if(all(unique(x[,b]) %in% c(NA, 0:10000) )){
        message("Changing ",names(x)[b]," from numeric to interger")
        x[,b] <- as.integer(x[,b])
      }
    }
  }
  return(x)
}

clean.nas <- function(x){
  togo <- NA
  for(i in 1:ncol(x)){
    if(all(is.na(x[,i]))){
      message(paste0("removing ",names(x)[i]))
      togo <- c(togo,i)

    }
  }
  togo <- togo[!is.na(togo)]
  x[,togo] <- NULL
  return(x)
}




regions <- regions.todo
acc.list <- list()

for(b in 1:length(regions)){
  if(file.exists(paste0("../cyipt-bigdata/osm-prep/",regions[b],"/collisions.Rds"))){
    #Get file
    acc <- readRDS(paste0("../cyipt-bigdata/osm-prep/",regions[b],"/collisions.Rds"))
    message(paste0(Sys.time()," Processing collisions ",regions[b]," with ",nrow(acc)," collisions"))

    acc <- as.data.frame(acc)
    acc <- acc[,c("AccRefGlobal","CollisionLine","CollisionJunc")]
    acc <- acc[!duplicated(acc$AccRefGlobal),]

    if(!"region" %in% names(acc)){
      acc$region <- regions[b]
      print(paste0("Region Missing from ", regions[b]))
    }

    acc.list[[b]] <- acc
    rm(acc)

  }else{
    message(paste0("Input File Missing for ",regions[b]," at ",Sys.time()))
  }
}

acc.list <- bind_rows(acc.list)


#code
acc.all <- readRDS("../cyipt-bigdata/collisions/acc.Rds")
cas.all <- readRDS("../cyipt-bigdata/collisions/cas.Rds")
veh.all <- readRDS("../cyipt-bigdata/collisions/veh.Rds")

#Change Geometry for DB
acc.all <- st_transform(acc.all, 4326)

#Reduce precison of data to reduce file size
acc.all$geometry <- st_as_binary(acc.all$geometry, precision = 1000000)
acc.all$geometry <- st_as_sfc(acc.all$geometry)

#Change to PostGIS WKT
acc.all$geotext <- st_as_text(acc.all$geometry)
acc.all <- as.data.frame(acc.all)
acc.all$geometry <- NULL

#Remove rows where coordinates are not valid
#cc.all <- acc.all[acc.all$geotext != "POINT(NaN NaN)",]
summary(acc.all$geotext == "POINT(NaN NaN)")
summary(acc.all$geotext == "POINT(0 0)")

#combine in the line and junction matching

names(acc.all)
names(acc.list)
summary(duplicated(acc.list$CollisionLine))

osm.all <- read.csv("../cyipt-bigdata/forDB/roads_16Jan2018.csv", stringsAsFactors = F)
osm.all <- osm.all[,c("idGlobal","id","region")]

acc.list <- left_join(acc.list,osm.all, by = c("CollisionLine" = "id", "region" = "region"))
acc.list$CollisionLine <- NULL
acc.list$region <- NULL
names(acc.list) <- c("AccRefGlobal", "CollisionJunc","CollisionLine")

acc.all <- left_join(acc.all, acc.list, by = c("AccRefGlobal" = "AccRefGlobal"))

#Clean Up For DB
acc.all <- acc.all[,c("AccRefGlobal","DateTime","Severity", "nVehicles","nCasualties","RoadClass1","RoadNumber1","RoadType","SpeedLimit",
                      "JunctionDetail","JunctionControl", "RoadClass2", "RoadNumber2","CrossingControl",
                      "CrossingFacilities","Light","Weather","Surface","SpecialConditions","Hazards","CollisionJunc","CollisionLine","geotext")]

cas.all <- cas.all[,c("AccRefGlobal","VehicleRef","CasualtyRef","CasualtyClass","CasSex","Age",
                      "PedestrianMovement","PedestrianDirection","SchoolPupil","SeatBelt","CarPassenger",
                      "BusPassenger","CasualtyType","MaintenanceWorker","HomeArea","CasualtyIMD")]

veh.all <- veh.all[,c("AccRefGlobal","VehicleRef","VehicleType",
                      "TowingArticulation","Manoeuvre","VehFrom","VehTo",
                      "LocationRoad","LocationRestrictedAway","Junction","SkiddingOverturning",
                      "ObjectInCarriageway","LeavingCarriageway","ObjectOffCarriageway","VehicleLetter",
                      "PointofImpact","OtherVehicle","CombinedDamage","RoofUndersideDamage",
                      "SexDriver","AgeDriver","VehAgeBand",
                      "HitRun","ForeignVehicle","LeftHandDrive",
                      "EngineSize","Propulsion","AgeVehicle","DriverIMD",
                      "DriverArea","VehicleIMD","JourneyPurpose")]



#remove data missign lables
#cas.all[cas.all == "Data missing or out of range"] <- NA
#acc.all[acc.all == "Data missing or out of range"] <- NA
#veh.all[veh.all == "Data missing or out of range"] <- NA

#Remove any all NA columns
veh.all <- clean.nas(veh.all)
cas.all <- clean.nas(cas.all)


object.size(acc.all)
acc.all <- clean.vars(acc.all,"acc")
object.size(acc.all)

object.size(veh.all)
veh.all <- clean.vars(veh.all,"veh")
object.size(veh.all)


object.size(cas.all)
cas.all <- clean.vars(cas.all,"cas")
object.size(cas.all)

nrow(acc.all)
acc.all <- acc.all[!duplicated(acc.all$AccRefGlobal),]
nrow(acc.all)

write.csv(acc.all,"../cyipt-bigdata/forDB/accidents.csv", row.names = F, na = "")
write.csv(cas.all,"../cyipt-bigdata/forDB/casualties.csv", row.names = F, na = "")
write.csv(veh.all,"../cyipt-bigdata/forDB/vehicles.csv", row.names = F, na = "")
cyipt/cyipt documentation built on Aug. 16, 2020, 10:24 p.m.