myTableCardiff<-read.csv("~/desktop/Cardiff.csv")
summary(myTableCardiff)
col_headings <- c("year","cp", "estimationmethod", "estimationmethoddetailed",
"region", "localauthority","road","roadcategory","easting",
"northing","startjunction","endjunction","linklengthmiles",
"pedalcycles","motorcycles","carstaxis","busescoaches",
"lightgoodsvehicles","v2axlerigidhgv","v3axlerigidhgv",
"v4or5axlerigidhgv","v3or4axleartichgv","v5axleartichgv",
"v6ormoreaxleartichgv","allhgvs","allmotorvehicles")
myTableCardiff
names(myTableCardiff) <- col_headings
attach(myTableCardiff)
myTableCardiff$routes <- paste(myTableCardiff$startjunction,
myTableCardiff$endjunction)
as.data.frame(table(myTableCardiff$routes))
busescoaches <- myTableCardiff$busescoaches
linklengthmiles <- myTableCardiff$linklengthmiles
frequency=busescoaches/linklengthmiles
dataset<-data.frame(busescoaches,linklengthmiles,frequency)
attach(dataset)
alldata<-data.frame(myTableCardiff$routes,busescoaches,linklengthmiles,
frequency)
attach(alldata)
#Max-Min Normalization
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
# normalize our dataset
#maxmindf is a shrinked version of original dataset
maxmindf <- as.data.frame(lapply(dataset, normalize))
#pca
column_names<-colnames(maxmindf)
#scale, with default settings, will calculate the mean and
#standard deviation of the entire vector,
#then "scale" each element by those values by subtracting the mean
#and dividing by the sd.
pcamatrix<-scale(maxmindf[,column_names])
#clustering
# C(1501,2), calculating the elucidean distance between every pair of points
d<-dist(pcamatrix, method="euclidean")
#Ward's minimum variance method aims at finding compact, spherical clusters
pfit<-hclust(d,method="ward.D")
#try to plot the clustering tree, does not go well
#plot(pfit,labels=myTableLeicester$routes)
# draw rectangles around clusters around pfit (an object returned by hclust)
#rect.hclust(pfit,k=3)
# k = the number of clusters the tree should be cut into.
groups<-cutree(pfit,k=3)
dataset$lab <- as.factor(groups)
a <- prVis (dataset, labels = T, saveOutput = T)
#sometimes for some unknown reason, x.pca.prout$x has no row names, this line
#make up to that deficit
row.names(a$prout$x) <- as.character(1:nrow(a$prout$x))
addRowNums(16)
library("umap")
z <- umap(dataset[,-4])
plot(z$layout,col=dataset[,4])
#cluster1:Short road length/low traffic for edinburge case
#cluster2:Long road length/high traffic
#cluster3:Short road length/high traffic
# green = c3
# red = c2
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.