# Script to cluster programmes according to user matrix
library(jsonlite)
library(RPostgreSQL)
library(tidyverse)
library(NbClust)
redshift_creds <- fromJSON("~/Documents/Admin/redshift_credentials.json")
driver <- dbDriver("PostgreSQL")
db_connection <- dbConnect(driver,
host = redshift_creds["host_name"],
port = redshift_creds["port_num"],
dbname = redshift_creds["db_name"],
user = redshift_creds["user_name"],
password = redshift_creds["password"])
user_matrix <- dbGetQuery(db_connection, "
SELECT * FROM central_insights_sandbox.dh_user_matrix
")
dbDisconnect(db_connection)
# Select latent features
user_matrix_reduced <- user_matrix %>%
select(tidyselect::matches("f[0-9]"))
# Run the model
set.seed(1234)
nbclust_k <- NbClust(data = user_matrix_reduced,
distance = "euclidean",
method = "kmeans",
index = "silhouette",
min.nc = 10,
max.nc = 10)
optimum_k <- nbclust_k$Best.nc[1]
kmodel <- kmeans(user_matrix_reduced, centers = 10, nstart = 25)
# Append data frame with cluster
item_matrix$cluster <- as.factor(kmodel$cluster)
# Save to disk
write.csv(item_matrix, "~/Desktop/latent-variables/item-matrix.csv", row.names = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.