##############################
# STAGE 1: SETUP #
##############################
library(tidyverse)
library(dbplyr)
library(rjson)
library(aws.secrets)
library(httr)
library(RPostgreSQL)
library(NbClust)
# Connect to Redshift with AWS secrets
s3credentials <-content(GET("http://169.254.169.254/latest/meta-data/iam/security-credentials/live-r-server-r-server-ComponentRole-1P8OXESQT6GMR"))
s3credentials2 <- fromJSON(s3credentials)
token <- s3credentials2$Token
secret_access_key <- s3credentials2$SecretAccessKey
access_key_id <- s3credentials2$AccessKeyId
# Set AWS region
Sys.setenv("AWS_DEFAULT_REGION" = "eu-west-1")
# Load credentials from AWS
secret <- get_secret_value('servers/r_server/prod/credentials/scv_redshift')$SecretString
secret <- fromJSON(secret)
# Connect to Redshift
redshift <- src_postgres(host='live-idl-prod-redshift-component-redshiftcluster-1q6vyltqf8lth.ctm1v7db0ubd.eu-west-1.redshift.amazonaws.com', port='5439',
dbname = 'redshiftdb',
user = secret$redshift_username,
password = secret$redshift_password)
# Load core data
item_matrix <- dbGetQuery(redshift$con, "SELECT * FROM central_insights_sandbox.dh_item_matrix_enriched")
##############################
# STAGE 2: DATA PROCESSING #
##############################
# Select latent features
item_matrix_reduced <- item_matrix %>%
select(tidyselect::matches("f[0-9]"))
# Take a sample
set.seed(1234)
nbclust_k <- NbClust(data = item_matrix_reduced,
method = "kmeans",
index = "silhouette",
min.nc = 5,
max.nc = 20)
optimum_k <- nbclust_k$Best.nc[1]
kmodel <- kmeans(item_matrix_reduced, centers = optimum_k, nstart = 25)
cluster_centres <- kmodel$centers
cluster_size <- kmodel$size
item_matrix_reduced$cluster <- as.factor(kmodel$cluster)
# Function to calculate distance from vector of cluster centres
calculate_distance <- function() {
# Initialise vector
distances <- vector()
# Outer loop: Iterate over number of clusters
for(i in 1:nrow(cluster_centres)) {
# Inner loop: Iterature over size of clusters
for(j in 1:cluster_size[i]) {
d <- dist(rbind(item_matrix_reduced[item_matrix_reduced$cluster==i,][j, 1:20],
cluster_centres[i]))
distances <- append(distances, d)
}
}
# Return vector of distances from cluster centres
return(distances)
}
distances <- calculate_distance()
item_matrix$dist <- distances
item_matrix$cluster <- as.factor(kmodel$cluster)
##############################
# STAGE 3: WRITE TO SERVER #
##############################
saveRDS(item_matrix, "/efs/shiny-server/latent-feature-explorer/item_matrix.RDS")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.