Nothing
# Real Data Analysis
library(magrittr)
library(scales)
library(tidyverse)
library(xts)
library(mvMonitoring)
###### False Alarm Rates ######
# Read in the NOC data from before the pH fault
NOC_1min <- read_csv("/Users/gabriel_odom/Box Sync/Consulting/Dr. Hering/MV_Process_Control/Kazor Code/NOC_1min_clean_Interpolated_Inner.txt")
NOC_1min %>% str
# xts Matrix
NOC_xts <- xts(NOC_1min[,-1], order.by = NOC_1min[[1]])
NOC_xts %>% head(1) # 13 June at 7:10AM
NOC_xts %>% tail(1) # 2 September at 6:00AM
# Check for near-constant columns and NAs
sapply(1:ncol(NOC_xts), function(i){
length(unique(NOC_xts[,i])) < 20
})
# Potential low-variance offenders
NOC_xts[,2] %>% unique() # Bio1 Phase Switching
NOC_xts[,3] %>% unique() # Bio2 Phase Switching
NOC_xts[,5] %>% unique() # MBR1 State (usually in 1)
NOC_xts[,8] %>% unique() # MBR2 State (usually in 1)
NOC_xts[,15] %>% unique() # MBR1 Flux Mode (usually in 0)
NOC_xts[,16] %>% unique() # MBR1 Mode Switching
NOC_xts[,18] %>% unique() # MBR2 Mode Switching
NOC_xts[,20] %>% unique() # Blower1 Switching
NOC_xts[,21] %>% unique() # Blower2 Switching
NOC_xts[,22] %>% unique() # Either Blower Switching
### BioX Blowers ###
# There are three rows of Blower columns with values equal to 1 + 1/3. Get rid
# of these. Also, recode blower_x as an actual blower state vector. Code
# bio_x_blower as 0 = neither blower on, 1 = blower 1 on, 2 = blower 2 on, and
# 3 = both blowers on.Because there are a few minutes where both blowers are on,
# roll anything in 3 into a 2 (blower 2 on) or a 1 (blower 1 on) at random. This
# only happens in 246 of the over 116k rows, so we aren't losing much.
which(NOC_xts[,22] == 1 + 1/3)
NOC_xts[,20] <- round(NOC_xts[,20])
NOC_xts[,21] <- round(NOC_xts[,21])
NOC_xts[,22] <- (NOC_xts[,20] - 1) + 2 * (NOC_xts[,21] - 1)
NOC_xts[NOC_xts[,22] > 2,22] <- round(runif(246)) + 1
### BioX Phases ###
# The features bio_1_phase and bio_2_phase alternate between 1, 2, and 3. There
# is 1 record (row 9496) that has a 0 recorded for both. We'll turn this into a
# 3 for bio_1_phase and a 2 for bio_2_phase (the last recorded values).
NOC_xts[9496,2] <- 3
NOC_xts[9496,3] <- 2
### Remove NAs ###
nrow(NOC_xts) # 116570
NOC_xts <- na.omit(NOC_xts)
nrow(NOC_xts) # 111336
# We lost 5,234 observations
### Split the Label Columns ###
# Split off the Switching Indicators, and remove columns 5, 8, and 15
NOC_xts_labels <- NOC_xts[,c(2, 3, 16, 18, 20, 21, 22)]
NOC_xts <- NOC_xts[,-c(2, 3, 5, 8, 15, 16, 18, 20, 21, 22)]
# Did we get all the problem columns?
sapply(1:ncol(NOC_xts), function(i){
length(unique(NOC_xts[,i])) < 20
})
# Looks like
rm(NOC_1min)
### Save the Clean Data ###
# setwd("/Users/gabriel_odom/Box Sync/Consulting/Dr. Hering/MV_Process_Control/Kazor Code")
# write_csv(NOC_xts, "NOC_Continuous_Clean_20170329.csv")
# write_csv(NOC_xts_labels, "NOC_Continuous_Clean_Labels_20170329.csv")
###### Implement the mspTrain Function ######
# My memory can't hold all 111k observations, so I want to run this on the
# last 60 days. Kazor et al used the first 40 days, so do that too.
### True Multi-state Observations ###
trainMins <- 1:(60 * 24 * 20)
Sys.time()
NOC_MS_blower_ls <- mspTrain(data = NOC_xts[trainMins,],
# Split on the blowers
labelVector = NOC_xts_labels[trainMins,7] + 1,
trainObs = 10080,
updateFreq = 4320,
alpha = 0.001,
faultsToTriggerAlarm = 5,
lagsIncluded = 0:1,
var.amnt = 0.90)
Sys.time() # 14.5 minutes for the first 40 days, 46.5 minutes for the last 60
# days, but we increased phyisical memory use from 18% to 80%. We don't have
# the memory for the entire data set.
setwd("~/GitHub/mvMonitoring/mvMonitoring/data")
saveRDS(NOC_MS_blower_ls, file = "NOC_Results_byBlower2_20170329.rds")
NOC_MS_blower_ls <- readRDS("NOC_Results_byBlower2_20170329.rds")
# False Alarm rate MS
nrow(NOC_MS_blower_ls$FaultChecks)
# T2
length(which(NOC_MS_blower_ls$Alarms[,85] %in% c(1,3))) / 45721
# SPE
length(which(NOC_MS_blower_ls$Alarms[,85] %in% c(2,3))) / 45721
### No Multi-state Observations ###
Sys.time()
NOC_SS_blower_ls <- mspTrain(data = NOC_xts[trainMins,],
# Split on the blowers
labelVector = rep(1, length(trainMins)),
trainObs = 10080,
updateFreq = 4320,
alpha = 0.001,
faultsToTriggerAlarm = 5,
lagsIncluded = 0:1,
var.amnt = 0.90)
trainMins2 <- (60 * 24 * 20 + 1 - 10080):(60 * 24 * 40)
NOC_SS_blower_ls2 <- mspTrain(data = NOC_xts[trainMins2,],
# Split on the blowers
labelVector = rep(1, length(trainMins2)),
trainObs = 10080,
updateFreq = 4320,
alpha = 0.001,
faultsToTriggerAlarm = 5,
lagsIncluded = 0:1,
var.amnt = 0.90)
Sys.time() # 14.5 minutes for the first 40 days, 56 minutes for the last 60
# days, but we increased phyisical memory use from 18% to 80%. We don't have
# the memory for the entire data set.
setwd("~/GitHub/mvMonitoring/mvMonitoring/data")
saveRDS(NOC_SS_blower_ls, file = "NOC_SS_Results_byBlower2_20170330.rds")
NOC_SS_blower_ls <- readRDS("NOC_SS_Results_byBlower2_20170330.rds")
# False Alarm rate sS
nrow(NOC_SS_blower_ls$FaultChecks) + nrow(NOC_SS_blower_ls2$FaultChecks)
# T2
(length(which(NOC_SS_blower_ls$Alarms[,85] %in% c(1,3))) +
length(which(NOC_SS_blower_ls2$Alarms[,85] %in% c(1,3)))) / 47518
# SPE
(length(which(NOC_SS_blower_ls$Alarms[,85] %in% c(2,3))) +
length(which(NOC_SS_blower_ls2$Alarms[,85] %in% c(2,3)))) / 47518
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.