Interpret Topics

```r, slideshow={'slide_type': 'skip'}}

library("oildata")

library("incidentmodels") suppressMessages(library("tidyverse")) library("tidytext") suppressMessages(library("magrittr")) suppressMessages(library("glue")) suppressMessages(library("here"))

data_folder <- purrr::partial(here, "data-raw", ".temp", "data")

```r, slideshow={'slide_type': 'subslide'}}
gammas <- readRDS(data_folder("gammas.rds"))
gammas$label <- as.character(NA)
gammas$cause_topic <- NA
head(gammas)

```r, slideshow={'slide_type': 'subslide'}} head(betas)

<!-- #region slideshow={"slide_type": "slide"} -->
## Functions
<!-- #endregion -->

```r}
top_terms <- betas %>%
    arrange(desc(beta)) %>%
    group_by(topic) %>%
    slice_head(n = 8) %>%
    ungroup() %>%
    arrange(topic, -beta)

head(top_terms)
get_terms <- function(topic) {
    filter(top_terms, topic == {{ topic }})
}
narratives <- select(incidents, incident_ID, narrative) %>%
    mutate(incident_ID = as.character(incident_ID))
get_narrative <- function(topic) {
    gammas %>%
        filter(topic == {{ topic }}) %>%
        filter(gamma == max(gamma)) %>%
        left_join(narratives, by = "incident_ID") %$%
        glue("Incident ID: {incident_ID}
              Operator ID: {ID}

              {narrative}")
}

Topics 1-5

Topic 1

get_terms(topic = 1)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(1))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic seems to be related to the management of incident reports**
<!-- #endregion -->

```r}
gammas[gammas$topic == 1, ]$label <- "report_mngt"
gammas[gammas$topic == 1, ]$cause_topic <- F

Topic 2

get_terms(topic = 2)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(2))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to the management of spills**
<!-- #endregion -->

```r}
gammas[gammas$topic == 2, ]$label <- "spill_mngt"
gammas[gammas$topic == 2, ]$cause_topic <- F

Topic 3

get_terms(topic = 3)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(3))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to contractors and excavation**
<!-- #endregion -->

```r}
gammas[gammas$topic == 3, ]$label <- "excavation"
gammas[gammas$topic == 3, ]$cause_topic <- T

Topic 4

get_terms(topic = 4)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(4))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to storms, water and related damages**
<!-- #endregion -->

```r}
gammas[gammas$topic == 4, ]$label <- "water"
gammas[gammas$topic == 4, ]$cause_topic <- T

Topic 5

get_terms(topic = 5)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(5))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to remote operation of pipelines**
<!-- #endregion -->

```r}
gammas[gammas$topic == 5, ]$label <- "control_center"
gammas[gammas$topic == 5, ]$cause_topic <- F

Topics 6-10

Topic 6

get_terms(topic = 6)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(6))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to service and repair**
<!-- #endregion -->

```r}
gammas[gammas$topic == 6, ]$label <- "service"
gammas[gammas$topic == 6, ]$cause_topic <- F

Topic 7

get_terms(topic = 7)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(7))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to pumps and their components**
<!-- #endregion -->

```r}
gammas[gammas$topic == 7, ]$label <- "pumps"
gammas[gammas$topic == 7, ]$cause_topic <- T

Topic 8

get_terms(topic = 8)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(8))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to procedures**
<!-- #endregion -->

```r}
gammas[gammas$topic == 8, ]$label <- "procedures"
gammas[gammas$topic == 8, ]$cause_topic <- T

Topic 9

get_terms(topic = 9)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(9))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to the commodity transported**
<!-- #endregion -->

```r}
gammas[gammas$topic == 9, ]$label <- "commodity"
gammas[gammas$topic == 9, ]$cause_topic <- F

Topic 10

get_terms(topic = 10)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(10))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to testing and monitoring**
<!-- #endregion -->

```r}
gammas[gammas$topic == 10, ]$label <- "monitoring"
gammas[gammas$topic == 10, ]$cause_topic <- T

Topics 11-15

Topic 11

get_terms(topic = 11)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(11))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related flow, pressure and relief**
<!-- #endregion -->

```r}
gammas[gammas$topic == 11, ]$label <- "pressure"
gammas[gammas$topic == 11, ]$cause_topic <- T

Topic 12

get_terms(topic = 12)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(12))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to corrosion of pipes**
<!-- #endregion -->

```r}
gammas[gammas$topic == 12, ]$label <- "corrosion"
gammas[gammas$topic == 12, ]$cause_topic <- T

Topic 13

get_terms(topic = 13)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(13))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to gaskets and related components**
<!-- #endregion -->

```r}
gammas[gammas$topic == 13, ]$label <- "gaskets"
gammas[gammas$topic == 13, ]$cause_topic <- T

Topic 14

get_terms(topic = 14)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(14))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to reporting of the spill**
<!-- #endregion -->

```r}
gammas[gammas$topic == 14, ]$label <- "report"
gammas[gammas$topic == 14, ]$cause_topic <- F

Topic 15

get_terms(topic = 15)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(15))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to the personnels response on site**
<!-- #endregion -->

```r}
gammas[gammas$topic == 15, ]$label <- "response"
gammas[gammas$topic == 15, ]$cause_topic <- F

Topics 16-20

Topic 16

get_terms(topic = 16)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(16))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to valves and related equipment**
<!-- #endregion -->

```r}
gammas[gammas$topic == 16, ]$label <- "valve"
gammas[gammas$topic == 16, ]$cause_topic <- T

Topic 17

get_terms(topic = 17)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(17))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to the leak and clean up**
<!-- #endregion -->

```r}
gammas[gammas$topic == 17, ]$label <- "leak"
gammas[gammas$topic == 17, ]$cause_topic <- F

Topic 18

get_terms(topic = 18)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(18))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to soil, contamination and cleanup**
<!-- #endregion -->

```r}
gammas[gammas$topic == 18, ]$label <- "contamination"
gammas[gammas$topic == 18, ]$cause_topic <- F

Topic 19

get_terms(topic = 19)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(19))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to the release of oil**
<!-- #endregion -->

```r}
gammas[gammas$topic == 19, ]$label <- "release"
gammas[gammas$topic == 19, ]$cause_topic <- F

Topic 20

get_terms(topic = 20)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(20))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic contains words to describe a spill**
<!-- #endregion -->

```r}
gammas[gammas$topic == 20, ]$label <- "spill"
gammas[gammas$topic == 20, ]$cause_topic <- F

Topics 21-23

Topic 21

get_terms(topic = 21)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(21))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to fire and emergencies**
<!-- #endregion -->

```r}
gammas[gammas$topic == 21, ]$label <- "fire"
gammas[gammas$topic == 21, ]$cause_topic <- T

Topic 22

get_terms(topic = 22)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(22))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to tanks and related facilities and equipment**
<!-- #endregion -->

```r}
gammas[gammas$topic == 22, ]$label <- "tanks"
gammas[gammas$topic == 22, ]$cause_topic <- T

Topic 23

get_terms(topic = 23)

```r, slideshow={'slide_type': 'subslide'}} print(get_narrative(23))

<!-- #region slideshow={"slide_type": "subslide"} -->
**This topic is related to cracks and other kinds of failures**
<!-- #endregion -->

```r}
gammas[gammas$topic == 23, ]$label <- "crack"
gammas[gammas$topic == 23, ]$cause_topic <- T

Save results

head(gammas)

```r, slideshow={'slide_type': 'subslide'}} table(unique(paste(gammas$topic, gammas$label)))

```r, slideshow={'slide_type': 'subslide'}}
labels <- distinct(select(gammas, topic, label, cause_topic))
labels$topic <- as.character(labels$topic)
head(labels)
head(gammas)
write_rds(labels, data_folder("labels.rds"))
incidents <- readRDS(data_folder("incidents_merged.rds"))
incidents_gammas <- gammas %>%
    select(incident_ID, topic, gamma) %>%
    pivot_wider(incident_ID, names_from = topic, values_from = gamma, names_prefix = "topic_") %>%
    right_join(incidents) %>%
    select(-starts_with("topic_"), starts_with("topic_")) # Move topic columns to back
head(incidents_gammas)

r, slideshow={'slide_type': 'subslide'}} write_rds(incidents_gammas, data_folder("incidents_topics.rds"))



julianbarg/oildata documentation built on Nov. 27, 2020, 4 p.m.