knitr::opts_chunk$set(cache = FALSE)
last_year <- lubridate::year(Sys.time()) - 1
last_month <- lubridate::month(Sys.time()) - 1
last_month <- ifelse(last_month == 0, 12, last_month)
last_month_lab <- lubridate::month(last_month, label = T)

this_year <- lubridate::year(Sys.time())



Aim

The following report summarizes the peskAAS survey activity in each landing site during r this_year. The purpose of this report is to provide an overview of the number of surveys submitted, the number of working days, and the estimated working hours per day.


library(ggplot2)
library(RColorBrewer)
library(magrittr)

setwd("../..")
pars <- peskas.timor.data.pipeline::read_config()

metadata <- peskas.timor.data.pipeline::get_preprocessed_sheets(pars)

metadata_stations <- metadata$stations %>%
  dplyr::select(landing_site = station_code, station_name) %>%
  dplyr::mutate(landing_site = as.character(landing_site))

landings_raw <-
  peskas.timor.data.pipeline::get_merged_landings(pars, "_weight") %>%
  dplyr::mutate(date = as.Date(date)) %>%
  dplyr::mutate(Year = lubridate::year(date)) %>%
  dplyr::filter(Year >= last_year) %>%
  dplyr::select(
    landing_id = `_id`,
    landing_site = landing_site_name,
    data_activity = Ita_koleta_dadus_husi_atividad,
    n_boats = no_boats,
    reason_zero_boats = reason_no_activity,
    date,
    today,
    start,
    end
  ) %>%
  dplyr::left_join(metadata_stations) %>%
  dplyr::select(-landing_site) %>%
  dplyr::rename(landing_site = station_name) %>%
  dplyr::mutate(
    landing_site = trimws(.data$landing_site, which = "both"),
    landing_site = dplyr::case_when(
      landing_site == "Oenunu/Citrana/Bona/Baoknana" ~ "Oenunu/Citrana",
      landing_site == "Beto Tasi/Bebonuk/Fatuhada/Kampung Alor" ~ "Beto Tasi/Bebonuk",
      landing_site == "Sentru/Liarafa/Sika/Rau Moko" ~ "Sentru/Liarafa",
      landing_site == "Oebone/Sakato/Bausiu/Mahata/Oebau/Oenunu/Kmusa/Bokos/Posto Sika" ~ "Oebone/Sakato",
      landing_site == "Dolok Oan/Hera/Metinaro/Manleu" ~ "Dolok Oan/Hera",
      landing_site == "Fatu'u/Doru/Iliana/Arlo" ~ "Fatu'u/Doru",
      landing_site == "Seiçal/Buruma/Fatuk Bo'ot/Uai'ca" ~ "Seiçal/Buruma",
      landing_site == "Beacou/Sulilaran/Palaka" ~ "Beacou/Sulilaran",
      landing_site == "Bonuk/Fatumeta/Nunumera" ~ "Bonuk/Fatumeta",
      landing_site == "Bidau Santana/Metiaut" ~ "Bidau Santana",
      T ~ landing_site
    )
  )

landings_raw_prep <-
  landings_raw %>%
  dplyr::mutate(
    landing_id = as.integer(landing_id),
    n_boats = as.integer(n_boats),
    today = as.Date(today),
    start = lubridate::as_datetime(start, tz = "Asia/Dili"),
    end = lubridate::as_datetime(end, tz = "Asia/Dili"),
    start_end_diff = round(difftime(end, start, units = "hours"), 2)
  ) %>%
  dplyr::select(
    landing_id,
    landing_site = landing_site,
    data_activity,
    n_boats,
    date,
    reason_zero_boats,
    start_end_diff
  )

landings <-
  peskas.timor.data.pipeline::get_merged_trips(pars) %>%
  dplyr::mutate(
    landing_date = as.Date(landing_date),
    Year = lubridate::year(landing_date)
  ) %>%
  dplyr::filter(Year >= last_year) %>%
  dplyr::select(landing_id, municipality, landing_date, landing_site) %>%
  dplyr::mutate(
    landing_site = trimws(.data$landing_site, which = "both"),
    landing_site = dplyr::case_when(
      landing_site == "Oenunu/Citrana/Bona/Baoknana" ~ "Oenunu/Citrana",
      landing_site == "Beto Tasi/Bebonuk/Fatuhada/Kampung Alor" ~ "Beto Tasi/Bebonuk",
      landing_site == "Sentru/Liarafa/Sika/Rau Moko" ~ "Sentru/Liarafa",
      landing_site == "Oebone/Sakato/Bausiu/Mahata/Oebau/Oenunu/Kmusa/Bokos/Posto Sika" ~ "Oebone/Sakato",
      landing_site == "Dolok Oan/Hera/Metinaro/Manleu" ~ "Dolok Oan/Hera",
      landing_site == "Fatu'u/Doru/Iliana/Arlo" ~ "Fatu'u/Doru",
      landing_site == "Seiçal/Buruma/Fatuk Bo'ot/Uai'ca" ~ "Seiçal/Buruma",
      landing_site == "Beacou/Sulilaran/Palaka" ~ "Beacou/Sulilaran",
      landing_site == "Bonuk/Fatumeta/Nunumera" ~ "Bonuk/Fatumeta",
      landing_site == "Bidau Santana/Metiaut" ~ "Bidau Santana",
      T ~ landing_site
    )
  )

regions_sites <- landings %>%
  dplyr::select(municipality, landing_site) %>%
  dplyr::distinct() %>%
  dplyr::mutate(municipality = ifelse(municipality == "Oecusse", "RAEOA", municipality))


tf <- data.frame(date = seq.Date(
  from = as.Date(paste0(lubridate::year(Sys.time()) - 1, "-01-01")),
  to = Sys.Date(),
  by = "day"
))

label_date <- function(x) {
  format(x, "%b")
}

make_color_pal <- function(colors, bias = 1) {
  get_color <- colorRamp(colors, bias = bias)
  function(x) rgb(get_color(x), maxColorValue = 255)
}

good_color <- make_color_pal(c("#ffffff", "#f2fbd2", "#c9ecb4", "#93d3ab", "#35b0ab"), bias = 2)

df <-
  dplyr::left_join(landings, landings_raw_prep, by = c("landing_id", "landing_site")) %>%
  dplyr::filter(!is.na(landing_site)) %>%
  dplyr::select(-municipality) %>%
  dplyr::mutate(id = seq(1, nrow(.))) %>%
  tidyr::pivot_wider(names_from = landing_site, values_from = landing_id) %>%
  dplyr::right_join(tf) %>%
  dplyr::filter(!landing_date == lubridate::floor_date(Sys.Date())) %>%
  dplyr::mutate(id = seq(1, nrow(.))) %>%
  tidyr::pivot_longer(
    cols = c(8:ncol(.)),
    names_to = "landing_site", values_to = "landing_id"
  ) %>%
  dplyr::arrange(date) %>%
  dplyr::mutate(
    year = lubridate::year(date),
    month = lubridate::month(date, label = T),
    landing_id_index = ifelse(!is.na(landing_id), 1, 0)
  ) %>%
  dplyr::select(
    landing_id, year, landing_id_index, landing_site,
    landing_date = date, month, n_boats, start_end_diff
  )

Summary tables

The following tables show the number of submissions for each landing site on a monthly basis.

r this_year

submissions_month <-
  df %>%
  dplyr::filter(year == this_year) %>%
  dplyr::group_by(landing_site) %>%
  dplyr::mutate(submissions = sum(landing_id_index)) %>%
  dplyr::filter(submissions > 0) %>%
  dplyr::group_by(landing_site, landing_date) %>%
  dplyr::summarise(
    act = sum(landing_id_index),
    month = dplyr::first(month)
  ) %>%
  dplyr::group_by(landing_site, month) %>%
  dplyr::mutate(
    submissions = sum(act)
  ) %>%
  dplyr::mutate(act = ifelse(act == 0, 0, 1)) %>%
  dplyr::group_by(landing_site, month) %>%
  dplyr::summarise(
    n = sum(act),
    submissions = dplyr::first(submissions)
  ) %>%
  dplyr::group_by(landing_site) %>%
  dplyr::mutate(mean_days = mean(n)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(submissions = ifelse(submissions == 0, NA_real_, submissions))

tab <-
  submissions_month %>%
  dplyr::select(-c(n, mean_days)) %>%
  tidyr::pivot_wider(names_from = month, values_from = submissions) %>%
  dplyr::left_join(regions_sites) %>%
  janitor::adorn_totals(where = "col") %>%
  dplyr::rename(
    Station = landing_site,
    Region = municipality
  ) %>%
  dplyr::select(Region, Station, dplyr::everything())
rows <- nrow(tab)

reactable::reactable(tab,
  defaultSorted = "Region", striped = TRUE,
  groupBy = "Region",
  defaultPageSize = rows,
  defaultColDef = reactable::colDef(
    aggregate = "sum",
    align = "center",
    minWidth = 55
  ),
  columns = list(
    Region = reactable::colDef(minWidth = 120),
    Station = reactable::colDef(minWidth = 125),
    Total = reactable::colDef(
      style = function(value) {
        normalized <- (value - min(tab$Total)) / (max(tab$Total) - min(tab$Total))
        color <- good_color(normalized)
        list(background = color)
      }
    )
  )
)

r last_year

submissions_month <-
  df %>%
  dplyr::filter(year == last_year) %>%
  dplyr::group_by(landing_site) %>%
  dplyr::mutate(submissions = sum(landing_id_index)) %>%
  dplyr::filter(submissions > 0) %>%
  dplyr::group_by(landing_site, landing_date) %>%
  dplyr::summarise(
    act = sum(landing_id_index),
    month = dplyr::first(month)
  ) %>%
  dplyr::group_by(landing_site, month) %>%
  dplyr::mutate(
    submissions = sum(act)
  ) %>%
  dplyr::mutate(act = ifelse(act == 0, 0, 1)) %>%
  dplyr::group_by(landing_site, month) %>%
  dplyr::summarise(
    n = sum(act),
    submissions = dplyr::first(submissions)
  ) %>%
  dplyr::group_by(landing_site) %>%
  dplyr::mutate(mean_days = mean(n)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(submissions = ifelse(submissions == 0, NA_real_, submissions))

tab <-
  submissions_month %>%
  dplyr::select(-c(n, mean_days)) %>%
  tidyr::pivot_wider(names_from = month, values_from = submissions) %>%
  dplyr::left_join(regions_sites) %>%
  janitor::adorn_totals(where = "col") %>%
  dplyr::rename(
    Station = landing_site,
    Region = municipality
  ) %>%
  dplyr::select(Region, Station, dplyr::everything())

rows <- nrow(tab)

reactable::reactable(tab,
  defaultSorted = "Region", striped = TRUE,
  groupBy = "Region",
  defaultPageSize = rows,
  defaultColDef = reactable::colDef(
    aggregate = "sum",
    align = "center",
    minWidth = 55
  ),
  columns = list(
    Region = reactable::colDef(minWidth = 120),
    Station = reactable::colDef(minWidth = 125),
    Total = reactable::colDef(
      style = function(value) {
        normalized <- (value - min(tab$Total)) / (max(tab$Total) - min(tab$Total))
        color <- good_color(normalized)
        list(background = color)
      }
    )
  )
)

r this_year detailed summary

Landing sites active during r this_year:

stations <-
  df %>%
  dplyr::filter(year == this_year & !is.na(landing_id)) %>%
  magrittr::extract2("landing_site") %>%
  unique()

stations

\pagebreak

Overall r this_year activity

The graph below show the monthly activity in the different sites in r this_year where each cells represent a calendar day. Cell colors indicate the number of submissions. The vertical axis is ranked by the most active landing sites.

df %>%
  dplyr::filter(year == this_year) %>%
  dplyr::group_by(landing_site) %>%
  dplyr::mutate(submissions = sum(landing_id_index)) %>%
  dplyr::filter(submissions > 0) %>%
  dplyr::group_by(landing_site, landing_date) %>%
  dplyr::summarise(nsub = sum(landing_id_index)) %>%
  dplyr::mutate(
    wday = lubridate::wday(landing_date, label = T, abbr = T),
    day = lubridate::day(landing_date),
    week = lubridate::week(landing_date),
    month = lubridate::month(landing_date, label = T),
    nsub_act = ifelse(nsub > 0, "activity", "no activity"),
    activity_sum = sum(nsub_act == "activity")
  ) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(
    # nsub = ifelse(nsub == 0, NA_real_, nsub),
    nsub_act = ifelse(nsub_act == "0", NA_real_, nsub_act)
  ) %>%
  ggplot(aes(x = day, y = reorder(landing_site, activity_sum))) +
  geom_tile(aes(fill = nsub), alpha = 0.8, height = 0.65, color = "white", size = 0.25) +
  theme_minimal(13.5) +
  facet_grid(~month, scales = "free") +
  scale_fill_gradient(low = "#8dc0dd", high = "#214b63", limits = c(1, NA), na.value = "white") +
  # scale_x_date(date_breaks = "1 month", minor_breaks = NULL, labels = label_date) +
  coord_cartesian(expand = F) +
  # scale_x_continuous(breaks = scales::pretty_breaks()) %>%
  labs(x = "Month day", y = "", fill = "N. submissions") +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom"
  )

\pagebreak

Monthly activity

The graph below show the number of days of activity in each month in each site. A day of activity means that in that specific day at least 1 submission was recorded. The black vertical line in each panel indicates the average of the days of activity, the number in the bars indicates the number of submissions for that specific month.

nmonths <-
  df %>%
  dplyr::filter(year == this_year) %>%
  magrittr::extract2("month") %>%
  unique() %>%
  length()

colw <- ifelse(nmonths < 6, 0.25, 0.75)

df %>%
  dplyr::filter(year == this_year) %>%
  dplyr::group_by(landing_site) %>%
  dplyr::mutate(submissions = sum(landing_id_index)) %>%
  dplyr::filter(submissions > 0) %>%
  dplyr::group_by(landing_site, landing_date) %>%
  dplyr::summarise(
    act = sum(landing_id_index),
    month = dplyr::first(month)
  ) %>%
  dplyr::group_by(landing_site, month) %>%
  dplyr::mutate(
    submissions = sum(act)
  ) %>%
  dplyr::mutate(act = ifelse(act == 0, 0, 1)) %>%
  dplyr::group_by(landing_site, month) %>%
  dplyr::summarise(
    n = sum(act),
    submissions = dplyr::first(submissions)
  ) %>%
  dplyr::group_by(landing_site) %>%
  dplyr::mutate(mean_days = mean(n)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(submissions = ifelse(submissions == 0, NA_real_, submissions)) %>%
  ggplot() +
  theme_bw(13) +
  geom_col(mapping = aes(n, month), alpha = 0.6, width = colw, fill = "#457b9d") +
  geom_text(mapping = aes(n, month, label = submissions), hjust = 1) +
  geom_vline(mapping = aes(xintercept = mean_days), linetype = "dotted", size = 0.75) +
  facet_wrap(landing_site ~ ., ncol = 5) +
  scale_x_continuous(n.breaks = 7) +
  labs(x = "Days of activity", y = "") +
  theme(strip.background = element_rect(fill = "white"))

\pagebreak

target_month <- lubridate::floor_date(Sys.Date(), "month")
target_month <- seq(target_month, length = 2, by = "-1 months")[2]
ref_month <- seq(target_month, length = 2, by = "-1 months")[2]

target_month_lab <- lubridate::month(target_month, label = T, abbr = F)
ref_month_lab <- lubridate::month(ref_month, label = T, abbr = F)

Monthly variation

The table below shows the overall activity in each landing site referred to the last complete month r target_month_lab compared to the previous one r ref_month_lab. Colours and numbers in brackets in each cell refer to the percentage variation of each indicator from the previous month. Grey cells indicate no possible comparison due to lack of data. The indicator "Working hours" is estimated assuming a duration of 6 minutes for each submission.

df_tab <-
  df %>%
  dplyr::mutate(month = lubridate::floor_date(landing_date, "month")) %>%
  dplyr::filter(month %in% c(target_month, ref_month)) %>%
  dplyr::group_by(month, landing_site) %>%
  dplyr::mutate(submissions = sum(landing_id_index)) %>%
  dplyr::filter(submissions > 0) %>%
  dplyr::group_by(landing_site, month, landing_date) %>%
  dplyr::summarise(N.submissions = sum(!is.na(landing_id))) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(`Days of activity` = ifelse(N.submissions > 0, 1, 0)) %>%
  dplyr::group_by(month, landing_site) %>%
  dplyr::summarise(
    N.submissions = sum(N.submissions),
    `Days of activity` = sum(`Days of activity`)
  ) %>%
  dplyr::mutate(
    `Daily work hours` = ((N.submissions * 6) / 60) / 220,
    `Daily work hours` = round(`Daily work hours`, 3)
  ) %>%
  dplyr::arrange(dplyr::desc(N.submissions)) %>%
  dplyr::ungroup() %>%
  # dplyr::mutate(id = seq(1, dplyr::n())) %>%
  tidyr::pivot_wider(names_from = month, values_from = 3:5) %>%
  dplyr::ungroup() %>%
  dplyr::rename(
    nsub_before = 3,
    nsub_now = 2,
    days_before = 5,
    days_now = 4,
    wh_before = 7,
    wh_now = 6
  ) %>%
  dplyr::mutate(dplyr::across(is.integer, ~ as.numeric(.))) %>%
  dplyr::mutate(
    diff_nsub = ((nsub_now - nsub_before) / nsub_before) * 100,
    diff_days = ((days_now - days_before) / days_before) * 100,
    diff_wh = ((wh_now - wh_before) / wh_before) * 100,
    dplyr::across(is.numeric, ~ round(., 2))
  ) %>%
  dplyr::select(-dplyr::contains("before")) %>%
  tidyr::pivot_longer(-c(landing_site, dplyr::starts_with("diff")))

df_tab_cols <-
  df_tab %>%
  dplyr::select(-c(name, value)) %>%
  dplyr::rename(
    days_now = diff_days,
    nsub_now = diff_nsub,
    wh_now = diff_wh
  ) %>%
  tidyr::pivot_longer(dplyr::contains("now"), values_to = "perc_val") %>%
  dplyr::distinct() %>%
  dplyr::mutate(perc_val = ifelse(is.infinite(perc_val), NA_real_, perc_val))


dplyr::full_join(df_tab, df_tab_cols) %>%
  dplyr::select(-dplyr::contains("diff")) %>%
  dplyr::mutate(name = dplyr::case_when(
    name == "nsub_now" ~ "N. submissions",
    name == "days_now" ~ "Days of activity",
    name == "wh_now" ~ "Est. Working hours"
  )) %>%
  dplyr::mutate(
    perc_val_d = as.character(perc_val),
    perc_val_d =
      dplyr::case_when(
        as.numeric(perc_val_d) > 100 ~ "> 100",
        is.na(perc_val_d) ~ "-",
        TRUE ~ perc_val_d
      )
  ) %>%
  dplyr::mutate(perc_val = ifelse(perc_val > 100, 100, perc_val)) %>%
  ggplot() +
  geom_tile(
    mapping = aes(name, landing_site, fill = perc_val),
    color = "white", alpha = 0.75, size = 1.5
  ) +
  geom_text(
    mapping = aes(name, landing_site,
      label = paste0(value, " (", perc_val_d, "%", ")")
    ),
    color = "black"
  ) +
  theme_minimal() +
  scale_fill_viridis_c() +
  scale_x_discrete(position = "top") +
  coord_cartesian(expand = F) +
  labs(y = "", x = "", fill = "% variation\n(from previuos month)") +
  theme(
    legend.position = "top",
    axis.text.x = element_text(face = "bold", color = "black", size = 12.5),
    panel.grid = element_blank()
  )


WorldFishCenter/peskas.timor.data.pipeline documentation built on April 14, 2025, 1:47 p.m.