# Packages
library(dplyr)
library(lubridate)
library(purrr)
library(stringr)
library(rvest)
library(geosphere)
source(file = "inst/extdata/analysis/utils.R", encoding = "UTF-8")
# Reading the dataset
marine <- read.table(
file = "data/raw/ships.csv", header = TRUE,
sep = ",", dec = ".", fill = T,
na.strings = c("NA", "NaN", "--", "")
)
# Cleaning steps:
#- Removing SHIP_ID = and SHIPNAME = "[SAT AIS]" (See 01_data_exploration.md)
#- Removing PORT variable (I already have the port variable)
#- Removing SHIPTYPE variable (I already have the port ship_type)
#- Using lower case for variables
#- Filtering only the vessels with valid id (not NA)
#- Transforming the DATETIME column into a datetime object
#- Getting the link address to each flag and vessel images
#- Choosing a pattern for categorical variables
#- Renaming columns and keeping only columns I would like to use
marine <- marine %>%
dplyr::filter(SHIP_ID != 4666609, SHIPNAME != "[SAT-AIS]") %>%
dplyr::select(-PORT, -SHIPTYPE) %>%
dplyr::rename_all(.funs = stringr::str_to_lower) %>%
dplyr::filter(!is.na(ship_id)) %>%
dplyr::mutate(
datetime = lubridate::ymd_hms(datetime),
flag_img = get_flag_link(flag),
vessel_img = get_vessel_link(ship_id),
port = stringr::str_to_title(port),
destination = stringr::str_to_title(destination),
ship_name = stringr::str_to_title(shipname)
) %>%
select(
datetime, lat, lon, speed,
port, destination,
flag, flag_img,
ship_type, ship_name, ship_id, vessel_img,
length, dwt,
is_parked
)
colnames(marine) <- gsub(pattern = "ship", replacement = "vessel", colnames(marine))
# I am assuming that, when the trio (vessel_id, port, destination) changes it is considered a new sail (I am keeping the destination's NA).
marine <- marine %>%
dplyr::group_by(vessel_id, port, destination) %>%
dplyr::arrange(datetime) %>%
dplyr::mutate(
lon_lag1 = dplyr::lag(x = lon, n = 1),
lat_lag1 = dplyr::lag(x = lat, n = 1),
datetime_lag1 = dplyr::lag(x = datetime, n = 1),
time_diff = as.numeric(difftime(time1 = datetime, time2 = datetime_lag1, units = "mins"))
) %>%
dplyr::ungroup()
marine$dist <- geosphere::distHaversine(p1 = cbind(marine$lon, marine$lat), p2 = cbind(marine$lon_lag1, marine$lat_lag1))
# Now I am going to calculate some statistics to present in my final map.
marine_stats <- marine %>%
dplyr::group_by(vessel_id, port, destination) %>%
dplyr::arrange(dplyr::desc(dist), dplyr::desc(datetime)) %>%
dplyr::summarise(
vessel_name = dplyr::first(vessel_name),
vessel_type = dplyr::first(vessel_type),
vessel_img = dplyr::first(vessel_img),
flag = dplyr::first(flag),
flag_img = dplyr::first(flag_img),
start_date = min(datetime),
final_date = max(datetime),
n_msg = dplyr::n(),
n_msg_mov = sum(!is_parked),
speed_mov_sum = sum(!is_parked*speed),
speed_mov = speed_mov_sum/n_msg_mov,
length = dplyr::first(length),
dwt = dplyr::first(dwt),
lat = dplyr::first(lat),
lon = dplyr::first(lon),
lat_lag1 = dplyr::first(lat_lag1),
lon_lag1 = dplyr::first(lon_lag1),
dist = dplyr::first(dist),
time_diff = dplyr::first(time_diff)
) %>%
dplyr::ungroup() %>%
dplyr::group_by(vessel_id) %>%
dplyr::arrange(dplyr::desc(dist), dplyr::desc(start_date)) %>%
dplyr::slice(1) %>%
dplyr::ungroup() %>%
dplyr::rename(vessel_id = vessel_id)
# Saving it
save(marine, file = "data/marine.rda")
save(marine_stats, file = "data/marine_stats.rda")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.