R/get_driving_distances.R

#' @importFrom oddsandsods customQuery
#' @importFrom fields rdist.earth.vec
#' @importFrom magrittr %>% %<>%
#' @import dplyr
#' @import RMySQL
NULL

#' @export
getDrivingDistances <- function(year=2017){
  options(sqldf.driver = "SQLite", gsubfn.engine = "R")

  con <<- dbConnect(drv = MySQL(), username="juicystat", dbname="speedway", host="127.0.0.1")
  trips <- customQuery({"
    SELECT
      e.date,
      competition,
      rider_name,
      e.name event,
      p.name place,
      X(coords) lon,
      Y(coords) lat,
      s.points,
      s.heats
    FROM speedway.events e
    LEFT JOIN speedway.places p on e.place = p.name
    LEFT JOIN speedway.event_squads s on e.id = s.event_id
    WHERE
    date >= '2017-01-01' and
    heats > 0
    "})
  dbDisconnect(con)

  # reshaping
  trips$date %<>% as.Date
  rider_trips <-
    trips %>%
    arrange(date) %>%
    group_by(rider_name) %>%
    mutate(
      from_date = lag(date),
      to_date = date,
      from_place = lag(place),
      to_place = place,
      from_x = lag(lon),
      from_y = lag(lat),
      to_x   = lon,
      to_y   = lat
    ) %>%
    filter( !is.na(from_x) ) %>%
    select(rider_name, starts_with("from_"),starts_with("to_"), heats, points, competition)

  # differences (distances and time)
  rider_trips %<>%
    mutate(
      distance = rdist.earth.vec( cbind(from_x, from_y), cbind(to_x, to_y), miles = F),
      date_diff = as.integer(to_date - from_date)
    )

  # summarization
  competitions <-
    rider_trips %>%
    group_by(rider_name, competition) %>%
    summarize( n = n() ) %>%
    group_by(rider_name) %>%
    arrange(competition) %>%
    summarize(
      competitions = paste( paste0(competition, " (", n,")"), collapse= ", " )
    )

  distances <-
    rider_trips %>%
    group_by(rider_name) %>%
    summarize(
      events = n(),
      heats = sum(heats, na.rm=T),
      points = sum(points, na.rm=T),
      mean_time_break = mean(date_diff, na.rm=T),
      total_distance = sum(distance, na.rm=T)
    ) %>%
    arrange(desc(total_distance))

  summary <- left_join(competitions, distances) %>% filter(events >= 10)
  table <-
    summary %>%
    arrange(desc(total_distance)) %>%
    select( rider_name , heats, mean_time_break , total_distance) %>%
    mutate(total_distance = round(total_distance,0),
           mean_time_break = round(mean_time_break,1)) %>%
    as.data.frame

  return(table)
}
elo2zero/juicystat.vis documentation built on May 31, 2019, 8:06 a.m.