```{R Compare events} require('parkrun')

courses <- c('riverside', 'durham', 'gibside', 'cotsfordfields')

courseResults <- lapply(courses, function (course) { eventHistory <- EventHistory(course) eventResults <- list.files(EventDirectory(course), pattern="\d+", full.names=TRUE) ret <- suppressWarnings(dplyr::bind_rows(lapply(eventResults, read.table))) ret$date <- as.Date(eventHistory[as.character(ret$runSeqNumber), 'date'], origin='1970-01-01') ret }) results <- suppressWarnings(dplyr::bind_rows(courseResults)) results <- results[!is.na(results$athleteNumber), ] results$runSeqNumber <- paste0(results$eventName, results$runSeqNumber) results$eventName <- relevel(as.factor(results$eventName), ref=courses[1])

uniqueAthletes <- na.omit(unique(results$athleteNumber)) athletesAtEvent <- vapply(courseResults, function (x) uniqueAthletes %in% x$athleteNumber, logical(length(uniqueAthletes))) tourists <- uniqueAthletes[rowSums(athletesAtEvent) > 1]

usefulResults <- results[results$athleteNumber %in% tourists, ]

outliers <- as.character(unlist(by(usefulResults, usefulResults$athleteNumber, function(x) { athleteSpeeds <- 5000 / x$timeInSeconds normalRange <- median(athleteSpeeds) + (sd(athleteSpeeds) * c(-4, +4)) rownames(x)[athleteSpeeds < normalRange[1] | athleteSpeeds > normalRange[2]] }))) usefulResults <- usefulResults[!rownames(usefulResults) %in% outliers, ]

athleteResults <- table(usefulResults$athleteNumber) athleteEntries <- rev(table(athleteResults))

tooManyAthletes <- 500 minRuns <- names(athleteEntries[sum(cumsum(athleteEntries) < tooManyAthletes)]) usefulAthletes <- rownames(athleteResults)[athleteResults >= as.integer(minRuns)] usefulResults <- usefulResults[usefulResults$athleteNumber %in% usefulAthletes, ]

eventSeason <- EventSeason(usefulResults$date)

timeModel <- lm(I(5000 / timeInSeconds) ~ (eventName * (sin(eventSeason) + cos(eventSeason))) + (as.factor(athleteNumber) * eventSeason), data=usefulResults)

timeModel <- lm(I(5000 / timeInSeconds) ~ eventName + sin(eventSeason) + cos(eventSeason) + (as.factor(athleteNumber) * eventSeason), data=usefulResults)

BIC prefers single sin/cos for all events

step(timeModel)

timeResid <- resid(timeModel)

#plot(time)

The passage of time makes no significant contribution

oldModel <- lm(I(5000 / usefulResults$timeInSeconds) ~ usefulResults$eventName + as.factor(usefulResults$athleteNumber)) oldCoefs <- summary(model)$coefficients coefs <- summary(timeModel)$coefficients

intercept <- coefs[1, 1] exampleTimes <- c(17, 18.5, 19, 19.5, 20, 21, 22, 25, 28, 30, 35) * 60 exampleSpeeds <- 5000 / exampleTimes names(exampleSpeeds) <- SecondsToMinutes(exampleTimes)

eventCoefs <- rbind(coefs[1L + seq_along(courses[-1]), c(1, 2), drop=FALSE]) ret <- apply(eventCoefs, 1, function(x) { target <- 5000 / (exampleSpeeds + x[1]) max <- 5000 / (exampleSpeeds + (x[1] - x[2])) min <- 5000 / (exampleSpeeds + (x[1] + x[2])) paste0(SecondsToMinutes(target), " ± ", round(colMeans(rbind(target - min, max - target))), 's') })

sinTerm <- coefs['sin(eventSeason)', 'Estimate'] cosTerm <- coefs['sin(eventSeason)', 'Estimate'] seasonVariance <- sqrt((sinTerm * sinTerm) + (cosTerm * cosTerm)) slowPhase <- acos(cosTerm / seasonVariance) fastPhase <- slowPhase + pi yearToRad <-365.25 / (2 * pi)

ret <- cbind(SecondsToMinutes(exampleTimes), ret, paste0(round((5000 / (exampleSpeeds - seasonVariance)) - (5000 / (exampleSpeeds + seasonVariance)), 1), 's')) eventSequence <- sub('.eventName(.)', '\1', rownames(eventCoefs)) colnames(ret) <- c(courses[1], eventSequence, paste0('Diff, ', format(as.Date('1999-01-01') + slowPhase * yearToRad, "%b %d") , ' to ', format(as.Date('1999-01-01') + fastPhase * yearToRad, "%b %d")) )

Return:

knitr::kable(ret) ```



ms609/parkrun documentation built on June 5, 2019, 11:32 a.m.