knitr::opts_chunk$set( warning = FALSE, message = FALSE, fig.width = 7, fig.height = 5, collapse = TRUE, comment = "#>" ) library(giro2019) library(DT)
In this blog post I will be looking at different stats from Giro roadbook(s). If you enjoy it and would like to play with the data more, you can download the whole package including the data and all the reports with devtools::install_github("schubertjan/giro2019")
.
#scrape data from wiki stages <- stage.scrape() #calculate cummulative sum after stage 6 dist.after.6 <- sapply(stages, function(x){ x <- x[!is.na(x[, 4]), ] d <- x[, 4] cumsum(d)[6] }) #plot barplot(height = dist.after.6, names.arg = names(dist.after.6), main = "Distance (km) Covered After Stage 6", ylab = "Distance (km)", cex.names = .7, cex.axis = .7, las=2) abline(h = tail(dist.after.6,1), lty = 2) legend("topright", legend = c("Giro 2019 - 1046 (km)"), lty = 2, cex = .5)
dat <- list(Roglic = tt[tt$rider == "Roglic", ], Dumoulin = tt[tt$rider == "Dumoulin", ], Yates = tt[tt$rider == "Yates S.", ]) #we will remove tour de romandie 2018 ITT as that was uphill and an outlier dat$Roglic <- dat$Roglic[- c(5), ] par(mfrow = c(3,1), mar = c(3,4,2,4)) x <- lapply(1:length(dat), function(i) { d <- dat[[i]] plot(d$len, d$time, xlab = "", ylab = "", main = paste0("ITT 2018/2019 - ", names(dat)[i])) abline(lm(time ~ len -1, data = d, w = 1/d$len)) text(d$len, d$time,labels = d$race, cex = .8) title(ylab="Time (min)", xlab = "Length (km)", line=2) }) #we will remove tour de romandie 2018 ITT as that was uphill and an outlier m <- list(roglic_m = lm(time ~ len - 1, data = dat$Roglic, w = 1/dat$Roglic$len), dumoulin_m = lm(time ~ len - 1, data = dat$Dumoulin, w = 1/dat$Dumoulin$len), yates_m = lm(time ~ len - 1, data = dat$Yates, w = 1/dat$Yates$len)) lapply(m, summary) pred <- sapply(m, predict, data.frame(len = c(5.9))) obs <- c("6:48", "7:06", "6:59") s <- as.numeric(format(strptime(obs, format = "%M:%S"), format = "%S")) * 1/60^2 m <- as.numeric(format(strptime(obs, format = "%M:%S"), format = "%M")) * 1/60 t <- (m+s)*60 (t - pred)*60/100 t/pred
dat <- list(Roglic = tt[tt$rider == "Roglic", ], Dumoulin = tt[tt$rider == "Dumoulin", ], Yates = tt[tt$rider == "Yates S.", ]) #we will remove tour de romandie 2018 ITT as that was uphill and an outlier dat$Roglic <- dat$Roglic[- c(5), ] par(mfrow = c(3,1), mar = c(3,4,2,4)) x <- lapply(1:length(dat), function(i) { d <- dat[[i]] plot(d$len, d$time, xlab = "", ylab = "", main = paste0("ITT 2018/2019 - ", names(dat)[i])) abline(lm(time ~ len -1, data = d, w = 1/d$len)) text(d$len, d$time,labels = d$race, cex = .8) title(ylab="Time (min)", xlab = "Length (km)", line=2) }) #we will remove tour de romandie 2018 ITT as that was uphill and an outlier m <- list(roglic_m = lm(time ~ len - 1, data = dat$Roglic, w = 1/dat$Roglic$len), dumoulin_m = lm(time ~ len - 1, data = dat$Dumoulin, w = 1/dat$Dumoulin$len), yates_m = lm(time ~ len - 1, data = dat$Yates, w = 1/dat$Yates$len)) lapply(m, summary) par(mfrow = c(1,1), mar = c(5,4,5,4)) plot(c(7,35), c(8,45), type = "n", xlab = "Length (km)", ylab = "Time (min)", main = "Expected Time in ITT") abline(m$roglic_m, col = "gold", lwd = 2) abline(m$dumoulin_m, col = "firebrick3", lwd = 2) abline(m$yates_m, col = "black", lwd = 2) abline(v = c(8,34.8,17), lty = 2) text(10,45,labels = "ITT1 - 8km", cex = .8) text(32.5,45,labels = "ITT2 - 34.8km", cex = .8) text(19.5,45,labels = "ITT3 - 17km", cex = .8) legend("bottomright", legend = c("Roglic", "Dumoulin", "Yates"), col = c("gold", "firebrick3", "black"), lwd = c(2,2,2), cex = .8) out <- sapply(m, predict, data.frame(len = c(8,34.8,17))) m <- round( floor((out %% 1)*100) * 0.6, 0) m <- ifelse(nchar(m) == 1, paste0("0",m),m) out_print <- matrix(paste0(floor(out), ":", m), ncol = 3, nrow = 3) #out_print <- as.data.frame(out_print) colnames(out_print) <- c("Roglic", "Dumoulin", "Yates") row.names(out_print) <- c("ITT1", "ITT2", "ITT3") knitr::kable(out_print) out_diff <- apply(out, 2, function(x) out[, 3]-x) out_diff <- as.data.frame(out_diff[, -3]) out_diff[4, ] <- colSums(out_diff) out_diff <- as.matrix(out_diff) m <- round(floor((out_diff %% 1)*100) * 0.6, 0) m <- ifelse(nchar(m) == 1, paste0("0",m),m) out_d_print <- matrix(paste0(floor(out_diff), ":", m), ncol = 2, nrow = 4) colnames(out_d_print) <- c("Roglic", "Dumoulin") row.names(out_d_print) <- c("ITT1", "ITT2", "ITT3", "Total") knitr::kable(out_d_print)
Today's data comes from Giro road book and has been downloaded using the tabulizer package: tabulizer::extract_tables("http://www.giroditalia.it/wp-content/uploads/2019/04/Garibaldi_Giro_italia_2019_low_res.pdf", pages = c(17, 18))
tabulizer::extract_tables("https://static2.giroditalia.it/wp-content/uploads/2017/04/Garibaldi_2017.pdf", pages = c(10))
tabulizer::extract_tables("https://legaciclismo.files.wordpress.com/2018/05/garibaldi-2018.pdf", pages = c(19, 20))
par(mfcol = c(3,2), mar = c(2,4,2,4)) hist(climbs_2017$p.media, breaks = 10, xlim = c(2, 13), cex.main = 1, cex.lab = .8, cex.axis = .8, xlab = "Average Gradient", main = "Distribution of average gradient\n categorized climbs") hist(climbs_2018$p.media, breaks = 10, xlim = c(2, 13), cex.main = 1, cex.lab = .8, cex.axis = .8, main = "") hist(climbs$p.med, breaks = 10, xlim = c(2, 13), cex.main = 1, cex.lab = .8, cex.axis = .8, main = "") hist(climbs_2017$p.max, breaks = 10, xlim = c(5, 22), cex.main = 1, cex.lab = .8, cex.axis = .8, xlab = "Max Gradient", main = "Distribution of max gradient\n categorized climbs") hist(climbs_2018$p.max, breaks = 10, xlim = c(5, 22), cex.main = 1, cex.lab = .8, cex.axis = .8, main = "") hist(climbs$p.max, breaks = 10, xlim = c(5, 22), cex.main = 1, cex.lab = .8, cex.axis = .8, main = "")
l <- list(giro_2017 = climbs_2018[, c("length.km", "p.media","p.max")], giro_2018 = climbs_2017[, c("length.km", "p.media","p.max")], giro_2019 = climbs[, c("lungh.(km)", "p.med","p.max")] ) lapply(l, function(x) summary(x)[4, ])
Today's data comes from Giro road book and has been downloaded using the tabulizer package: tabulizer::extract_tables("http://www.giroditalia.it/wp-content/uploads/2019/04/Garibaldi_Giro_italia_2019_low_res.pdf", pages = c(17, 18))
tabulizer::extract_tables("https://static2.giroditalia.it/wp-content/uploads/2017/04/Garibaldi_2017.pdf", pages = c(10))
tabulizer::extract_tables("https://legaciclismo.files.wordpress.com/2018/05/garibaldi-2018.pdf", pages = c(19, 20))
Riders did an extra 3000m of climbing in 2017 compared with 2019. 2019 sees 1800m more categorized climbing than last year, or one big category 1 climb. All is building up for week 2 and 3. Unusually late GC action for Giro. On paper, stage 20 looks like the hardest in the last 3 years. Despite little GC action in week 1, riders will end week 2 with about the same amount of gained elevation as in previous 2 years. Week 3 is tougher than last year. Will be extra important to time the form right!
#fill the data with all stages fill <- data.frame(tappa = 1:21) climbs_2019 <- merge(fill, climbs, by = "tappa", all.x = TRUE) fill <- data.frame(stage = 1:21) climbs_2018 <- merge(fill, climbs_2018, by = "stage", all.x = TRUE) climbs_2017 <- merge(fill, climbs_2017, by = "stage", all.x = TRUE) #replace NA climbs_2019$`disl.(m)` <- ifelse(is.na(climbs_2019$`disl.(m)`), 0, climbs_2019$`disl.(m)`) climbs_2018$disl.m <- ifelse(is.na(climbs_2018$disl.m), 0, climbs_2018$disl.m) climbs_2017$disl.m <- ifelse(is.na(climbs_2017$disl.m), 0, climbs_2017$disl.m) #create colours col_2017 <- rgb(200, 79, 178, maxColorValue = 255) col_2018 <- rgb(105, 147, 45, maxColorValue = 255) col_2019 <- rgb(85, 130, 169, maxColorValue = 255) comp <- data.frame(Year = factor(c("2017", "2018", "2019")), Total.elev = c(sum(climbs_2017$disl.m, na.rm = TRUE), sum(climbs_2018$disl.m, na.rm = TRUE), sum(climbs_2019$`disl.(m)`, na.rm = TRUE))) with(comp,barplot(Total.elev, names.arg = Year, xlab = "Stage", ylab = "Total Elevation (m)", main = "Total Elevation Gained in Categorized Climbs", cex.axis = .8, cex.names = .8, col = c(col_2017, col_2018, col_2019))) DT::datatable(comp, rownames = FALSE) #summary by stage s_2019 <- with(climbs_2019, tapply(`disl.(m)`, tappa, sum)) s_2018 <- with(climbs_2018, tapply(disl.m, stage, sum)) s_2017 <- with(climbs_2017, tapply(disl.m, stage, sum)) par(mfrow = c(3,1), mar = c(2,4,2,4)) plot(names(s_2017), s_2017, type = "b", lwd = 3, col = col_2017, ylim = c(0, max(c(s_2017, s_2018, s_2019))), main = "Total Elevation Gained in Categorized Climbs By Stages", ylab = "Total Elevation (m)", xlab = "Stage", cex.axis = .85, cex.lab = .85, cex.main = 1.2, xaxt="n", pch = 16) axis(1, labels = names(s_2017), at = names(s_2017), cex.axis = .85) legend("topleft", legend = c("2017", "2018", "2019", "Rest days"), col = c(col_2017, col_2018, col_2019, "black"), lwd = c(3,3,3,1), lty = c(1,1,1,2), cex = .8) abline(v = 3.5, lty = 2) abline(v = 9.5, lty = 2) abline(v = 15.5, lty = 2) plot(names(s_2017), s_2018, type = "b", lwd = 3, col = col_2018, ylim = c(0, max(c(s_2017, s_2018, s_2019))), ylab = "Total Elevation (m)", cex.axis = .85, cex.lab = .85, xaxt="n", pch = 16) axis(1, labels = names(s_2017), at = names(s_2017), cex.axis = .85) abline(v = 3.5, lty = 2) abline(v = 9.5, lty = 2) abline(v = 15.5, lty = 2) plot(names(s_2017), s_2019, type = "b", lwd = 3, col = col_2019, ylim = c(0, max(c(s_2017, s_2018, s_2019))), ylab = "Total Elevation (m)", cex.axis = .85, cex.lab = .85, xaxt="n", pch = 16) axis(1, labels = names(s_2017), at = names(s_2017), cex.axis = .85) abline(v = 9.5, lty = 2) abline(v = 15.5, lty = 2) #summary by stage cs_2017 <- cumsum(s_2017) cs_2018 <- cumsum(s_2018) cs_2019 <- cumsum(s_2019) #replace NA par(mfrow = c(1,1), mar = c(5,4,4,2)) plot(names(s_2017), cs_2017, type = "b", lwd = 3, col = col_2017, ylim = c(0, max(c(cs_2017, cs_2018, cs_2019))), main = "Cumulative Total Elevation Gained in Categorized Climbs By Stages", ylab = "Cumulative Total Elevation (m)", xlab = "Stage", cex.main = .8, cex.axis = .8, cex.lab = .8, xaxt="n", pch = 16) legend("topleft", legend = c("2017", "2018", "2019"), col = c(col_2017, col_2018, col_2019), lwd = c(3,3,3), cex = .8) lines(cs_2018, lwd = 3, pch = 16,col = col_2018, type = "b") lines(cs_2019, lwd = 3, pch = 16, col = col_2019, type = "b") axis(1, labels = names(s_2017), at = names(s_2017), cex.axis = .8) legend("topleft", legend = c("2017", "2018", "2019"), col = c(col_2017, col_2018, col_2019), lwd = c(3,3,3), lty = c(1,1,1), cex = .8) DT::datatable(data.frame(giro2017 = s_2017, giro2018 = s_2018, giro2019 = s_2019))
The data comes from 2019 Giro road book and has been downloaded using the tabulizer package tabulizer::extract_tables("http://www.giroditalia.it/wp-content/uploads/2019/04/Garibaldi_Giro_italia_2019_low_res.pdf", pages = c(17, 18))
.
Below is a detailed overview of all classified climbs by stages. The x axis shows the length of a climb (km), the y axis elevation difference (m). All charts use the same axis to allow relative comparison. The colour represents climb's category.
Most of climbing action is packed between 1 and 2 rest day. Stage 20 is packed with climbing.
#pre-process data climbs_l <- split(climbs,climbs$tappa) n <- lapply(climbs_l,function(x) nrow(x) + 1) #creare NA for stages with < 5 climbs for(i in 1:length(climbs_l)){ if(n[i] != 6){ for(j in n[[i]]:5){ climbs_l[[i]][j, ] <- NA } } } climbs_full <- do.call(rbind, climbs_l) par(mfrow = c(19,5), mar = c(1,1,2,1)) x <- lapply(1:nrow(climbs_full), function(i) { #create data x <- c(0, climbs_full$`lungh.(km)`[i]) y <- c(0, climbs_full$`disl.(m)`[i]) #if no data plot empty else plot climb if(is.na(x[2])) { plot(0,0,type = "n", axes = FALSE,ann = FALSE) } else { dat <- giro2019::createhilldata(x = climbs_full$`lungh.(km)`[i], y = climbs_full$`disl.(m)`[i], cat = climbs_full$cat[i]) giro2019::hillplot(x = dat$x, y = dat$y, col_plot = dat$plot_col, x_lim = max(climbs_full$`lungh.(km)`, na.rm = TRUE), y_lim = max(climbs_full$`disl.(m)`, na.rm = TRUE), stage = climbs_full$tappa[i], climb_name = climbs_full$gpm[i], showaxis = FALSE) } }) DT::datatable(climbs[, c(1,4,5,11,12,13)], rownames = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.