# Functions for processing HTML files raw.
process.pl.old <- function (pl.html) {
date <- unlist(strsplit (gsub(" *([A-Za-z]+), ([A-Za-z]+) ([0-9]+), ([0-9]+).*",
"\\1;\\2;\\3;\\4",
pl.html[grep("[A-Za-z]+, [A-Za-z]+ [0-9]+, [0-9]+", pl.html)]),
";"))
nums <- range(grep("[Pp][Rr][Ee]>", pl.html))
if (nums[1] == nums[2]) nums[2] <- length(pl.html)
extract <- pl.html[(nums[1]+3):(nums[2]-1)]
extract <- gsub("<i>(.*)</i>", "\\1", extract)
extract <- gsub("<I>(.*)</I>", "\\1", extract)
extract <- gsub("<strong>", "", extract)
extract <- gsub("</strong>", "", extract)
extract <- gsub("<STRONG>", "", extract)
extract <- gsub("</STRONG>", "", extract)
extract <- gsub("Penalty Shot,", "Penalty Shot", extract)
extract <- gsub("\\t ", "", extract)
extract <- gsub("\\t", "", extract)
goal.lines <- which(substr(extract,5,5) == " ")
#event.rec <- extract[gsub(" +([0-9]+).*", "\\1", extract) != extract]
if (length(goal.lines)>0) event.rec <- extract[-goal.lines] else event.rec <- extract
event.rec <- event.rec[substr(event.rec,5,5) %in% as.character(0:9) &
substr(event.rec,1,1) == " "]
goal.players <- extract[goal.lines]
goal.players <- goal.players[grep("^ *[A-Z\\.]{3}", goal.players)]
goal.players <- t(matrix(goal.players[!grepl(" +SO +", goal.players)], nrow=2))
new.event <- data.frame(event=0, period=0, seconds=0, etype="", etext="",
a1="", a2="", a3="", a4="", a5="", a6="",
h1="", h2="", h3="", h4="", h5="", h6="",
ev.team="",
ev.player.1="", ev.player.2="", ev.player.3="",
distance=NA, type="", homezone="",
stringsAsFactors=FALSE)
playbyplay <- NULL; for (kk in 1:length(event.rec)) playbyplay <- rbind(playbyplay, new.event)
for (kk in c(4:20,22:24)) playbyplay[,kk] <- as.character(playbyplay[,kk])
playbyplay$event <- as.numeric(substr(event.rec,1,5))
playbyplay$period <- as.numeric(substr(event.rec,6,9))
playbyplay$seconds <- 1200*(playbyplay$period-1)+60*as.numeric(substr(event.rec,12,13))+as.numeric(substr(event.rec,15,16))
playbyplay$etype <- substr(event.rec,18,34)
playbyplay$ev.team <- substr(event.rec,35,37)
playbyplay$etext <- substr(event.rec,43,nchar(event.rec))
#BLOCK, GOAL, MISS, SHOT FAC, GIVE, TAKE PENL HIT
#STOP PEND
event.types <- matrix(c("BLOCK", "BLOCKED SHOT ", "GOAL", "GOAL ",
"MISS", "MISSED SHOT ", "SHOT", "SHOT ",
"FAC", "FACE-OFF ", "GIVE", "GIVEAWAY ",
"TAKE", "TAKEAWAY ", "PENL", "PENALTY ",
"HIT", "HIT ", "STOP", "STOPPAGE ",
"", "GOALIE ", "HIT", "HIT (!) ",
"HIT", "HIT (*) ", "SHOT", "SHOT (!) ",
"SHOT", "SHOT (*) "), nrow=2)
playbyplay$etype <- event.types[1, match(playbyplay$etype, event.types[2,])]
playbyplay$etype[is.na(playbyplay$etype)] <- ""
playbyplay <- playbyplay[!(playbyplay$etype %in% c("STOP","", "PEND")),]
#playbyplay[playbyplay$etype=="GOAL",-(6:17)]
# FAC.
#c1 <- which(playbyplay$etype=="FAC")
#teamwon;homezone;ateam;anum;aname;hteam;hnum;hname
p1 <- sapply(gsub(" *([A-Z'\\.\\-]+) won - ([a-z]+) zone\\. ([A-Z'\\.\\-]+) +([0-9]+) ([A-Z' \\.\\-]+) vs ([A-Z'\\.\\-]+) +([0-9]+) ([A-Z' \\.\\-]+)",
"\\1;\\2;\\3;\\4;\\5;\\6;\\7;\\8",
playbyplay$etext[playbyplay$etype=="FAC"]),
function(tt) unlist(strsplit (tt, ";")))
teams <- p1[c(3,6), 1]
playbyplay$ev.team[playbyplay$etype=="FAC"] <- p1[1,]
playbyplay$homezone[playbyplay$etype=="FAC"] <- p1[2,]
playbyplay$ev.player.1[playbyplay$etype=="FAC"][p1[1,]==p1[3,]] <- paste(p1[4,], p1[5,])[p1[1,]==p1[3,]]
playbyplay$ev.player.1[playbyplay$etype=="FAC"][p1[1,]!=p1[3,]] <- paste(p1[7,], p1[8,])[p1[1,]!=p1[3,]]
playbyplay$ev.player.2[playbyplay$etype=="FAC"][p1[1,]==p1[3,]] <- paste(p1[7,], p1[8,])[p1[1,]==p1[3,]]
playbyplay$ev.player.2[playbyplay$etype=="FAC"][p1[1,]!=p1[3,]] <- paste(p1[4,], p1[5,])[p1[1,]!=p1[3,]]
#BLOCK
#num;name
if (sum(playbyplay$etype=="BLOCK") > 0) {
p1 <- sapply(gsub(" *([0-9]+) +([A-Z' \\.\\-]+)", "\\1;\\2",
playbyplay$etext[playbyplay$etype=="BLOCK"]),
function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.1[playbyplay$etype=="BLOCK"] <- paste(p1[1,], p1[2,])
}
#MISS
#num;name
if (sum(playbyplay$etype=="MISS") > 0) {
p1 <- sapply(gsub(" *([0-9]+) +([A-Z' \\.\\-]+),.*", "\\1;\\2",
playbyplay$etext[playbyplay$etype=="MISS"]),
function(tt) {
text1 <- unlist(strsplit (tt, ";"))
if (length(text1) == 1) text1 <- unlist(strsplit(gsub(" *([0-9]+) +([A-Z' \\.\\-]+) .*", "\\1;\\2", text1), ";"))
text1
})
playbyplay$ev.player.1[playbyplay$etype=="MISS"] <- paste(p1[1,], p1[2,])
}
#HIT, TAKE, GIVE
#num;name
if (sum(playbyplay$etype %in% c("HIT", "GIVE", "TAKE")) > 0) {
p1 <- sapply(gsub(" *([0-9]+) +([A-Z' \\.\\-]+)", "\\1;\\2",
playbyplay$etext[playbyplay$etype %in% c("HIT", "TAKE", "GIVE")]),
function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.1[playbyplay$etype %in% c("HIT", "TAKE", "GIVE")] <- paste(p1[1,], p1[2,])
}
#PENL
#num;name;type
if (sum(playbyplay$etype=="PENL") > 0) {
p1 <- sapply(gsub(" *([0-9]+) +([A-Z' \\.\\-]+), (.*)", "\\1;\\2;\\3",
playbyplay$etext[playbyplay$etype=="PENL"]),
function(tt) {
text1 <- unlist(strsplit (tt, ";"))
if (length(text1) != 3) text1 <- c("","","Bench Minor")
text1
})
playbyplay$ev.player.1[playbyplay$etype=="PENL"] <- paste(p1[1,], p1[2,])
playbyplay$type[playbyplay$etype=="PENL"] <- p1[3,]
}
#SHOT
#name happens one time in mixed case: 38 Robinson, 0304-2-0333
if (sum(playbyplay$etype=="SHOT") > 0) {
p1 <- sapply(gsub(" *([0-9]+) +([A-Za-z' \\.\\-]+), +([A-Za-z '\\-]+), +([0-9]+) .*", "\\1;\\2;\\3;\\4",
playbyplay$etext[playbyplay$etype=="SHOT"]),
function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.1[playbyplay$etype=="SHOT"] <- paste(p1[1,], toupper(p1[2,]))
playbyplay$type[playbyplay$etype=="SHOT"] <- p1[3,]
playbyplay$distance[playbyplay$etype=="SHOT"] <- as.numeric(p1[4,])
}
#GOAL
#2 assists
c2 <- which(playbyplay$etype=="GOAL" & grepl("A: +[0-9]+ +[A-Z' \\.\\-]+, +[0-9]+", playbyplay$etext))
if (length(c2)>0) {
p1 <- sapply(gsub(" *([0-9]+) +([A-Z' \\.\\-]+),? +A: +([0-9]+) +([A-Z' \\.\\-]+), +([0-9]+) +([A-Z' \\.\\-]+),? *([A-Za-z '\\-]+), +([0-9]+) .*",
"\\1;\\2;\\3;\\4;\\5;\\6;\\7;\\8",
playbyplay$etext[c2]),
function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.1[c2] <- paste(p1[1,], p1[2,])
playbyplay$ev.player.2[c2] <- paste(p1[3,], p1[4,])
playbyplay$ev.player.3[c2] <- paste(p1[5,], p1[6,])
playbyplay$type[c2] <- p1[7,]
playbyplay$distance[c2] <- as.numeric(p1[8,])
}
#1 assist
#name happens one time in mixed case: 22 Pock- 0304-2-1136
c2 <- which(playbyplay$etype=="GOAL" & grepl("A: +[0-9]+ +[A-Z' \\.\\-]+, +[A-Za-z]+", playbyplay$etext))
if (length(c2)>0) {
p1 <- sapply(gsub(" *([0-9]+) ([A-Za-z' \\.\\-]+), A: +([0-9]+) ([A-Z' \\.\\-]+), ([A-Za-z '\\-]+), +([0-9]+) .*",
"\\1;\\2;\\3;\\4;\\5;\\6",
playbyplay$etext[c2]),
function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.1[c2] <- paste(p1[1,], toupper(p1[2,]))
playbyplay$ev.player.2[c2] <- paste(p1[3,], toupper(p1[4,]))
playbyplay$type[c2] <- p1[5,]
playbyplay$distance[c2] <- as.numeric(p1[6,])
}
#unassisted
c2 <- which(playbyplay$etype=="GOAL" & grepl("A: +[A-Za-z'\\-]+, +[A-Za-z]+", playbyplay$etext))
if (length(c2)>0) {
p1 <- sapply(gsub(" *([0-9]+) ([A-Z' \\.\\-]+), A: [A-Za-z'\\-]+, ([A-Za-z '\\-]+), +([0-9]+).*",
"\\1;\\2;\\3;\\4",
playbyplay$etext[c2]),
function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.1[c2] <- paste(p1[1,], p1[2,])
playbyplay$type[c2] <- p1[3,]
playbyplay$distance[c2] <- as.numeric(p1[4,])
}
# players on ice for goals.
#goal.rec <- array(0, c(dim(goal.players), 6))
goal.evs <- which(playbyplay$etype=="GOAL")
if (length(goal.evs) > 0)
for (ii in 1:dim(goal.players)[1]) {
for (jj in 1:2) {
#home team?
home <- 1*(gsub(" +([A-Z\\.]+).*", "\\1", goal.players[ii,jj])==teams[2])
sq <- unlist(strsplit(goal.players[ii,jj], " "))
#manual fix for space names like DE VRIES.
for (xx in length(sq):2)
if (grepl("[A-Z]+", sq[xx]) & grepl("[A-Z]+", sq[xx-1])) {
sq[xx-1] <- paste(sq[xx-1], sq[xx])
sq <- sq[-xx]
}
nums <- grep("[0-9]+", sq)
playbyplay[goal.evs[ii],5+6*home+1:length(nums)] <- paste(sq[nums], sq[nums+1])
}
}
playbyplay$homezone[playbyplay$homezone == "defensive"] <- "Def"
playbyplay$homezone[playbyplay$homezone == "offensive"] <- "Off"
playbyplay$homezone[playbyplay$homezone == "neutral"] <- "Neu"
#remove text.
playbyplay <- playbyplay[,-5]
#flip zone to home zone referent.
flip.1 <- playbyplay$ev.team == teams[1] & playbyplay$homezone=="Off"
flip.2 <- playbyplay$ev.team == teams[1] & playbyplay$homezone=="Def"
playbyplay$homezone[flip.1] <- "Def"
playbyplay$homezone[flip.2] <- "Off"
#Add this.
playbyplay$homezone[playbyplay$etype %in% c("SHOT", "BLOCK", "GOAL", "MISS") & playbyplay$ev.team == teams[1]] <- "Def"
playbyplay$homezone[playbyplay$etype %in% c("SHOT", "BLOCK", "GOAL", "MISS") & playbyplay$ev.team == teams[2]] <- "Off"
output <- list(playbyplay=playbyplay,
teams=teams,
date=date)
return(output)
}
p.split <- function(bit, pieces) {
out1 <- unlist(strsplit (bit, ";"))
if (length(out1) == 1) out1 <- rep("", pieces)
out1
}
process.pl.new <- function (pl.html) {
#pl.html = game.rec$pl
teams <- pl.html[grep(".*On Ice.*", pl.html)][1:2]
teams <- gsub("<td width=\"10%\" class=\"heading \\+ bborder\" align=\"center\">([A-Z\\.]+) On Ice</td>.*", "\\1", teams)
date <- pl.html[grep("<td align=\"center\" style=\"font-size: 10px;font-weight:bold\">([A-Za-z]+), ([A-Za-z]+) ([0-9]+), ([0-9]+)</td>", pl.html)][1]
date <- gsub("<td align=\"center\" style=\"font-size: 10px;font-weight:bold\">([A-Za-z]+), ([A-Za-z]+) ([0-9]+), ([0-9]+)</td>", "\\1;\\2;\\3;\\4", date)
date <- unlist(strsplit(date, ";"))
pl.main <- {
find.1 <- grep("class=\"[a-z \\+]* bborder\"", pl.html)
find.2 <- grep("<font style=\"cursor:hand;\"", pl.html)
pl.reduced <- pl.html[sort(c(find.1, find.2))]
pl.reduced <- gsub(" ", ", ", pl.reduced)
pl.reduced <- gsub("; ", ", ", pl.reduced)
# p: player id.
pl.reduced.1 <- gsub("<font style=\"cursor:hand;\" title=\"([A-Za-z ]+) - ([A-Za-z' \\.-]+)\">([0-9]+).*", "p;\\1;\\2;\\3", pl.reduced)
# s: time
pl.reduced.1 <- gsub("<td class=\"[a-z \\+]* \\+ bborder\" align=\"center\">([0-9]+):([0-9]+)<br.*", "s;\\1;\\2;", pl.reduced.1)
# q: period number.
pl.reduced.1 <- gsub("<td class=\"[a-z]* \\+ bborder\" align=\"center\">([0-9]+)</td>.*", "q;\\1;;", pl.reduced.1)
# t: event type;
pl.reduced.2 <- gsub("<td class=\"[a-z]* \\+ bborder\">([A-Z]{3,})</td>", "t;\\1;;", pl.reduced.1)
pl.reduced.2 <- gsub("<td class=\"[a-z]* \\+ bborder\" align=\"center\">([A-Z]{3,})</td>", "t;\\1;;", pl.reduced.2)
# x: event text;
#pl.reduced.3 <- gsub("<td class=\"[a-z]* \\+ bborder\">([A-Za-z0-9',# \\(\\)\\.-]{3,})</td>", "x;\\1;;", pl.reduced.2)
#pl.reduced.3 <- gsub("<td class=\"[a-z]* \\+ bborder\" align=\"center\">([A-Za-z0-9',# \\(\\)\\.-]{3,})</td>", "x;\\1;;", pl.reduced.3)
pl.reduced.3 <- gsub("<td class=\"[a-z]* \\+ bborder\">(.{3,})</td>", "x;\\1;;", pl.reduced.2)
pl.reduced.3 <- gsub("<td class=\"[a-z]* \\+ bborder\" align=\"center\">(.{3,})</td>", "x;\\1;;", pl.reduced.3)
# e: event number;
pl.reduced.4 <- gsub("<td align=\"center\" class=\"[a-z \\+]* \\+ bborder\">([0-9]+)</td>", "e;\\1;;", pl.reduced.3)
# b: break;
pl.reduced.5 <- gsub("<td class=\"[a-z \\+]* \\+ bborder\">", "b;;;", pl.reduced.4)
pl.reduced.5[substr(pl.reduced.5, 1, 1) != "<"]
}
#fix one.
pl.main <- gsub("Penalty Shot,", "Penalty Shot", pl.main)
pl.grid <- sapply(pl.main, function(tt) {
tt <- unlist(strsplit(tt, ";"))
while (length(tt) < 4) tt <- c(tt, "")
tt
})
playbyplay <- NULL
new.event <- data.frame(event=0, period=0, seconds=0, etype="", etext="",
a1=NA, a2=NA, a3=NA, a4=NA, a5=NA, a6=NA,
h1=NA, h2=NA, h3=NA, h4=NA, h5=NA, h6=NA)
started <- FALSE; current.event <- NULL
a.h <- rep(0,dim(pl.grid)[2])
sw <- 1
for (kk in 1:length(pl.main)) {
if (pl.grid[1,kk] %in% c("e","b")) sw <- 1-sw
a.h[kk] <- sw
}
pl.grid <- rbind(pl.grid, a.h)
players <- unique(t(pl.grid[2:5, pl.grid[1,]=="p"]))
rownames(players) <- NULL
for (row in 1:length(pl.main)) {
if (pl.grid[1, row] == "e") {
if (started) playbyplay <- rbind(playbyplay, current.event) else started <- TRUE
current.event <- new.event
current.event$event <- as.numeric(pl.grid[2,row])
coln <- 6
ttype <- FALSE
}
if (pl.grid[1, row] == "q") current.event$period <- as.numeric(pl.grid[2,row])
if (pl.grid[1, row] == "s") current.event$seconds <- 60*as.numeric(pl.grid[2,row])+as.numeric(pl.grid[3,row])
if (pl.grid[1, row] == "t") current.event$etype <- pl.grid[2,row]
if (pl.grid[1, row] == "x") current.event$etext <- pl.grid[2,row]
if (pl.grid[1, row] == "p") {
# current.event[1,coln] <- match(paste(pl.grid[3:4,row], collapse=" "),
# paste(players[,2],players[,3]))
current.event[1,coln] <- paste(pl.grid[4:3,row], collapse=" ") #pl.grid[4,row] #
if (coln<17) coln <- coln + 1
}
if (pl.grid[1, row] == "b") coln <- 12 #home team now.
}
playbyplay$ev.team <- substr(playbyplay$etext, 1, 3)
playbyplay$ev.player.1 <- ""
playbyplay$ev.player.2 <- ""
playbyplay$ev.player.3 <- ""
playbyplay$distance <- NA
playbyplay$type <- ""
playbyplay$homezone <- ""
#BLOCK, GOAL, MISS, SHOT FAC, GIVE, TAKE PENL HIT
#STOP PEND
playbyplay <- playbyplay[!(playbyplay$etype %in% c("PSTR","GEND","ICING","OFFSIDE")),]
#SHOT
c1 <- which(playbyplay$etype=="SHOT")
if (length(c1)>0) {
p1 <- sapply(gsub("[A-Z \\.#-]*?([0-9]+) ([A-Z' \\.-]+), ([A-Za-z-]+),[A-Za-z,\\. ]*([0-9]+).*", "\\1;\\2;\\3;\\4", playbyplay$etext[c1]),
function(tt) {
out1 <- unlist(strsplit (tt, ";"))
if (length(out1) != 4) { #No shot type listed.
out1 <- gsub(".*([0-9]+) ([A-Z' \\.-]+),[A-Za-z,\\. ]*([0-9]+).*", "\\1;\\2;\\3", out1)
out1 <- p.split(out1, 3)
out1 <- c(out1[1:2], "", out1[3])
}
out1
})
playbyplay$ev.player.1[playbyplay$etype=="SHOT"] <- paste(p1[1,], p1[2,])
playbyplay$distance[playbyplay$etype=="SHOT"] <- as.numeric(p1[4,])
playbyplay$type[playbyplay$etype=="SHOT"] <- p1[3,]
}
#BLOCK
#num;name;num2;name2;type
c1 <- which(playbyplay$etype=="BLOCK")
if (length(c1)>0) {
p1 <- sapply(gsub(".*#([0-9]+) ([A-Z' \\.-]+) BLOCK[A-Z\\. ]+#([0-9]+) ([A-Z' \\.-]+), ([A-Za-z \\.]+),.*", "\\1;\\2;\\3;\\4;\\5",
playbyplay$etext[c1]),
function(tt) {
out1 <- unlist(strsplit (tt, ";"))
if (length(out1)==1) {
out1 <- gsub(".*#([0-9]+) ([A-Z' \\.-]+) BLOCK[A-Z\\. ]+#([0-9]+) ([A-Z' \\.-]+),.*",
"\\1;\\2;\\3;\\4",
out1)
out1 <- p.split(out1, 4)
out1 <- c(out1, "")
}
out1
})
playbyplay$ev.player.1[playbyplay$etype=="BLOCK"] <- paste(p1[1,], p1[2,])
playbyplay$ev.player.2[playbyplay$etype=="BLOCK"] <- paste(p1[3,], p1[4,])
playbyplay$type[playbyplay$etype=="BLOCK"] <- p1[5,]
}
#MISS
#num;name;type;homezone;distance
#Wide of net; crossbar; post -- ignored for now.
c1 <- which(playbyplay$etype=="MISS")
if (length(c1)>0) {
p1 <- sapply(gsub(".* #([0-9]+) ([A-Z' \\.-]+), ([A-Za-z -]+),[A-Za-z,\\. ]*[rt], ([A-Za-z]+)\\..* ([0-9]+) .*", "\\1;\\2;\\3;\\4;\\5", playbyplay$etext[c1]),
function(tt) {
out1 <- unlist(strsplit (tt, ";"))
if (length(out1)==1) { #assume missing net location.
out1 <- gsub(".* #([0-9]+) ([A-Z' \\.-]+), ([A-Za-z -]+), ([A-Za-z]+)\\..* ([0-9]+) .*",
"\\1;\\2;\\3;\\4;\\5",
out1)
out1 <- p.split(out1, 5)
}
out1
})
playbyplay$ev.player.1[playbyplay$etype=="MISS"] <- paste(p1[1,], p1[2,])
playbyplay$distance[playbyplay$etype=="MISS"] <- as.numeric(p1[5,])
playbyplay$type[playbyplay$etype=="MISS"] <- p1[3,]
}
#GOAL
#num;name;type;distance
#c2 <- which(playbyplay$etype=="GOAL" & grepl("Assists:", playbyplay$etext))
c1 <- which(playbyplay$etype=="GOAL")
if (length(c1)>0) {
p1 <- sapply(gsub(".* #([0-9]+) ([A-Z' \\.-]+)[\\(\\)0-9, ]+([A-Za-z -]+), [A-Za-z]+\\..* ([0-9]+) .*",
"\\1;\\2;\\3;\\4", playbyplay$etext[c1]),
function(tt) {
out1 <- unlist(strsplit (tt, ";"))
if (length(out1)==1) { #assume missing shot type.
out2 <- gsub(".* #([0-9]+) ([A-Z' \\.-]+)[\\(\\)0-9, ]+ [A-Za-z]+\\..* ([0-9]+) .*",
"\\1;\\2;;\\3",
out1)
if (out2 == out1) #missing zone.
out2 <- gsub(".* #([0-9]+) ([A-Z' \\.-]+)[\\(\\)0-9, ]+([A-Za-z -]+), ([0-9]+) .*",
"\\1;\\2;\\3;\\4",
out1)
out1 <- p.split(out2, 4)
}
out1
})
playbyplay$ev.player.1[playbyplay$etype=="GOAL"] <- paste(p1[1,], p1[2,])
playbyplay$distance[playbyplay$etype=="GOAL"] <- as.numeric(p1[4,])
playbyplay$type[playbyplay$etype=="GOAL"] <- p1[3,]
}
c2 <- which(playbyplay$etype=="GOAL" & grepl("Assists:", playbyplay$etext))
if (length(c2) > 0) {
p1 <- sapply(gsub(".*Assists: #([0-9]+) ([A-Z' \\.-]+)[\\(\\)0-9]+, #([0-9]+) ([A-Z' \\.-]+)[\\(\\)0-9, ]+.*", "\\1;\\2;\\3;\\4", playbyplay$etext[c2]),
p.split, 4)
# function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.2[c2] <- paste(p1[1,], p1[2,])
playbyplay$ev.player.3[c2] <- paste(p1[3,], p1[4,])
}
c1 <- which(playbyplay$etype=="GOAL" & grepl("Assist:", playbyplay$etext))
if (length(c1) > 0) {
p1 <- sapply(gsub(".*Assist: #([0-9]+) ([A-Z' \\.-]+).*", "\\1;\\2;\\", playbyplay$etext[c1]),
p.split, 2)
playbyplay$ev.player.2[c1] <- paste(p1[1,], p1[2,])
}
#FAC
#zone;num1;name1;num2;name2
c1 <- which(playbyplay$etype=="FAC")
if (length(c1)>0) {
p1 <- sapply(gsub(".* won ([A-Za-z]+)\\..*#([0-9]+) ([A-Z' \\.-]+) .*#([0-9]+) ([A-Z' \\.-]+).*", "\\1;\\2;\\3;\\4;\\5",
playbyplay$etext[c1]),
p.split, 5)
playbyplay$ev.player.1[c1] <- paste(p1[2,], p1[3,])
playbyplay$ev.player.2[c1] <- paste(p1[4,], p1[5,])
playbyplay$homezone[c1] <- p1[1,]
}
# GIVE
#num;name;zone
c1 <- which(playbyplay$etype=="GIVE")
if (length(c1)>0) {
p1 <- sapply(gsub(".*#([0-9]+) ([A-Z' \\.-]+), ([A-Za-z]+)\\..*", "\\1;\\2;\\3",
playbyplay$etext[playbyplay$etype=="GIVE"]),
p.split, 3)
# function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.1[playbyplay$etype=="GIVE"] <- paste(p1[1,], p1[2,])
playbyplay$homezone[playbyplay$etype=="GIVE"] <- p1[3,]
}
# TAKE
#num;name;zone
c1 <- which(playbyplay$etype=="TAKE")
if (length(c1)>0) {
p1 <- sapply(gsub(".*#([0-9]+) ([A-Z' \\.-]+), ([A-Za-z]+)\\..*", "\\1;\\2;\\3",
playbyplay$etext[playbyplay$etype=="TAKE"]),
p.split, 3)
playbyplay$ev.player.1[playbyplay$etype=="TAKE"] <- paste(p1[1,], p1[2,])
playbyplay$homezone[playbyplay$etype=="TAKE"] <- p1[3,]
}
# PENL
c1 <- which(playbyplay$etype=="PENL" & grepl("Drawn By:", playbyplay$etext))
if (length(c1) > 0) {
p1 <- sapply(gsub(".*#([0-9]+) ([A-Z' \\.-]+), ([A-Za-z\\(\\)0-9 -]+), ([A-Za-z]+)\\..*#([0-9]+) ([A-Z' \\.-]+).*", "\\1;\\2;\\3;\\4;\\5;\\6",
playbyplay$etext[c1]),
p.split, 6)
playbyplay$ev.player.1[c1] <- paste(p1[1,], p1[2,])
playbyplay$ev.player.2[c1] <- paste(p1[5,], p1[6,])
playbyplay$homezone[c1] <- p1[4,]
playbyplay$type[c1] <- p1[3,]
}
c1 <- which(playbyplay$etype=="PENL" & !grepl("Drawn By:", playbyplay$etext))
if (length(c1) > 0) {
p1 <- sapply(gsub(".*#([0-9]+) ([A-Z' \\.-]+), ([A-Za-z\\(\\)0-9 :#-]+), ([A-Za-z]+)\\..*", "\\1;\\2;\\3;\\4",
playbyplay$etext[c1]),
function(tt) {
out1 <- unlist(strsplit (tt, ";"))
if (length(out1) == 1) out1 <- c("","","Bench minor", "")
out1
})
playbyplay$ev.player.1[c1] <- paste(p1[1,], p1[2,])
playbyplay$homezone[c1] <- p1[4,]
playbyplay$type[c1] <- p1[3,]
}
#HIT
c1 <- which(playbyplay$etype=="HIT")
if (length(c1)>0) {
p1 <- sapply(gsub(".*#([0-9]+) ([A-Z' \\.-]+) HIT.*#([0-9]+) ([A-Z' \\.-]+), ([A-Za-z]+).*", "\\1;\\2;\\3;\\4;\\5",
playbyplay$etext[playbyplay$etype=="HIT"]),
p.split, 5)
# function(tt) unlist(strsplit (tt, ";")))
playbyplay$ev.player.1[playbyplay$etype=="HIT"] <- paste(p1[1,], p1[2,])
playbyplay$ev.player.2[playbyplay$etype=="HIT"] <- paste(p1[3,], p1[4,])
playbyplay$homezone[playbyplay$etype=="HIT"] <- p1[5,]
}
########################################################################
# replace number-last with number-first-last.
#on.ice.players <- NULL
#for (kk in 6:17) on.ice.players <- c(on.ice.players, playbyplay[,kk])
#on.ice.players <- unique(on.ice.players)
# seconds in game.
playbyplay$seconds <- playbyplay$seconds + 1200*(playbyplay$period-1)
# add approximate changes.
orig.rec <- dim(playbyplay)[1]
for (kk in 1:(orig.rec-1)) {
past <- na.omit(unlist(playbyplay[kk,6:17]))
future <- na.omit(unlist(playbyplay[kk+1,6:17]))
if (playbyplay[kk,3] < playbyplay[kk+1,3])
if (length(past) != length(future)) {
new.record <- playbyplay[kk,]
new.record[1,4] <- "CHANGE"
new.record[1,5] <- "CHANGE"
new.record$ev.team <- ""
new.record[1,3] <- mean(c(playbyplay[kk,3], playbyplay[kk+1,3]))
playbyplay <- rbind(playbyplay, new.record)
} else if (any(past != future)) {
new.record <- playbyplay[kk,]
new.record[1,4] <- "CHANGE"
new.record[1,5] <- "CHANGE"
new.record$ev.team <- ""
new.record[1,3] <- mean(c(playbyplay[kk,3], playbyplay[kk+1,3]))
playbyplay <- rbind(playbyplay, new.record)
}
}
playbyplay <- playbyplay[order(playbyplay[,3]),]
playbyplay[,1] <- 1:dim(playbyplay)[1]
#remove text.
playbyplay <- playbyplay[,-5]
#flip zone to home zone referent.
flip.1 <- playbyplay$ev.team == teams[1] & playbyplay$homezone=="Off"
flip.2 <- playbyplay$ev.team == teams[1] & playbyplay$homezone=="Def"
playbyplay$homezone[flip.1] <- "Def"
playbyplay$homezone[flip.2] <- "Off"
playbyplay$homezone[playbyplay$etype=="CHANGE"] <- "Neu"
stops <- which(playbyplay$ev.team != "ICI" & playbyplay$homezone == ""); playbyplay$homezone[stops] <- playbyplay$homezone[stops+1]
playbyplay$homezone[playbyplay$etype %in% c("SHOT", "BLOCK", "GOAL", "MISS") & playbyplay$ev.team == teams[1]] <- "Def"
playbyplay$homezone[playbyplay$etype %in% c("SHOT", "BLOCK", "GOAL", "MISS") & playbyplay$ev.team == teams[2]] <- "Off"
#faceoffs follow stoppages, so cut them.
playbyplay <- playbyplay[playbyplay$etype != "STOP",]
output <- list(playbyplay=playbyplay,
players=players,
teams=teams,
date=date)
return(output)
}
process.es.new <- function(es.html) {
rel.rows.1 <- grep("<tr", es.html)
rel.rows.2 <- sort(c(outer(rel.rows.1, 1:3, "+")))
picks <- es.html[rel.rows.2[grep(" *<td [awc ].*", es.html[rel.rows.2])]]
picks.1 <- gsub(" *<td.*> *([0-9]{1,2})</td>", "n;\\1", picks)
picks.2 <- gsub(" *<td.*>([CLRDG])</td>", "p;\\1", picks.1)
picks.3 <- gsub(" *<td.*>([A-Z'\\., -]{6,})</td>", "m;\\1", picks.2)
picks.3 <- picks.3[substr(picks.3, 1, 1) != "<"]
if (length(picks)>2) {
rk <- substr(picks.3,1,1)
nms <- substr(picks.3,3,nchar(picks.3))
lr <- length(rk)
rk1 <- rbind(rk[1:(lr-2)], rk[2:(lr-1)], rk[3:lr])
sts <- which(apply(rk1 == c("n","p","m"), 2, prod)>0)
division <- grep("TEAM TOTALS", picks.3)[1]
} else {nms <- rep("",3); sts <- NULL; division=3}
names.mat <- data.frame(number=nms[sts],
pos=nms[sts+1],
lastfirst=nms[sts+2],
hometeam=1*(sts>division), stringsAsFactors=FALSE)
names.mat <- names.mat[grepl(",", names.mat$lastfirst),]
names.mat$last <- gsub("(.*), .*", "\\1", names.mat$lastfirst)
names.mat$first <- gsub(".*, (.*)", "\\1", names.mat$lastfirst)
names.mat$numlast <- paste(names.mat$number, names.mat$last)
names.mat$numfirstlast <- paste(names.mat$number, names.mat$first, names.mat$last)
#remove duplicates.
lastfirst <- unique(names.mat$lastfirst)
if (length(lastfirst) != dim(names.mat)[1]) for (mm in lastfirst) {
rws <- rev(which(names.mat$lastfirst == mm))
if (length(rws)>1) names.mat <- names.mat[-rws[2:length(rws)],]
}
names.mat <- names.mat[order(names.mat$hometeam, as.numeric(names.mat$number)),]
return(names.mat)
}
integrate.new.pieces <- function (es.html, pl.html) {
#es.html=game.rec$es; pl.html=game.rec$pl
es.table <- try(process.es.new(es.html), TRUE)
if (class(es.table)=="try-error") es.table <- NULL
pl.table <- try(process.pl.new(pl.html), TRUE)
if (class(pl.table)=="try-error") pl.table <- NULL
# replace with full names.
for (ii in which(colnames(pl.table$playbyplay) %in% c("ev.player.1", "ev.player.2", "ev.player.3")))
pl.table$playbyplay[,ii] <- es.table$numfirstlast[match(pl.table$playbyplay[,ii],
es.table$numlast)]
pl.table$players <- es.table
return(pl.table)
}
integrate.old.pieces <- function (home.image, away.image,
es.html, pl.html, game) {
#home.image=game.rec$imh; away.image=game.rec$imv; es.html=game.rec$es; pl.html=game.rec$pl
es.table <- try(process.es.new(es.html), TRUE)
if (class(es.table)=="try-error") es.table <- NULL
pl.table <- try(process.pl.old(pl.html), TRUE)
if (class(pl.table)=="try-error") pl.table <- NULL
if (length(pl.table) > 0) {
if (substr(game,1,1)=="2") pl.table$playbyplay <- pl.table$playbyplay[pl.table$playbyplay$seconds <= 3900,]
# replace with full names.
for (ii in which(colnames(pl.table$playbyplay) %in% c("ev.player.1", "ev.player.2", "ev.player.3")))
pl.table$playbyplay[,ii] <- es.table$numfirstlast[match(pl.table$playbyplay[,ii],
es.table$numlast)]
for (ii in which(colnames(pl.table$playbyplay) %in% c(paste0("a",1:6), paste0("h",1:6))))
pl.table$playbyplay[pl.table$playbyplay$etype=="GOAL",ii] <-
es.table$numfirstlast[match(pl.table$playbyplay[pl.table$playbyplay$etype=="GOAL",ii],
es.table$numlast)]
es.home <- es.table[es.table$hometeam==1,]; es.home <- rbind(es.home, "")
es.away <- es.table[es.table$hometeam==0,]; es.away <- rbind(es.away, "")
if (length(home.image) == 0) warning ("Home image file does not exist.")
if (length(away.image) == 0) warning ("Away image file does not exist.")
home.players <- try(pick.out.features (pl.table$playbyplay$seconds, home.image), TRUE)
if (class(home.players)=="try-error") home.players <- array(0, c(6, length(pl.table$playbyplay$seconds)))
away.players <- try(pick.out.features (pl.table$playbyplay$seconds, away.image), TRUE)
if (class(away.players)=="try-error") away.players <- array(0, c(6, length(pl.table$playbyplay$seconds)))
home.players[home.players==0] <- nrow(es.home)
away.players[away.players==0] <- nrow(es.away)
# Players on ice.
home.players.names <- t(array(es.home$numfirstlast[home.players], dim(home.players)))
away.players.names <- t(array(es.away$numfirstlast[away.players], dim(away.players)))
pl.table$playbyplay[pl.table$playbyplay$etype!="GOAL",5:10] <- away.players.names[pl.table$playbyplay$etype!="GOAL",]
pl.table$playbyplay[pl.table$playbyplay$etype!="GOAL",6+5:10] <- home.players.names[pl.table$playbyplay$etype!="GOAL",]
#Add changes.
orig.rec <- dim(pl.table$playbyplay)[1]
for (kk in 1:(orig.rec-1)) {
past <- na.omit(unlist(pl.table$playbyplay[kk,6:17-1]))
future <- na.omit(unlist(pl.table$playbyplay[kk+1,6:17-1]))
if (pl.table$playbyplay[kk,3] < pl.table$playbyplay[kk+1,3])
if (length(past) != length(future)) {
new.record <- pl.table$playbyplay[kk,]
new.record[1,4] <- "CHANGE"
new.record$ev.team <- ""
new.record[1,3] <- mean(c(pl.table$playbyplay[kk,3], pl.table$playbyplay[kk+1,3]))
pl.table$playbyplay <- rbind(pl.table$playbyplay, new.record)
} else if (any(sort(past) != sort(future))) {
new.record <- pl.table$playbyplay[kk,]
new.record[1,4] <- "CHANGE"
new.record$ev.team <- ""
new.record[1,3] <- mean(c(pl.table$playbyplay[kk,3], pl.table$playbyplay[kk+1,3]))
pl.table$playbyplay <- rbind(pl.table$playbyplay, new.record)
}
}
pl.table$playbyplay <- pl.table$playbyplay[order(pl.table$playbyplay[,3]),]
pl.table$playbyplay[,1] <- 1:dim(pl.table$playbyplay)[1]
pl.table$playbyplay$homezone[pl.table$playbyplay$etype=="CHANGE"] <- "Neu"
pl.table$players <- es.table
} else {pl.table <- list(players=NULL, playbyplay=NULL, teams=NULL, date=NULL)}
return(pl.table)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.