Nothing
# e.g.
# getTradeMeCarData("http://www.trademe.co.nz/Trade-Me-Motors/Cars/Subaru/mcat-0001-0268-0332-.htm")
getTradeMeCarPage =
function(url, base = NULL)
{
tmp = parseURI(url)
if(tmp$server == "")
url = paste(base$scheme, "://", base$server, url, sep = "")
d = htmlTreeParse(url, useInternal = TRUE)
n = getNodeSet(d, "//table[@class='listings']")
n = n[[1]]
if(xmlName(n[[1]]) == "tbody") {
rows = n[[1]]
} else
rows = n
list(doc = d, rows = rows)
}
if(FALSE) {
tbody = getTradeMeCarPage("~/TradeMe.cars.subaru.html")$tbody
# the children of tbody are all tr elements
table(xmlSApply(tbody, xmlName))
# And the first one has two elements and the remainder are the actual car listings
xmlSApply(tbody, xmlSize)
}
getTradeMeCarData =
function(url = "~/TradeMe.cars.subaru.html",
data = NULL, numPages = 1)
{
uri = parseURI(url)
doc = getTradeMeCarPage(url)
rows = doc$rows
if(is.null(data)) {
# The first row has an entry of the form 1234 listings, showing 1 to 50"
# so we get this number via regular expressions and gsub on the content
# of that first cell.
num = as.integer(gsub("^([0-9]+) .*", "\\1", xmlValue(rows[[1]][[1]])))
if(is.na(num) || numPages == 1)
num = xmlSize(rows) - 1
# type/description, kms, amount, # bids,
data = data.frame(description = I(character(num)),
kms = integer(num),
askingPrice = integer(num),
numBid = as.integer(rep(NA, num)),
auctionExpiration = I(character(num)),
year = integer(num)
)
}
page = 1
cur = 1
while(page <= numPages) {
tbody = rows
for(i in 2:xmlSize(tbody)) {
r = tbody[[i]]
data[cur, 1] = xmlValue(r[[2]])
data[cur, "year"] = as.integer(gsub(".* ([0-9]{4})", "\\1", strsplit(data[cur, 1], "\302\240")[[1]][1]))
data[cur, 2] = as.integer(gsub(",", "", xmlValue(r[[3]][[1]])))
data[cur, 3] = as.integer(gsub("[,$]", "", xmlValue(r[[4]])))
bids = xmlValue(r[[6]])
if(bids != "-")
data[cur, 4] = as.integer(bids)
data[cur, 5] = xmlValue(r[[7]])
cur = cur + 1
}
nxt = getNodeSet(doc$doc, "//table//a/font/b[text() = 'Next']")
if(length(nxt) == 0) {
browser()
print("no next page")
break
}
# Get the <a href=> element for the Next
a = xmlParent(xmlParent(nxt[[1]]))
doc = getTradeMeCarPage(xmlGetAttr(a, "href"), base = uri)
tbody = doc$tbody
}
data[1:(cur-1), ]
# invisible(data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.