FindPowerplantsOutsideOfStatedCountries <- function (endpoint, bot) {
################ Check if power plants are outside of their stated countries ################
# TODO A buffer can be used to deal with power plants that might be on borders or the ocean
# This is important as the coordinates for the power plant may be more accurate than the borders
# represented in the shapefile
# http://www.inside-r.org/packages/cran/rgeos/docs/gBuffer
# sourced from http://www.marineregions.org/sources.php#eez
# make sure that the EEZ_land_v1 folder is in the working directory
borders = readOGR("EEZ_land_v1", "EEZ_land_v1")
queryString = "select * where {
?x rdf:type cat:Powerplant .
?x prop:Point ?point .
?x prop:Country ?country .
?country rdfs:label ?countryName .
?country prop:ISO_3166-1_Alpha-3_code ?isoCountry .
}"
queryResults = SPARQL(url=endpoint, query=queryString, format='csv', extra=list(format='text/csv'))
df = queryResults$results
coords = colsplit(df$point, ",", names=c("lat", "lon"))
df$lat = coords$lat
df$lon = coords$lon
coordinates(df) <- c("lon", "lat")
proj4string(df) <- proj4string(borders)
# figure out which country this power plant should be in based on the shapefile
df@data$country_ISO_Shapefile <- over(df, borders)$ISO_3digit
# convert back to data frame
df = as.data.frame(df)
# ok, who's in the wrong country
locs = which(df$isoCountry != df$country_ISO_Shapefile)
queryString = "select * where {
?country rdfs:label ?countryName .
?country prop:ISO_3166-1_Alpha-3_code ?isoCountry .
}"
queryResults = SPARQL(url=endpoint, query=queryString, format='csv', extra=list(format='text/csv'))
countryCodes = queryResults$results
df = df[locs, ]
df = merge(df, countryCodes, by.x=c("country_ISO_Shapefile"), by.y=c("isoCountry"), all.x=TRUE)
colnames(df)[2] = "Powerplant"
colnames(df)[4] = "Stated_Country"
colnames(df)[9] = "Country_from_Shapefile"
# If we don't have the name of the country for the ISO code (from the wiki)
# then just print out the ISO code
naLocs = which(is.na(df$Country_from_Shapefile))
df$Country_from_Shapefile[naLocs] = df$country_ISO_Shapefile[naLocs]
# get this all into a wiki table, and move it to RSemanticMediaWikiBot
wikiTable = df[,c("Powerplant", "Stated_Country", "Country_from_Shapefile")]
# reformat the links to point to wiki pages where possible
for (i in c(1:ncol(wikiTable))){
locs = which(grepl("http://enipedia.tudelft.nl/wiki/", wikiTable[,i]) == TRUE)
wikiTable[locs,i] = gsub("http://enipedia.tudelft.nl/wiki/", "", wikiTable[locs,i])
wikiTable[locs,i] = gsub("_", " ", wikiTable[locs,i])
wikiTable[locs,i] = paste("[[", wikiTable[locs,i], "]]", sep="")
}
wikiTable = sqldf("select * from wikiTable order by Stated_Country")
# get the wiki text needed to generate this table
wikiTable = getWikiTableTextForDataFrame(wikiTable)
pageText = "This page is automatically generated and will likely be overwritten. Any comments should go on the [[{{TALKPAGENAME}}|discussion page]].
The table below is generated by using a [http://www.marineregions.org/sources.php#eez shapefile of Exclusive Economic Zones boundaries] to locate power plants on Enipedia who are stated as being in one country, but their coordinates are within another country. The matching is done based on [http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3 ISO 3166-1 alpha-3 codes], and if there is no corresponding country page on Enipedia, then the country code is displayed.
The source code that generates this page is [https://github.com/cbdavis/EnipediaDataQualityBot/blob/master/R/FindPowerplantsOutsideOfStatedCountries.R here], and will likely be updated to deal with various relationships between countries and their territories."
pageText = paste(pageText, "\n\n", wikiTable, sep="")
# write what we found to a wiki page
edit(title="Powerplant Country and Coordinates Check",
text=pageText,
bot,
summary="updating results of country/coordinate check")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.