R/FindPowerplantsOutsideOfStatedCountries.R

FindPowerplantsOutsideOfStatedCountries <- function (endpoint, bot) {
  ################ Check if power plants are outside of their stated countries ################
  
  # TODO A buffer can be used to deal with power plants that might be on borders or the ocean
  # This is important as the coordinates for the power plant may be more accurate than the borders
  # represented in the shapefile
  # http://www.inside-r.org/packages/cran/rgeos/docs/gBuffer
  
  # sourced from http://www.marineregions.org/sources.php#eez
  # make sure that the EEZ_land_v1 folder is in the working directory
  borders = readOGR("EEZ_land_v1", "EEZ_land_v1")
  
  queryString = "select * where {
                  ?x rdf:type cat:Powerplant . 
                  ?x prop:Point ?point . 
                  ?x prop:Country ?country . 
                  ?country rdfs:label ?countryName . 
                  ?country prop:ISO_3166-1_Alpha-3_code ?isoCountry .
                }"
  
  queryResults = SPARQL(url=endpoint, query=queryString, format='csv', extra=list(format='text/csv'))
  df = queryResults$results
  coords = colsplit(df$point, ",", names=c("lat", "lon"))
  df$lat = coords$lat
  df$lon = coords$lon
  
  coordinates(df) <- c("lon", "lat")
  proj4string(df) <- proj4string(borders)
  
  # figure out which country this power plant should be in based on the shapefile
  df@data$country_ISO_Shapefile <- over(df, borders)$ISO_3digit
  
  # convert back to data frame
  df = as.data.frame(df)
  
  # ok, who's in the wrong country
  locs = which(df$isoCountry != df$country_ISO_Shapefile)

  queryString = "select * where {
                  ?country rdfs:label ?countryName . 
                  ?country prop:ISO_3166-1_Alpha-3_code ?isoCountry .
                }"
  
  queryResults = SPARQL(url=endpoint, query=queryString, format='csv', extra=list(format='text/csv'))
  countryCodes = queryResults$results

  df = df[locs, ]
  
  df = merge(df, countryCodes, by.x=c("country_ISO_Shapefile"), by.y=c("isoCountry"), all.x=TRUE)
  colnames(df)[2] = "Powerplant"
  colnames(df)[4] = "Stated_Country"
  colnames(df)[9] = "Country_from_Shapefile"
  
  # If we don't have the name of the country for the ISO code (from the wiki)
  # then just print out the ISO code
  naLocs = which(is.na(df$Country_from_Shapefile))
  df$Country_from_Shapefile[naLocs] = df$country_ISO_Shapefile[naLocs]
  
  # get this all into a wiki table, and move it to RSemanticMediaWikiBot
  wikiTable = df[,c("Powerplant", "Stated_Country", "Country_from_Shapefile")]
  
  # reformat the links to point to wiki pages where possible
  for (i in c(1:ncol(wikiTable))){
    locs = which(grepl("http://enipedia.tudelft.nl/wiki/", wikiTable[,i]) == TRUE)
    wikiTable[locs,i] = gsub("http://enipedia.tudelft.nl/wiki/", "", wikiTable[locs,i])
    wikiTable[locs,i] = gsub("_", " ", wikiTable[locs,i])
    wikiTable[locs,i] = paste("[[", wikiTable[locs,i], "]]", sep="")
  }

  wikiTable = sqldf("select * from wikiTable order by Stated_Country")
  # get the wiki text needed to generate this table
  wikiTable = getWikiTableTextForDataFrame(wikiTable)
  
  pageText = "This page is automatically generated and will likely be overwritten.  Any comments should go on the [[{{TALKPAGENAME}}|discussion page]].

The table below is generated by using a [http://www.marineregions.org/sources.php#eez shapefile of Exclusive Economic Zones boundaries] to locate power plants on Enipedia who are stated as being in one country, but their coordinates are within another country.  The matching is done based on [http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3 ISO 3166-1 alpha-3 codes], and if there is no corresponding country page on Enipedia, then the country code is displayed.

The source code that generates this page is [https://github.com/cbdavis/EnipediaDataQualityBot/blob/master/R/FindPowerplantsOutsideOfStatedCountries.R here], and will likely be updated to deal with various relationships between countries and their territories."

  pageText = paste(pageText, "\n\n", wikiTable, sep="")
  
  # write what we found to a wiki page
  edit(title="Powerplant Country and Coordinates Check", 
       text=pageText, 
       bot, 
       summary="updating results of country/coordinate check")
  
}
cbdavis/EnipediaDataQualityBot documentation built on May 13, 2019, 1:49 p.m.