R/coord_geocode.R

#' Geocode
#'
#' geocodes an address using Google or Baidu Maps API. Note that in most cases by
#' using this function you are agreeing to the Google Maps API Terms of Service
#' at \url{https://developers.google.com/maps/terms} or the Baidu Maps API Terms
#' of Use at \url{http://developer.baidu.com/map/law.htm}.
#'
#' @param address a character vector specifying a location of interest (e.g.,
#' "Tsinghua Univeristy").
#' @param api use Google or Baidu Maps API. When using Baidu Maps API, the address 
#' must be in Chinese.
#' @param key an api key must be provided when calling baidu maps api.
#' While it's unnecessary for calling google maps api.
#' @param ocs output coordinate systems including WGS-84, GCJ-02 and BD-09, which
#' are the GCSs of Google Earth, Google Map in China and Baidu Map, respectively.
#' For address out of China, ocs is automatically set to 'WGS-84' and other values
#' are igored.
#' @param output lat/lng coordinates or lat/lng coordinates with location type (Goolge Map) | confidence
#' (Baidu Map) or lat/lng coordinates with formated address and address components (only available for
#' Google Map API).
#' @param messaging turn messaging on/off. The default value is FALSE.
#' @param time the time interval to geocode, in seconds. Default value is zero.
#' When you geocode multiple addresses, set a proper time interval to avoid
#' exceeding usage limits. For details see
#' \url{https://developers.google.com/maps/documentation/business/articles/usage_limits}
#' @return a data.frame with variables lat/lng or lat/lng/conf
#' @author Jun Cai (\email{cai-j12@@mails.tsinghua.edu.cn}), PhD student from
#' Center for Earth System Science, Tsinghua University
#' @details note that the google maps api limits to 2500 queries a day.
#' @seealso \code{\link{revgeocode}}, \code{\link{geohost}}.
#'
#' Google Maps API at \url{http://code.google.com/apis/maps/documentation/geocoding/}
#' and Baidu Maps API at \url{http://developer.baidu.com/map/webservice-geocoding.htm}
#' @export
#' @importFrom plyr ldply
#' @examples
#' \dontrun{
#' geocode('Tsinghua University', api = 'google', ocs = 'GCJ-02')
#' geocode('Tsinghua University', api = 'google', ocs = 'WGS-84',
#'         messaging = TRUE)
#' geocode('Beijing railway station', api = 'google', ocs = 'WGS-84',
#'         output = 'latlngc')
#' geocode('Beijing railway station', api = 'google', ocs = 'WGS-84',
#'         output = 'latlnga')
#' geocode(c('Tsinghua University', 'Beijing railway station'), api = 'google',
#'         ocs = 'GCJ-02')
#' geocode(c('Tsinghua University', 'Beijing railway station'), api = 'google',
#'         ocs = 'WGS-84', output = 'latlngc', messaging = TRUE)
#' geocode(c('Tsinghua University', 'Beijing railway station'), api = 'google',
#'         ocs = 'WGS-84', output = 'latlnga', messaging = TRUE)
#' geocode(c('Tsinghua University', 'Beijing railway station'), api = 'google',
#'         ocs = 'WGS-84', output = 'latlngc', messaging = TRUE, time = 2)
#' geocode('Beijing railway station', api = 'baidu', key = 'your baidu maps api key',
#'         ocs = 'BD-09')
#' geocode('Beijing railway station', api = 'baidu', key = 'your baidu maps api key',
#'         ocs = 'GCJ-02', messaging = TRUE)
#' geocode('Beijing railway station', api = 'baidu', key = 'your baidu maps api key',
#'         ocs = 'BD-09', output = 'latlngc')
#' geocode(c('Tsinghua University', 'Beijing railway station'), api = 'baidu',
#'         key = 'your baidu maps api key', ocs = 'BD-09')
#' geocode(c('Tsinghua University', 'Beijing railway station'), api = 'baidu',
#'         key = 'your baidu maps api key', ocs = 'WGS-84', output = 'latlngc')
#' }
geocode <- function(address, api = c('google', 'baidu'), key = '',
                    ocs = c('WGS-84', 'GCJ-02', 'BD-09'),
                    output = c('latlng', 'latlngc', 'latlnga'), messaging = FALSE,
                    time = 0){
    # check parameters
    stopifnot(is.character(address))
    api <- tolower(match.arg(api))
    stopifnot(is.character(key))
    output <- tolower(match.arg(output))
    ocs <- toupper(match.arg(ocs))
    stopifnot(is.logical(messaging))
    stopifnot(is.numeric(time))

    # vectorize for many addresses
    if (length(address) > 1) {
        if (api == 'google') {
            s <- 'google restricts requests to 2500 requests a day.'
            if (length(address) > 2500) stop(s, call. = F)
            if (length(address) > 200 & messaging) message(paste('Reminder', s, sep = ' : '))
        }

        return(ldply(address, function(add){
            Sys.sleep(time)
            geocode(add, api = api, key = key, ocs = ocs, output = output, messaging = messaging)
        }))
    }

    # location encoding
    address <- enc2utf8(address)
    # different google maps api is used based user's location. If user is inside China,
    # ditu.google.cn is used; otherwise maps.google.com is used.
    if (api == 'google') {
        cname <- try(ip.country(), TRUE)
        if (cname == "CN") {
            api_url <- 'http://ditu.google.cn/maps/api/geocode/json'
        } else{
            api_url <- 'http://maps.googleapis.com/maps/api/geocode/json'
        }
    } else{
        api_url <- 'http://api.map.baidu.com/geocoder/v2/'
    }
    # format url
    # https is only supported on Windows, when R is started with the --internet2
    # command line option. without this option, or on Mac, you will get the error
    # "unsupported URL scheme".
    if (api == 'google') {
        # http://maps.googleapis.com/maps/api/geocode/json?address=ADDRESS&sensor
        # =false&key=API_KEY for outside China
        # http://ditu.google.cn/maps/api/geocode/json?address=ADDRESS&sensor
        # =false&key=API_KEY for outside China
        url_string <- paste(api_url, '?address=', address, '&sensor=false', sep = '')
        if (nchar(key) > 0) {
            url_string <- paste(url_string, '&key=', key, sep = '')
        }
    }
    if (api == 'baidu') {
        # http://api.map.baidu.com/geocoder/v2/?address=ADDRESS&output=json&ak=API_KEY
        url_string <- paste(api_url, '?address=', address, '&output=json&ak=', key, sep = '')
    }

    if (messaging) message(paste('calling ', url_string, ' ... ', sep = ''),
                           appendLF = FALSE)

    # geocode
    con <- curl(URLencode(url_string))
    gc <- fromJSON(paste(readLines(con, warn = FALSE), collapse = ''))
    if (messaging) message('done.')
    close(con)

    # geocoding results
    if (api == 'google') {
        # did geocode fail?
        if (gc$status != 'OK') {
            warning(paste('geocode failed with status ', gc$status, ', location = "',
                          address, '"', sep = ''), call. = FALSE)
            return(data.frame(lat = NA, lng = NA))
        }

        # more than one location found?
        if (length(gc$results) > 1 && messaging) {
            Encoding(gc$results[[1]]$formatted_address) <- "UTF-8"
            message(paste('more than one location found for "', address,
                          '", using address\n"',
                          tolower(gc$results[[1]]$formatted_address),
                          '"', sep = ''), appendLF = TRUE)
        }

        gcdf <- with(gc$results[[1]], {
            data.frame(lat = NULLtoNA(geometry$location['lat']),
                       lng = NULLtoNA(geometry$location['lng']),
                       loctype = tolower(NULLtoNA(geometry$location_type)),
                       address = tolower(NULLtoNA(formatted_address)),
                       row.names = NULL)})

        # address components
        attrdf <- ldply(gc$results[[1]]$address_components, function(l){
            l <- lapply(l, function(li) {
                if (length(li) == 0) li <- NA else li
            })
            as.data.frame(l, stringsAsFactors = FALSE)[1, ]
        })
        attrdf <- attrdf[!is.na(attrdf$types), c('types', 'long_name')]
        gcdf <- within(gcdf, {
            poi <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'point_of_interest']))
            street_no <-
                as.numeric(NULLtoNA(attrdf$long_name[attrdf$types == 'street_number']))
            route <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'route']))
            subloc_l3 <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'sublocality_level_3'])) # village
            subloc_l2 <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'sublocality_level_2'])) # town
            subloc_l1 <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'sublocality_level_1'])) # distrcit/county
            locality <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'locality'])) # city
            admin_area_l2 <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'administrative_area_level_2'])) # US county
            admin_area_l1 <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'administrative_area_level_1'])) # prvince or US state
            country <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'country']))
            postal_code <-
                tolower(NULLtoNA(attrdf$long_name[attrdf$types == 'postal_code']))
        })

        # convert coordinates only in China
        if (!isOutOfChina(gcdf[, 'lat'], gcdf[, 'lng'])) {
            gcdf[c('lat', 'lng')] <- convCoord(gcdf[, 'lat'], gcdf[, 'lng'], from = 'GCJ-02', to = ocs)
        } else{
            if (ocs != 'WGS-84') {
                message('wrong usage: for address out of China, ocs can only be set to "WGS-84"',
                        appendLF = TRUE)
                ocs <- 'WGS-84'
            }
        }

        if (output == 'latlng') return(gcdf[c('lat', 'lng')])
        if (output == 'latlngc') return(gcdf[c('lat', 'lng', 'loctype')])
        if (output == 'latlnga') return(gcdf[!names(gcdf) %in% c('loctype')])
    }
    if (api == 'baidu') {
        # did geocode fail?
        if (gc$status != 0) {
            warning(paste('geocode failed with status code ', gc$status, ', location = "',
                          address, '". see more details in the response code table of Baidu Geocoding API',
                          sep = ''), call. = FALSE)
            return(data.frame(lat = NA, lng = NA))
        }

        gcdf <- with(gc$result, {data.frame(lat = NULLtoNA(location['lat']),
                                            lng = NULLtoNA(location['lng']),
                                            conf = NULLtoNA(confidence),
                                            row.names = NULL)})

        # convert coordinates
        gcdf[c('lat', 'lng')] <- convCoord(gcdf[, 'lat'], gcdf[, 'lng'], from = 'BD-09', to = ocs)

        if (output == 'latlnga') {
            message('Baidu map geocoder cannot return address, please try Goolge map geocoder.',
                    appendLF = TRUE)
            output <- 'latlng'
        }
        if (output == 'latlng') return(gcdf[c('lat', 'lng')])
        if (output == 'latlngc') return(gcdf[c('lat', 'lng', 'conf')])
    }
}


#' Reverse geocode
#'
#' reverse geocodes a lat/lng location using Google or Baidu Maps API.  Note that in
#' most cases by using this function you are agreeing to the Google Maps API Terms
#' of Service at \url{https://developers.google.com/maps/terms} or the Baidu Maps
#' API Terms of Use at \url{http://developer.baidu.com/map/law.htm}.
#'
#' @param latlng a location in latitude/longitude format
#' @param ics the coordinate system of inputing location, including WGS-84, GCJ-02
#' and BD-09, which are the GCSs of Google Earth, Google Map in China and Baidu
#' Map, respectively. For location out of China, ics is automatically set to 'WGS-84'
#' and other values are ignored.
#' @param api use google or baidu maps api
#' @param key an api key must be provided when calling baidu maps api.
#' While it's unnecessary for calling google maps api.
#' @param output formatted address or formmatted address with address components
#' @param messaging turn messaging on/off. The default value is FALSE.
#' @param time the time interval to revgeocode, in seconds. Default value is zero.
#' When you revgeocode multiple locations, set a proper time interval to avoid
#' exceeding usage limits. For details see
#' \url{https://developers.google.com/maps/documentation/business/articles/usage_limits}
#' @return a data.frame with variables address or detail address components
#' @author Jun Cai (\email{cai-j12@@mails.tsinghua.edu.cn}), PhD student from
#' Center for Earth System Science, Tsinghua University
#' @details note that the google maps api limits to 2500 queries a day.
#' @seealso \code{\link{geocode}}, \code{\link{geohost}}.
#'
#' Google Maps API at \url{http://code.google.com/apis/maps/documentation/geocoding/}
#' and Baidu Maps API at \url{http://developer.baidu.com/map/webservice-geocoding.htm}
#' @export
#' @examples
#' \dontrun{
#' # reverse geocode Beijing railway station
#' revgeocode(c(39.90105, 116.42079), ics = 'WGS-84', api = 'google',
#'            output = 'address')
#' revgeocode(c(39.90245, 116.42703), ics = 'GCJ-02', api = 'google',
#'            output = 'address', messaging = TRUE)
#' revgeocode(c(39.90851, 116.43351), ics = 'BD-09', api = 'google',
#'            output = 'addressc')
#' revgeocode(c(39.90851, 116.43351), ics = 'BD-09', api = 'baidu',
#'            key = 'your baidu maps api key', output = 'address')
#' revgeocode(c(39.90245, 116.42703), ics = 'GCJ-02', api = 'baidu',
#'            key = 'your baidu maps api key', output = 'address', messaging = TRUE)
#' revgeocode(c(39.90105, 116.42079), ics = 'WGS-84', api = 'baidu',
#'            key = 'your baidu maps api key', output = 'addressc')
#'
#' # reverse geocode multiple locations
#' latlng = data.frame(lat = c(39.99837, 39.98565), lng = c(116.3203, 116.2998))
#' revgeocode(latlng, ics = 'WGS-84', api = 'google', output = 'address')
#' revgeocode(latlng, ics = 'WGS-84', api = 'google', output = 'address', time = 2)
#' }
revgeocode <- function(latlng, ics = c('WGS-84', 'GCJ-02', 'BD-09'),
                       api = c('google', 'baidu'), key = '',
                       output = c('address', 'addressc'), messaging = FALSE,
                       time = 0){
    # check parameters
    stopifnot(class(latlng) %in% c('numeric', 'data.frame'))
    ics <- toupper(match.arg(ics))
    api <- toupper(match.arg(api))
    stopifnot(is.character(key))
    output <- match.arg(output)
    stopifnot(is.logical(messaging))
    stopifnot(is.numeric(time))

    # vectorize for many locations
    if(is.data.frame(latlng)){
        return(ldply(seq_along(latlng), function(i){
            Sys.sleep(time)
            revgeocode(as.numeric(latlng[i, ]), ics = ics, api = api, key = key,
                       output = output, messaging = messaging) }))
    }

    # different google maps api is used based user's location. If user is inside China,
    # ditu.google.cn is used; otherwise maps.google.com is used.
    if(api == 'google'){
        cname <- try(ip.country(), TRUE)
        if(cname == "CN"){
            api_url <- 'http://ditu.google.cn/maps/api/geocode/json'
        } else{
            api_url <- 'http://maps.googleapis.com/maps/api/geocode/json'
        }
    } else{
        api_url <- 'http://api.map.baidu.com/geocoder/v2/'
    }

    # format url
    if(api == 'google'){
        # convert coordinates only in China
        if(!isOutOfChina(latlng[1], latlng[2])){
            latlng <- convCoord(latlng[1], latlng[2], from = ics, to = 'GCJ-02')
        } else{
            if(ics != 'WGS-84'){
                message('wrong usage: for location out of China, ics can only be set to "WGS-84"',
                        appendLF = TRUE)
            }
        }

        # http://maps.googleapis.com/maps/api/geocode/json?latlng=LAT,LNG
        # &sensor=FALSE&key=API_KEY for outside China
        # http://ditu.google.com/maps/api/geocode/json?latlng=LAT,LNG
        # &sensor=FALSE&key=API_KEY for inside China
        url_string <- paste(api_url, '?latlng=', latlng[1], ',', latlng[2],
                            '&sensor=false', sep = '')
        if(nchar(key) > 0){
            url_string <- paste(url_string, '&key=', key, sep = '')
        }
    }
    if(api == 'baidu'){
        # coordinate type lookup table
        code <- c('wgs84ll', 'gcj02ll', 'bd09ll')
        names(code) <- c('WGS-84', 'GCJ-02', 'BD-09')
        coordtype <- code[ics]
        # http://api.map.baidu.com/geocoder/v2/?location=LAT,LNG&coordtype=COORDTYPE
        # &output=json&ak=API_KEY
        url_string <- paste(api_url, '?location=', latlng[1], ',', latlng[2],
                            '&coordtype=', coordtype, '&output=json&ak=', key, sep = '')
    }

    if(messaging) message(paste('calling ', url_string, ' ... ', sep = ''),
                          appendLF = FALSE)

    # reverse gecode
    con <- curl(URLencode(url_string))
    rgc <- fromJSON(paste(readLines(con, warn = FALSE), collapse = ''))
    if(messaging) message('done.')
    close(con)

    # reverse geocoding results
    if(api == 'google'){
        # did reverse geocoding fail?
        if(rgc$status != 'OK'){
            warning(paste('reverse geocode failed with status ', gc$status, ',
                          location = "', latlng[1], ', ', latlng[2], '"', sep = ''),
                    call. = FALSE)
            return(data.frame(address = NA))
        }

        # more than one address found?
        if(length(rgc$results) > 1 && messaging){
            message(paste('more than one address found for "', latlng[1], ', ',
                          latlng[2],  '", reverse geocoding first ... ', sep = ''),
                    apppendLF = TRUE)
        }

        rgcdf <- with(rgc$results[[1]], {data.frame(address = formatted_address,
                                                    row.names = NULL)})
        for(i in seq_along(rgc$results[[1]]$address_components)){
            rgcdf <- cbind(rgcdf, rgc$results[[1]]$address_components[[i]]$long_name)
        }
        names(rgcdf) <- c('address', sapply(rgc$results[[1]]$address_components,
                                            function(l) l$types[1]))
    }
    if(api == 'baidu'){
        # did geocode fail?
        if(rgc$status != 0){
            warning(paste('geocode failed with status code ', rgc$status, ',
                          location = "', latlng[1], ', ', latlng[2],
                          '". see more details in the response code table of Baidu Geocoding API',
                          sep = ''), call. = FALSE)
            return(data.frame(address = NA))
        }

        rgcdf <- with(rgc$result, {
            data.frame(address = formatted_address,
                       street_number = NULLtoNA(addressComponent['street_number']),
                       street = NULLtoNA(addressComponent['street']),
                       district = NULLtoNA(addressComponent['district']),
                       city = NULLtoNA(addressComponent['city']),
                       province = NULLtoNA(addressComponent['province']),
                       row.names = NULL)})
    }

    if (output == 'address') return(rgcdf['address'])
    if (output == 'addressc') return(rgcdf)
}
madlogos/aseshms documentation built on May 21, 2019, 11:03 a.m.