R/GridToMap-package.r

    

library(maptools)
library(rgeos)
library(fields)
library(ggplot2)


#-------------------------------------------------------------------------------------------------
#' Grid to Map
#'
#' This function downloads Census county data and links each county to grid cells. The linking is area weighted to each grid.
#'
#' @param method Method to map counties and grids. `Overlap' (default) calculate the overlap in the area of each spacial polygon. `Points' approximates the calculation by generating random points in each county and linking those to the nearest grid centroid. Points is more robust to invalid spatial polygons and irregularly spaced grid.
#' @param lon A numeric vector of numeric longitudes for the centroids of each gridcell
#' @param lat A numeric vector of numeric lattitudes for the centroids of each gridcell
#' @param drop.states A character vector of two letter state abreviations to be dropped from the analysis. 
#' @param keep.states A list of states to keep.
#' @param path A path to store data.
#' @param res Resolution of the county files.  The choices are `500k' (default), `5m,' and `20m' which translate to 500k = 1:500,000, 5m = 1:5,000,000, and 20m = 1:20,000,000. This controls how precise the counties are defined.
#' @param year The year of the county file.  Default is 2010. They are available for each decenial census and 2013.
#' @param dataframe A logical with TRUE indicating to return a dataframe of shapes and FALSE (default) indicating to return an object of class SpatialPolygons
#' @param proj4string A CRS class.
#' @param grid.tolerace Grid tolerance used to make sure sure the grid is evenly spaced. Default is 10^10. For overlap method only.
#' @param n.points Number of points generated in each county for points method. The large the number the more precise but slower it will be
#' @author Ander Wilson
#' @export
#' @examples
#' data(GridToMapSampleData)
#' lon <- GridToMapSampleData$names$lon
#' lat <- GridToMapSampleData$names$lat
#' ma.overlap<- GridToMap(lon=lon, lat=lat, method="overlap",keep.states=c("MA"))



GridToMap <- function(
method = "overlap",
lon,
lat,
drop.states,
keep.states,
path = "LingGridToCountyCensusData",
res = "500k",
year = 2010,
download=TRUE,
proj4string = CRS("+proj=longlat +datum=wgs84"),
grid.tolerace = 10^(-10),
n.points = 100000
){
    
    #-------------------------------------------------------------------------------------------------
    if(missing(path)) path <- "LingGridToCountyCensusData"
    
    
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    #
    # PART 1
    #
    # Load cencus data (https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html)
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    
    #-------------------------------------------------------------------------------------------------
    # data prep foun county data
    
    dat <- GetCountyData(year=year, res=res, drop.states=drop.states, keep.states=keep.states, path=path, download=download)
    if(is.null(dat)){
      return()
    }else{
      shape.sp <- dat$shape
      shape.ids <- dat$info
    }
    
    
    
    
#     
#     shape.file.name <- paste0("gz_",year,"_us_050_00_",res)
#     
#     #read shape file
#     dir.create("LingGridToCountyCensusData", showWarnings = FALSE)
#     download.file(url=paste0("http://www2.census.gov/geo/tiger/GENZ",year,"/",shape.file.name,".zip"), destfile=paste0(path,"/",shape.file.name,".zip"))
#     unzip(zipfile=paste0(path,"/",shape.file.name,".zip"),exdir=path)
#     
#     shape.spdf <- readShapeSpatial(paste0(path,"/",shape.file.name,".shp"), proj4string=proj4string, IDvar="GEO_ID")
#     
#     
#     #create data frame with county and state ids
#     shape.ids <- data.frame(state=shape.spdf$STATE, county=shape.spdf$COUNTY, name=shape.spdf$NAME, GEO_ID=shape.spdf$GEO_ID, LSAD=shape.spdf$LSAD)
#     
#     #get state names
#     download.file(url="http://quickfacts.census.gov/qfd/download/FIPS_CountyName.txt", destfile=paste0(path,"/FIPS_CountyName.txt"))
#     FIPS_CountyName <- data.frame(input = read.delim(file=paste0(path,"/FIPS_CountyName.txt"), header=FALSE, sep=";")$V1)
#     #remove special characters
#     FIPS_CountyName$input <- gsub("\xb1",'n',FIPS_CountyName$input)
#     FIPS_CountyName$state.name <- FIPS_CountyName$county.name <- ""
#     FIPS_CountyName$state <- substr(FIPS_CountyName$input,1,2)
#     for(i in 1:nrow(FIPS_CountyName)){
#         temp <- gregexpr(",",FIPS_CountyName$input[i])[[1]][1]
#         if(temp>0){
#             FIPS_CountyName$state.name[i] <- substr(FIPS_CountyName$input[i],temp+2,temp+3)
#             FIPS_CountyName$county.name[i] <- substr(FIPS_CountyName$input[i],7,temp-1)
#         }
#     }
#     
#     state_codes <- subset(FIPS_CountyName, state.name!="", c("state","state.name"))
#     state_codes <- state_codes[!duplicated(state_codes[,c("state")]),]
#     
#     #remove unwanted states
#     if(!missing(drop.states)) state_codes <- state_codes[-which(state_codes$state.name%in%c(drop.states)),]
#     if(!missing(keep.states)) state_codes <- state_codes[which(state_codes$state.name%in%c(keep.states)),]
#     #merge and only keep data for states in both files
#     shape.ids <- merge(x=shape.ids, y=state_codes)
#     row.names(shape.ids) <- shape.ids$GEO_ID
#     print(paste0("Data in ", length(unique(state_codes$state.name)), " states: ",paste0(unique(state_codes$state.name), collapse=",")))
#     
#     
#     #conver from SpatialPolygon*DataFrame to SpatialPolygon
#     shape.sp <- SpatialPolygons(  shape.spdf@polygons    ,  proj4string=proj4string  )
#     shape.sp <- shape.sp[shape.ids$GEO_ID]
#     
#     
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    #
    # PART 2
    #
    # Load cliamte data
    #
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    
    
    if(method=="overlap" & var(lon[-1]-lon[-length(lon)])<grid.tolerace & var(lat[-1]-lat[-length(lat)])<grid.tolerace){
        long.steps <- abs((lon[1]-lon[length(lon)])/(length(lon)-1))
        lat.steps <- abs((lat[1]-lat[length(lat)])/(length(lat)-1))
    }else if(method=="overlap"){
        message("Warning: grid values do not appear to be even. Either use random points method or increase grid.tolerace")
        return()
    }else if(method=="points"){
        long.steps <- max(lon[-1]-lon[-length(lon)])
        lat.steps <- max(lat[-1]-lat[-length(lat)])
    }
    
    
    
    range.map <- round(apply(coordinates(shape.sp),2,range)) + matrix(c(-2*max(long.steps),2*max(long.steps),-2*max(lat.steps),2*max(lat.steps)),2,2)
    
    
    #cut the grid to roughtly fit the lower 48 (just makes a smaller file to work with)
    centroids <- as.data.frame(expand.grid(lon,lat[length(lat):1]))
    centroids <- cbind(centroids, as.data.frame(expand.grid(1:length(lon),length(lat):1)))
    colnames(centroids) <- c("lon", "lat", "row","col")
    row.names(centroids) <- paste(paste0("r",centroids$row),paste0("c",centroids$col),sep="_")
    centroids <- subset(centroids, lon < range.map[2,1] + 2 & lon > range.map[1,1] - 2 & lat < range.map[2,2] + 2 & lat > range.map[1,2] - 2)
    
    if(nrow(centroids)==0){
        message("No grid cell cells over map.")
        return()
    }
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    #
    # PART 3a
    #
    # Find overlap area
    # gArea will give a warning "In RGEOSMiscFunc(spgeom, byid, "rgeos_area") :
    #       Spatial object is not projected; GEOS expects planar coordinates." This is okay.
    #
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    
    if(method=="overlap"){
        #expand to grid format (make sure to specify grid size)
        grd <- GridTopology(apply(centroids[,c("lon","lat")],2,min), c(long.steps,lat.steps), c(length(unique(centroids$lon)),length(unique(centroids$lat))))
        gridtop <- as.SpatialPolygons.GridTopology(grd, proj4string=proj4string)
        gtc <- coordinates(gridtop)
        
        qtl <- round(seq(1,length(names(shape.sp)),length=11))[-1]
        weights <- list()
        max.dev1 <- 0
        for(c in names(shape.sp)){
            if(which(names(shape.sp)==c)%in%qtl) message(paste0(10*which(qtl==which(names(shape.sp)==c)),"% of counties complete."))
            
            weights[[c]]$GEO_ID <- c
            weights[[c]]$info <- shape.ids[c,]
            
            for(g in names(which((gtc[,1]-coordinates(shape.sp[c])[1])^2 +  (gtc[,2]-coordinates(shape.sp[c])[2])^2 < sum((c(-1,1)%*%apply(shape.sp[c]@polygons[[1]]@Polygons[[1]]@coords,2,range) + 2*c(max(long.steps),max(lat.steps)))^2) ))){
                inter <- gIntersection(shape.sp[c], gridtop[g])
                if(!is.null(inter)){
                    weights[[c]]$weight <- c(weights[[c]]$weight,suppressWarnings(gArea(inter)/gArea(shape.sp[c])))
                    weights[[c]]$cells <- rbind(weights[[c]]$cells,centroids[as.numeric(substr(g,2,1000000L)),])
                }
            }
            weights[[c]]$cells$weight <- weights[[c]]$weight
            weights[[c]]$weight <- sum(weights[[c]]$weight)
            max.dev1 <- max(max.dev1,abs(weights[[c]]$weight-1))
        }
    }
    
    
    
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    #
    # PART 3b
    #
    # Find overlap area using random points.
    #
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    
    if(method=="points"){
        
        n.points <- as.integer(round(n.points))
        qtl <- round(seq(1,length(names(shape.sp)),length=11))[-1]
        weights <- list()
        max.dev1 <- 0
        for(c in names(shape.sp)){
            
            if(which(names(shape.sp)==c)%in%qtl) message(paste0(10*which(qtl==which(names(shape.sp)==c)),"% of counties complete."))
            limited.centroids <- which((centroids[,1]-coordinates(shape.sp[c])[1])^2 +  (centroids[,2]-coordinates(shape.sp[c])[2])^2 < sum((c(-1,1)%*%apply(shape.sp[c]@polygons[[1]]@Polygons[[1]]@coords,2,range) + 2*c(max(long.steps),max(lat.steps)))^2) )
            
            D <- rdist.earth(centroids[limited.centroids,c("lon","lat")], coordinates(spsample(shape.sp[c], n.points, type = "random")) )
            weights[[c]]$GEO_ID <- c
            weights[[c]]$info <- shape.ids[c,]
            weights[[c]]$weight <- table(apply(D,2,which.min))/n.points
            weights[[c]]$cells <- centroids[limited.centroids[as.numeric(names(weights[[c]]$weight))],]
            weights[[c]]$cells$weight <- weights[[c]]$weight
            weights[[c]]$weight <- sum(weights[[c]]$weight)
            max.dev1 <- max(max.dev1,abs(weights[[c]]$weight-1))
        }
        
    }
    
    
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    #
    # PART 4
    #
    # convert to more efficient format
    #
    #-------------------------------------------------------------------------------------------------
    #-------------------------------------------------------------------------------------------------
    
    
    return(
    list(weights=weights, max.dev1=max.dev1, shapes=shape.sp, shape.info=shape.ids,
    details=
    list(shape.file=dat$shape.file,
    county.info="http://quickfacts.census.gov/qfd/download/FIPS_CountyName.txt",
    lon=lon,
    lat=lat,
    method=method,
    n.points=n.points,
    proj4string=proj4string
    ))
    )
    
}





#-------------------------------------------------------------------------------------------------
#' Plot Grid to Map Link
#'
#' This function plots a county and the monitors grid centroids it is linked to. Each circle is a grid centroid assigned non-zero weight. The amont of the circle filled in is proportional to the weight with a full circle having 100% of the weight.
#'
#' @param link A linking from the function GridToMap
#' @param count The name of a county
#' @export
#' @examples
#' data(GridToMapSampleData)
#' lon <- GridToMapSampleData$names$lon
#' lat <- GridToMapSampleData$names$lat
#' ma.overlap<- GridToMap(lon=lon, lat=lat, method="overlap",keep.states=c("MA"))
#' plot.link(ma.overlap,1)

plot.link <- function(link,county){
    if(length(county)>1) county <- county[1]
    shape.temp <- link$shapes[county]
    shape.temp@bbox["x","min"] <- min(shape.temp@bbox["x","min"],link$weights[[county]]$cells$lon)
    shape.temp@bbox["x","max"] <- max(shape.temp@bbox["x","max"],link$weights[[county]]$cells$lon)
    shape.temp@bbox["y","min"] <- min(shape.temp@bbox["y","min"],link$weights[[county]]$cells$lat)
    shape.temp@bbox["y","max"] <- max(shape.temp@bbox["y","max"],link$weights[[county]]$cells$lat)
    plot(shape.temp)
    points(link$weights[[county]]$cells$lat~link$weights[[county]]$cells$lon, pch=1, cex=2)
    points(link$weights[[county]]$cells$lat~link$weights[[county]]$cells$lon, pch=16, cex=2*link$weights[[county]]$cells$weight)
}





#-------------------------------------------------------------------------------------------------
#' Make Time Series
#'
#' This function creates a time series data for eah county.
#'
#' @param link A linking from the function GridToMap
#' @param data A 3D array of data where the dimensions are lon, lat, and time.
#' @param names A list of the dimnames for data. The list should have elements names lon, lat, and time. Lon and lat are numeric vectors indicating the longitude and latitude for the rows. Time is vector of dates or times that the data correspond to. This function will not work with they are not the propoer lengths and named lon, lat, and time.
#' @author Ander Wilson
#' @export
#' @examples
#' data(GridToMapSampleData)
#' lon <- GridToMapSampleData$names$lon
#' lat <- GridToMapSampleData$names$lat
#' ma.overlap<- GridToMap(lon=lon, lat=lat, method="overlap",keep.states=c("MA"))
#' names <- GridToMapSampleData$names
#' data <- GridToMapSampleData$data
#' ts.data <- MakeTimeSeries(link = ma.overlap, data=data, names=names)
#' head(ts.data)



MakeTimeSeries <- function(
link,
data,
names
){
    
    if(length(dim(data))!=3){
        message("Data should be dimension 3.")
        return()
    }
    
    if(any(dim(data)!=unlist(lapply(names,length)))){
        message("names must be the length of the dimension of data and in the same order.")
        return()
    }
    
    if(!is.numeric(names$lat) | !is.numeric(names$lon)){
        message("lat and lon must be numberic vectors.")
        return()
    }
    if(any(link$details$lon!=names$lon) | any(link$details$lat!=names$lat)){
        message("Link lon and lat do not match the names lon and lat.")
        return()
    }
    
    #check order of data
    data <- aperm(data, perm = c(which(names(names)=="lon"), which(names(names)=="lat"), which(names(names)=="time")))
    names <- names[c("lon","lat","time")]
    
    
    series <- data.frame(row.names=names$time) 
    for(c in names(link$weights)){
        temp <- NULL
        if(!is.null(nrow(link$weights[[c]]$cells))){
            for(i in 1:nrow(link$weights[[c]]$cells)){
                temp <- cbind(temp,data[link$weights[[c]]$cells$row[i],link$weights[[c]]$cells$col[i],])
            }
            series[,c] <- temp%*%link$weights[[c]]$cells$weight
        }else{
            series[,c] <- NA
        }
    }
    
    
    return(series)
}



#' Simulated Climate Data
#'
#' A dataset white noise over a lon-lat grid. This is a list with the following objects
#'
#' \itemize{
#' \item data. An array of data
#' \item names. Names of the dimensions including lon, lat, and time.
#' }
#'
#' @docType data
#' @keywords datasets
#' @name GridToMapSampleData
#' @usage data(GridToMapSampleData)
#' @format A list
NULL





#-------------------------------------------------------------------------------------------------
#' Get County Data
#'
#' This function downloads the county data or loads county data from a file.
#'
#' @param res Resolution of the county files.  The choices are `500k' (default), `5m,' and `20m' which translate to 500k = 1:500,000, 5m = 1:5,000,000, and 20m = 1:20,000,000. This controls how precise the counties are defined.
#' @param year The year of the county file.  Default is 2010. They are available for each decenial census and 2013.
#' @param drop.states A character vector of two letter state abreviations to be dropped from the analysis. 
#' @param keep.states A list of states to keep.
#' @param download A logical with TRUE (default) indicates to download data from census.gov and FALSE uses data from path.
#' @param dataframe A logical with TRUE indicating to return a dataframe of shapes and FALSE (default) indicating to return an object of class SpatialPolygons
#' @param path An optional path to store downloaded data or to open data from.
#' @param proj4string = CRS("+proj=longlat +datum=wgs84"),
#' @author Ander Wilson
#' @export
#' @examples
#' dat <- GetCountyData(year=2010, res="20m")


GetCountyData <- function(
  year=2010,
  res="20m",
  drop.states,
  keep.states,
  download=TRUE,
  dataframe=FALSE,
  path,
  proj4string = CRS("+proj=longlat +datum=wgs84")
){
  
  
  if(missing(path)) path <- "LingGridToCountyCensusData"
  shape.file.name <- paste0("gz_",year,"_us_050_00_",res)
  
  #read shape file
  if(download){
    #shape file
    dir.create("LingGridToCountyCensusData", showWarnings = FALSE)
    download.file(url=paste0("http://www2.census.gov/geo/tiger/GENZ",year,"/",shape.file.name,".zip"), destfile=paste0(path,"/",shape.file.name,".zip"))
    unzip(zipfile=paste0(path,"/",shape.file.name,".zip"),exdir=path)
    
    #other county info
    download.file(url="http://quickfacts.census.gov/qfd/download/FIPS_CountyName.txt", destfile=paste0(path,"/FIPS_CountyName.txt"))
  }
  
  #make sure files are there.
  if(length(Sys.glob(paste0(path,"/FIPS_CountyName.txt")))==0){
    message("No file FIPS_CountyName.txt found in path.")
    return()
  }
  if(length(Sys.glob(paste0(path,"/",shape.file.name,".shp")))==0){
    message("No shape file found in path.")
    return()
  }
  
  
  #read data
  shape.spdf <- readShapeSpatial(paste0(path,"/",shape.file.name,".shp"), proj4string=proj4string, IDvar="GEO_ID")
  FIPS_CountyName <- data.frame(input = read.delim(file=paste0(path,"/FIPS_CountyName.txt"), header=FALSE, sep=";")$V1)
  
  
  #remove special characters
  FIPS_CountyName$input <- gsub("\xb1",'n',FIPS_CountyName$input)
  FIPS_CountyName$state.name <- FIPS_CountyName$county.name <- ""
  FIPS_CountyName$state <- substr(FIPS_CountyName$input,1,2)
  for(i in 1:nrow(FIPS_CountyName)){
    temp <- gregexpr(",",FIPS_CountyName$input[i])[[1]][1]
    if(temp>0){
      FIPS_CountyName$state.name[i] <- substr(FIPS_CountyName$input[i],temp+2,temp+3)
      FIPS_CountyName$county.name[i] <- substr(FIPS_CountyName$input[i],7,temp-1)
    }
  }
  
  state_codes <- subset(FIPS_CountyName, state.name!="", c("state","state.name"))
  state_codes <- state_codes[!duplicated(state_codes[,c("state")]),]
  state_codes <- state_codes[which(state_codes$state.name!="PR"),]
  
  #remove unwanted states
  if(!missing(drop.states)) state_codes <- state_codes[-which(state_codes$state.name%in%c(drop.states)),]
  if(!missing(keep.states)) state_codes <- state_codes[which(state_codes$state.name%in%c(keep.states)),]
  
  print(paste0("Data in ", length(unique(state_codes$state.name)), " states: ",paste0(unique(state_codes$state.name), collapse=",")))
  
  
  #create data frame with county and state ids
  shape.ids <- data.frame(state=shape.spdf$STATE, county=shape.spdf$COUNTY, name=shape.spdf$NAME, GEO_ID=shape.spdf$GEO_ID, LSAD=shape.spdf$LSAD)
  shape.ids <- merge(x=shape.ids, y=state_codes)
  row.names(shape.ids) <- shape.ids$GEO_ID

   
  #conver from SpatialPolygon*DataFrame to SpatialPolygon
  shape.sp <- SpatialPolygons(  shape.spdf@polygons    ,  proj4string=proj4string  )
  shape.sp <- shape.sp[shape.ids$GEO_ID]
  
  shape.sp <- shape.sp[names(shape.sp)[which(names(shape.sp)%in%row.names(shape.ids))]]
  shape.ids <- shape.ids[which(row.names(shape.ids)%in%names(shape.sp)),]
  
  if(dataframe){
    shape.sp <- fortify(shape.sp)
    colnames(shape.sp)[which(colnames(shape.sp)=="id")] <- "GEO_ID"
  }
  
  return(list(shape=shape.sp, info=shape.ids, shape.file=shape.file.name))
}
AnderWilson/GridToMaps documentation built on May 5, 2019, 4:56 a.m.