ccafs introduction

ccafs provides access to Climate Change, Agriculture, and Food Security (CCAFS) General Circulation Models (GCM) data. Data is stored in Amazon S3, from which we provide functions to fetch data.

Potential users are probably anyone from scientists asking questions about climate change, to scientists or companies projecting crop yields in the future.

Install

CRAN version

install.packages("ccafs")

Dev version

remotes::install_github("ropensci/ccafs")
library("ccafs")

Search

Search CCAFS

You can search by the numbers representing each possible value for each parameter. See the ?'ccafs-search' for help on that.

(res <- cc_search(file_set = 4, scenario = 6, model = 2, extent = "global",
  format = "ascii", period = 5, variable = 2, resolution = 3))
#> [1] "http://gisweb.ciat.cgiar.org/ccafs_climate/files/data/ipcc_4ar_ciat/sres_b1/2040s/bccr_bcm2_0/5min/bccr_bcm2_0_sres_b1_2040s_prec_5min_no_tile_asc.zip"

Some saerches, like that above, result in one URL, while others, like that below, result in many URLs.

res <- cc_search(file_set = 12, extent = "global", format = "ascii",
  period = 4, variable = 1, resolution = 4)
length(res)
#> [1] 110
res[1:5]
#> [1] "http://datacgiar.s3.amazonaws.com/ccafs/ccafs-climate/data/ipcc_5ar_ciat_downscaled/rcp2_6/2030s/bcc_csm1_1/10min/bcc_csm1_1_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc.zip"      
#> [2] "http://datacgiar.s3.amazonaws.com/ccafs/ccafs-climate/data/ipcc_5ar_ciat_downscaled/rcp2_6/2030s/bcc_csm1_1_m/10min/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc.zip"  
#> [3] "http://datacgiar.s3.amazonaws.com/ccafs/ccafs-climate/data/ipcc_5ar_ciat_downscaled/rcp2_6/2030s/bnu_esm/10min/bnu_esm_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc.zip"            
#> [4] "http://datacgiar.s3.amazonaws.com/ccafs/ccafs-climate/data/ipcc_5ar_ciat_downscaled/rcp2_6/2030s/cccma_canesm2/10min/cccma_canesm2_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc.zip"
#> [5] "http://datacgiar.s3.amazonaws.com/ccafs/ccafs-climate/data/ipcc_5ar_ciat_downscaled/rcp2_6/2030s/cesm1_cam5/10min/cesm1_cam5_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc.zip"

One we have some URLs, we can fetch the files at the URLs

One at a time

cc_data_fetch(res[1], progress = FALSE)
#> 
#> <CCAFS GCM files>
#>    19 files
#>    Base dir: /bcc_csm1_1_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc
#>    File types (count): 
#>      - .asc: 19

Or many at once

lapply(res[1:3], cc_data_fetch, progress = FALSE)
#> [[1]]
#> 
#> <CCAFS GCM files>
#>    19 files
#>    Base dir: /bcc_csm1_1_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc
#>    File types (count): 
#>      - .asc: 19
#> 
#> [[2]]
#> 
#> <CCAFS GCM files>
#>    19 files
#>    Base dir: /bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc
#>    File types (count): 
#>      - .asc: 19
#> 
#> [[3]]
#> 
#> <CCAFS GCM files>
#>    19 files
#>    Base dir: /bnu_esm_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc
#>    File types (count): 
#>      - .asc: 19

Alternatively, you can use the helper list where you can reference options by name; the downside is that this leads to very verbose code.

(res <- cc_search(file_set = cc_params$file_set$`Delta method IPCC AR4`,
                  scenario = cc_params$scenario$`SRES B1`,
                  model = cc_params$model$bccr_bcm2_0,
                  extent = cc_params$extent$global,
                  format = cc_params$format$ascii,
                  period = cc_params$period$`2040s`,
                  variable = cc_params$variable$Precipitation,
                  resolution = cc_params$resolution$`5 minutes`))
#> [1] "http://gisweb.ciat.cgiar.org/ccafs_climate/files/data/ipcc_4ar_ciat/sres_b1/2040s/bccr_bcm2_0/5min/bccr_bcm2_0_sres_b1_2040s_prec_5min_no_tile_asc.zip"

List keys

Keys are basically paths to files on Amazon S3's store of CCAFS files

cc_list_keys()
#> # A tibble: 1,000 x 7
#>    Key       LastModified  ETag   Size  Owner.ID   Owner.DisplayNa… StorageClass
#>    <chr>     <chr>         <chr>  <chr> <chr>      <chr>            <chr>       
#>  1 ccafs/    2014-02-28T1… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  2 ccafs/20… 2014-07-01T0… "\"d7… 1171  b1e110da4… amazonec2public… STANDARD    
#>  3 ccafs/am… 2014-02-28T1… "\"1d… 1108… b1e110da4… amazonec2public… STANDARD    
#>  4 ccafs/cc… 2014-07-15T1… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  5 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  6 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  7 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  8 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  9 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#> 10 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#> # … with 990 more rows

Max keys

cc_list_keys(max = 3)
#> # A tibble: 3 x 7
#>   Key      LastModified  ETag    Size  Owner.ID    Owner.DisplayNa… StorageClass
#>   <chr>    <chr>         <chr>   <chr> <chr>       <chr>            <chr>       
#> 1 ccafs/   2014-02-28T1… "\"d41… 0     b1e110da4d… amazonec2public… STANDARD    
#> 2 ccafs/2… 2014-07-01T0… "\"d72… 1171  b1e110da4d… amazonec2public… STANDARD    
#> 3 ccafs/a… 2014-02-28T1… "\"1db… 1108… b1e110da4d… amazonec2public… STANDARD

Key prefix

cc_list_keys(prefix = "ccafs/ccafs-climate/data/ipcc_5ar_ciat_downscaled/")
#> # A tibble: 1,000 x 7
#>    Key       LastModified  ETag   Size  Owner.ID   Owner.DisplayNa… StorageClass
#>    <chr>     <chr>         <chr>  <chr> <chr>      <chr>            <chr>       
#>  1 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  2 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  3 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  4 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  5 ccafs/cc… 2014-11-10T2… "\"d4… 0     b1e110da4… amazonec2public… STANDARD    
#>  6 ccafs/cc… 2014-11-10T2… "\"6d… 1113… b1e110da4… amazonec2public… STANDARD    
#>  7 ccafs/cc… 2014-11-10T2… "\"cd… 9684… b1e110da4… amazonec2public… STANDARD    
#>  8 ccafs/cc… 2014-11-10T2… "\"d2… 1125… b1e110da4… amazonec2public… STANDARD    
#>  9 ccafs/cc… 2014-11-10T2… "\"8a… 90200 b1e110da4… amazonec2public… STANDARD    
#> 10 ccafs/cc… 2014-11-10T2… "\"e3… 5411… b1e110da4… amazonec2public… STANDARD    
#> # … with 990 more rows

You can find certain files easily. Here, we'll find zip files

res <- cc_list_keys()
zips <- grep("\\.zip", res$Key, value = TRUE)
zips[1:5]
#> [1] "ccafs/ccafs-climate/data/eta/eta_south_america/baseline/1970s/hadcm_cntrl/20min/hadcm_cntrl_baseline_1970s_prec_20min_sa_eta_asc.zip" 
#> [2] "ccafs/ccafs-climate/data/eta/eta_south_america/baseline/1970s/hadcm_cntrl/20min/hadcm_cntrl_baseline_1970s_tmax_20min_sa_eta_asc.zip" 
#> [3] "ccafs/ccafs-climate/data/eta/eta_south_america/baseline/1970s/hadcm_cntrl/20min/hadcm_cntrl_baseline_1970s_tmean_20min_sa_eta_asc.zip"
#> [4] "ccafs/ccafs-climate/data/eta/eta_south_america/baseline/1970s/hadcm_cntrl/20min/hadcm_cntrl_baseline_1970s_tmin_20min_sa_eta_asc.zip" 
#> [5] "ccafs/ccafs-climate/data/eta/eta_south_america/baseline/1970s/hadcm_high/20min/hadcm_high_baseline_1970s_prec_20min_sa_eta_asc.zip"

Which you can use below to fetch data from.

Fetch some data

key <- "ccafs/ccafs-climate/data/ipcc_5ar_ciat_downscaled/rcp2_6/
  2030s/bcc_csm1_1_m/10min/
  bcc_csm1_1_m_rcp2_6_2030s_prec_10min_r1i1p1_no_tile_asc.zip"
#> 
#> <CCAFS GCM files>
#>    12 files
#>    Base dir: /bcc_csm1_1_m_rcp2_6_2030s_prec_10min_r1i1p1_no_tile_asc
#>    File types (count): 
#>      - .asc: 12
(res <- cc_data_fetch(key = key, progress = FALSE))

Using the above set of zips, fetch some data:

x <- cc_data_fetch(zips[1], progress = FALSE)
cc_data_read(x[1])
#> class      : RasterLayer 
#> dimensions : 151, 141, 21291  (nrow, ncol, ncell)
#> resolution : 0.400002, 0.400002  (x, y)
#> extent     : -81.99999, -25.59971, -49.20001, 11.2003  (xmin, xmax, ymin, ymax)
#> crs        : NA 
#> source     : /Users/sckott/Library/Caches/ccafs/hadcm_cntrl_baseline_1970s_prec_20min_sa_eta_asc/prec_1.asc 
#> names      : prec_1

Caching

When requesting data, we first check if you already have that data cached. You'll know when this happens as the request will finish much faster when you have data cached already.

You can list cached files

cc_cache_list()[1:10]
#>  [1] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc"           
#>  [2] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc.zip"       
#>  [3] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc/bio_1.asc" 
#>  [4] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc/bio_10.asc"
#>  [5] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc/bio_11.asc"
#>  [6] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc/bio_12.asc"
#>  [7] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc/bio_13.asc"
#>  [8] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc/bio_14.asc"
#>  [9] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc/bio_15.asc"
#> [10] "/Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_bio_10min_r1i1p1_no_tile_asc/bio_16.asc"

See ?cc_cache for all the cache management functions.

Read data

After fetching data, you need to read the data into a RasterLayer or RasterBrick object

You can read a single file

cc_data_read(res[1])
#> class      : RasterLayer 
#> dimensions : 900, 2160, 1944000  (nrow, ncol, ncell)
#> resolution : 0.1666667, 0.1666667  (x, y)
#> extent     : -180, 180, -60, 90  (xmin, xmax, ymin, ymax)
#> crs        : NA 
#> source     : /Users/sckott/Library/Caches/ccafs/bcc_csm1_1_m_rcp2_6_2030s_prec_10min_r1i1p1_no_tile_asc/prec_1.asc 
#> names      : prec_1 
#> values     : -2147483648, 2147483647  (min, max)

many files

cc_data_read(res[1:2])
#> class      : RasterStack 
#> dimensions : 900, 2160, 1944000, 2  (nrow, ncol, ncell, nlayers)
#> resolution : 0.1666667, 0.1666667  (x, y)
#> extent     : -180, 180, -60, 90  (xmin, xmax, ymin, ymax)
#> crs        : NA 
#> names      :      prec_1,     prec_10 
#> min values : -2147483648, -2147483648 
#> max values :  2147483647,  2147483647

or all files

cc_data_read(res)
#> class      : RasterStack 
#> dimensions : 900, 2160, 1944000, 12  (nrow, ncol, ncell, nlayers)
#> resolution : 0.1666667, 0.1666667  (x, y)
#> extent     : -180, 180, -60, 90  (xmin, xmax, ymin, ymax)
#> crs        : NA 
#> names      :      prec_1,     prec_10,     prec_11,     prec_12,      prec_2,      prec_3,      prec_4,      prec_5,      prec_6,      prec_7,      prec_8,      prec_9 
#> min values : -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648 
#> max values :  2147483647,  2147483647,  2147483647,  2147483647,  2147483647,  2147483647,  2147483647,  2147483647,  2147483647,  2147483647,  2147483647,  2147483647

Visualize

library("raster")
plot(cc_data_read(res[1:6]))

plot of chunk unnamed-chunk-23

For more control over vizualizations of raster data, check out the rasterVis package.



Try the ccafs package in your browser

Any scripts or data that you put into this service are public.

ccafs documentation built on Aug. 26, 2020, 5:15 p.m.