Interpolating

Packages

You'll need several packages from the tidyverse in addition to weathercan to complete the following analysis.

library(weathercan)
library(ggplot2)
library(dplyr)

General usage

You can merge weather data with other data frames by linearly interpolating between points.

For example, here we have a dataset of weather data from Kamloops

glimpse(kamloops)
## Rows: 4,368
## Columns: 37
## $ station_name     <chr> "KAMLOOPS A", "KAMLOOPS A", "KAMLOOPS A", "KAMLOOPS A", "KAMLOOPS A", "KAMLOOPS A", "KAMLO…
## $ station_id       <dbl> 51423, 51423, 51423, 51423, 51423, 51423, 51423, 51423, 51423, 51423, 51423, 51423, 51423,…
## $ station_operator <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ prov             <chr> "BC", "BC", "BC", "BC", "BC", "BC", "BC", "BC", "BC", "BC", "BC", "BC", "BC", "BC", "BC", …
## $ lat              <dbl> 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, 50.7, …
## $ lon              <dbl> -120.45, -120.45, -120.45, -120.45, -120.45, -120.45, -120.45, -120.45, -120.45, -120.45, …
## $ elev             <dbl> 345.3, 345.3, 345.3, 345.3, 345.3, 345.3, 345.3, 345.3, 345.3, 345.3, 345.3, 345.3, 345.3,…
## $ climate_id       <chr> "1163781", "1163781", "1163781", "1163781", "1163781", "1163781", "1163781", "1163781", "1…
## $ WMO_id           <chr> "71887", "71887", "71887", "71887", "71887", "71887", "71887", "71887", "71887", "71887", …
## $ TC_id            <chr> "YKA", "YKA", "YKA", "YKA", "YKA", "YKA", "YKA", "YKA", "YKA", "YKA", "YKA", "YKA", "YKA",…
## $ date             <date> 2016-01-01, 2016-01-01, 2016-01-01, 2016-01-01, 2016-01-01, 2016-01-01, 2016-01-01, 2016-…
## $ time             <dttm> 2016-01-01 00:00:00, 2016-01-01 01:00:00, 2016-01-01 02:00:00, 2016-01-01 03:00:00, 2016-…
## $ year             <chr> "2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", "2…
## $ month            <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", …
## $ day              <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", …
## $ hour             <chr> "00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", "07:00", "08:00", "09:00", …
## $ weather          <chr> NA, "Mostly Cloudy", NA, NA, "Cloudy", NA, NA, "Cloudy", NA, "Snow", "Snow", "Snow", "Snow…
## $ hmdx             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ hmdx_flag        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ precip_amt       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ precip_amt_flag  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ pressure         <dbl> 99.95, 99.93, 99.92, 99.90, 99.86, 99.82, 99.80, 99.78, 99.77, 99.78, 99.79, 99.74, 99.69,…
## $ pressure_flag    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ rel_hum          <dbl> 74, 76, 74, 73, 70, 71, 69, 69, 71, 71, 71, 70, 69, 70, 68, 68, 70, 74, 73, 74, 74, 74, 77…
## $ rel_hum_flag     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ temp             <dbl> -9.1, -9.6, -9.9, -9.5, -9.4, -9.8, -10.0, -10.2, -10.1, -9.7, -9.4, -9.0, -8.6, -8.2, -8.…
## $ temp_dew         <dbl> -12.9, -13.1, -13.7, -13.5, -13.9, -14.1, -14.7, -14.9, -14.4, -14.0, -13.7, -13.5, -13.3,…
## $ temp_dew_flag    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ temp_flag        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ visib            <dbl> 64.4, 64.4, 64.4, 64.4, 64.4, 64.4, 64.4, 64.4, 48.3, 48.3, 48.3, 48.3, 48.3, 48.3, 48.3, …
## $ visib_flag       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_chill       <dbl> -17, -17, -18, -17, -17, -17, -18, -17, -17, -16, -15, -14, -14, -13, -13, -13, -13, -14, …
## $ wind_chill_flag  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_dir         <dbl> 13, 11, 11, 11, 11, 10, 9, 7, 7, 10, 11, 10, 10, 13, 11, 10, 10, 9, 12, 10, 13, 12, 10, 12…
## $ wind_dir_flag    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wind_spd         <dbl> 19, 20, 20, 18, 18, 16, 23, 15, 14, 15, 12, 11, 12, 9, 10, 12, 11, 12, 10, 11, 11, 6, 6, 4…
## $ wind_spd_flag    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

As well as a data set of finch visits to an RFID feeder

glimpse(finches)
## Rows: 16,886
## Columns: 10
## $ animal_id <fct> 041868FF93, 041868FF93, 041868FF93, 06200003BB, 06200003BB, 06200003BB, 06200003BB, 06200003BB, 0…
## $ date      <date> 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, …
## $ time      <dttm> 2016-03-01 06:57:42, 2016-03-01 06:58:41, 2016-03-01 07:07:21, 2016-03-01 07:32:34, 2016-03-01 0…
## $ logger_id <fct> 2300, 2300, 2300, 2400, 2400, 2400, 2400, 2400, 2300, 2300, 2300, 2300, 2300, 2400, 2300, 2400, 2…
## $ species   <chr> "Mountain Chickadee", "Mountain Chickadee", "Mountain Chickadee", "House Finch", "House Finch", "…
## $ age       <chr> "AHY", "AHY", "AHY", "SY", "SY", "SY", "SY", "SY", "AHY", "AHY", "AHY", "AHY", "AHY", "SY", "AHY"…
## $ sex       <chr> "U", "U", "U", "M", "M", "M", "M", "M", "F", "F", "F", "F", "F", "M", "F", "M", "M", "M", "M", "M…
## $ site_name <chr> "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "…
## $ lon       <dbl> -120.3622, -120.3622, -120.3622, -120.3635, -120.3635, -120.3635, -120.3635, -120.3635, -120.3622…
## $ lat       <dbl> 50.66967, 50.66967, 50.66967, 50.66938, 50.66938, 50.66938, 50.66938, 50.66938, 50.66967, 50.6696…

Although the times in the weather data do not exactly match those in the finch data, we can merge them together through linear interpolation. This function uses the approx function from the stats package under the hood.

Here we specify that we only want the temperature (temp) column:

finches_temperature <- weather_interp(data = finches, weather = kamloops, cols = "temp")
## temp is missing 4 out of 4368 data, interpolation may be less accurate as a result.
summary(finches_temperature)
##       animal_id         date                 time                     logger_id     species         
##  0620000513:7624   Min.   :2016-03-01   Min.   :2016-03-01 06:57:42   1500:6370   Length:16886      
##  041868D861:2767   1st Qu.:2016-03-05   1st Qu.:2016-03-05 13:54:13   2100: 968   Class :character  
##  0620000514:1844   Median :2016-03-09   Median :2016-03-09 16:54:47   2200:2266   Mode  :character  
##  06200004F8:1386   Mean   :2016-03-08   Mean   :2016-03-09 07:45:58   2300:3531                     
##  041868BED6: 944   3rd Qu.:2016-03-13   3rd Qu.:2016-03-13 08:24:58   2400:1477                     
##  06200003BB: 708   Max.   :2016-03-16   Max.   :2016-03-16 16:39:30   2700:2274                     
##  (Other)   :1613                                                                                    
##      age                sex             site_name              lon              lat             temp        
##  Length:16886       Length:16886       Length:16886       Min.   :-120.4   Min.   :50.67   Min.   :-0.2317  
##  Class :character   Class :character   Class :character   1st Qu.:-120.4   1st Qu.:50.67   1st Qu.: 5.0561  
##  Mode  :character   Mode  :character   Mode  :character   Median :-120.4   Median :50.67   Median : 7.1651  
##                                                           Mean   :-120.4   Mean   :50.67   Mean   : 7.4349  
##                                                           3rd Qu.:-120.4   3rd Qu.:50.67   3rd Qu.: 9.3319  
##                                                           Max.   :-120.4   Max.   :50.67   Max.   :16.3712  
## 
glimpse(finches_temperature)
## Rows: 16,886
## Columns: 11
## $ animal_id <fct> 041868FF93, 041868FF93, 041868FF93, 06200003BB, 06200003BB, 06200003BB, 06200003BB, 06200003BB, 0…
## $ date      <date> 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, …
## $ time      <dttm> 2016-03-01 06:57:42, 2016-03-01 06:58:41, 2016-03-01 07:07:21, 2016-03-01 07:32:34, 2016-03-01 0…
## $ logger_id <fct> 2300, 2300, 2300, 2400, 2400, 2400, 2400, 2400, 2300, 2300, 2300, 2300, 2300, 2400, 2300, 2400, 2…
## $ species   <chr> "Mountain Chickadee", "Mountain Chickadee", "Mountain Chickadee", "House Finch", "House Finch", "…
## $ age       <chr> "AHY", "AHY", "AHY", "SY", "SY", "SY", "SY", "SY", "AHY", "AHY", "AHY", "AHY", "AHY", "SY", "AHY"…
## $ sex       <chr> "U", "U", "U", "M", "M", "M", "M", "M", "F", "F", "F", "F", "F", "M", "F", "M", "M", "M", "M", "M…
## $ site_name <chr> "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "…
## $ lon       <dbl> -120.3622, -120.3622, -120.3622, -120.3635, -120.3635, -120.3635, -120.3635, -120.3635, -120.3622…
## $ lat       <dbl> 50.66967, 50.66967, 50.66967, 50.66938, 50.66938, 50.66938, 50.66938, 50.66938, 50.66967, 50.6696…
## $ temp      <dbl> 3.984667, 3.991222, 4.036750, 4.162833, 4.162917, 4.163000, 4.163083, 4.163167, 4.180417, 4.18075…
ggplot(data = finches_temperature, aes(x = temp, fill = animal_id)) +
  theme_bw() +
  theme(legend.position = "none") +
  geom_histogram(binwidth = 1) +
  labs(x = "Temperature (C)", y = "Activity Count", fill = "Finch ID")

plot of chunk unnamed-chunk-4

Or summarized:

finches_temperature <- finches_temperature %>%
  group_by(date) %>%
  summarize(n = length(time),
            temp = mean(temp))

ggplot(data = finches_temperature, aes(x = date, y = n)) +
  theme_bw() +
  theme(legend.position = "top") +
  geom_point(aes(shape = "Activity")) +
  geom_line(aes(y = temp * 100, colour = "Temperature")) +
  scale_colour_discrete(name = "") +
  scale_shape_discrete(name = "") +
  scale_y_continuous(name = "Activity", sec.axis = sec_axis(~. / 100, name = "Temperature (C)"))

plot of chunk unnamed-chunk-5

Data gaps

By default, gaps of 2 hours (or 2 days, with a daily scale) will be interpolated over (i.e. they will be filled with values interpolated from either side of the gap), but longer gaps will be skipped and filled with NAs. You can adjust this behaviour with na_gap. Note that as Environment and Climate Change Canada data is downloaded on an hourly scale, it makes no sense to apply na_gap values of less than 1.

In this example, note the larger number of NAs in temp and how it corresponds to the missing variables in the weather dataset:

finches_temperature <- weather_interp(data = finches, weather = kamloops,
                                      cols = "temp", na_gap = 1)
## temp is missing 4 out of 4368 data, interpolation may be less accurate as a result.
summary(finches_temperature)
##       animal_id         date                 time                     logger_id     species         
##  0620000513:7624   Min.   :2016-03-01   Min.   :2016-03-01 06:57:42   1500:6370   Length:16886      
##  041868D861:2767   1st Qu.:2016-03-05   1st Qu.:2016-03-05 13:54:13   2100: 968   Class :character  
##  0620000514:1844   Median :2016-03-09   Median :2016-03-09 16:54:47   2200:2266   Mode  :character  
##  06200004F8:1386   Mean   :2016-03-08   Mean   :2016-03-09 07:45:58   2300:3531                     
##  041868BED6: 944   3rd Qu.:2016-03-13   3rd Qu.:2016-03-13 08:24:58   2400:1477                     
##  06200003BB: 708   Max.   :2016-03-16   Max.   :2016-03-16 16:39:30   2700:2274                     
##  (Other)   :1613                                                                                    
##      age                sex             site_name              lon              lat             temp        
##  Length:16886       Length:16886       Length:16886       Min.   :-120.4   Min.   :50.67   Min.   :-0.2317  
##  Class :character   Class :character   Class :character   1st Qu.:-120.4   1st Qu.:50.67   1st Qu.: 5.0746  
##  Mode  :character   Mode  :character   Mode  :character   Median :-120.4   Median :50.67   Median : 7.1668  
##                                                           Mean   :-120.4   Mean   :50.67   Mean   : 7.4433  
##                                                           3rd Qu.:-120.4   3rd Qu.:50.67   3rd Qu.: 9.3458  
##                                                           Max.   :-120.4   Max.   :50.67   Max.   :16.3712  
##                                                                                            NA's   :84
finches_temperature %>%
  select(date, time, temp) %>%
  filter(is.na(temp))
## # A tibble: 84 × 3
##    date       time                 temp
##    <date>     <dttm>              <dbl>
##  1 2016-03-10 2016-03-10 16:00:12    NA
##  2 2016-03-10 2016-03-10 16:00:33    NA
##  3 2016-03-10 2016-03-10 16:00:36    NA
##  4 2016-03-10 2016-03-10 16:00:39    NA
##  5 2016-03-10 2016-03-10 16:00:42    NA
##  6 2016-03-10 2016-03-10 16:00:45    NA
##  7 2016-03-10 2016-03-10 16:00:48    NA
##  8 2016-03-10 2016-03-10 16:00:51    NA
##  9 2016-03-10 2016-03-10 16:00:54    NA
## 10 2016-03-10 2016-03-10 16:00:57    NA
## # … with 74 more rows
kamloops %>%
  select(time, temp) %>%
  filter(is.na(temp))
## # A tibble: 4 × 2
##   time                 temp
##   <dttm>              <dbl>
## 1 2016-02-11 19:00:00    NA
## 2 2016-03-08 13:00:00    NA
## 3 2016-03-11 01:00:00    NA
## 4 2016-04-09 00:00:00    NA

Multiple weather columns

We could also add in more than one column at a time:

finches_weather <- weather_interp(data = finches, weather = kamloops,
                                  cols = c("temp", "wind_spd"))
## temp is missing 4 out of 4368 data, interpolation may be less accurate as a result.
## wind_spd is missing 4 out of 4368 data, interpolation may be less accurate as a result.
summary(finches_weather)
##       animal_id         date                 time                     logger_id     species         
##  0620000513:7624   Min.   :2016-03-01   Min.   :2016-03-01 06:57:42   1500:6370   Length:16886      
##  041868D861:2767   1st Qu.:2016-03-05   1st Qu.:2016-03-05 13:54:13   2100: 968   Class :character  
##  0620000514:1844   Median :2016-03-09   Median :2016-03-09 16:54:47   2200:2266   Mode  :character  
##  06200004F8:1386   Mean   :2016-03-08   Mean   :2016-03-09 07:45:58   2300:3531                     
##  041868BED6: 944   3rd Qu.:2016-03-13   3rd Qu.:2016-03-13 08:24:58   2400:1477                     
##  06200003BB: 708   Max.   :2016-03-16   Max.   :2016-03-16 16:39:30   2700:2274                     
##  (Other)   :1613                                                                                    
##      age                sex             site_name              lon              lat             temp        
##  Length:16886       Length:16886       Length:16886       Min.   :-120.4   Min.   :50.67   Min.   :-0.2317  
##  Class :character   Class :character   Class :character   1st Qu.:-120.4   1st Qu.:50.67   1st Qu.: 5.0561  
##  Mode  :character   Mode  :character   Mode  :character   Median :-120.4   Median :50.67   Median : 7.1651  
##                                                           Mean   :-120.4   Mean   :50.67   Mean   : 7.4349  
##                                                           3rd Qu.:-120.4   3rd Qu.:50.67   3rd Qu.: 9.3319  
##                                                           Max.   :-120.4   Max.   :50.67   Max.   :16.3712  
##                                                                                                             
##     wind_spd     
##  Min.   : 1.000  
##  1st Qu.: 7.634  
##  Median :13.738  
##  Mean   :14.443  
##  3rd Qu.:19.907  
##  Max.   :44.939  
## 
glimpse(finches_weather)
## Rows: 16,886
## Columns: 12
## $ animal_id <fct> 041868FF93, 041868FF93, 041868FF93, 06200003BB, 06200003BB, 06200003BB, 06200003BB, 06200003BB, 0…
## $ date      <date> 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, 2016-03-01, …
## $ time      <dttm> 2016-03-01 06:57:42, 2016-03-01 06:58:41, 2016-03-01 07:07:21, 2016-03-01 07:32:34, 2016-03-01 0…
## $ logger_id <fct> 2300, 2300, 2300, 2400, 2400, 2400, 2400, 2400, 2300, 2300, 2300, 2300, 2300, 2400, 2300, 2400, 2…
## $ species   <chr> "Mountain Chickadee", "Mountain Chickadee", "Mountain Chickadee", "House Finch", "House Finch", "…
## $ age       <chr> "AHY", "AHY", "AHY", "SY", "SY", "SY", "SY", "SY", "AHY", "AHY", "AHY", "AHY", "AHY", "SY", "AHY"…
## $ sex       <chr> "U", "U", "U", "M", "M", "M", "M", "M", "F", "F", "F", "F", "F", "M", "F", "M", "M", "M", "M", "M…
## $ site_name <chr> "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "Kamloops, BC", "…
## $ lon       <dbl> -120.3622, -120.3622, -120.3622, -120.3635, -120.3635, -120.3635, -120.3635, -120.3635, -120.3622…
## $ lat       <dbl> 50.66967, 50.66967, 50.66967, 50.66938, 50.66938, 50.66938, 50.66938, 50.66938, 50.66967, 50.6696…
## $ temp      <dbl> 3.984667, 3.991222, 4.036750, 4.162833, 4.162917, 4.163000, 4.163083, 4.163167, 4.180417, 4.18075…
## $ wind_spd  <dbl> 22.88500, 22.93417, 22.26500, 19.74333, 19.74167, 19.74000, 19.73833, 19.73667, 19.39167, 19.3850…
finches_weather <- finches_weather %>%
  group_by(date) %>%
  summarize(n = length(time),
            temp = mean(temp),
            wind_spd = mean(wind_spd))

ggplot(data = finches_weather, aes(x = date, y = n)) +
  theme_bw() +
  theme(legend.position = "top") +
  geom_bar(stat = "identity") +
  geom_line(aes(y = temp * 50, colour = "Temperature"), size = 2) +
  geom_line(aes(y = wind_spd * 50, colour = "Wind Speed"), size = 2) +
  scale_colour_discrete(name = "") +
  scale_y_continuous(
    name = "Activity Counts",
    sec.axis = sec_axis(~. / 50, name = "Temperature (C) / Wind Speed (km/h)"))

plot of chunk unnamed-chunk-7



Try the weathercan package in your browser

Any scripts or data that you put into this service are public.

weathercan documentation built on Dec. 11, 2021, 10 a.m.