inst/doc/VIII_data_cleaning.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
library(rnpn)

## ----eval=FALSE---------------------------------------------------------------
# df <- npn_download_status_data(
#   request_source = 'Your Name Here',
#   years = c(2015:2021),
#   species_ids = c(82),
#   additional_fields = c("Observed_Status_Conflict_Flag"),
# )

## ----eval=FALSE---------------------------------------------------------------
# library(dplyr)
# conflict_summary <- df %>%
#   count(site_id, observed_status_conflict_flag) %>%
#   group_by(site_id) %>%
#   mutate(observed_status_conflict_flag=recode(
#     observed_status_conflict_flag,'MultiObserver-StatusConflict'='Multi', 'OneObserver-StatusConflict'='One')) %>%
#   mutate(Percent_Conflict = n / sum(n))

## ----eval=FALSE---------------------------------------------------------------
# library(dplyr)
# conflicts <- conflict_summary %>%
#   filter(observed_status_conflict_flag != '-9999')
# 
# library(ggplot2)
# p <- ggplot(conflicts,aes(observed_status_conflict_flag, Percent_Conflict)) +
#   facet_wrap(~conflicts$site_id) +
#   geom_bar(stat = "identity")
# plot(p + labs(title = "Percent Multi and One Observer Status Conflict by Phenophase")
#      + scale_y_continuous(labels = scales::percent_format(scale = 100)))

## ----echo=FALSE, out.width = "75%", fig.pos="h"-------------------------------
knitr::include_graphics("figures/conflicts-by-site.png", dpi = NULL,
  error = getOption("knitr.graphics.error", TRUE))

## ----eval=FALSE---------------------------------------------------------------
# df_hi_conflict_sites <- subset(conflicts, Percent_Conflict > 0.05)
# df_low_conflict_sites <- subset(df, !site_id %in% hi_conflict_sites$site_id)

## ----eval=FALSE---------------------------------------------------------------
# df_no_conflicts <- subset(df, observed_status_conflict_flag == "-9999")

## ----eval=FALSE---------------------------------------------------------------
# df <- npn_download_individual_phenometrics(
#   request_source = 'Your Name Here',
#   years = c(2021),
#   species_ids = c(12)
# )

## ----eval=FALSE---------------------------------------------------------------
# library(dplyr)
# df <- df %>%
#   mutate(numdays_since_prior_no = na_if(numdays_since_prior_no, "-9999"))
# 
# hist(df$numdays_since_prior_no,
#      breaks = c(0,3,7,14,21,28,35,42,100,250),
#      main = "Histogram of Number of Days Since Prior No")
# 
# quantile(x = df$numdays_since_prior_no,
#          probs = c(0.01, 0.05, 0.25, 0.75, 0.95, 0.99),
#          na.rm = TRUE)
# 

## ----echo=FALSE, out.width = "75%", fig.pos="h"-------------------------------
knitr::include_graphics("figures/HistogramPriorNo.png", dpi = NULL,
  error = getOption("knitr.graphics.error", TRUE))

## ----eval=FALSE---------------------------------------------------------------
# library(dplyr)
# df_21d_prior_no <- df %>%
#   filter(numdays_since_prior_no < 21)

## ----eval=FALSE---------------------------------------------------------------
# df <- npn_download_individual_phenometrics(
#   request_source = 'Your Name Here',
#   years = c(2021),
#   species_ids = c(12),
#   additional_fields = c("multiple_firsty")
# )

## ----eval=FALSE---------------------------------------------------------------
# library(dplyr)
# mfy_summary <- df %>%
#   count(phenophase_description, multiple_firsty) %>%
#   group_by(phenophase_description) %>%
#   mutate(Percent_MFY = n / sum(n))

## ----eval=FALSE---------------------------------------------------------------
# library(dplyr)
# mfy <- mfy_summary %>%
#   filter(mfy_summary$multiple_firsty == 1)
# 
# library(ggplot2)
# p <- ggplot(mfy,aes(multiple_firsty, Percent_MFY)) +
#   facet_wrap(~mfy$phenophase_description) +
#   geom_bar(stat = "identity")
# plot(p + labs(title = "Frequency of Multiple First Yeses by Phenophase (Flowering Dogwood, 2021)")
#      + scale_y_continuous(labels = scales::percent_format(scale = 100)))

## ----echo=FALSE, out.width = "75%", fig.pos="h"-------------------------------
knitr::include_graphics("figures/MFY-byPhenophase.png", dpi = NULL,
  error = getOption("knitr.graphics.error", TRUE))

## ----eval=FALSE---------------------------------------------------------------
# df_one_firsty <- df %>%
#   group_by(phenophase_description, individual_id) %>%
#   mutate(mean_firsty = mean(first_yes_doy)) %>%
#   distinct(phenophase_id, individual_id, .keep_all = TRUE)

## ----eval=FALSE---------------------------------------------------------------
# df <- npn_download_individual_phenometrics(
#   request_source = 'Your Name Here',
#   years = c(2009:2020),
#   species_ids = c(102),
#   phenophase_ids = c(371)
# )

## ----eval=FALSE---------------------------------------------------------------
# plot(
#   df$first_yes_doy~df$first_yes_year,
#   ylab=c("Day of Year"), xlab=c("Year"), ylim=c(1,350),
#   cex=2,  cex.axis=1.5, cex.lab=1.5, pch=21
# )

## ----echo=FALSE, out.width = "75%", fig.pos="h"-------------------------------
knitr::include_graphics("figures/RedOak1.png", dpi = NULL,
  error = getOption("knitr.graphics.error", TRUE))

## ----eval=FALSE---------------------------------------------------------------
# df_8Y <- df %>%
#   group_by(df$individual_id) %>%
#   filter(n_distinct(first_yes_year) > 7)

## ----eval=FALSE---------------------------------------------------------------
# quantiles <- as.data.frame(df_8Y %>%
#                         group_by(individual_id) %>%
#                         summarize(Q1 = quantile(first_yes_doy, .25),
#                                   Q3 = quantile(first_yes_doy, .75),
#                                   IQR = IQR(first_yes_doy)))

## ----eval=FALSE---------------------------------------------------------------
# df_8Y_Q = df_8Y %>%
#   right_join(quantiles, by = "individual_id")

## ----eval=FALSE---------------------------------------------------------------
# df_8Y_clean <- subset(
#   df_8Y_Q, (df_8Y_Q$first_yes_doy > (Q1 - 1.5*df_8Y_Q$IQR) &
#                df_8Y_Q$first_yes_doy < (Q3 + 1.5*df_8Y_Q$IQR))
#   )

## ----eval=FALSE---------------------------------------------------------------
# plot(
#   df_8Y_clean$first_yes_doy~df_8Y_clean$first_yes_year,
#   ylab=c("Day of Year"), xlab=c("Year"), ylim=c(1,350),
#   cex=2,  cex.axis=1.5, cex.lab=1.5, pch=21
# )

## ----echo=FALSE, out.width = "75%", fig.pos="h"-------------------------------
knitr::include_graphics("figures/RedOak3.png", dpi = NULL,
  error = getOption("knitr.graphics.error", TRUE))

Try the rnpn package in your browser

Any scripts or data that you put into this service are public.

rnpn documentation built on Aug. 25, 2025, 9:52 a.m.