library(ecosscraper)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(knitr)
library(lubridate)
library(plotly)
library(stringr)
library(viridis)
data("recovery_plan_table")
data("species_info_table")
data("ecos_all_spp")

Abstract

Recovery planning is an essential part of implementing the U.S. Endangered Species Act (ESA) because explicit planning helps conservation partners identify and address the key threats to listed species. Despite the importance of recovery planning, it is generally acknowledged that reaching a final plan takes far longer than the goal of 2-1/2 years from the time of listing. But the extent of planning (number of species with recovery plans) and time required for planning has not been evaluated for nearly two decades. Using data from all domestic ESA-listed species, we quantify basic characteristics of ESA recovery planning. We show that nearly 1/4 of listed species lack recovery plans; the average recovery plan has taken >5 years to be developed after listing; and the recovery plans that have been written are nearly 20 years old on average. These results are not unexpected for agencies that have seen dwindling budgets, but highlight the need to improve recovery planning so that ESA-listed species can benefit from advance planning.

Introduction

The U.S. Endangered Species Act (ESA) requires the Fish and Wildlife Service (FWS) and National Marine Fisheries Service (NMFS; collectively, the Services) to develop plans to guide the recovery of listed species, unless doing so is not warranted (e.g., for foreign listed species). Recovery plans have evoloved significantly over the years, as has recovery planning guidance the Services use. Since 1994, the Services' guidance has stated that recovery plans would be completed within 2-1/2 years. But we know a persistent problem is that recovery planning often takes far longer than that. While this problem is a "generally accepted" fact, there is very little quantitative information that can adequately describe the scope of the challenge.

Using the data scraped from FWS's ECOS website, we sought to answer answer several outstanding questions about ESA recovery planning. How many species have final recovery plans, and how has that changed over the last 35 years? What is the average or median time from listing to a final recovery plan? What proportion of species have plans completed within the 2.5-year time-frame? How has the time required for a recovery plan changed over the past >40 years? How old are recovery plans as of 2016? Is there variation in time-to-recovery plan among FWS regions or between the Services? We show that both the extent of recovery plan coverage and the time required for recovery plan development, finalization, and revision are falling short of expectations. This analysis should provide a solid reason for the Services, and FWS in particular, to critically evaluate how recovery planning is done. (NMFS recently undertook a public review of their recovery program and is implementing changes.)

combo <- dplyr::left_join(recovery_plan_table,
                          species_info_table,
                          by = "Species")
names(combo)[1] <- "Rec_Plan_Date"
combo$Rec_Plan_Date <- as_date(combo$Rec_Plan_Date)
names(combo)[4] <- "Plan_Status"
names(combo)[8] <- "Date_Cur_Status"
combo$Date_Cur_Status <- as_date(combo$Date_Cur_Status)
combo$Cur_Status_To_Plan <- as.numeric(combo$Rec_Plan_Date - combo$Date_Cur_Status)
combo <- distinct(combo, Species, `Where Listed`, .keep_all = TRUE)
combo$Status <- sub(combo$Status, 
                    pattern = 'displayListingStatus\\(\\"', 
                    replacement = '')
combo$Status <- sub(combo$Status, 
                    pattern = '\\"\\)', 
                    replacement = '')
combo <- filter(combo, Status == "Threatened" | Status == "Endangered")

# Next, the join with the 'big' ECOS species table to get original listing dates
recplan_spp <- left_join(combo, ecos_all_spp, by = c("Species" = "Scientific_Name"))
recplan_spp <- filter(recplan_spp, US_Foreign %in% c("US", "US/Foreign"))
recplan_spp <- filter(recplan_spp, 
                      Federal_Listing_Status %in% c("Endangered", "Threatened"))
recplan_spp <- select(recplan_spp, Rec_Plan_Date, Title, Plan_Status, 
                      Species, Status, Date_Cur_Status, Lead_Region, 
                      Cur_Status_To_Plan, First_Listed, Species_Group,
                      US_Foreign, Where_Listed)
recplan_spp <- distinct(recplan_spp, Species, Where_Listed, .keep_all = TRUE)
recplan_spp$Elapsed <- as.numeric(recplan_spp$Rec_Plan_Date - recplan_spp$First_Listed)
recplan_spp$Elapsed_Years <- recplan_spp$Elapsed / 365

# Looks like I'm missing records for ~50 species compared to what the BoxScore
# provides (http://ecos.fws.gov/ecp0/reports/box-score-report)...wait, no, the
# footnote on that page says "active plans," so both draft and final.
final_plans <- filter(recplan_spp, recplan_spp$Plan_Status == "Final")
with_plan <- filter(recplan_spp, 
                    recplan_spp$Plan_Status != "Outline",
                    recplan_spp$Plan_Status != "Conservation Strategy",
                    recplan_spp$Plan_Status != "Exempt")

###############
# Also need the complement of the previous, a data.frame of listed species,
# with NAs for listed species lacking plans. Note that the 1593 I get here is 
# the same as what BoxScore indicates today.
listed <- filter(ecos_all_spp, 
                 Federal_Listing_Status %in% c("Endangered", "Threatened"))
listed <- filter(listed, US_Foreign %in% c("US", "US/Foreign"))

listed <- left_join(listed, combo, by = c("Scientific_Name" = "Species"))
listed <- distinct(listed, Scientific_Name, Where_Listed, .keep_all = TRUE)
listed <- select(listed, -`Plan Action Status`, -Doc_Link, -Status, -`Lead Region`,
                  -`Where Listed`)
listed$Year_Listed <- year(listed$First_Listed)
listed$Year_Plan <- year(listed$Rec_Plan_Date)

no_plan <- filter(listed, is.na(listed$Year_Plan))

Analyses

The raw data scraped from ECOS cannot be used directly, so we undertook several data cleaning and management steps to prepare it for analysis (Appendix A). In addition to the Appendix, the raw Rmarkdown file that generated this working paper can be reviewed for the entire analysis.

Species with and without plans

The first question we need to address is, How many listed species have plans, and how has that number changed through time? The answer to this question is important to interpreting time-to-plan statistics and figures because those analyses only include species with final recovery plans.1

# Make a data.frame of numbers listed, with and without plans, through time.
years <- seq(min(listed$Year_Listed, na.rm = TRUE),
             max(listed$Year_Listed, na.rm = TRUE))
n_listed <- unlist(lapply(years, 
                          FUN = function(x) sum(listed$Year_Listed <= x, 
                                                na.rm = TRUE)))
n_plans <- unlist(lapply(years, 
                         FUN = function(x) sum(listed$Year_Plan <= x & 
                                                 (listed$Plan_Status == "Final" |
                                                  grepl(listed$Plan_Status, 
                                                        pattern = "Revision")), 
                                               na.rm = TRUE)))
n_list_df <- data.frame(years, n_listed, n_plans)
n_list_df$n_no_plan <- n_list_df$n_listed - n_list_df$n_plans

# And now the plot
ggplot(n_list_df, aes(y = n_listed, x = years)) +
  geom_line(colour = "gray70") +
  geom_line(aes(y = n_no_plan, x = years),
            size = 2) +
  geom_line(aes(y = n_plans, x = years), 
            colour = "gray70",
            linetype = "dashed") +
  ggtitle("Number of species without a final recovery plan (black)", 
          subtitle = "# listed species solid gray, # with plans dashed gray") +
  labs(x = "",
       y = "Count") +
  theme_hc() + 
  theme(text = element_text(family = "Open Sans"))

This pattern highlights the challenge of interpreting the time-to-plan figures because of the right censored data. The r n_list_df$n_no_plan[length(n_list_df$n_no_plan)] species without final or revised plans today effectively have a time-to-plan of infinity, so estimates of the mean / median will be biased low. Because of the stationarity problem, there's no reliable way to correct the bias.

We can also view this as the proportion of species listed in each year that have no recovery plan as of September, 2016.

n_listed <- unlist(lapply(years, 
                          function(x) sum(listed$Year_Listed == x, na.rm = TRUE)))
n_w_plans <- unlist(lapply(years, 
                         function(x) {
                           sub <- filter(listed, Year_Listed == x)
                           sum(!is.na(sub$Year_Plan))
                         }))
n_species <- unlist(lapply(years, 
                         function(x) {
                           sub <- filter(listed, Year_Plan == x)
                           length(unique(sub$Scientific_Name))
                         }))
n_plans <- unlist(lapply(years, 
                         function(x) {
                           sub <- filter(listed, Year_Plan == x)
                           length(unique(sub$Title))
                         }))
n_list_df <- data.frame(years, n_listed, n_w_plans, n_species, n_plans)
n_list_df$pct_w_plan <- n_list_df$n_w_plans / n_list_df$n_listed

ggplot(n_list_df, aes(y = pct_w_plan, x = years)) +
  geom_point(size = 3, alpha = 0.5) +
  geom_smooth(level = 0.9, alpha = 0, colour = "black") +
  scale_y_continuous(limits = c(0,1)) +
  ggtitle("Proportion of species with recovery plans", 
          subtitle = "By year of listing") +
  labs(x = "Year Listed",
       y = "Proportion with plans") +
  theme_hc() + 
  theme(text = element_text(family = "Open Sans"))

The majority of species listed before 1995 or so have recovery plans, even though some long-listed species still lack plans. The proportion with plans drops precipitously after 1995; the high rates in the 2000-2010 window were possible because so few species (n = 60, or six per year) were listed during this time.

Time-to-plan (final)

As noted in the Introduction, the time to develop and finalize recovery plans is known to be "too long." What does the data say about how long recovery planning takes?

qplot(final_plans$Elapsed_Years, geom = "histogram") +
      labs(x = "Years between date listed and recovery plan date",
           y = "Number of species") +
      ggtitle("Time to final recovery plan", 
              subtitle = "By species") +
      theme_hc() +
      theme(plot.title=element_text(hjust = 0, size = 18))

We see that the bulk of plans are finished in the 5-10 year time span, with a significant number of higher values. There are also 53 species for which the recovery plan was finalized at the time of listing or earlier. These aren't mistakes: the species were "rolled into" existing multi-species plans, e.g., for Hawaiian plants. How the recovery criteria were determined before listing is an outstanding question that deserves attention. Some summary statistics:

delta_tmp <- c(n = length(final_plans$Elapsed_Years),
               min = min(final_plans$Elapsed_Years, na.rm = TRUE),
               median = median(final_plans$Elapsed_Years, na.rm = TRUE),
               mean = mean(final_plans$Elapsed_Years, na.rm = TRUE),
               max = max(final_plans$Elapsed_Years, na.rm = TRUE))
listd_tmp <- c(n = as.character(length(final_plans$First_Listed )),
               min = as.character(min(final_plans$First_Listed, na.rm = TRUE)),
               median = as.character(median(final_plans$First_Listed, na.rm = TRUE)),
               mean = as.character(mean(final_plans$First_Listed, na.rm = TRUE)),
               max = as.character(max(final_plans$First_Listed, na.rm = TRUE)))
pland_tmp <- c(n = as.character(length(final_plans$Rec_Plan_Date)),
               min = as.character(min(final_plans$Rec_Plan_Date, na.rm = TRUE)),
               median = as.character(median(final_plans$Rec_Plan_Date, na.rm = TRUE)),
               mean = as.character(mean(final_plans$Rec_Plan_Date, na.rm = TRUE)),
               max = as.character(max(final_plans$Rec_Plan_Date, na.rm = TRUE)))
to_show_df <- data.frame(`Date Listed` = as.vector(listd_tmp),
                         `Plan Date` = as.vector(pland_tmp),
                         `Years Elapsed` = as.vector(delta_tmp))
row.names(to_show_df) <- c("N", "min", "median", "mean", "max")
kable(to_show_df,
      caption = "Summary statistics of time between listing and final recovery plan. Min, median, mean, and max are given in days.",
      digits = 1,
      align = "c")

Another question about time-to-plans is, What percentage of recovery plans are completed in X years? For example, how often do the Services meet their goal of 2.5 years from listing to final recovery plan?

duration <- sort(final_plans$Elapsed_Years, decreasing = FALSE)
order <- seq(1:length(duration))
percentile <- order / max(order)
pctile <- data.frame(duration, order, percentile, stringsAsFactors = FALSE)

gg <- ggplot(pctile, aes(y = duration, x = percentile)) +
        geom_line() +
        ggtitle("Time to recovery plan", 
                subtitle = "As a function of quantiles") +
        labs(x = "Percentile",
             y = "Time-to-recovery plan (years)\n") +
        theme_hc() + 
        theme(text = element_text(family = "Open Sans"))

ggplotly(gg) %>% layout(font = list(family = "Open Sans"))

With this we can quickly say that about 19% are completed within the 2.5 years the Services allot themselves, 50% of plans are finalized within 4.7 years of the species' listing, and 95% are finalized within 20 years.

Time-to-plan over the years

Bearing in mind the issue of right-censored data, we can now ask, How has the time it takes to write and finalize a recovery plan changed over the years?

tmp <- final_plans
tmp$Year_Listed <- year(tmp$First_Listed)
tmp$Year_Plan <- year(tmp$Rec_Plan_Date)
years <- seq(min(tmp$Year_Listed, na.rm = TRUE),
             max(tmp$Year_Listed, na.rm = TRUE))

cur_fx <- function(df, x, fn) {
  sub_df <- filter(df, Year_Listed == x)
  if(dim(sub_df)[1] == 0) return(NA)
  switch(fn,
         mean = mean(sub_df$Elapsed_Years, na.rm = TRUE),
         min = min(sub_df$Elapsed_Years, na.rm = TRUE),
         max = max(sub_df$Elapsed_Years, na.rm = TRUE)
  )
}

n_plans <- unlist(lapply(years,  function(x) length(filter(tmp, Year_Plan == x)[[1]])))
tm_mean <- unlist(lapply(years, cur_fx, df = tmp, fn = "mean"))
tm_min <- unlist(lapply(years, cur_fx, df = tmp, fn = "min"))
tm_max <- unlist(lapply(years, cur_fx, df = tmp, fn = "max"))
dat <- data.frame(years, tm_mean, tm_min, tm_max, n_plans)
names(dat) <- c("Year", "Mean", "Min", "Max", "N_plans")

ggplot(dat, aes(y = Mean, x = Year)) +
  geom_line() +
  geom_line(aes(y = Min, x = Year), colour = "gray70") +
  geom_line(aes(y = Max, x = Year), colour = "gray70") +
  ggtitle("Time to final recovery plan through the years", 
          subtitle = "Min and max in gray") +
  labs(x = "Year of Listing",
       y = "Time-to-plan (years)\n") +
  theme_hc() + 
  theme(text = element_text(family = "Open Sans"))

This shows a steady decline in the time between listing and a final recovery plan. Recovery plans weren't required until the 1978 amendments to the ESA, and the three requirements for recovery plans (site-specific actions, objective criteria, and cost estimates) weren't put in place until the 1988 amendments. These aspects of the ESA explain some of the long times to a plan for early species, but the shorter times after 2000 are rare exceptions because many of these species - listed for up to 15 years - still have no plan. We could substitute a "minimum time-to-plan" for those species that lack a plan (e.g., a species listed in 2000 but with no )

Plan ages

Another challenge of recovery planning is the difficulty of updating plans: revisions require extensive work by planning teams and Federal Register notices and potential revisions to a revision draft. But what we (collectively) know about a species changes rapidly, from basic biological research, to the types of management that can help or hinder recovery, and many topics in-between. This begs the question, What is the distribution of recovery plan ages?

with_plan$Current_Age <- as.numeric(Sys.Date() - with_plan$Rec_Plan_Date) / 365

qplot(with_plan$Current_Age, geom = "histogram") +
      labs(x = "Plan age (years)",
           y = "Number of species") +
      ggtitle("Age of recovery plans", 
              subtitle = "Count by species") +
      theme_hc() +
      theme(plot.title=element_text(hjust = 0, size = 18))

With a median age of r median(with_plan$Current_Age, na.rm = TRUE), most recovery plans are quite old - if they were people, they could vote. The distribution reflects the pattern of plans-through-time even without considering the 135 plans that have been or are being revised. As with the time-to-plan, we can use a dynamic percentile plot to quickly find the proportion of plans that are age X or lower:

tmp <- distinct(with_plan, Title, .keep_all = TRUE)
age <- sort(tmp$Current_Age, decreasing = FALSE)
order <- seq(1:length(age))
percentile <- order / max(order)
pctile <- data.frame(age, order, percentile, stringsAsFactors = FALSE)

gg <- ggplot(pctile, aes(y = age, x = percentile)) +
        geom_line() +
        ggtitle("Ages of recovery plans", 
                subtitle = "As percentiles") +
        labs(x = "Percentile",
             y = "Time-to-recovery plan (years)\n") +
        theme_hc() + 
        theme(text = element_text(family = "Open Sans"))

ggplotly(gg) %>% layout(font = list(family = "Open Sans"))

Only 15% of species have plans that are >10 years old; 10% of species have plans that are >30 years old. These numbers highlight a dire need for some way to improve how recovery plans are updated.

Number of plans through time

Given the variation of time-to-plan over the decades, it would be interesting to know when species got their plans and when plans - which may be multi-species - were written.

ggplot(n_list_df, aes(y = n_plans, x = years)) +
  geom_line(colour = "gray70") +
  geom_line(aes(y = n_species, x = years)) +
  ggtitle("Number of plans and species covered by year",
          subtitle = "# spp. in black, # plans in gray") +
  labs(x = "Year",
       y = "Count") +
  theme_hc() +
  theme(text = element_text(family = "Open Sans"))

In the mid- to late-1990s a lot of species got recovery plans, in large part because of the increase in multi-species plans, which have costs (specificity) and benefits (some planning). We also know that many plans from this time are insufficient by current standards. Plans since the Society for Conservation Biology recovery planning review that concluded in 2002, recover plans have become much more thorough, which may contribute to the lower rate of plans in the past 15 years.

By region and agency

Because of the high degree of independence among FWS regions and differences between FWS and NMFS, it may be instructive to break down recovery planning along those lines. Which regions / agency have finalized the highest numbers and proportions of plans?

mat <- as.matrix(table(listed$Lead_Region, is.na(listed$Year_Plan)))
res <- data.frame(Region_Agency = row.names(mat),
                  With_Plans = mat[, 1],
                  Without_Plans = mat[, 2],
                  Prop_Plans = mat[, 1] / (mat[,1] + mat[,2]))
row.names(res) <- NULL
kable(res, 
      caption = "Distribution of plans among FWS regions and NMFS.", 
      align = "c",
      digits = 3)

How does the variation in time-to-plan break down by region/agency?

ggplot(final_plans, aes(y = Elapsed_Years, x = Lead_Region)) +
  geom_violin(fill = "gray", alpha = 0.5, colour = NA) +
  geom_boxplot(colour = "gray20", fill = NA) +
  labs(x = "FWS Region / NMFS", y = "Time-to-Plan (years)") +
  ggtitle("Time to recovery plan by region / agency",
          subtitle = "Final plans only") +
  theme_hc()

Time for NMFS is both high (median) and highly variable relative to FWS regions. Regions 4 and 8, which encompass many species, do not take as long recovery planning as I would have thought (though I don't know if they have significantly more species without plans). Last but not least, a quick summary.

Discussion

TODO:

1: This is "right censored" data, akin to the challenge of estimating the effects of a treatment on survival when individuals are still alive at the end of the experiment. While there are ways to estimate an "expected" values for right-censored data, those methods require making the questionable assumption that the same underlying process generates the data (stationarity). We should suspect that variation in presidential administrations, congresses, and career staff at national, regional, and local levels have significant effects on the process that generates final recovery plans within some time-frame. Rather than make the almost certainly invalid assumption, we will instead look at how the number of species without plans has changed through time. Return

Appendix A. Data cleaning

While the code for all of the plots and tables can be easily viewed by examining the Rmarkdown document for this vignette, we provide the data cleaning (commented out) here for ease of viewing from the HTML document.

# combo <- dplyr::left_join(recovery_plan_table,
#                           species_info_table,
#                           by = "Species")
# names(combo)[1] <- "Rec_Plan_Date"
# combo$Rec_Plan_Date <- as_date(combo$Rec_Plan_Date)
# names(combo)[4] <- "Plan_Status"
# names(combo)[8] <- "Date_Cur_Status"
# combo$Date_Cur_Status <- as_date(combo$Date_Cur_Status)
# combo$Cur_Status_To_Plan <- as.numeric(combo$Rec_Plan_Date - combo$Date_Cur_Status)
# combo <- distinct(combo, Species, `Where Listed`, .keep_all = TRUE)
# combo$Status <- sub(combo$Status, 
#                     pattern = 'displayListingStatus\\(\\"', 
#                     replacement = '')
# combo$Status <- sub(combo$Status, 
#                     pattern = '\\"\\)', 
#                     replacement = '')
# combo <- filter(combo, Status == "Threatened" | Status == "Endangered")
# 
# # Next, the join with the 'big' ECOS species table to get original listing dates
# recplan_spp <- left_join(combo, ecos_all_spp, by = c("Species" = "Scientific_Name"))
# recplan_spp <- filter(recplan_spp, US_Foreign %in% c("US", "US/Foreign"))
# recplan_spp <- filter(recplan_spp, 
#                       Federal_Listing_Status %in% c("Endangered", "Threatened"))
# recplan_spp <- select(recplan_spp, Rec_Plan_Date, Title, Plan_Status, 
#                       Species, Status, Date_Cur_Status, Lead_Region, 
#                       Cur_Status_To_Plan, First_Listed, Species_Group,
#                       US_Foreign, Where_Listed)
# recplan_spp <- distinct(recplan_spp, Species, Where_Listed, .keep_all = TRUE)
# recplan_spp$Elapsed <- as.numeric(recplan_spp$Rec_Plan_Date - recplan_spp$First_Listed)
# recplan_spp$Elapsed_Years <- recplan_spp$Elapsed / 365
# 
# # Looks like I'm missing records for ~50 species compared to what the BoxScore
# # provides (http://ecos.fws.gov/ecp0/reports/box-score-report)...wait, no, the
# # footnote on that page says "active plans," so both draft and final.
# final_plans <- filter(recplan_spp, recplan_spp$Plan_Status == "Final")
# with_plan <- filter(recplan_spp, 
#                     recplan_spp$Plan_Status != "Outline",
#                     recplan_spp$Plan_Status != "Conservation Strategy",
#                     recplan_spp$Plan_Status != "Exempt")
# 
# ###############
# # Also need the complement of the previous, a data.frame of listed species,
# # with NAs for listed species lacking plans. Note that the 1593 I get here is 
# # the same as what BoxScore indicates today.
# listed <- filter(ecos_all_spp, 
#                  Federal_Listing_Status %in% c("Endangered", "Threatened"))
# listed <- filter(listed, US_Foreign %in% c("US", "US/Foreign"))
# 
# listed <- left_join(listed, combo, by = c("Scientific_Name" = "Species"))
# listed <- distinct(listed, Scientific_Name, Where_Listed, .keep_all = TRUE)
# listed <- select(listed, -`Plan Action Status`, -Doc_Link, -Status, -`Lead Region`,
#                   -`Where Listed`)
# listed$Year_Listed <- year(listed$First_Listed)
# listed$Year_Plan <- year(listed$Rec_Plan_Date)
# 
# no_plan <- filter(listed, is.na(listed$Year_Plan))


jacob-ogre/recovery.plan.overview documentation built on May 18, 2019, 8:01 a.m.