# css: custom.css
  # rmdformats::material:
library(ecosscraper)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(knitr)
library(lubridate)
library(plotly)
library(recovery.plan.overview)
library(stringr)
library(viridis)
data("recovery_plan_table")
data("species_info_table")
data("ecos_all_spp")
data("nmfs_man")
###########################################################################
# load and prep the data
#
# First, fix some errors from ECOS
recovery_plan_table$Date <- ifelse(recovery_plan_table$Title == "Final Recovery Plan for Johnson's Seagrass",
                                   "2002-09-15",
                                   recovery_plan_table$Date)
recovery_plan_table$Date <- ifelse(recovery_plan_table$Title == "White Abalone Recovery Plan",
                                   "2008-10-10",
                                   recovery_plan_table$Date)
recovery_plan_table$Date <- as_date(recovery_plan_table$Date)

# Add manually curated data from NMFS
# nmfs_man <- readr::read_tsv("../data-raw/NMFS_recplan_manual.tsv")
combo <- rbind(recovery_plan_table, nmfs_man)

# # Now do table joins and filtering
# combo <- dplyr::left_join(with_new,
#                           species_info_table,
#                           by = "Species")
# names(combo)[1] <- "Rec_Plan_Date"
# names(combo)[4] <- "Plan_Status"
# names(combo)[8] <- "Date_Cur_Status"
# combo$Date_Cur_Status <- as_date(combo$Date_Cur_Status)
# combo$Cur_Status_To_Plan <- as.numeric(combo$Rec_Plan_Date - combo$Date_Cur_Status)
# combo <- distinct(combo, Title, Species, `Where Listed`, .keep_all = TRUE)
# combo$Status <- sub(combo$Status, 
#                     pattern = 'displayListingStatus\\(\\"', 
#                     replacement = '')
# combo$Status <- sub(combo$Status, 
#                     pattern = '\\"\\)', 
#                     replacement = '')
# combo <- filter(combo, Status == "Threatened" | Status == "Endangered")

# Next, the join with the 'big' ECOS species table to get original listing dates
names(combo)[1] <- "Rec_Plan_Date"
names(combo)[4] <- "Plan_Status"
recplan_spp <- left_join(combo, ecos_all_spp, by = c("Species" = "Scientific_Name"))
recplan_spp <- filter(recplan_spp, US_Foreign %in% c("US", "US/Foreign"))
recplan_spp <- filter(recplan_spp, 
                      Federal_Listing_Status %in% c("Endangered", "Threatened"))
recplan_spp <- select(recplan_spp, Rec_Plan_Date, Title, Plan_Status, 
                      Species, Common_Name, Federal_Listing_Status, Lead_Region, 
                      First_Listed, Species_Group, US_Foreign, Where_Listed)
recplan_spp <- distinct(recplan_spp, Species, Where_Listed, .keep_all = TRUE)
recplan_spp$Elapsed <- as.numeric(recplan_spp$Rec_Plan_Date - recplan_spp$First_Listed)
recplan_spp$Elapsed_Years <- recplan_spp$Elapsed / 365

# Looks like I'm missing records for ~50 species compared to what the BoxScore
# provides (http://ecos.fws.gov/ecp0/reports/box-score-report)...wait, no, the
# footnote on that page says "active plans," so both draft and final.
final_plans <- filter(recplan_spp, Plan_Status == "Final")
with_plan <- filter(recplan_spp, 
                    recplan_spp$Plan_Status != "Outline",
                    recplan_spp$Plan_Status != "Conservation Strategy",
                    recplan_spp$Plan_Status != "Exempt")

###############
# Also need the complement of the previous, a data.frame of listed species,
# with NAs for listed species lacking plans. Note that the 1593 I get here is 
# the same as what BoxScore indicates today.
listed <- filter(ecos_all_spp, 
                 Federal_Listing_Status %in% c("Endangered", "Threatened"))
listed <- filter(listed, US_Foreign %in% c("US", "US/Foreign"))

listed <- left_join(listed, combo, by = c("Scientific_Name" = "Species"))
listed <- distinct(listed, Scientific_Name, Where_Listed, .keep_all = TRUE)
# listed <- select(listed, -`Plan Action Status`, -Doc_Link, -Status, -`Lead Region`,
#                   -`Where Listed`)
listed$Year_Listed <- year(listed$First_Listed)
listed$Year_Plan <- year(listed$Rec_Plan_Date)

no_plan <- filter(listed, is.na(listed$Year_Plan))



Abstract

Recovery planning is an essential part of implementing the U.S. Endangered Species Act (ESA) because explicit planning helps identify and address the key threats to listed species. Despite the importance of recovery planning, it is generally acknowledged that reaching a "final" plan takes far longer than the goal of 2-1/2 years from the time of listing. But the extent of planning (number of species with recovery plans) and time required for planning has not been evaluated for over a decade. Using data from all domestic ESA-listed species, we quantify basic characteristics of ESA recovery planning. We show that 1/4 of listed species lack recovery plans; the average recovery plan has taken >5 years to be developed after listing; and the recovery plans that have been written are nearly 20 years old on average. These results are not unexpected for agencies that have seen dwindling budgets and more species to protect, but they highlight the need to improve recovery planning so that ESA-listed species can benefit. Defenders proposes that these challenges can be addressed, in part, by moving to web-based, dynamic recovery plans that can improve efficiency in plan development and improve effectiveness by providing timely information.

Introduction

The U.S. Endangered Species Act (ESA) requires the Fish and Wildlife Service (FWS) and National Marine Fisheries Service (NMFS; collectively, the Services) to develop plans to guide the recovery of listed species, unless doing so is not warranted (e.g., for foreign listed species). Recovery plans have evoloved significantly over the years, as has recovery planning guidance the Services use. The Services' 1994 recovery planning guidance stated that "the Services will...develop recovery plans within 2-1/2 years after final listing." But we know a persistent problem is that recovery planning often takes far longer than that. While this problem is a "generally accepted" fact, there is very little quantitative information that can adequately describe the scope of the challenge.

Using the data scraped from FWS's ECOS website1, we sought to answer answer several outstanding questions about ESA recovery planning. How many species have final recovery plans, and how has that changed over the last 35 years?2 What is the average or median time from listing to an original final recovery plan?3 What proportion of species have plans completed within the 2.5-year time-frame? How has the time required for a recovery plan changed over the past >40 years? How old are recovery plans as of 2016?4 Is there variation in time-to-recovery plan among FWS regions or between the Services? We show that both the extent of recovery plan coverage and the time required for recovery plan development, finalization, and revision are falling short of expectations. This analysis should provide a solid reason for the Services, and FWS in particular, to critically evaluate how recovery planning is done. (NMFS recently undertook a public review of their recovery program and is implementing changes.)

1: The raw data scraped from ECOS cannot be used directly, so we undertook several data cleaning and management steps to prepare it for analysis (Appendix A). In addition to the Appendix, the raw Rmarkdown file that generated this working paper can be reviewed for the entire analysis.

2: We focus on domestic and transboundary species because foreign listed species will rarely if ever have recovery plans.

3: We only include species with recovery plans that are the original final plan in the current analysis of time-to-plan: including species with revisions (which may come many years later) in these calculations would artificially inflate the time-to-plan. Unfortunately, ECOS rarely includes the original plan or the date of the original final plan, so we can't yet estimate the time-to-plan for species that have revisions.

4: In contrast to the time-to-plan estimates, we do include all species with either original or revised plans for estimating plan age as of 2016 because the most-recent time is known and the age estimate is unbiased.

Species with and without plans

The first question we need to address is, How many domestic listed species have plans, and how has that number changed through time? The answer to this question is important to interpreting time-to-plan statistics and figures because those analyses only include species with final recovery plans.5

# Make a data.frame of numbers listed, with and without plans, through time.
years <- seq(min(listed$Year_Listed, na.rm = TRUE),
             max(listed$Year_Listed, na.rm = TRUE))
n_listed <- unlist(lapply(years, 
                          FUN = function(x) sum(listed$Year_Listed <= x, 
                                                na.rm = TRUE)))
n_plans <- unlist(lapply(years, 
                         FUN = function(x) sum(listed$Year_Plan <= x & 
                                                 (listed$Plan_Status == "Final" |
                                                  grepl(listed$Plan_Status, 
                                                        pattern = "Revision")), 
                                               na.rm = TRUE)))
n_list_df <- data.frame(years, n_listed, n_plans)
n_list_df$n_no_plan <- n_list_df$n_listed - n_list_df$n_plans

# And now the plot
ggplot(n_list_df, aes(y = n_listed, x = years)) +
  geom_line(colour = "gray70") +
  geom_line(aes(y = n_no_plan, x = years),
            size = 2) +
  geom_line(aes(y = n_plans, x = years), 
            colour = "gray70",
            linetype = "dashed") +
  ggtitle("Number of species without a final recovery plan (black)", 
          subtitle = "# listed species solid gray, # with plans dashed gray") +
  labs(x = "",
       y = "Count") +
  theme_hc() + 
  theme(text = element_text(family = "Open Sans"))

This pattern highlights the challenge of interpreting the time-to-plan figures because of the right censored data. The r n_list_df$n_no_plan[length(n_list_df$n_no_plan)] species without final or revised plans today effectively have a time-to-plan of infinity, so estimates of the mean / median will be biased low. Because of the stationarity problem, there's no reliable way to correct the bias.

We can also view this as the proportion of species listed in each year that have no recovery plan as of September, 2016.

n_listed <- unlist(lapply(years, 
                          function(x) sum(listed$Year_Listed == x, na.rm = TRUE)))
n_w_plans <- unlist(lapply(years, 
                         function(x) {
                           sub <- filter(listed, Year_Listed == x)
                           sum(!is.na(sub$Year_Plan))
                         }))
n_species <- unlist(lapply(years, 
                         function(x) {
                           sub <- filter(listed, Year_Plan == x)
                           length(unique(sub$Scientific_Name))
                         }))
n_plans <- unlist(lapply(years, 
                         function(x) {
                           sub <- filter(listed, Year_Plan == x)
                           length(unique(sub$Title))
                         }))
n_list_df <- data.frame(years, n_listed, n_w_plans, n_species, n_plans)
n_list_df$pct_w_plan <- n_list_df$n_w_plans / n_list_df$n_listed

ggplot(n_list_df, aes(y = pct_w_plan, x = years)) +
  geom_point(size = 3, alpha = 0.5) +
  geom_smooth(level = 0.9, alpha = 0, colour = "black") +
  scale_y_continuous(limits = c(0,1)) +
  ggtitle("Proportion of species with recovery plans", 
          subtitle = "By year of listing") +
  labs(x = "Year Listed",
       y = "Proportion with plans") +
  theme_hc() + 
  theme(text = element_text(family = "Open Sans"))

The majority of species listed before 1995 or so have recovery plans, even though some long-listed species still lack plans. The proportion with plans drops precipitously after 1995; the high rates in the 2000-2010 window were possible because so few species (n = 60, or six per year) were listed during this time.

Number of plans through time

Given the variation of time-to-plan over the decades, it would be interesting to know when species got their plans and when plans - which may be multi-species - were written.

ggplot(n_list_df, aes(y = n_plans, x = years)) +
  geom_line(colour = "gray70") +
  geom_line(aes(y = n_species, x = years)) +
  ggtitle("Number of plans and species covered by year",
          subtitle = "# spp. in black, # plans in gray") +
  labs(x = "Year",
       y = "Count") +
  theme_hc() +
  theme(text = element_text(family = "Open Sans"))

In the mid- to late-1990s a lot of species got recovery plans, in large part because of the increase in multi-species plans, which have costs (specificity) and benefits (some planning). We also know that many plans from this time are insufficient by current standards. Since the Society for Conservation Biology recovery planning review that concluded in 2002, recovery plans have become much more thorough, which may contribute to the lower rate of plan completion in the past 15 years.


5: This is "right censored" data, akin to the challenge of estimating the effects of a treatment on survival when individuals are still alive at the end of the experiment. While there are ways to estimate an "expected" values for right-censored data, those methods require making the questionable assumption that the same underlying process generates the data (stationarity). We should suspect that variation in presidential administrations, congresses, and career staff at national, regional, and local levels have significant effects on the process that generates final recovery plans within some time-frame. Rather than make the almost certainly invalid assumption, we will instead look at how the number of species without plans has changed through time.

Time-to-plan

As noted in the Introduction, the time to develop and finalize recovery plans is known to be "too long." What does the data say about how long recovery planning takes? (As discussed in footnote 3 of the Introduction, these analyses use only species that have a final plan, but not those with a revised plan.)

qplot(final_plans$Elapsed_Years, geom = "histogram") +
      labs(x = "Years between date listed and recovery plan date",
           y = "Number of species") +
      ggtitle("Time to final recovery plan", 
              subtitle = "By species") +
      theme_hc() +
      theme(plot.title=element_text(hjust = 0, size = 18))

We see that the bulk of plans are finished in the 5-10 year time span, with a significant number of higher values. There are also 53 species for which the recovery plan was finalized at the time of listing or earlier. These aren't mistakes: the species were "rolled into" existing multi-species plans, e.g., for Hawaiian plants, that had already identified the species. How the recovery criteria were determined before listing is an outstanding question that deserves attention. Some summary statistics:

delta_tmp <- c(n = length(final_plans$Elapsed_Years),
               min = round(min(final_plans$Elapsed_Years, na.rm = TRUE), 1),
               median = round(median(final_plans$Elapsed_Years, na.rm = TRUE), 1),
               mean = round(mean(final_plans$Elapsed_Years, na.rm = TRUE), 1),
               max = round(max(final_plans$Elapsed_Years, na.rm = TRUE)))
listd_tmp <- c(n = length(final_plans$First_Listed ),
               min = as.character(min(final_plans$First_Listed, na.rm = TRUE)),
               median = as.character(median(final_plans$First_Listed, na.rm = TRUE)),
               mean = as.character(mean(final_plans$First_Listed, na.rm = TRUE)),
               max = as.character(max(final_plans$First_Listed, na.rm = TRUE)))
pland_tmp <- c(n = length(final_plans$Rec_Plan_Date),
               min = as.character(min(final_plans$Rec_Plan_Date, na.rm = TRUE)),
               median = as.character(median(final_plans$Rec_Plan_Date, na.rm = TRUE)),
               mean = as.character(mean(final_plans$Rec_Plan_Date, na.rm = TRUE)),
               max = as.character(max(final_plans$Rec_Plan_Date, na.rm = TRUE)))
to_show_df <- rbind(listd_tmp, pland_tmp, delta_tmp)
row.names(to_show_df) <- c("Listed Date", "Plan Date", "Years Elapsed")
kable(to_show_df,
      caption = "Summary statistics of time between listing and final recovery plan. Min, median, mean, and max are given in days.",
      digits = 1,
      align = "c")

Another question about time-to-plans is, What percentage of recovery plans are completed in X years? For example, how often do the Services meet their goal of 2.5 years from listing to final recovery plan?

duration <- sort(final_plans$Elapsed_Years, decreasing = FALSE)
order <- seq(1:length(duration))
percentile <- order / max(order)
pctile <- data.frame(duration, order, percentile, stringsAsFactors = FALSE)

gg <- ggplot(pctile, aes(y = duration, x = percentile)) +
        geom_line() +
        ggtitle("Time to recovery plan", 
                subtitle = "As a function of quantiles") +
        labs(x = "Percentile",
             y = "Time-to-recovery plan (years)\n") +
        theme_hc() + 
        theme(text = element_text(family = "Open Sans"))

ggplotly(gg) %>% layout(font = list(family = "Open Sans"))

With this we can quickly say that about 19% are completed within the 2.5 years the Services allot themselves, 50% of plans are finalized within 4.7 years of the species' listing, and 95% are finalized within 20 years.

Time-to-plan over the years

Bearing in mind the issue of right-censored data, we can now ask, How has the time it takes to write and finalize a recovery plan changed over the years?

tmp <- final_plans
tmp$Year_Listed <- year(tmp$First_Listed)
tmp$Year_Plan <- year(tmp$Rec_Plan_Date)
years <- seq(min(tmp$Year_Listed, na.rm = TRUE),
             max(tmp$Year_Listed, na.rm = TRUE))

cur_fx <- function(df, x, fn) {
  sub_df <- filter(df, Year_Listed == x)
  if(dim(sub_df)[1] == 0) return(NA)
  switch(fn,
         mean = mean(sub_df$Elapsed_Years, na.rm = TRUE),
         min = min(sub_df$Elapsed_Years, na.rm = TRUE),
         max = max(sub_df$Elapsed_Years, na.rm = TRUE)
  )
}

n_plans <- unlist(lapply(years,  function(x) length(filter(tmp, Year_Plan == x)[[1]])))
tm_mean <- unlist(lapply(years, cur_fx, df = tmp, fn = "mean"))
tm_min <- unlist(lapply(years, cur_fx, df = tmp, fn = "min"))
tm_max <- unlist(lapply(years, cur_fx, df = tmp, fn = "max"))
dat <- data.frame(years, tm_mean, tm_min, tm_max, n_plans)
names(dat) <- c("Year", "Mean", "Min", "Max", "N_plans")

ggplot(dat, aes(y = Mean, x = Year)) +
  geom_line() +
  geom_line(aes(y = Min, x = Year), colour = "gray70") +
  geom_line(aes(y = Max, x = Year), colour = "gray70") +
  ggtitle("Time to final recovery plan through the years", 
          subtitle = "Min and max in gray") +
  labs(x = "Year of Listing",
       y = "Time-to-plan (years)\n") +
  theme_hc() + 
  theme(text = element_text(family = "Open Sans"))

This shows a steady decline in the time between listing and a final recovery plan. Recovery plans weren't required until the 1978 amendments to the ESA, and the three requirements for recovery plans (site-specific actions, objective criteria, and cost estimates) weren't put in place until the 1988 amendments. These aspects of the ESA explain some of the long times to a plan for early species, but the shorter times after 2000 are rare exceptions because many of these species - listed for up to 15 years - still have no plan. We could substitute a "minimum time-to-plan" for those species that lack a plan (e.g., a species listed in 2000 but with no final recovery plan would have a minimum time-to-plan of 16 years), but those minima are not particularly helpful.

Plan ages

Another challenge of recovery planning is the difficulty of updating plans: revisions require extensive work by planning teams and Federal Register notices and potential revisions to a revision draft. But what we (collectively) know about a species changes rapidly, from basic biological research, to the types of management that can help or hinder recovery, and many topics in-between. This begs the question, What is the distribution of recovery plan ages? (As described in footnote 4 of the Introduction, these analyses include all species with final plans, whether original or revised.)

with_plan$Current_Age <- as.numeric(Sys.Date() - with_plan$Rec_Plan_Date) / 365

qplot(with_plan$Current_Age, geom = "histogram") +
      labs(x = "Plan age (years)",
           y = "Number of species") +
      ggtitle("Age of recovery plans", 
              subtitle = "Count by species") +
      theme_hc() +
      theme(plot.title=element_text(hjust = 0, size = 18))

With a median age of r round(median(with_plan$Current_Age, na.rm = TRUE), 2) years, most recovery plans are quite old - if plans were people, they could vote. The distribution reflects the pattern of plans-through-time even without considering the 135 plans that have been or are being revised. As with the time-to-plan, we can use a dynamic percentile plot to quickly find out the proportion of species' plans that are less than X years old:

age <- sort(with_plan$Current_Age, decreasing = FALSE)
order <- seq(1:length(age))
percentile <- order / max(order)
pctile <- data.frame(age, order, percentile, stringsAsFactors = FALSE)

gg <- ggplot(pctile, aes(y = age, x = percentile)) +
        geom_line() +
        ggtitle("Age of species' plans", 
                subtitle = "As percentiles") +
        labs(x = "Percentile",
             y = "Plan age (years)\n") +
        theme_hc() + 
        theme(text = element_text(family = "Open Sans"))

ggplotly(gg) %>% layout(font = list(family = "Open Sans"))

Only 16% of species have plans that are < 10 years old; 10% of species have plans that are >30 years old. These numbers highlight a dire need for some way to improve how recovery plans are updated. Many species are part of multi-species plans, and so have the same age for the individual plans. We can plot the ages of plans, rather than of species' plan ages:

tmp <- distinct(with_plan, Title, .keep_all = TRUE)
age <- sort(tmp$Current_Age, decreasing = FALSE)
order <- seq(1:length(age))
percentile <- order / max(order)
pctile <- data.frame(age, order, percentile, stringsAsFactors = FALSE)

gg <- ggplot(pctile, aes(y = age, x = percentile)) +
        geom_line() +
        ggtitle("Ages of recovery plans", 
                subtitle = "As percentiles") +
        labs(x = "Percentile",
             y = "Plan age (years)\n") +
        theme_hc() + 
        theme(text = element_text(family = "Open Sans"))

ggplotly(gg) %>% layout(font = list(family = "Open Sans"))

This view of the data suggests a slightly worse picture. The median age of plans is 22 years and 17% of plans are > 30 years old; the difference on the high end is because more older plans are single-species than the average-age plan.

Plans by region and agency

Because of the high degree of independence among FWS regions and differences between FWS and NMFS, it may be instructive to break down recovery planning along those lines. Which regions / agency have finalized the highest numbers and proportions of plans?

mat <- as.matrix(table(listed$Lead_Region, is.na(listed$Year_Plan)))
res <- data.frame(Region_Agency = row.names(mat),
                  With_Plans = mat[, 1],
                  Without_Plans = mat[, 2],
                  Prop_Plans = mat[, 1] / (mat[,1] + mat[,2]))
row.names(res) <- NULL
kable(res, 
      caption = "Distribution of plans among FWS regions and NMFS.", 
      align = "c",
      digits = 3)

How does the variation in time-to-plan break down by region/agency?

ggplot(final_plans, aes(y = Elapsed_Years, x = Lead_Region)) +
  geom_violin(fill = "gray", alpha = 0.5, colour = NA) +
  geom_boxplot(colour = "gray20", fill = NA) +
  labs(x = "FWS Region / NMFS", y = "Time-to-Plan (years)") +
  ggtitle("Time to recovery plan by region / agency",
          subtitle = "Final plans only") +
  theme_hc()

Time for NMFS is both high (median) and highly variable relative to FWS regions. Regions 4 and 8, which encompass many species, do not take as long recovery planning as I would have thought.

Discussion

Recovery planning was incorporated in the 1978 amendments to the ESA because of the realization that planning should improve conservation outcomes. Significant progress has been made improving the quality of recovery plans: contemporary plans are far more detailed and science-based than many older plans. But the handful of species that benefit from "modern" recovery plans are overshadowed by the shortcomings identified here.

First, 1/4 of ESA-listed species currently lack a recovery plan, even a draft that is publicly available. How can anyone other than specialists know what to do to help - or at least avoid harming - these listed species? If someone discovers an important tool for the conservation of one of these species, how can they know whether anyone else knows? Recovery outlines are a good starting point, but the data indicate only 64 of the 428 species lacking recovery plans have a recovery outline. Draft recovery plans that are readily available are the next step up, but there are only 34 of those on ECOS. Filling the gap with at least some well-informed guidance for the ~350 species lacking any type of plan should be a high priority.

Second, and as recognized by the review panel for NMFS's recovery program review, the time between listing an providing a plan is too long. While the median time is "only" 5.3 years, that number is pulled down by two factors. Nearly 120 species have plans that were finalized before those species were listed; removing those raises the median time to 5.9 years. More drastically, those 450 species that currently lack a recovery plan are excluded from the analysis and their addition even today would certainly increase the median time-to-plan. The NMFS review panel report provides a substantial number of recommendations that can likely reduce the time to first recovery planning documents and "final" plans. Building on the preceding paragraph, we would encourage the Services to, at a minimum, get recovery outlines in place for all listed species. Doing so will at least jump-start recovery planning for the overlooked species, and we expect will lead to reducing the time-to-plan in the coming years.

Third, at a median of 19 years and 10% of plans 30 years or older, many completed plans are showing their age. Not only is it likely that science has learned more about these species and their conservation, the threats to species may have changed significantly over these extended timeframes. For example, the indigo snake recovery plan was finalized in 1982; poaching may have been a significant threat at that time, but habitat destruction is clearly the leading threat today. Recognizing that formal recovery plan updates, as traditionally practiced, are time-consuming and expensive (e.g., just in Federal Register notices), the Services need to transition to a new and more dynamic recovery planning framework.

We recognize that the Services understand through experience some of the challenges that this analysis has shown. For example, the Service has been developing their Recovery Enhancement Vision (REV) for a number of years, which should help improve several aspects of recovery planning. (And NMFS expressed their interest in the REV model in their response to the recent review.) We think that the dynamic, web-based recovery plan frameworks that we are developing can be particularly useful in implementing REVs and similar updates to recovery planning. For example, recovery plans for the 450 species lacking plans - and hundreds of species that will be listed in the coming years - can start as recovery outlines in the web-based framework. Through regular updates the outlines can be fleshed out as draft plans, and after public review, "finalized." These online plans would be continually updated rather than being treated as fixed documents. Rather than a future analysis finding the average recovery plan hasn't been updated in 19 years, we think it is completely feasible that the dynamic plans could have an average "last update" of 19 days or weeks instead. Such a future can only help improve conservation outcomes of imperiled species.

Appendix A Data cleaning

While the code for all of the plots and tables can be easily viewed by examining the Rmarkdown document for this vignette, we provide the data cleaning (commented out) here for ease of viewing from the HTML document.

# combo <- dplyr::left_join(recovery_plan_table,
#                           species_info_table,
#                           by = "Species")
# names(combo)[1] <- "Rec_Plan_Date"
# combo$Rec_Plan_Date <- as_date(combo$Rec_Plan_Date)
# names(combo)[4] <- "Plan_Status"
# names(combo)[8] <- "Date_Cur_Status"
# combo$Date_Cur_Status <- as_date(combo$Date_Cur_Status)
# combo$Cur_Status_To_Plan <- as.numeric(combo$Rec_Plan_Date - combo$Date_Cur_Status)
# combo <- distinct(combo, Species, `Where Listed`, .keep_all = TRUE)
# combo$Status <- sub(combo$Status, 
#                     pattern = 'displayListingStatus\\(\\"', 
#                     replacement = '')
# combo$Status <- sub(combo$Status, 
#                     pattern = '\\"\\)', 
#                     replacement = '')
# combo <- filter(combo, Status == "Threatened" | Status == "Endangered")
# 
# # Next, the join with the 'big' ECOS species table to get original listing dates
# recplan_spp <- left_join(combo, ecos_all_spp, by = c("Species" = "Scientific_Name"))
# recplan_spp <- filter(recplan_spp, US_Foreign %in% c("US", "US/Foreign"))
# recplan_spp <- filter(recplan_spp, 
#                       Federal_Listing_Status %in% c("Endangered", "Threatened"))
# recplan_spp <- select(recplan_spp, Rec_Plan_Date, Title, Plan_Status, 
#                       Species, Status, Date_Cur_Status, Lead_Region, 
#                       Cur_Status_To_Plan, First_Listed, Species_Group,
#                       US_Foreign, Where_Listed)
# recplan_spp <- distinct(recplan_spp, Species, Where_Listed, .keep_all = TRUE)
# recplan_spp$Elapsed <- as.numeric(recplan_spp$Rec_Plan_Date - recplan_spp$First_Listed)
# recplan_spp$Elapsed_Years <- recplan_spp$Elapsed / 365
# 
# # Looks like I'm missing records for ~50 species compared to what the BoxScore
# # provides (http://ecos.fws.gov/ecp0/reports/box-score-report)...wait, no, the
# # footnote on that page says "active plans," so both draft and final.
# final_plans <- filter(recplan_spp, recplan_spp$Plan_Status == "Final")
# with_plan <- filter(recplan_spp, 
#                     recplan_spp$Plan_Status != "Outline",
#                     recplan_spp$Plan_Status != "Conservation Strategy",
#                     recplan_spp$Plan_Status != "Exempt")
# 
# ###############
# # Also need the complement of the previous, a data.frame of listed species,
# # with NAs for listed species lacking plans. Note that the 1593 I get here is 
# # the same as what BoxScore indicates today.
# listed <- filter(ecos_all_spp, 
#                  Federal_Listing_Status %in% c("Endangered", "Threatened"))
# listed <- filter(listed, US_Foreign %in% c("US", "US/Foreign"))
# 
# listed <- left_join(listed, combo, by = c("Scientific_Name" = "Species"))
# listed <- distinct(listed, Scientific_Name, Where_Listed, .keep_all = TRUE)
# listed <- select(listed, -`Plan Action Status`, -Doc_Link, -Status, -`Lead Region`,
#                   -`Where Listed`)
# listed$Year_Listed <- year(listed$First_Listed)
# listed$Year_Plan <- year(listed$Rec_Plan_Date)
# 
# no_plan <- filter(listed, is.na(listed$Year_Plan))


Defenders-ESC/recovery.plan.overview documentation built on May 6, 2019, 1:58 p.m.