R/additionalStats.R

Defines functions additionalStats

Documented in additionalStats

#' Generate miscellaneous statistics for species range shifts
#'
#' This function generates graphs showing the changes in range size and position between
#' the data a species distribution model was trained on and future or past projections
#' of species ranges.
#'
#' NOTE: this function is dependent on the outputs generated by the \code{projectSuit}
#' function, in particular the "Results.csv" file, which includes information on
#' changes in range size and position across the projected time periods and scenarios.
#'
#' @param result_dir the directory where the ensembled and binary maps are placed in
#' addition to the "Results.csv" file. If \code{projectSuit} was used to make
#' these maps, this should be the same as the \code{output} argument in that function.
#' @param time_periods a vector of the years in which the projection will occur, with the
#' first element as the year the model will be trained on (usually the current data).If no
#' precise years are available (e.g., using data from the Last Glacial Maximum), order
#' time periods from current to least current and give character strings for the years (e.g., "LGM").
#' @param scenarios a vector of character strings detailing the different climate models
#' used in the forecasted/hindcasted species distribution models. In no projection is
#' needed, set to NA (defualt).
#' @param ncores the number of computer cores to parallelize the background point
#' generation on. Default is 1; Using one fewer core than the computer has is usually
#' optimal.
#' @param dispersal (logical \code{TRUE} or \code{FALSE}) Should these statistics be
#' calculated for the dispersal-constrained distribution maps? If dispersal rate
#' analysis are not needed, or if the \code{megaSDM::dispersalRate} function has yet
#' to be run, this should be set to \code{FALSE} (the default).
#' @param contiguous (logical \code{TRUE} or \code{FALSE}) If \code{dispersal == TRUE},
#' do the statistics that need to be calculated come from the analysis that 
#' constrained the suitable habitat at the first time step to the existing occurrences? 
#' See the \code{contiguous} argument in the \code{dispersalRate()} function. 
#' Default is \code{FALSE}
#' @param dispersaldata A dataframe or the full file path to a .csv file with two columns:
#'   1. Species.
#'   2. Dispersal Rate in km/yr.
#'   See the function \code{megaSDM::dispersalRate} for more details.
#' @export
#' @return creates .pdf files of graphs showing changes in range size and distribution
#' across multiple time periods and scenarios:
#' 1. The overall modelled range size across all time periods and scenarios.
#' 2. The percent change from the current range size.
#' 3. Average range size for each year given multiple climate scenarios.
#' 4. (If \code{dispersal = TRUE}) the difference in range size between dispersal constrained
#'    and non-dispersal constrained species ranges.
#'

additionalStats <- function(result_dir, time_periods, scenarios,
                            dispersal = FALSE, contiguous = FALSE,
                            dispersaldata = NA, ncores = 1) {

  spp.list <- list.dirs(result_dir, full.names = FALSE, recursive = FALSE)
  if (length(spp.list) == 0) {
    stop(paste0("No projected models found in 'result_dir': Ensure that 'result_dir' provides a path to the proper location"))
  }
  ListSpp <- c()
  #Generates the species list for parallelization
  if (dispersal == TRUE) {

    #Reads in dispersal data
    if (class(dispersaldata) == "character") {
      dispersaldata <- utils::read.csv(dispersaldata, stringsAsFactors = FALSE)
      dispersaldata[, 1] <- gsub("_", " ", dispersaldata[, 1])
    } else {
      dispersaldata[, 1] <- gsub("_", " ", dispersaldata[, 1])
    }

    for (i in 1:length(spp.list)) {
      curspecies <- spp.list[i]
      if (file.exists(paste0(result_dir, "/", curspecies, "/Results_Dispersal.csv"))) {
        ListSpp <- c(ListSpp, spp.list[i])
      }
    }
    for (w in 1:length(spp.list)) {
      FocSpec <- gsub("_", " ",spp.list[w])
      DispSpec <- grep(paste0("^", FocSpec, "$"), dispersaldata[, 1])
      if (length(DispSpec) == 0) {
        message(paste0("No dispersal rate values found for ", FocSpec, ": skipping dispersal rate analysis"))
      }
    }
  } else {
    ListSpp <- spp.list
  }
  if (length(ListSpp) < ncores) {
    ncores <- length(ListSpp)
  }
  ListSpp <- matrix(ListSpp, ncol = ncores)

  #Get the nubmer of years and scenarios
  numYear <- length(time_periods)
  numScenario <- length(scenarios[!is.na(scenarios)])

  #Graphical parameters for the barplots
  colScenario <- grDevices::colorRampPalette(c("blue", "darkred"))(max(numScenario, 1))  #COLOR SCHEME
  colYear <- grDevices::colorRampPalette(c("darkgreen", "brown"))(numYear)
  col13 <- grDevices::colorRampPalette(c("yellow", "darkgrey"))(max(numScenario, 1) * (numYear - 1) + 1)

  #Functions----------------------------

  #Creates a bar-graph of the number of cells at all time periods for all scenarios
  getCellsGraph <- function(spp, stats, dispersalApplied, contig) {

    #If dispersal has been applied, prints out a new graph
    if (dispersalApplied == TRUE) {
      if(contig == TRUE) {
        grDevices::pdf(file = paste0(result_dir, "/", spp, "/Dispersal Applied Additional Stats/NumCells_Contig.pdf"))
      } else {
        grDevices::pdf(file = paste0(result_dir, "/", spp, "/Dispersal Applied Additional Stats/NumCells.pdf"))
      }
    } else {
      grDevices::pdf(file = paste0(result_dir, "/", spp, "/Additional Stats/NumCells.pdf"))
    }

    #Graphical parameters and barplot
    graphics::par(mar = c(8, 4, 4, 2), mfrow = c(1, 1), las = 2)
    ticks <- signif(seq(from = 0, to = max(stats$NumberCells), length.out = 10), digits = 3)
    graphics::par(yaxp = c(0, max(stats$NumberCells), 20))

    graphics::barplot(stats$NumberCells,
            ylab = "Number of Cells",
            axes = FALSE,
            main = spp,
            col = col13,
            names.arg = stats$Projection,
            cex.names = 0.7,
            cex.lab = 0.7,
            beside = TRUE)
    graphics::axis(2, ticks, cex.axis = 0.6)
    grDevices::dev.off()
  }

  #Creates multiple bar-graphs showing number of cells for each time period (one for each scenario)
  getDiffScenariosGraph <- function(spp, stats, dispersalApplied, contig) {

    #If dispersal has been applied, prints out a new graph
    if (dispersalApplied == "TRUE") {
      if(contig == TRUE) {
        grDevices::pdf(file = paste0(result_dir, "/", spp, "/Dispersal Applied Additional Stats/NumCells_Contig_", numScenario, "Graphs.pdf"))
      } else {
        grDevices::pdf(file = paste0(result_dir, "/", spp, "/Dispersal Applied Additional Stats/NumCells", numScenario, "Graphs.pdf"))
      }
    } else {
      grDevices::pdf(file = paste0(result_dir, "/", spp, "/Additional Stats/NumCells", numScenario, "Graphs.pdf"))
    }

    #graphical parameters and bar plot (for each climate scenario)
    old.par <- graphics::par(mfrow = c(2, ceiling((max(numScenario, 1) / 2))), las = 2, mar = c(8, 4, 4, 2))
    for (ScenIndex in 1:numScenario) {
      ticks <- signif(seq(from = 0, to = max(stats$NumberCells), length.out = 10), digits = 3)
      graphics::barplot(stats$NumberCells[c(1, (2 + ((ScenIndex - 1) * (numYear - 1))):((2 + ((ScenIndex - 1) * (numYear - 1))) + numYear - 2))],
              ylab = "Number of Cells",
              axes = FALSE,
              main = paste0(spp, "_", scenarios[ScenIndex]),
              names.arg = time_periods,
              cex.names = 0.7,
              cex.axis = 0.5,
              cex.lab = 0.7,
              beside = TRUE,
              col = colYear)
      graphics::axis(2, ticks, cex.axis = 0.6)
    }
    grDevices::dev.off()
  }

  #Creates a bar-graph of the change in cells (percent of original) from original
  getpercentDiffGraphs <- function(spp, stats, dispersalApplied, contig) {
    nstats <- nrow(stats)
    PercentChange <- c()
    origcells <- stats$NumberCells[1]

    #Calculates percent change between time period range and original range
    for (n in 1:nstats) {
      nchange <- stats$CellChange[n]
      if (origcells > 0) {
        perchange <- (nchange / origcells * 100)
      } else {
        perchange <- 0
      }
      PercentChange <- c(PercentChange, perchange)
    }

    PercentChange <- matrix(PercentChange)
    rownames(PercentChange) <- stats$Projection

    #If dispersal has been applied, prints out a new graph
    if (dispersalApplied == "TRUE") {
      if(contig == TRUE) {
        grDevices::pdf(file = paste0(result_dir, "/", spp, "/Dispersal Applied Additional Stats/CellChange_Contig.pdf"))
      } else {
        grDevices::pdf(file = paste0(result_dir, "/", spp, "/Dispersal Applied Additional Stats/CellChange.pdf"))
      }
     
    } else {
      grDevices::pdf(file = paste0(result_dir, "/", spp, "/Additional Stats/CellChange.pdf"))
    }

    #barplot
    CellChangePlot <- graphics::barplot(PercentChange[, 1],
                              ylab = paste0("Percent Change from ", time_periods[1]),
                              main = spp,
                              cex.names = 0.7,
                              cex.axis = 0.7,
                              cex.lab = 0.7,
                              col = col13)
    grDevices::dev.off()
  }

  getMinMaxGraphs <- function(spp, stats, dispersalApplied, contig) {
    numlist <- c(stats$NumberCells[1])

    #Calculates average number of cells per time period across scenarios
    for (i in 2:numYear) {
      #Sums all of the cell numbers from each scenario within a time period
      for (j in 0:(max(numScenario, 1) - 1)) {
        if (j == 0) {
          num <- stats$NumberCells[j * (numYear - 1) + (i - 1) + 1]
        } else {
          num <- num + stats$NumberCells[j * (numYear - 1) + (i - 1) + 1]
        }
      }
      num <- num / max(numScenario, 1)
      numlist <- c(numlist, num)
    }

    #Calculates maximum range size per time period
    maxlist <- c(0)
    for (i in 2:numYear) {
      tempList <- c()
      for (j in 0:(max(numScenario, 1) - 1)) {
        tempList <- c(tempList, stats$NumberCells[j * (numYear - 1) + (i - 1) + 1])
      }
      maxlist <- c(maxlist, max(tempList))
    }

    #Calculates minimum range size per time period
    minlist <- c(0)
    for (i in 2:numYear) {
      tempList <- c()
      for (j in 0:(max(numScenario, 1) - 1)) {
        tempList <- c(tempList, stats$NumberCells[j * (numYear - 1) + (i - 1) + 1])
      }
      minlist <- c(minlist, min(tempList))
    }

    #If dispersal has been applied, prints out a new graph
    if (dispersalApplied == TRUE) {
      if(contig == TRUE) {
        grDevices::pdf(file = paste0(result_dir, "/", spp, "/Dispersal Applied Additional Stats/AvgNumberCells_Contig.pdf"))
      } else {
        grDevices::pdf(file = paste0(result_dir, "/", spp, "/Dispersal Applied Additional Stats/AvgNumberCells.pdf"))
      }
      
    } else {
      grDevices::pdf(file = paste0(result_dir, "/", spp, "/Additional Stats/AvgNumberCells.pdf"))
    }

    #graphical parameters and barplot
    ticks <- signif(seq(from = 0, to = max(numlist), length.out = 10), digits = 2)

    avg <- graphics::barplot(numlist,
                   axes = FALSE,
                   ylab = "Average # of Cells per Decade",
                   main = spp,
                   names.arg = time_periods,
                   cex.names = 0.9,
                   cex.axis = 0.5,
                   cex.lab = 0.9,
                   beside=TRUE,
                   col=colYear)
    graphics::axis(2, ticks, cex.axis = 0.6)
    graphics::segments(x0 = avg, y0 = minlist, x1 = avg, y1 = maxlist)
    grDevices::dev.off()
  }

  DispersalCompare <- function(spp, stats, contig) {
    spp.name <- spp

    #Reads in the results data frame (not dispersal-constrained)
    statsNoDisp <- utils::read.csv(file.path(result_dir, spp.name, "Results.csv"))
    statsNoDisp <- statsNoDisp
    nstatsNoDisp <- nrow(statsNoDisp)

    for(scen in 1:numScenario) {
      DispComp <- matrix(rep(NA, 2 * (length(time_periods) - 1)), nrow = 2, ncol = length(time_periods) - 1)

      #Fill in "DispComp" matrix with area of suitable habitat (both dispersal and regular)
      for(y in 2:length(time_periods)) {
        CurYear <- time_periods[y]
        DispCells <- stats[grep(paste0(scenarios[scen], "_", time_periods[y], "$"), stats$Projection), "NumberCells"]
        NoDispCells <- statsNoDisp[grep(paste0(scenarios[scen], "_", time_periods[y], "$"), statsNoDisp$Projection), "NumberCells"]
        DispComp[1, (y - 1)] <- NoDispCells
        DispComp[2, (y - 1)] <- DispCells
      }

      #Name the output pdf
      rownames(DispComp) <- c("No Dispersal", "Dispersal")
      if(contig == TRUE) {
        grDevices::pdf(file = file.path(result_dir, spp, "Dispersal Applied Additional Stats",
                                        paste0(scenarios[scen], "_DispersalCompare_Contig.pdf")))
      } else {
        grDevices::pdf(file = file.path(result_dir, spp, "Dispersal Applied Additional Stats",
                                        paste0(scenarios[scen], "_DispersalCompare.pdf")))
      }
      

      #graphical parameters and barplot
      graphics::par(mfrow = c(1, 1), mar = c(5, 5, 4, 8))
      graphics::barplot(DispComp,
              main = c(spp, paste0("Dispersal Rate Constrained vs. Unconstrained: ", scenarios[scen])),
              ylab = "Number of Cells",
              names.arg = c(time_periods[2:length(time_periods)]),
              col = c("darkblue", "red"),
              legend = c("Unconstrained", "Constrained"),
              args.legend = list(x = "bottomright", bty = "n",inset = c(-0.35, 0)),
              beside = TRUE)
      grDevices::dev.off()
    }
  }

  run <- function(CurSpp) {
    spp.name <- CurSpp

    if (dispersal == TRUE) {
      dir.create(file.path(result_dir, spp.name, "Dispersal Applied Additional Stats"))
      if (contiguous == TRUE) {
        stats <- utils::read.csv(file.path(result_dir, spp.name, "Results_Dispersal.csv"))
      } else {
        stats <- utils::read.csv(file.path(result_dir, spp.name, "Results_Dispersal_Contig.csv"))
      }
      stats <- stats
      nstats <- nrow(stats)
    } else {
      dir.create(file.path(result_dir, spp.name, "Additional Stats"))
      stats <- utils::read.csv(file.path(result_dir, spp.name, "Results.csv"))
      stats <- stats
      nstats <- nrow(stats)
    }

    #construct graphs of area changes
    getCellsGraph(spp.name, stats, dispersal, contiguous)
    getDiffScenariosGraph(spp.name, stats, dispersal, contiguous)
    getpercentDiffGraphs(spp.name, stats, dispersal, contiguous)

    if (numScenario > 0) {
      getMinMaxGraphs(spp.name, stats, dispersal, contiguous)
    }

    if (dispersal == "TRUE") {
      DispersalCompare(spp.name, stats, contiguous)
    }
    grDevices::graphics.off()
  }

  if (ncores == 1) {
    ListSpp <- as.vector(ListSpp)
    out <- sapply(ListSpp, function(x) run(x))
  } else {
    clus <- parallel::makeCluster(ncores, setup_timeout = 0.5)
    
    parallel::clusterExport(clus, varlist = c("colScenario", "colYear", "col13", "result_dir",
                                              "numYear", "numScenario", "dispersal",
                                              "dispersaldata", "DispersalCompare", "time_periods","scenarios",
                                              "getCellsGraph", "getDiffScenariosGraph", "getpercentDiffGraphs",
                                              "getMinMaxGraphs", "run", "contiguous"), envir = environment())
    
    parallel::clusterEvalQ(clus, library(graphics))
    
    for (i in 1:nrow(ListSpp)) {
      out <- parallel::parLapply(clus, ListSpp[i, ], function(x) run(x))
    }
    parallel::stopCluster(clus)
  }
}
brshipley/megaSDM documentation built on Nov. 26, 2024, 6:08 a.m.