FILL IN PLEASE!!! Note to self: Probably will want to split into three vignettes from an organization standpoint
1) Simulated data analysis to test analysis tools
2) Proof-of-concept analysis to get things working - simplified inputs - illustrates approach a) Species Distribution Model b) SpatialDemography Model c) Comparison
3) Main analysis a) Run Species Distribution Model b) Run SpatialDemography Model c) Compare statistical model with process model
# Set up common information for all data runs spath = "C:/docs/beplants/Scripts/" in.path = "C:/docs/beplants/drafts/BE_data_paper/R_inputs/" out.path = "C:/docs/beplants/drafts/BE_data_paper/R_outputs/" DispPath = sprintf("%sdispersal_tables/", spath) # Path to dispersal tables #install.packages(be_hlpr) #Supporting package needs to be installed in order to run the below code # Load packages library(spatialdemography) setwd(spath) if (dev.mode == 0){ library(be_hlpr) }else{ source("be_hlpr/R/be_hlpr.r") source("be_hlpr/R/be_sdm_hlpr.r") source("be_hlpr/R/be_spdem_hlpr.r") library(myr) #Make this a package dependency? }
Here, we examine a 2 x 2 landscape (see Keyel et al. in review) to examine the analysis tools used here. We assumed that the initial deterministic spatialdemography model run defined reality.
We then evaluated the ability of the species distribution model (SDM) to identify meaningful response traits. We then re-ran the simulation model using the estimated response traits (from the SDM), and compared this to the "reality" established by the initial model run.
Concerns with this approach: The SDM will have an incredibly small sample size to work from, given a 2 x 2 landscape Possible solutions: - can increase landscape size if necessary The metacommunity model will have an advantage, as "reality" was derived from a model run. - can do an example the other way - have the species distribution model define "reality" and then examine model performance.
But let's see how it performs first, before I over-analyze things. Also, the main purpose of this simulation is to check that the code works, not to produce biologically interesting results. And neither of the above should interfere with checking the technical implementation of the script!
sim.path = sprintf("%ssimulation/", in.path) ipath = sprintf("%sinputs/", sim.path) lpath = sprintf("%slandscape/", sim.path) opath = sprintf("%ssimulation/", out.path) if (!file.exists(opath)){ dir.create(opath)} scale.vec = "landscape" # Set scale of evaluation sp.locs = sprintf("%s/inputs/locations_spdem_ex1.csv", sim.path) ## Generate "Reality" run.name = "simulation" run.path = opath #**# see if this fixes my problem (even if the results won't be in sd_runs) file.ending = "spdem_ex1" # Set up species file and initial locations of species spfile = sprintf("%s/Species/Species_spdem_ex1.csv", sim.path) spdem = do.spatial.demography(run.name, run.path, spfile, sp.locs, ipath, file.ending, lpath, DisPath, opath, scale.vec) ## Run species distribution model # Set up basic settings sdm.type = 1 # 1 = simple, 2 = with biomod2, 3 = with previous data # set up species data: extract SDM input from metacommunity output sp.data = spdem.extract(sp.locs) # Extract land use data lu.path = "C:/docs/beplants/drafts/BE_data_paper/R_inputs/simulation/landscape/landcover_scn1.csv" #NOTE: this won't capture the response to the herbicide. And is currently based on the first timestep. Needs fixing if more than one timestep is desired (which at the moment is not the case!). Also note that the landuse file uses changesteps instead of timesteps, so a more complicated mapping would be needed. I leave that to another programmer, or another day. #herb.path = etwas predictors.vec = c("landuse-sim") # ,"herbicide-sim" predictors.paths = c(lu.path) # , herb.path out.info = setup.predictors(sp.data, predictors.vec, predictors.paths) my.data = out.info[[1]] my.index = out.info[[2]] # Only select the last land use column #**# Automated solution would be helpful here - this is done manually using knowledge. col.index = rep(0,ncol(my.data)) col.index[ncol(my.data)] = 1 # Do basic descriptive analysis of data #**# Think critically about what descriptive info you want and the implications for your analysis #do.descriptives(my.data, my.index) #**# NEEDS CODING # Run SDM analysis sdm.out = do.sdm(my.data, my.index, col.index, sdm.type) validation.diagnostics.lst = sdm.out[[1]] thresholds = sdm.out[[2]] #**# Need a way to make this more general if we start comparing linear models too. #**# Think about saving thresholds to file, to make it easier to save them! # Visually examine quality of sdm results for all species check.sdm.quality(validation.diagnostics.lst, "Land Use") # Visualize SDM analysis #visualize.sdm.main(sdm.out) # Create table output of results for further analysis model.type = "SDM-simulation-threshold" model.name = "landuse" results.table = sprintf("%sResults.csv", opath) sp.names = names(sp.data[2:length(sp.data)]) # Drop plotyear column, but retain all other species names. my.sdm.results = output.results.to.table(model.type, model.name, results.table, sp.names, validation.diagnostics.lst, do.append = FALSE) ## Run SpatialDemography model # NOTE: Some inputs used below were defined above. run.path = sprintf("%s/spdem_sdm/", opath) run.name = "spdem_sdm" file.ending = "spdem_sdm" inputs.ending = "spdem_ex1" # Set up species file & initial locations outside of function, then provide as input spbase = sprintf("%s/Species/Species_base.csv", sim.path) spfile = sprintf("%s/Species/Species_sdm.csv", sim.path) prepare.spfile(spbase, spfile, thresholds) spdem.out = do.spatial.demography(run.name, run.path, spfile, sp.locs, ipath, file.ending, lpath, DispPath, opath, scale.vec, inputs.ending) # Data from last model run (#**# improve acquisition of paths) v.path = "C:/docs/beplants/drafts/BE_data_paper/R_outputs/simulation/spdem_sdm/Example1/" m.path = "C:/docs/beplants/drafts/BE_data_paper/R_outputs/simulation/Example1/" validation.file = sprintf("%sSpeciesData.csv", v.path) model.file = sprintf("%sSpeciesData.csv", m.path) validation.timestep = 12 model.timestep = 12 n.sp = 3 # Compare model-produced results to observed data validation.diagnostics.lst = evaluate.spatialdemography(validation.file, model.file, validation.timestep, model.timestep, n.sp) # Write results to table model.type = "metacommunity" model.name = "landuse" sp.names = c("sp1","sp2","sp3") my.spdem.results = output.results.to.table(model.type, model.name, results.table, sp.names, validation.diagnostics.lst) # Examine results table combined.results = read.csv(results.table) combined.results
dpath = "C:/docs/beplants/datasets/" opath = sprintf("%s/proof_of_concept/", out.path) ### Run Species Distribution model # Set up basic settings #out.pdf = "C:/docs/beplants/Scripts/be_sdm/y2009.pdf" sdm.type = 1 # 1 = simple, 2 = with biomod2, 3 = with previous data #drop.rare = 0 # Set up species data sp.file = sprintf("%s/combined/SpeciesDataBE.csv", dpath) sp.lookup.file = sprintf("%s/combined/species_lookup.csv", dpath) sp.data = read.sp.data(sp.file, sp.lookup.file) #head(sp.data) # comes out organized with a column for every species, with a single plotyear column as ID. # Drop many species to speed up analysis (can optimize later) sp.data = sp.data[ ,1:5] # Set paths to predictor variables # Potential predictor variables: c("climate", "landuse", "biotic", "isolation") lu.path = sprintf("%sBExIS/landuse/16029.csv", dpath) # Run analysis just for land use predictors.vec = c("landuse") predictors.paths = c(lu.path) out.info = setup.predictors(sp.data, predictors.vec, predictors.paths) my.data = out.info[[1]] my.index = out.info[[2]] #col.index = out.info[[3]] #**# THis has yet to be coded col.index = rep(0,ncol(my.data)) col.index[(ncol(my.data) - 3):ncol(my.data)] = 1 # Try to just select the last 4 land use columns. #**# This is a hacky patch & requires a much better coding solution # Do basic descriptive analysis of data #**# Think critically about what descriptive info you want and the implications for your analysis #do.descriptives(my.data, my.index) # Drop rare species (if desirable) #if (drop.rare == 1){ # stop("This has not been retested since last modification") # out.info = drop.rare.species(my.data, my.index, col.index) # my.data = out.info[[1]] # my.index = out.info[[2]] # #col.index = out.info[[3]] #**# this has yet to be coded # } # Run SDM analysis sdm.out = do.sdm(my.data, my.index, col.index, sdm.type) validation.diagnostics.lst = sdm.out[[1]] thresholds = sdm.out[[2]] #**# Need a way to make this more general if we start comparing linear models too. # Visually examine quality of sdm results for all species check.sdm.quality(validation.diagnostics.lst, "Land Use") # Visualize SDM analysis #visualize.sdm.main(sdm.out) # Create table output of results for further analysis model.type = "SDM-threshold" model.name = "landuse" results.table = sprintf("%sResults.csv", opath) sp.names = names(sp.data[2:length(sp.data)]) # Drop plotyear column, but retain all other species names. my.sdm.results = output.results.to.table(model.type, model.name, results.table, sp.names, validation.diagnostics.lst) ### Run SpatialDemography #Set up paths for SpatialDemography template.dir = sprintf("%sinput_templates/", in.path) # Path to templates to provide the basis for the analyses lpath = "C:/docs/beplants/drafts/BE_data_paper/R_inputs/input_templates/landscape/" file.ending = "BE" # Set up species file sp.base = sprintf("%s/Species/spfile_base.csv", template.dir) spfile = sprintf("%s/Species/spfile_spdem.csv", template.dir) prepare.spfile(sp.base, spfile, thresholds) # Set up basic parameters for simulation start.year = 2009 #**# Arbitrarily chosen starting year pool.type = "plot" # region & all are other anticipated options landscape.extent = 5 #**# can try different landscape sizes n.sim = 1 n.plots = 150 n.sp = ncol(sp.dat) - 1 # Remove plotyear from count # Restrict data set to starting year #**# Note: species pools should be set up prior to this step spdem.out = spdem.prep(my.data, start.year, col.index) # Initialize a new data object (should I just use my.data?) sp.dat = spdem.out[[1]] land.dat = spdem.out[[2]] # get number of cells in landscape n.cells = landscape.extent ^ 2 # get position of focal cell focal.cell.position = floor(median(seq(1,n.cells))) # median of the sequence gets the middle number. Floor ensures that it is a whole number. # Set up model validation data #**# Non-optimal approach - convert my.data to a SpeciesData.csv format to allow me to use an existing function. model.timestep = 2010 actual.presences.lst = convert.sp.data(sp.data, model.timestep) # Create the model object based on sp.data # Create a list to hold results from ALL simulations sim.lst = list() # loop through simulations (note: for pool.type = plot, it should be fairly deterministic?) for (k in 1:n.sim){ predicted.presences.lst = rep(list(NA), n.sp) # Loop through plots in the exploratories for (p in 1:n.plots){ # Restrict data set to a specific plot if (nrow(sp.dat) != n.plots) { stop("Species dataset does not match plot dataset. Something is wrong.")} if (nrow(land.dat) != n.plots) {stop("Landscape data dataset does not match plot dataset. Something is wrong.")} plot = sp.dat$plots[p] sp.plot.dat = sp.dat[sp.dat$plots == plot, ] land.plot.dat = land.dat[land.dat$plots == plot, ] # Set up landscape configuration (hacky way for now, will come back to this) # Currently using fixed values which may or may not correpsond to the focal cell. This stupid, because we need the value for the focal cell. In addition to species pool, we need an environmental pool!). Maybe I should just use LUI and not worry about mowing, grazing, and fertilization to start with?? # customize to focal plot fert.plot = land.plot.dat[[2]] mowing.plot = land.plot.dat[[3]] grazing.plot = land.plot.dat[[4]] lui.plot = land.plot.dat[[5]] lui = rep(lui.plot, n.cells) fert = rep(fert.plot, n.cells) mowing = rep(mowing.plot, n.cells) grazing = rep(grazing.plot, n.cells) landscape = list(lui, fert, mowing, grazing) #**# make this correspond to an input for spatialdemography # Create landscape layer files (can you just input landscape objects to spatialdemography? I think so???) lpath = landscape # Hidden functionality that needs better documentation: this can also be the landscape itself, if properly formatted! # Set up environmental layers #**# NEED TO FIX ENVIRONMENTAL LAYERS TO CORRESPOND TO THOSE THAT HAVE BEEN ASSESSED FOR THRESHOLDS #prepare.env.lyrs(STUFF) #**# Needs scripting #Run name & run path run.path = sprintf("%splot_%s_sim_%s", opath, p, k) run.name = sprintf("plot_%s_sim_%s", p, k) # Set up locations file sp.locs = sprintf("%s/locations_%s_%s.csv", template.dir, pool.type, Digit(n.sim, 4)) prepare.sp.locations(sp.locs, sp.plot.dat, pool.type, n.cells) # Create sp.locs file scale.vec = "landscape" #c("landscape", "focal cell") #**# Figure out how to add your focal cell to this! # Run main model spdem.out = do.spatial.demography(run.name, run.path, spfile, sp.locs,template.dir, file.ending, lpath, DisPath, opath, scale.vec) # Extract result from focal cell & append to list output.file = sprintf("%sLanduse/SpeciesData.csv", run.path) validation.timestep = 2 #**# Don't run it for 6 timesteps then! predicted.presences.lst = extract.cell.values(predicted.presences.lst, output.file, focal.cell.position, validation.timestep) } # Compare model-produced results to observed data validation.diagnostics.lst = evaluate.spatialdemography.v2(predicted.presences.lst, actual.presences.lst, n.sp) # Add this runs accuracy stats to the running total. sim.lst = append(sim.lst, list(validation.diagnostics.lst)) } # Convert validation.diagnostics from multiple simulations into an average result & output the average result validation.diagnostics.lst = summarize.validation(sim.lst, n.sp) # Write average accuracy to file model.type = "metacommunity-agg-mean" model.name = "landuse" sp.names = names(sp.data)[2:ncol(sp.data)] # Exclude plotyear column my.spdem.results = output.results.to.table(model.type, model.name, results.table, sp.names, validation.diagnostics.lst) # Examine results table combined.results = read.csv(results.table) combined.results
Copy, paste, adapt from above
#**# NEEDS SCRIPTING
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.