knitr::opts_chunk$set(fig.width=6.5, fig.height=3.5,fig.pos='center',echo=TRUE,comment='>',dpi=800) library(ggplot2) library(biosplit) figList <- list() source(paste(realWd,"docs/rmdFunctions.R",sep='/')) theme_set(getNiceTheme()) meteoLoc <- "C:/Users/Siddarta.Jairam/Desktop/sid/MeteoInfo1.2" goldLoc <- "C:/Program Files/Golden Software/Surfer 12" RLoc <- "C:/Users/Siddarta.Jairam/Documents/R/R-3.2.0"
When the package is installed there is a blank "config.txt" file created in the package directory. This holds all the information about the run and the file folders. Ultimately, most of the file folders are optional but the parameters are vital to how the biological side of the program runs. One file folder that is necessary is the location of the HYSPLIT working directory and the installed "execs" file. This program assumes that the HYSPLIT is installed.
cfg <- loadConfig() tail(names(cfg))
Make the grids of the different manipulation steps:
boBox <- raster::extent(cfg$xmin, cfg$xmax, cfg$ymin, cfg$ymax) cropGrid <- raster::raster(boBox, crs = cfg$cropProj,resolution = cfg$spc) niceGrid <- raster::projectExtent(cropGrid,cfg$niceProj)
Download the files directly from NASS cropscape
NASS2TIFs(cfg$CropFold[1], cfg$year, cropGrid)
convert the downloaded Tiff 30m blocks to 40 km blocks and get rid of projection
finCrop <- paste(cfg$CropFold[2],paste0("aggCrop_", cfg$year, ".nc"), sep='/') rawCrop2nicenc(cfg$CropFold[1], finCrop, cropGrid)
add in Canada
addCanada <- "C:/Users/Siddarta.Jairam/Documents/Documentation/Untitled Polygon.kml" finAddCrop <- gsub("[.]nc", "add.nc", finCrop) addCropArea(finCrop, addCanada, cropAmt = 1, pathOut = finAddCrop)
Go through the ARL data Require MeteoInfo
runMeteo <- makeRunFun(meteoLoc,"py") runMeteo("ARL2GRD.py", c(cfg$MetARLFold, cfg$proARLDir, cfg$wantedMetVars, recursive = TRUE), TRUE)
Get all the variables cleaned up unprojected and netCDFs.
#get the units for the variables (comes from readme of the met data used unitsDict <- list( T02M = 'K', V10M = 'm/s North', TPP3 = 'm', SOLT = 'K') vapply(cfg$wantedMetVars, function(var){ namu <- paste0(var, "Fold") rawMet2nicenc(dirTreeIn = cfg[[namu]], projKey = cfg$MetMappingLoc, unit = unitsDict[[var]], niceGrid = niceGrid) TRUE},TRUE)
Transform predictive data into the derived variables used by the program
collectAprioriVars(cfg)
Call the model
runBiosplit(cfg)
Grab the trap data and get a list of all the trap locations and their captures
pathTrapXlsx <- "C:/Users/Siddarta.Jairam/Documents/Documentation/PestWatchDump.xlsx" pathScrubTrap <- "C:/Users/Siddarta.Jairam/Documents/Documentation/scrubTrap.csv" trapRec <- scrubTrap(pathTrapXlsx, cfg$year)
Go through the haplotype data and relate it back to the trap data for a smaller subset.
pathHapXlsx <- "C:/Users/Siddarta.Jairam/Documents/Documentation/hapDat.xlsx" pathTrapDict <- "C:/Users/Siddarta.Jairam/Documents/Documentation/County2TrapLoc.csv" csvTab <- scrubHaplo(pathHapXlsx, pathTrapDict) csvTab <- scrubHaplo(pathHapXlsx, pathTrapDict, pathCsvOut = gsub("[.]xlsx", paste0(cfg$year, ".csv"), pathHapXlsx), onlyYr = cfg$year)
Summarize the validation on a per year basis in the form of a single column csv file. After this, one can further summarize the data using area of interests that either calculate the first occurrence in that area using the minimum date of arrival or the average mixing ratio that takes the sum of all the moths in that entered that area as a population.
pathHapSum <- "C:/Users/Siddarta.Jairam/Documents/Documentation/sumHapTrap.csv" sumTab <- summarizeValid(trapRec, cfg$year, hapIn = csvTab) valAreaStats(pathHapSum,"C:/Users/Siddarta.Jairam/Documents/Documentation/Central PA.kml", niceGrid)
Make the final csv tables comparing all the different validation and simulation data.
ncdf2trapdata(cfg$SimOutFold, cfg$TrapLoc[paste0(cfg$year)], shDoSum = TRUE) runs <- c("runInterYear","runAbsFlightLimit","runFlightAndNight","runNightDurDel") pushLoc <- paste('X:/2 WESTBROOK/Sid/Hysplit Out Moth table',cfg$year, sep='/') vapply(runs, function(x){ direc <- gsub(cfg$runName, x, cfg$SimOutFold) direc[2] <- paste(pushLoc, x, sep='/') ncdf2trapdata(direc[1], cfg$TrapLoc[paste0(cfg$year)]) pathCSV <- paste0(direc, "/TrapFinal") vapply(c('H.csv','V.csv'), function(x){ file.copy(paste0(pathCSV[1],x), paste0(pathCSV[2],x), overwrite = TRUE) },TRUE) },rep(TRUE,2))
You can also do the regional statistics on the simulation data using a similar scheme.
simAreaStats(cfg$SimOutFold, "C:/Users/Siddarta.Jairam/Documents/Documentation/Central PA.kml")
Spatially average the output over the whole grid. This compares the trap data on the same scale as the simulation data.
predObv <- ncdf2trapgrid(cfg$SimOutFold, "C:/Users/Siddarta.Jairam/Documents/Documentation/Result Files/firstOccTrap.grd", paste(cfg$SimOutFold, "Sim-TrapFirstOcc.nc",sep='/')) #The spatial average distance between the weeks mean(abs(predObv[,,]),na.rm=TRUE)
Along with the general comparison of the rasters, one can look at the difference with the first occurrence and the mixing ratios in both cases spatially averaged to the base grid size. This now is the third way of calculating these values for three different spatial scales (point and time specific, regional over the year, spatially averaged per point over the year).
ncdf2trapgrid(cfg$SimOutFold, pathOut = paste(cfg$SimOutFold, "SimFirstOcc.nc",sep='/'), shUseSum = TRUE) ncdf2trapgrid(cfg$SimOutFold, pathOut = paste(cfg$SimOutFold, "SimMixRatio.nc",sep='/'), shDoMix = TRUE, shUseSum = TRUE)
There are 3 different maps used in this analysis
These are done using a provided script running function to call scripter as follows:
runScripter <- makeRunFun(goldLoc, "BAS")
The contour maps are the first thing to look at with a given simulation. You can see the progress through the year and have a good idea about mixing areas and population densities. Mixing areas are hard to ascertain but first occurrence is mapped precisely.
runScripter("Mothtxt2Contour.BAS",cfg$SimOutFold)
The contours are made for almost thumbnail- like size for use in the autoreport. This means that areas have been smoothed and complexities have been eliminated. To get a clearer look at what is happening in each grid cell the class post maps are used. All the complexities are shown in gory details including individual populations in a specific grid point. The downside is that these are overlayed on top of each other which makes the graph extremely messy after June or so. These are not ready for publication and should be used as raw verification outputs.
runScripter("Mothtxt2ClassPost.BAS",cfg$SimOutFold)
After the post-processing is done the last graph is used. Without too much manipulation, these graphs were arrived upon for showing the data as they are. It takes an netCDF output of one of the statistics used, mixing ratio or first occurrence as well as a table of scrubbed validation data. It has flags to manipulate the number of bins and if color is used.
pathMix <- paste(cfg$SimOutFold, "SimMixRatio.nc",sep='/') pathFirst <- paste(cfg$SimOutFold, "SimFirstOcc.nc",sep='/') pathHap <- "C:/Users/Siddarta.Jairam/Documents/Documentation/hapDat2013.csv" pathSum <- "C:/Users/Siddarta.Jairam/Documents/Documentation/sumTrapHap2013.csv" runScripter("ncdf2contour.BAS", c(pathMap, 3, "bw", pathHap, 3, 2, 8, 1)) runScripter("ncdf2contour.BAS", c(pathFirst, 4, "bw", pathSum, 3, 2, 4, 0, "First Occ (PestWatch wk)"))
The order of the flags (the character vector given after the script name) are:
For the pre-processing, this document should suffice as the individual steps may change with the specific project. As with all the functions, roxygen comments are used to produce the final man pages for more detailed documentation.
An attempt to summarize the biophysical models that go into the model can be found in "docs/BiologicalEqs.rmd". An explanation of the HYSPLIT model can be located at the NOAA website.
batchRun.R in the scripts folder details the modeling steps while running a multiple runs in a single source call.
The most complete collection of the post processing steps and scientific underpinnings can be found in "docs/AutoReport.rmd". This is a parameterized report that compares two or more runs with each other and largely runs through the post-processing steps. It has a required parameter other than the specific runs so the call in the final output grid as follows:
rmarkdown::render(system.file("docs", "AutoReport.rmd", package = "biosplit"), params = list( runs = rep("runWithCanada",4), years = c(2011, 2012, 2013, 2014), niceGrid = niceGrid ) )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.