R/simulacro.R

Defines functions simulacro

Documented in simulacro

#' Builds a simulated biological invasion dataset.
#'
#' This function builds point time seris within geographic borders based on an iterative process that simulates biological invasions. Points of natural and anthropic origin can be generated with different processes. Points of natural origin are generated by a stepwise process where new locatins are chosen on the basis of a user-definable dispersal kernel. Points of antrhopic origin are sampled randomly. All generated points can be filtered through probability maps.
#'
#' @import raster
#' @importFrom sp point.in.polygon
#' @importFrom stats na.omit
#' @importFrom utils read.table
#'
#' @param INIDIST data frame of initial distribution. Columns must be: 'year' (year of first sighting); 'y' (latitude on a projected coordinate system with meters as distance unit of measure); 'x' (longitude on a projected coordinate system with meters as distance unit of measure); 'species' (the name of the species for that sighting location); 'Pnat' ([0;1], probability for that sighting location of being of natural origin (as computed by function EM())), 'Dist' (dispersal distance of that sighting location as computed by function EM()).
#' @param YEARS vector of unique year values for which the simulacro function will generate data (they will be written in the output file).
#' @param BOUNDARY object of class sp::SpatialPolygons or raster:RasterLayer (0: inside, >= 1: outside) used as geographi boundary for function RPG(). These are the geographic limits within which points or anthropic origin can be generated.
#' @param NANTH either a vector of numbers of anthropic points to be generated every year or an integer number of points to be generated per time-step.
#' @param NNAT either a vector of numbers of natural points to be generated every year  or an integer number of points to be generated per time-step.
#' @param FACANTH Only if HSM is a probability raster. Factor multiplying NANTH before the filtering. This value should be high if HSM has a low proportion of suitable cells.
#' @param FACNAT Only if HSM is a probability raster. Factor multiplying NNAT before the filtering
#' @param A Alpha values for the one dimensional dispersal kernel. Each Alpha value will be used to generate a single data frame.
#' @param C C values for the one dimensional dispersal kernel. C=2: normal kernel; C=1: negative exponenetial kernel; C<1: fat-tailed kernel.
#' @param HSM either a Habitat Suitability Map ([0;1], object of class raster::RasterLayer, probability raster giving value of likelyhood of a viable population establishing each cell) or the geographic boundaries (object of class sp::SpatialPolygons) within which to generate points of natural or anthropic origin.
#' @param X set of possible interpopulation distances [km].
#' @param ITERATIONS number of replicate datasets generated with the same Alpha and C values.
#' @param DIR directory where to write the simulated datasets. if FALSE they will be saved as data frames in a list object.
#' @param TRUEANTH If FALSE will use HSM to simulate anthropogenic dispersal. If TRUE will use TRUEDB
#' @param TRUEDB data frame containing points of anthropic origin (must be in the same format as INIDIST, rows where TRUEDB$Pnat>PROB will be ignored). Ignored if TRUEANTH= FALSE, but HSM must then be specified.
#' @param PROB threshold over which Pnat is considered natural.
#'
#' @return list of data frames. Every data frame represents a simulated biological invasion. if DIR=TRUE one folder will be created for each Alpha and C combination containig all the replicates datasets set in ITERATION. if DIR=FALSE (default) the order in the list will follow the order of the C and Alpha values respectively as set in C and A.
#'
#' @author Luca Butikofer
#'
#' @export
#'
#' @examples
#' data('frogsEM')  #see example in ?EM().
#' data('nzp')
#'
#' idst<- frogsEM[1:10,]
#' Cr<- frogsEM[-(1:10),]
#' yr<- unique(Cr$year)
#'
#' nNoYear<- rep(NA,length(unique(Cr$year)))
#' hNoYear<- rep(NA,length(unique(Cr$year)))
#'
#' for(i in 1:length(unique(Cr$year))){
#'  CrYear<- Cr[Cr$year==unique(Cr$year)[i],]  #Cr for that year
#'  nNoYear[i]<- nrow(CrYear[CrYear$Pnat>=.5,])  #natural points for that year
#'  hNoYear[i]<- nrow(CrYear[CrYear$Pnat<.5,])  #human points for that year
#' }
#'
#' AV<- c(2,3,4.5,7.5,11,15,20,25)  #alpha values
#'
#' \dontrun{
#' frogsLacro<- simulacro(INIDIST=idst,YEARS=yr,
#'  BOUNDARY=nzp,NNAT=nNoYear,NANTH=hNoYear,
#'  FACNAT=10,
#'  A=AV,X=seq(.1,30,.1),
#'  TRUEANTH=TRUE,TRUEDB=Cr,PROB=.5,
#'  ITERATIONS=10,HSM=nzp)
#'  }


simulacro<- function(INIDIST, YEARS, BOUNDARY, NNAT, NANTH, A, C=2, X,
                     HSM=FALSE, FACANTH=1, FACNAT=1, ITERATIONS=1,
                     DIR=F, TRUEANTH=c(FALSE,TRUE), TRUEDB,
                     PROB=.5){


  if(TRUEANTH==FALSE){
    NYEARS<-length(unique(YEARS))

    ed<- lapply (HSM@polygons, function(y) {y@Polygons[[1]]@coords})
    HSM2<- lapply (ed, function(q) {q<- q[nrow(q):2,]})
    HSM2=do.call('rbind',HSM2)


    antRel<-RPG(rpopn=10000, boundary=BOUNDARY, year="anyYear",SP=INIDIST$species[1])    #  Release sites for anthropic dispersal
    if(class(HSM)=='RasterLayer') {antRel<-spatFilter(Nclass=20, points=antRel,MAP=HSM)
    } else if (class(HSM)=='SpatialPolygons'){ins<- point.in.polygon(antRel$x,antRel$y,HSM2[,1],HSM2[,2])}  #  samples points within shoreline
    tot<-length(A)*length(C)*ITERATIONS*NYEARS    #  used in printing the "progress bar"
    position<-1:tot    #  used in printing the "progress bar"

    alpha<-NULL; ind<-1  #  ind is for saving lists

    for(alphavalue in A){
      for(cvalue in C){

        if(DIR!=F){dir.create(paste(DIR,"/A",alphavalue,"C",cvalue,sep=""),recursive=T)}
        Y<-fx(X,alphavalue,cvalue)
        deltas<-SDS(DIST=X,probDist=Y)

        replica<-NULL

        for(ITERATION in 1:ITERATIONS){    #  iteration START
          presloc<-INIDIST
          for(a in 1:NYEARS){    #  point generation START
            if(length(NNAT)>1){nnat=NNAT[a]}else{nnat=NNAT}
            if(length(NANTH)>1){nanth=NANTH[a]}else{nanth=NANTH}
            natloc<-NPG(PresLoc=presloc,N=nnat*FACNAT,Deltas=deltas,year=YEARS[a])  #  generates natural points
            if(class(HSM)=='RasterLayer'){  #  if you need spatial filtering
              samp<-sample(rownames(antRel),nanth*FACANTH)    #  samples an excess of anthropogenically dispersed points
              anthloc<-antRel[samp,]
              anthloc$year<-rep(YEARS[a],nrow(anthloc))
              if(length(samp)>0) antRel<-antRel[-as.numeric(samp),]  #  removes already sampled locations form the pool of possible anthopogenic release sites (antRel) to avoid duplicates in the next years
              natloc<-spatFilter(Nclass=20, points=natloc,MAP=HSM)  #  filters natural points
              newOnes<-na.omit(rbind(natloc,anthloc))
              if(nrow(newOnes[newOnes$Pnat==1,])<nnat){print("FACNAT too small")}    #  warning if available points are not sufficient for the HSMfilter
              if(nrow(newOnes[newOnes$Pnat==0,])<nanth){print("FACANTH too small")}
              newSel<-rbind(newOnes[sample(rownames(newOnes[newOnes$Pnat==1,]),nnat),],newOnes[sample(rownames(newOnes[newOnes$Pnat==0,]),nanth),])    #  samples the exact number of natural and anthropogenic points
              presloc<-na.omit(rbind(presloc,newSel))
            }else if (class(HSM)=='SpatialPolygons'){  #  this is for when you want points inside a polygon
              samp<-sample(row.names(antRel[ins==1,]),nanth); anthloc<-antRel[samp,]
              anthloc$year<-rep(YEARS[a],nrow(anthloc))
              natloc<-NPG(PresLoc=presloc,N=nnat*FACNAT,Deltas=deltas,year=YEARS[a])
              natloc<- natloc[(point.in.polygon(natloc$x,natloc$y,HSM2[,1],HSM2[,2]))==1,]  #samples points within shoreline
              natloc<- natloc[sample(nrow(natloc), nnat),]  #samples exact number of natural points
              presloc<-na.omit(rbind(presloc,natloc,anthloc))
            }else{  #  this is for when you don't need spatial filtering
              samp<-sample(row.names(antRel),nanth); anthloc<-antRel[samp,]
              anthloc$year<-rep(YEARS[a],nrow(anthloc))
              #  antRel<-antRel[-as.numeric(samp),]    #  removes already sampled locations form the pool of possible anthopogenic release sites (antRel) [to avoid duplicates in the next years]
              presloc<-na.omit(rbind(presloc,natloc,anthloc))
            }
            flush.console()
            print(paste(round(100*position[1]/tot,2),"%"," ~ ","ALPHA=",alphavalue," / C=",cvalue," ~ HSM=",names(HSM)," ~ END OF YEAR ",YEARS[a]," ~ ITERATION ",ITERATION,sep=""))
            position<-position[2:length(position)]
          }    #  point generation END
          if(DIR!=F){write.table(presloc,paste(DIR,"/A",alphavalue,"C",cvalue,"/dataset",ITERATION,".txt",sep=""),row.names=F)
          }else{replica[[ITERATION]]<- presloc}
        }    #  iteration END
      }
      alpha[[ind]]<-replica; ind=ind+1
    }
    if(DIR!=F){
      fileNames<-list.files(DIR,recursive=T)
      simul<-list()
      for(i in 1:length(fileNames)){
        simul[[i]]<-read.table(paste(DIR,"/",fileNames[i],sep=""),header=T)
      }
      return(simul)} else {return(alpha)}


  }else{

    NYEARS<-length(unique(YEARS))
    tot<-length(A)*length(C)*ITERATIONS*NYEARS    #  used in printing the "progress bar"
    position<-1:tot    #  used in printing the "progress bar"

    alpha<-NULL; ind<-1  #  ind is for saving lists

    for(alphavalue in A){
      for(cvalue in C){

        if(DIR!=F){dir.create(paste(DIR,"/A",alphavalue,"C",cvalue,sep=""),recursive=T)}
        Y<-fx(X,alphavalue,cvalue)
        deltas<-SDS(DIST=X,probDist=Y)


        replica<- NULL

        for(ITERATION in 1:ITERATIONS){    #  iteration START
          presloc<-INIDIST
          for(a in 1:NYEARS){    #  point generation START
            if(length(NNAT)>1){nnat=NNAT[a]}else{nnat=NNAT}
            if(length(NANTH)>1){nanth=NANTH[a]}else{nanth=NANTH}
            anthloc<-subset(TRUEDB,TRUEDB$year==YEARS[a]&TRUEDB$Pnat<=PROB)    #  copies anthropogenic dispersed points
            anthloc$year<-rep(YEARS[a],nrow(anthloc))

            if(class(HSM)=='RasterLayer'){  #  if you need spatial filtering
              natloc<-NPG(PresLoc=presloc,N=nnat*FACNAT,Deltas=deltas,year=YEARS[a])    #  generates naturally dispersed points
              newSel<-spatFilter(Nclass=20, points=natloc,MAP=HSM)    #  filters naturaly generated points with HSM
              if(nrow(natloc)<nnat){print("FACNAT too small")}    #  warns if available points are not sufficient for the HSMfilter
              newSel<-newSel[sample(rownames(newSel),nnat),]   #  samples the exact number of natural points
              presloc<-na.omit(rbind(presloc,newSel,anthloc))
            }else if (class(HSM)=='SpatialPolygons'){  #  this is for when you want points inside a polygon
              ed<- lapply (HSM@polygons, function(y) {y@Polygons[[1]]@coords})
              HSM2<- lapply (ed, function(q) {q<- q[nrow(q):2,]})
              HSM2=do.call('rbind',HSM2)
              natloc<-NPG(PresLoc=presloc,N=nnat*FACNAT,Deltas=deltas,year=YEARS[a])
              natloc<- natloc[(point.in.polygon(natloc$x,natloc$y,HSM2[,1],HSM2[,2]))==1,]  #samples points within shoreline
              natloc<- natloc[sample(nrow(natloc), nnat),]  #samples exact number of natural points
              presloc<-na.omit(rbind(presloc,natloc,anthloc))
            }else if (class(HSM)=='logical'){  #  this is for when you don't need spatial filtering
              natloc<-NPG(PresLoc=presloc,N=nnat*FACNAT,Deltas=deltas,year=YEARS[a])
              presloc<-na.omit(rbind(presloc,natloc,anthloc))
            }

            flush.console()
            print(paste(round(100*position[1]/tot,2),"%"," ~ ","ALPHA=",alphavalue," / C=",cvalue," ~ TRUENAT=True Values"," ~ END OF YEAR ",YEARS[a]," ~ ITeRATION ",ITERATION,sep=""))
            position<-position[2:length(position)]
          }    #  point generation END
          if(DIR!=F){write.table(presloc,paste(DIR,"/A",alphavalue,"C",cvalue,"/dataset",ITERATION,".txt",sep=""),row.names=F)
          }else{replica[[ITERATION]]<- presloc}
        }    #  iteration END
      }
      alpha[[ind]]<-replica; ind=ind+1
    }
    if(DIR!=F){
      fileNames<-list.files(DIR,recursive=T)
      simul<-list()
      for(i in 1:length(fileNames)){
        simul[[i]]<-read.table(paste(DIR,"/",fileNames[i],sep=""),header=T)
      }
      return(simul)} else {return(alpha)}
  }
}

Try the Biolinv package in your browser

Any scripts or data that you put into this service are public.

Biolinv documentation built on March 30, 2021, 5:13 p.m.