R/sp_aggregate_functions.R

###############################################################################
##################generate hexagon polygons for dataset########################
###############################################################################
hexagonize<-function(dat,
                     resolution)
{
  require(dggridR)
  # make grid
  dggs_grid <- dgconstruct(res = resolution, metric = T)
  area_tab <- dggetres(dggs_grid)
  area_km = area_tab[match(resolution, area_tab$res), "area_km"]
  # find every observation in grid and write down the cell (=seqnum)

  dat$cell <-dgGEO_to_SEQNUM(dggs_grid, dat$Longitude, dat$Latitude)$seqnum
  agg_unit = sort(unique(dat$cell))
  coordinates(dat) <- cbind(dat$Longitude , dat$Latitude)
  cellcenters <-dgSEQNUM_to_GEO(dggs_grid, agg_unit)
  variables <- data.frame(
    agg_unit = agg_unit,
    resolution = resolution,
    area_km = area_km,
    Longitude = cellcenters[[1]],
    Latitude = cellcenters[[2]]
  )
  rownames(variables)<-variables$agg_unit
  #make polygons
  grid <- dgcellstogrid(dggs_grid,
                        dat$cell,
                        frame = F,
                        wrapcells = T)

  #SpatialPolygonsDataFrame
  cells <- SpatialPolygonsDataFrame(Sr = grid, data = variables)
  return(cells)
}

####################################################################################
##################### make rarefy function based on iNExt  ####################################
####################################################################################
# m : site-by-species
rare_ext<-function(m, base="size", size=min(rowSums(m)), q=0, coverage= c(0.5,0.75 )) {
  require(data.table)
  require(iNEXT)
  require(reshape2)
  if(is.null(row.names(m))) rownames(m)<-paste("site", 1:dim(m)[1], sep="_")

  if(base=="size"){
    a<-iNEXT(t(m),size= size,se = F,q = q, knots=NULL)[[2]]
    a<-rbindlist(a,idcol = "site")
    a<-unique(a[a$m %in% size,])
    S_n<-acast(a,   site~m, value.var = "qD")
    colnames(S_n)<-paste("S",colnames(S_n),sep="_")
    S_n<-S_n[match(row.names(m), rownames(S_n)),,drop=F]
    return(S_n)
  }
  if (base=="coverage" & !is.null(coverage)){


    a<-lapply(seq_len(nrow(m)), function(i) as.numeric(m[i,m[i,]!=0]))

    names(a)<-rownames(m)

    S_sc<-lapply(coverage, function(j) {
      b<-subset(estimateD(a,base="coverage",level = j), order==0)
      b$SC<-j
      return(b)
    })
    S_sc<-rbindlist(S_sc)

    S_sc<-acast(S_sc,   site~SC, value.var = "qD")
    colnames(S_sc)<-paste("S",colnames(S_sc),sep="_")
    S_sc<-S_sc[match(row.names(m), rownames(S_sc)),,drop=F]
    return(S_sc)
  }
  return(rep(NA,length.out=dim(m)[1]))
}


# test<-rare_ext(m,size = sample)



#  BCI<-BCI[c(5:1,6:50),]
#  rare_ext(BCI, size=c(100))
# vegan::rarefy(m,sample =c(100,200))


################################################################
############ Rarefy for different sampling effort###############
################################################################
rarefy_effort <- function(m,
                          effort,
                          stand_effort = min(effort, na.rm = T),
                          rep = 1000,
                          estimate_only=F) {
  if (anyNA(m)) {
    warning("Community matrix m contains missing values. Replacing NA's by 0's.")
    m[is.na(m)] <- 0
  }
  N <- rowSums(m)
  N_stand <- N / effort
  stand_N <- N_stand * stand_effort
  S_observed <- apply(m, 1, function(x)
    length(x[x > 0]))

  pool <- apply(m, 1, function(x)
    return(rep(colnames(m), x)))

  s_rep <- sapply(1:rep, function(x) {
    a <-
      mapply(function(n, k)
        return(ifelse(length(n) >= k, length(
          unique(sample(n, k, replace = F))
        ), NA)),
        pool, stand_N)
    return(a)
  })
  S_stand <-
    t(apply(s_rep, 1, function(x)
      quantile(
        x, probs = c(0.025, 0.5, 0.975), na.rm = T
      )))
  result <- data.frame(
    site = rownames(S_stand),
    N = N,
    S_observed = S_observed,
    effort = effort,
    N_stand = N_stand,
    stand_effort = stand_effort,
    stand_N = stand_N,
    S_estimated = S_stand[, 2],
    S_low = S_stand[, 1],
    S_upp = S_stand[, 3]
  )
  if(estimate_only){
    estimates<-result$S_estimated
    names(estimates)<-result$site
    return(estimates)
  }else{
    return(result)
  }
}

#test<-rarefy_effort(m,effort)

######################################################################
##########Apply the rarefy_effort function for a range of efforts#####
######################################################################

rarefy_along <- function(m,
                         effort,
                         stand_effort = seq(0, max(effort), 1),
                         rep = 1000,
                         plot = T) {
  out <-
    lapply(stand_effort, function(x)
      rarefy_effort(
        stand_effort = x,
        m = m,
        effort = effort,
        rep = rep
      ))
  out <- as.data.frame(do.call(rbind, out))

  return(out)

}

####################################################################
#######Plot the result of rarefy_along as a rarefaction curve#######
####################################################################

plot.curve <- function(dat) {
  library(ggplot2)
  graph <-
    ggplot(data = na.omit(dat), aes(stand_effort, S_estimated, color = site)) +
    geom_line(size = 1) +
    geom_ribbon(aes(
      ymax = S_upp,
      ymin = S_low,
      fill = site,
      color = NULL
    ), alpha = 0.3) +
    xlab("effort")
  graph

}


####################################################################################
##################### dissect species richness  ####################################
####################################################################################

dissectS<-function(m,sample=min(rowSums(m)),coverage=c(0.5,0.75), effort=NULL, q=0, ...){
  require(reshape2)
  require(mobr)
  require(vegan)
  # abundance
  abundance <- rowSums(m, na.rm = T)
  # individual based rarefaction
  S_n<-rare_ext(m,size=sample,q=q)
  # coverage based rarefaction
  S_sc<-rare_ext(m, base="coverage", coverage=coverage,q=q)
  #Chao
  Chao<-calc_chao1(m)
  #observed richness
  S_obs<-specnumber(m)
  #PIE
  PIE <-calc_PIE(m,ENS=F)
  #ENS
  ENS <-calc_PIE(m,ENS=T)
  # actual SC
  SC<-coverage(m)
  # effort rarefaction

  if(is.null(effort)==F)    {
    if(length(effort)!=dim(m)[1])stop("Length of 'effort' argument must be the same as the number of sites in m.")
    S_est<-rarefy_effort(m,effort,...)
    S_est<-S_est[,c("effort","stand_effort","N_stand","S_estimated")]
    S_obs<-cbind(S_obs,S_est)
  }

  site<-rownames(m)
  variables<-cbind(abundance,S_obs,SC,Chao,PIE,ENS,S_n,S_sc)

  return(variables)
}


# data(BCI)
# test<-dissectS(BCI,coverage = c(0.25,0.75),sample = c(10,100,200))

####################################################################################
##################### aggregating data by polygon ##################################
####################################################################################

sp_aggregate <-
  function(dat,
           abundance.col="Abundance",
           taxon.col = "standard_genus",
           group = "hexagon",
           site.col=NULL,
           effort.col=NULL,
           stand_effort=NULL,
           raster = NULL,
           is.temp=F,
           sample=NULL,
           coverage=c(0.75),
           resolution=7,
           inext=F,
           ...) {
    library(vegan)
    library(raster)
    library(sp)
    library(rgeos)
    mode <- ifelse(
      class(group) == "SpatialPolygonsDataFrame",
      "polygon",
      ifelse(
        group=="hexagon",
        "hexagon",
        ifelse(group %in% colnames(dat), "point",
               stop(
                 "group must be 'hexagon', a column name in dat specifying the Site variable or a SpatialPolygonsDataFrame for user specified aggregation of locations"
               ))
      )
    )
    if(class(raster)=="RasterStack" & mode!= "point"){
      stop("RasterStacks are not allowed for aggregation by hexagon or polygon.
           Define 'raster' as a single rasterlayer.")
    }
    if(class(raster)=="RasterStack" & is.temp==T) {
      warning("Setting is.temp FALSE. Argument cannot be TRUE for RasterStacks. Boltzmann's temperature won't be calculated.")
      is.temp<-F
    }
    if(is.null(effort.col)==F) if( effort.col %in% colnames(dat)==F) stop("'effort.col' must be NULL or a column name in dat." )

    # For mode "hexagon" define group as hexagon polygons
    if (mode == "hexagon") {
      print("Making hexagons")
      group<-hexagonize(dat=dat, resolution=resolution)
    }

    coordinates(dat) <- cbind(dat$Longitude , dat$Latitude)

    if (mode == "point") {
      dat@data$cell <- dat@data[, paste(group)]
    } else{
      proj4string(dat) <- proj4string(group)
      dat@data$cell <- over(dat, group)$agg_unit
      if(!is.null(site.col)){
        n_sites<-aggregate(dat@data[,paste(site.col)], by=list(agg_unit=dat@data$cell),
                           FUN=function(x)length(unique(x)))
      }
    }


    # Agregate the abundances by cell and taxon
    long_data   <-
      aggregate(dat@data[,paste(abundance.col)] ,
                by = list(agg_unit = dat@data$cell, Taxon = dat@data[, paste(taxon.col)]),
                sum)
    long_data$x<-as.integer(long_data$x)

    library(reshape2)
    library(iNEXT)

    # convert to community matrix
    m <- acast(long_data,   agg_unit~Taxon, value.var = "x")
    m[is.na(m)] <- 0
    #m<-as.data.frame(m)

    if(is.null(effort.col)){
      effort=NULL
    }else{
      if(mode=="point"){
        effort<-dat@data[,c(group,effort.col)]
        effort<-effort[match(rownames(m),effort[,group] ),2]
      }else{stop("effort rarefaction only works in point mode")}
    }


    #iNext output

    if(inext){
      print("Running iNext. May take a while.")
      iNEXT_out <- iNEXT(x = t(m), ...)
    }else{
      iNEXT_out=NULL
    }

    #dissect richness

    variables<-as.data.frame(dissectS(m,sample = sample, coverage = coverage, effort = effort, stand_effort=stand_effort))

    variables$agg_unit<- rownames(variables)
    if (mode == "point") {
      cells <- dat
      cells <-
        aggregate(
          cells@data[, c("Latitude", "Longitude")],
          by = list(agg_unit = cells@data$cell),
          sample,
          size = 1
        )

      coordinates(cells)<-cbind(cells$Longitude , cells$Latitude)
    }else{
      cells <- group
      if(!is.null(site.col)){
        cells@data$sites_aggregated<-n_sites$x[match(cells$agg_unit, n_sites$agg_unit)]
      }
    }
    cells <- merge(cells, variables)


    if (is.null(raster) == F) {
      print("Extracting raster values. May take a while.")

      #Crop raster to extent of spatial object
      raster <- crop(raster, extent(cells))
      if (mode == "point") {
        raster_value <- extract(raster, cells)
        cells@data<-cbind(cells@data,raster_value)
      } else{
        library(parallel)
        cl <- makeCluster(3)
        clusterExport(cl, c("cells", "raster"),envir=environment())
        clusterEvalQ(cl, library(raster, sp))
        i <- nrow(cells)
        library(pbapply)
        extracted <- pbsapply(cl = cl, X = 1:i, function(x) {
          #
          single <- cells[x, ]
          clip <- crop(raster, extent(single))
          clip2 <- rasterize(single, clip, mask = TRUE, fun=mean, na.rm=T)
          NAs <- summary(rasterize(single, clip, T))[6]
          values <- getValues(clip2)
          mean<-mean(values, na.rm = T)
          cover<-1-(table(is.na(values))[2]-NAs)/length(values)
          return(c(mean,cover))
        })
        stopCluster(cl)
        cells@data$raster_value <- extracted[1,]
        cells@data$cover<-extracted[2,]
      }
      if (is.temp==T){
        cells@data$temp<-cells@data$raster_value/10
        cells@data$t_boltz <-
          ifelse(is.nan(cells@data$temp), NA, 1 / ((cells@data$temp + 273.15) * 8.62e-5))
      }
    }
    return(list(
      agg_units = cells,
      abundance = dat,
      iNext = iNEXT_out
    ))
    }
####

# library(vegan)
# data("BCI")
# BCI<-as.data.frame(t(BCI))
# set.seed(7)
# BCI_effort<-sample(10:50,50,T)
# BCI_richness<-dissectS(BCI)
# BCI_richness<-rarefy_effort(BCI,BCI_effort)
T-Engel/hammeRs documentation built on May 22, 2019, 2:16 p.m.