R/aggregate.R

Defines functions make.gmap pophist.aggregate

Documented in make.gmap pophist.aggregate

#' Aggregate a pophist object into genetic populations
#'
#' Combines grid cells from the forward-time demographic simulation into a smaller number of cells for coalescent simulation.
#'
#' @param ph the output from forward time sim, includes a pophist object (output by \code{getpophist2.cells()})
#' @param gmap data frame that maps forward time populations to genetic populations (output by \code{make.gmap()})
#'
#' @details 
#' This function takes a fine-grained spatio-temporal history of a species generated by holoSimCells forward-time simulator and aggregates demographic populations into genetic populations that can be simulated more tractably.  The colonization history of component cells in the coarse grid is used to model proportional colonization from multiple source cells. 
#'
#' @return 
#' \itemize{
#' Returned object includes the following named components:
#' \item{\code{pophist}} {a data frame with population ID, cell row, cell column, timing of colonization, and source population associated with colonists.}
#' \item{\code{Nvecs}} {a data frame with number of generations columns and number of (aggregated) populations rows that provides population size through time from the forward demographic simulation.}
#' \item{\code{tmat}} {a matrix of pairwise migration rates between (aggregated) populations, used in subsequent coalescent simulations.}
#' \item{\code{struct}} {a vector summarizing the structure of the simulated landscape from the forward simulation, x and y dimensions of the grid, number of generations, disperal parameters, etc.}
#' \item{\code{hab_suit}} {a matrix with number of generations rows and number of (aggregated) populations columns that provides habitat suitability through time.}
#' \item{\code{coalhist}} {a data frame specifying historical events for the coalescent simulation, includes time of event (time), source (src) and sink (snk) IDs, and fractional colonization (prop).}
#' \item{\code{popslst}} {a list object with one element per grid cell from the forward-time demographic simulation.  Each element includes information on location on the landscape, colonization time, source populations, and population size through time.}
#' \item{\code{old_landscape}} {original landscape from the forward-time demographic simulation.}
#' \item{\code{old_tmat}} {original migration matrix from the forward-time demographic simulation.}
#' \item{\code{gmap}} {gmap object that maps cells from the demographic simulation to a coarser grid for coalescent simulations.}
#' }
#'
#' @examples
#' library(holoSimCell)
#' 
#' parms <- drawParms(control = system.file("extdata/ashpaper","Ash_priors.csv",package="holoSimCell"))
#' load(file=paste0(system.file(package="holoSimCell"),"/extdata/landscapes/",pollenPulls[[1]]$file))
#' refpops <- pollenPulls[[1]]$refs
#' avgCellsz <- mean(c(res(landscape$sumrast)))
#'
#' ph = getpophist2.cells(h = landscape$details$ncells, xdim = landscape$details$x.dim, ydim = landscape$details$y.dim,
#'                        landscape=landscape,
#'                        refs=refpops,  
#'                        refsz=parms$ref_Ne,
#'                        lambda=parms$lambda,
#'                        mix=parms$mix,  
#'                        shortscale=parms$shortscale*avgCellsz,  
#'                        shortshape=parms$shortshape, 
#'                        longmean=parms$longmean*avgCellsz,  
#'                        ysz=res(landscape$sumrast)[2], 
#'                        xsz=res(landscape$sumrast)[1], 
#'                        K = parms$Ne) 
#' 
#' gmap=make.gmap(ph$pophist,
#'                xnum=2, 
#'                ynum=2) 
#' 
#' ph2 <- pophist.aggregate(ph,gmap=gmap)
#'
#' @seealso \code{\link{make.gmap}}, \code{\link{doesGmapCombine}}, \code{\link{getpophist2.cells}}, \code{\link{run_FSC_step_agg3}}, \url{http://cmpg.unibe.ch/software/fastsimcoal26/man/fastsimcoal26.pdf}
#' @export

pophist.aggregate <- function(ph, gmap=NULL)
{
    if (is.null(ph)) stop("must include output from getpophist")
    if (is.null(ph$pophist)) stop("there needs to be a correct pophist in the list 'ph'")
    if (is.null(gmap))
    {
        gh = ph$coalhist
    }    else   { 

        refs <- ph$coalhist[ph$coalhist$time==0,]
        refs$snk <- NA
        refs$prop=1.0

        refs <- merge(refs,gmap,by.x="src",by.y="pop",all.x=T)
        refs <- unique(refs[,c("time","gpop","snk","prop")])
        names(refs)[which(names(refs)=="gpop")] <- "src"
        
        gh <- merge(ph$coalhist,gmap,by.x="src",by.y="pop",all=T)[,1:4]
        names(gh)[4] <- "gsrc"
        gh <- merge(gh,gmap,by.x="snk",by.y="pop")[,1:5]
        names(gh)[5] <- "gsnk"
        gh <- gh[order(gh$time,gh$snk,gh$src),]
        gh$srcsz <- apply(gh[,c("src","time")],1,function(x){ph$Nvec[x[1],paste0("gen",x[2])]})

        agg <- with(gh,aggregate(cbind(mvsrcsz=srcsz),list(time=time,gsnk=gsnk,gsrc=gsrc),sum))
        
        nvt <- aggregate(cbind(ph$Nvecs),list(gpop=gmap$gpop),sum) 
        rownames(nvt) <- as.character(nvt$gpop)
        nvt <- nvt[,-1]
        agg$gsrcsz <- apply(agg[,c("gsrc","time")],1,function(x){nvt[x[1],paste0("gen",x[2])]})
        agg$prop <- round(agg$mvsrcsz/agg$gsrcsz,6)
        gh <- agg[agg$gsnk!=agg$gsrc,c("time","gsrc","gsnk","prop")]
        names(gh) <- c("time","src","snk","prop")
        
        gh <- rbind(refs,gh[order(gh$time,gh$snk,gh$src),])
        if (!testCoal(gh))
        {
            gh <- gh[order(gh$src,gh$time,gh$snk),]
            gh <- do.call(rbind,lapply(unique(gh$src),function(src)
            {
                tmp=gh[gh$src==src,]  
                if (min(tmp$time)>0)
                {
                    mt = which(tmp$time==min(tmp$time))
                    tmp[mt,"prop"] <- tmp[mt,"prop"]/cumsum(tmp[mt,"prop"])
                }
                tmp
            }))
        }
    }
    ph$coalhist <- gh
    
    poph <- gmap[,-1]
    names(poph)[1] <- "pop"
    poph <- with(poph,aggregate(cbind(row=row,col=col),list(pop=pop),mean))
    pophist <- merge(ph$pophist,gmap)
    gpophist <- do.call(rbind,lapply(unique(pophist$gpop),function(gp)
    {
        p <- pophist[pophist$gpop==gp,]
        if (nrow(p)>0)
        {
            ret <- NULL  
            for (a in unique(p$arrive))
                for (s in unique(p$source))
                {
                    if (is.na(s)) src <- NA else src <- gmap[gmap$pop==s,"gpop"]

                    if (!is.na(src)[1])
                        if (src!=gp)
                        {
                            ret <- rbind(ret,
                                         data.frame(pop=gp,arrive=a,source=src))
                        }
                }
            ret
        } else {NULL}
        
    }))
    rc <- with(gmap,aggregate(cbind(row,col),list(pop=gpop),mean))
    gpophist <- merge(gpophist,rc)[,c("pop","col","row","arrive","source")]

    nv <- aggregate(ph$Nvecs,list(gpop=gmap$gpop),sum)
    rownames(nv) <- as.character(nv$gpop)
    nv <- nv[,-1]
    ph$Nvecs <- nv

    ph$old_landscape <- ph$hab_suit
    ph$hab_suit <- t(aggregate(t(ph$hab_suit$hab_suit),list(gpop=gmap$gpop),mean))

    ynum <- length(unique(ph$pophist[ph$pophist$pop %in% gmap[gmap$gpop==1,"pop"],"row"]))
    xnum <- length(unique(ph$pophist[ph$pophist$pop %in% gmap[gmap$gpop==1,"pop"],"col"]))
    ysz <- ph$struct["ysz"]*ynum
    xsz <- ph$struct["xsz"]*xnum
    landx=ceiling(max(gmap$col)/xnum)
    landy=ceiling(max(gmap$row)/ynum)
    tmat <- integratedMigMat(landx=landx,landy=landy,
                             xnum=5,ynum=5,ysz=ysz,xsz=xsz,
                             sshp=ph$struct["shortshape"],ssc=ph$struct["shortscale"],mix=ph$struct["mix"],
                             nmean=ph$struct["longmean"],nvar=ph$struct["longmean"]^2)
    print(dim(tmat))
    ph$old_tmat <- ph$tmat
    ph$tmat <- tmat
    ph$gmap <- gmap
    ph$pophist <- gpophist
    
    ph
}





#' Make a genetic subset map for rectangular landscape
#'
#' Function to generate a map for aggregating populations from a fine-resolution forward-simulation for coalescent simulations.
#'
#' @param pophist a data frame describing the population history (source of colonists, time of colonization, etc.; output from \code{getpophist2.cells()}).
#' @param xnum number of columns to aggregate
#' @param ynum number of rows to aggregate
#'
#' @details Allows the user to specify a scheme for aggregation (e.g., 2x2 blocks of cells are combined if xnum and ynum = 2).  If the size of the landscape in pophist is not evenly divisible by xnum and ynum, "remainder" cells on the top and righthand sides of the landscape will be included in genetic clusters.  As a result the numbers of cells per cluster will vary
#'
#' @return a gmap object used for pophist.aggregate
#'
#' @examples
#' library(holoSimCell)
#' 
#' parms <- drawParms(control = system.file("extdata/ashpaper","Ash_priors.csv",package="holoSimCell"))
#' load(file=paste0(system.file(package="holoSimCell"),"/extdata/landscapes/",pollenPulls[[1]]$file))
#' refpops <- pollenPulls[[1]]$refs
#' avgCellsz <- mean(c(res(landscape$sumrast)))
#'
#' ph = getpophist2.cells(h = landscape$details$ncells, xdim = landscape$details$x.dim, ydim = landscape$details$y.dim,
#'                        landscape=landscape,
#'                        refs=refpops,  #set at cell 540 right now 
#'                        refsz=parms$ref_Ne,
#'                        lambda=parms$lambda,
#'                        mix=parms$mix,  #note how small.
#'                        shortscale=parms$shortscale*avgCellsz,  # scale parameter of weibull with shape below
#'                        shortshape=parms$shortshape, #weibull shape
#'                        longmean=parms$longmean*avgCellsz,  # mean of normal with sd = longmean
#'                        ysz=res(landscape$sumrast)[2], #height of cell in raster (same units as longmean and shortscale)
#'                        xsz=res(landscape$sumrast)[1], #width of cell in raster
#'                        K = parms$Ne) #maximum population size in a grid cell, scaled with hab_suit from landscape object
#' 
#' gmap=make.gmap(ph$pophist,
#'                xnum=2, #number of cells to aggregate in x-direction
#'                ynum=2) #number of aggregate in the y-direction
#' 
#' ph2 <- pophist.aggregate(ph,gmap=gmap)
#'
#' @seealso \code{\link{pophist.aggregate}}, \code{\link{getpophist2.cells}}, \code{\link{run_FSC_step_agg3}}, \code{\link{doesGmapCombine}}
#' @export

make.gmap <- function(pophist,xnum=2,ynum=2)
{

    gm=expand.grid(startx=seq(1,max(pophist$col),by=xnum),
                   starty=seq(1,max(pophist$row),by=ynum))
    gm$stopx=lag(gm$startx)+xnum-1
    gm$stopy=lag(gm$starty)+ynum-1
    gm$pop=1:dim(gm)[1]
    gpops=do.call(rbind,lapply(1:dim(gm)[1],function(i) 
    {
        tmp = with(pophist,pophist[(col<=gm$stopx[i])&
                                   (col>=gm$startx[i])&
                                   (row<=gm$stopy[i])&
                                   (row>=gm$starty[i]),])
        tmp$gpop=gm$pop[i]
        tmp
    }))
    
    unique(gpops[order(gpops$pop),c("pop","gpop","col","row")])
}
stranda/holoSimCell documentation built on Aug. 4, 2023, 1:12 p.m.