R/rase_source.r

###############
# Basic Brownian motion tree likelihood for any number of independent dimensions

bm_loglik_tree = function(tree, values, par, dimen) {
  
  A = par[1:dimen]  	
  Sig = par[(dimen + 1):(dimen*2)] 	
  if (any(Sig < 0)) return(-Inf)
  
  n = length(tree$tip.label)
  V = vcv(tree)  	    	
  
  res = mapply(function(values, A, Sig) dmvnorm(t(as.matrix(values)), mean = rep(A, n), sigma = V*Sig, 1), values, A, Sig)
  
  return(sum(res))
}


#--------------
# Optimize
# 'values' has to be a list, with each element being one-dimensional values

point.like.bm = function(tree, values, start_values = NA, dimen = NA) {
	
	if (is.na(dimen)) dimen = length(values)
	if (is.na(start_values)) start_values = unlist(c(lapply(values, mean), lapply(values, sd)))	
	
	opt.res = nlm(function(p) -1*bm_loglik_tree(tree, values, p, dimen), p = start_values)	
	
	return(list(mrcas = opt.res$estimate[1:dimen],
		 rates = opt.res$estimate[(dimen + 1):(dimen*2)],
		 nlm.details = opt.res))
}


###############
# Rectangle constrained Brownian motion tree likelihood for any number of independent dimensions

bm_loglik_tree_constrained = function(tree, lower_bounds, upper_bounds, par, dimen) { 
		
  	A = par[1:dimen]  	
  	Sig = par[(dimen + 1):(dimen*2)]  
  	if (any(Sig < 0)) {
      return(-1e30)   		  	
    }
  	
  	n = length(tree$tip.label)
  	V = vcv(tree)	
	
	  res = mapply(function(lower_bounds, upper_bounds, A, Sig) pmvnorm(lower_bounds, upper_bounds, mean = rep(A, n), sigma = V*Sig), lower_bounds, upper_bounds, A, Sig)  	  	
  	
  	area_cor = mapply(function(upper_bounds,lower_bounds) sum(log(upper_bounds - lower_bounds)), upper_bounds,lower_bounds)	

    v = sum(log(as.numeric(res))) - sum(area_cor)
    if(is.nan(v) || abs(v)==Inf) v = -1e30
  	
  	return(v)
}

#--------------
# Optimize
# 'lower/upper_bounds' have to be 2 separate lists, with each list element being one-dimensional values

ranges.like.bm = function(tree, lower_bounds, upper_bounds, start_values = NA, dimen = NA) {
	
	if (is.na(dimen)) dimen = length(lower_bounds)
	if (is.na(start_values)) { start_values = c((unlist(lapply(lower_bounds, mean)) + 
		unlist(lapply(upper_bounds, mean)))/2,
		(unlist(lapply(lower_bounds, sd)) + 
		unlist(lapply(upper_bounds, sd)))/2) }
	
	opt.res = nlm(function(p) -1*bm_loglik_tree_constrained(tree,lower_bounds, 
				upper_bounds, p, dimen), p = start_values)	
	
	return(list(mrcas = opt.res$estimate[1:dimen],
		          rates = opt.res$estimate[(dimen + 1):(dimen*2)],
		          nlm.details = opt.res))
}
  

###############
# Functions for internal use
###############
# Basic BM for ancestors

bm_loglik_ancestors = function(tree, values, params) {	 				

	ntaxa = length(tree$tip.label)
	nnode = tree$Nnode

	ax = params[1:nnode]
	ay = params[(nnode+1):(2*nnode)]
  	sigma2x = params[2*nnode+1]
  	sigma2y = params[2*nnode+2]
  	if (sigma2x<0 || sigma2y<0) return(-Inf)

	tips = tree$edge[,2] <= ntaxa

	ax1 = ax[tree$edge[,1] - ntaxa]
  	ax2 = rep(NA, length(ax1))
  	ax2[tips]  = values$x[tree$edge[tips,2]]
  	ax2[!tips] = ax[tree$edge[!tips,2]-ntaxa]

  	ay1 = ay[tree$edge[,1] - ntaxa]
  	ay2 = rep(NA, length(ay1))
  	ay2[tips]  = values$y[tree$edge[tips,2]]
  	ay2[!tips] = ay[tree$edge[!tips,2]-ntaxa]

  	lx = -1*sum((ax1 - ax2)^2/tree$edge.length)/(2 * sigma2x) - nnode * log(sigma2x)
  	ly = -1*sum((ay1 - ay2)^2/tree$edge.length)/(2 * sigma2y) - nnode * log(sigma2y)
  	
  	return(lx + ly)
}

######################################
# utility function to estimate sigma2x and sigma2y 
# by maximum likelihood and compute the 
# standard errors of these estimates.  
# This is only used to generate a proposal distribution 
# for sigma2 in rase.  
make_bm_est_sigma2 = function(ntaxa, nnode, wtips, paren, child, trlen) {

  xsub   = seq_len(nnode)
  ysub   = (nnode+1):(2*nnode)
  tredmt = paren - ntaxa
  nedges = length(tredmt) 
  nedNA  = rep.int(NA, nedges)

  parenmt = paren - ntaxa
  wchtips = child[wtips]
  wnottip = child[!wtips] - ntaxa

  function(values, params) {
    
    ax = .subset(params, xsub)
    ay = .subset(params, ysub)
    
    ax1 = .subset(ax, tredmt)
    ax2 = nedNA


    ax2[wtips]  = .subset2(values,'x')[wchtips]
    ax2[!wtips] = .subset(ax, wnottip)
    
    ay1 = .subset(ay, parenmt)
    ay2 = nedNA

    ay2[wtips]  = .subset2(values,'y')[wchtips]
    ay2[!wtips] = .subset(ay, wnottip)

    sigma2xhat = mean( (ax1 - ax2)^2 / trlen)
    sigma2xhat_sd = sqrt( -1/sum(1/(2*sigma2xhat^2) - 
                    (ax1-ax2)^2/(sigma2xhat^3 * trlen)) ) 
    
    sigma2yhat = mean( (ay1 - ay2)^2 / trlen )
    sigma2yhat_sd = sqrt( -1/sum(1/(2*sigma2yhat^2) - 
                    (ay1-ay2)^2/(sigma2yhat^3 * trlen)) ) 
    
    return(list(sigma2xhat=sigma2xhat, 
                sigma2xhat_sd=sigma2xhat_sd, 
                sigma2yhat=sigma2yhat, 
                sigma2yhat_sd=sigma2yhat_sd))
  }
}



#########################
# Loglik for ancestors using polygons


make_bm_loglik_ancestors_poly = function(polygons, ntaxa, nnode, 
                                         tredge, trlen, nGQ) {
  
  xsub   = seq_len(nnode)
  ysub   = (nnode+1):(2*nnode)
  nnt2p1 = 2*nnode+1
  nnt2p2 = 2*nnode+2

  dat = cbind(tredge, trlen)

 function(params) {          
    
    ax = .subset(params, xsub)
    ay = .subset(params, ysub)

    sigma2x = .subset(params, nnt2p1)
    sigma2y = .subset(params, nnt2p2)
    
    if(sigma2x<0 || sigma2y<0) return(-Inf)
    
    logliks = apply(dat, 1, function(r) {
      ax1 = ax[r[1] - ntaxa]
      ay1 = ay[r[1] - ntaxa]

      if (r[2]<= ntaxa) {
        v = polyCub.SV(polygons[[r[2]]], bigauss_pdf, 
                       mx = ax1, my = ay1, 
                       sx = sqrt(sigma2x*r[3]), sy = sqrt(sigma2y*r[3]), 
                       rho = 0, nGQ = nGQ)    
        logv = ifelse(is.nan(v) || !is.finite(v), -1e30, log(v)) 
        loglik = logv - log(area(.subset2(polygons,r[2])))
        if(is.nan(loglik)) loglik = -1e30
      } else {
        loglik = sum(dnorm(c(ax[r[1]-ntaxa], ay[r[1]-ntaxa]), c(ax1, ay1), 
                           sd=sqrt(r[3]*c(sigma2x,sigma2y)), log=TRUE))
      }
      return(loglik)
    })
    
    return(sum(logliks))
  }

}



#########################
# propose ancestral values (x,y) at an internal node 
# the ancestral values is a=(ax,ay)
# the daughter values are d1=(d1x,d1y) and d2=(d2x,d2y) 
# s is the ancestral branch length
# t1,t2 are the daughter branch lengths

bm_propose_trio = function(a, d1, d2, s,t1, t2, sigma2x, sigma2y) {

	if (is(d1, 'owin')) { 
    	d1 = poly_center(d1)
  	}

  	if (is(d2, 'owin')) { 
    	d2 = poly_center(d2)
  	}

	ax  = a[1L];  ay  = a[2L]
	d1x = d1[1L]; d1y = d1[2L]
	d2x = d2[1L]; d2y = d2[2L]

	xm1 = (d1x*t2 + d2x*t1)/(t1+t2)
	xv1 = t1*t2*sigma2x/(t1+t2)

	ym1 = (d1y*t2 + d2y*t1)/(t1+t2)
	yv1 = t1*t2*sigma2y/(t1+t2)

	if (anyNA(c(ax,ay))) {

 		x = rnorm(1L, mean=xm1, sd=sqrt(xv1))
  	y = rnorm(1L, mean=ym1, sd=sqrt(yv1))
   	logfwdprob = dnorm(x, mean=xm1, sd=sqrt(xv1), log=TRUE)
    logbwdprob = dnorm(y, mean=ym1, sd=sqrt(yv1), log=TRUE)

	} else {

 		xm2 = (xm1*s*sigma2x + ax*xv1)/(xv1 + s*sigma2x)
 		xv2 = xv1*s*sigma2x/(xv1 + s*sigma2x)
  	x = rnorm(1L, mean=xm2, sd=sqrt(xv2))
  	ym2 = (ym1*s*sigma2y + ay*yv1)/(yv1 + s*sigma2y)
  	yv2 = yv1*s*sigma2y/(yv1 + s*sigma2y)
  	y = rnorm(1L, mean=ym2, sd=sqrt(yv2))
   	logfwdprob = dnorm(x, mean=xm1, sd=sqrt(xv1), log=TRUE)
    logbwdprob = dnorm(y, mean=ym1, sd=sqrt(yv1), log=TRUE)
	}

  # return value, logfwdprob, logbwdprob
	return(list(value=c(x,y), 
              logfwdprob=logfwdprob, 
              logbwdprob=logbwdprob)
  )
}

#########################
# compute the log-likelihood of a trio
# using fast integration
# d1 and/or d2 can be shapes OR points

bm_loglik_trio = function(a, v, d1, d2, s, t1, t2, sigma2x, sigma2y, areas, daughter_ids, nGQ) {
  
  if (is.na(a[1])) {
    la = 0
  } else {
    la = sum(dnorm(v, a, sd=sqrt(s*c(sigma2x, sigma2y)), log=TRUE))
  }
  
  if (!is(d1, 'owin')) { # d1 is an internal node
    l1 = sum(dnorm(d1, v, sd = sqrt(t1*c(sigma2x, sigma2y)), log=TRUE))
  } else { # d1 is a tip
    l1 = log(as.numeric(polyCub.SV(d1, bigauss_pdf, mx = v[1], my = v[2], sx = sqrt(sigma2x*t1), sy = sqrt(sigma2y*t1), rho = 0, nGQ = nGQ))) - 
      log(areas[daughter_ids[1]])
    if (is.nan(l1) | !is.finite(l1)) l1 = -1e30
    #if (l1==0) l1 = -1e30
  }
  
  if (!is(d2, 'owin')) { # d2 is an internal node
    l2 = sum(dnorm(d2,v,sd=sqrt(t2*c(sigma2x,sigma2y)), log=TRUE))
  } else { # d2 is a tip
    l2 = log(as.numeric(polyCub.SV(d2, bigauss_pdf, mx = v[1], my = v[2], sx = sqrt(sigma2x*t2), sy = sqrt(sigma2y*t2), rho = 0, nGQ=nGQ))) - log(areas[daughter_ids[2]])
    if(is.nan(l2)| !is.finite(l2)) l2 = -1e30
    #if (l2==0) l2 = -1e30
  }
  
  return(la + l1 + l2)

}




#########################
# gibbs sampling for ancestors in 2-dimensional point bm

bm_ase = function(tree, values, niter=1e3, logevery=10, sigma2_scale=0.05, screenlog=TRUE, params0 = NA) {

    if (!is(tree, "phylo")) {
        stop('tree should be of class phylo')
    }
    
    ntaxa = length(tree$tip.label)
    nnode = tree$Nnode   
       
    ax = array(NA, dim=c(niter,nnode))
    sigma2x = rep(NA, niter)
    ay = array(NA, dim=c(niter,nnode))
    sigma2y = rep(NA, niter)
     
    if (any(is.na(params0))) {
        ace_resx = ace(values$x, tree, method = 'ML')
        ace_resy = ace(values$y, tree, method = 'ML')
        ax[1, (1:nnode)] = ace_resx$ace[1:nnode]
        sigma2x[1] = ace_resx$sigma[1]
        ay[1, (1:nnode)] = ace_resy$ace[1:nnode]
        sigma2y[1] = ace_resy$sigma[1]
    } else {
        if (length(params0) != 2*nnode+2) stop("starting values not of correct length")
        ax[1,] = params0[1:nnode]
        sigma2x[1] = params0[2*nnode+1]
        ay[1,] = params0[(nnode+1):(2*nnode)]
        sigma2y[1] = params0[2*nnode+2]
    }
    
    tredge = .subset2(tree,'edge')
    trlen  = .subset2(tree,'edge.length')
    wtips  = tredge[,2] <= ntaxa 
    bm_est_sigma2 = make_bm_est_sigma2(ntaxa, nnode, wtips, tredge[,1L], tredge[,2L], trlen)

    for (iter in 2:niter) {
                
        # random permutation of internal nodes
    	nodelist = ntaxa + sample(1:nnode, nnode, replace=FALSE)
        
        # populate current node values
    	ax[iter,] = ax[iter-1,]
    	ay[iter,] = ay[iter-1,]
    	sigma2x[iter] = sigma2x[iter-1]
    	sigma2y[iter] = sigma2y[iter-1]
        
        for (node in nodelist) {
            
            # daughters
            daughter_ids = tree$edge[tree$edge[,1]==node,2]
            t = c(tree$edge.length[tree$edge[,2]==daughter_ids[1]],
                tree$edge.length[tree$edge[,2]==daughter_ids[2]])
            
            if (daughter_ids[1]<= ntaxa) {
                d1_value = c(values$x[daughter_ids[1]], values$y[daughter_ids[1]])
            } else {
                d1_value = c(ax[iter,daughter_ids[1]-ntaxa], ay[iter,daughter_ids[1]-ntaxa])
            }
            
            if (daughter_ids[2]<=ntaxa) {
    	    	d2_value = c(values$x[daughter_ids[2]], values$y[daughter_ids[2]])
            } else {
    	    	d2_value = c(ax[iter,daughter_ids[2]-ntaxa], ay[iter,daughter_ids[2]-ntaxa])
            }
            
            # ancestor
            a_id = tree$edge[tree$edge[,2]==node,1]
            if (length(a_id)==0) {
                a_value = c(NA,NA)
                s = NA
            } else {
                a_value = c(ax[iter,a_id-ntaxa], ay[iter,a_id-ntaxa])
    	    	s = tree$edge.length[a_id-ntaxa]
            }	
            
            xy = bm_propose_trio(a_value, d1_value, d2_value, s, t[1],t[2], sigma2x, sigma2y)
            ax[iter,node-ntaxa] = xy$value[1]
            ay[iter,node-ntaxa] = xy$value[2]
            
    	}
                
        obj = bm_est_sigma2(list(x=values$x, y=values$y), c(ax[iter,], ay[iter,]))

      	s2x_cur = obj$sigma2xhat
      	s2x_sd = sigma2_scale*obj$sigma2xhat_sd
      	s2y_cur = obj$sigma2yhat
      	s2y_sd = sigma2_scale*obj$sigma2yhat_sd

        repeat {
            sigma2x_prop = rnorm(1, mean=s2x_cur, sd=s2x_sd)
            if(sigma2x_prop>0) break
        }
    	  logprobratiox = dnorm(s2x_cur, sigma2x_prop, sd=s2x_sd, log=TRUE) - dnorm(sigma2x_prop, s2x_cur, sd=s2x_sd, log=TRUE)
        
        repeat {
            sigma2y_prop = rnorm(1, mean=s2y_cur, sd=s2y_sd)
            if(sigma2y_prop>0) break
        }
    	  logprobratioy = dnorm(s2y_cur, sigma2y_prop, sd=s2y_sd, log=TRUE) - dnorm(sigma2y_prop, s2y_cur, sd=s2y_sd, log=TRUE)
        
      
       # flat prior
        loglik_cur = bm_loglik_ancestors(tree, values, c(ax[iter,], ay[iter,], sigma2x[iter], sigma2y[iter]))
        loglik_prop = bm_loglik_ancestors(tree, values, c(ax[iter,], ay[iter,], sigma2x_prop, sigma2y_prop))
	
        logratio = loglik_prop - loglik_cur + logprobratiox + logprobratioy
        
         if (log(runif(1)) < logratio) {
            sigma2x[iter] = sigma2x_prop
            sigma2y[iter] = sigma2y_prop
        }
  	
  	    if (screenlog && iter%%logevery==0) {
                
                if (iter == logevery) {
                    cat('Iteration sigma2x sigma2y\n')
                    cat(iter, sigma2x[iter], sigma2y[iter],'\n')
                } else {
                    cat(iter, sigma2x[iter], sigma2y[iter],'\n')
                }

            } 
    }
    
    colnames(ax) = paste('n', names(branching.times(tree)), '_x', sep = '')
    colnames(ay) = paste('n', names(branching.times(tree)), '_y', sep = '')
    
    return(cbind(ax, ay, sigma2x, sigma2y))  
}

   

############################
# Almost-Gibbs sampling of ancestors with polygons at tips
# using polyCub for the terminal branch likelihoods

# Bivariate gaussian pdf
bigauss_pdf = function(s, mx, my, sx, sy, rho) {
  x     = s[,1L]
  y     = s[,2L]
  rho2  = rho*rho
  rho1m = 1 - rho2
  tr1   = 1 / (2 * pi * sx * sy * sqrt(rho1m))
  tr2   = (x - mx)^2 / sx^2
  tr3   = -(2 * rho * (x - mx)*(y - my))/(sx * sy)
  tr4   = (y - my)^2 / sy^2
  z     = tr2 + tr3 + tr4
  tr5   = tr1 * exp((-z / (2 * (rho1m))))
  return (tr5)
}



#rase = range ancestral state estimation
rase = function(tree, 
                polygons, 
                niter        = 1e3L,
                logevery     = 10L,
                sigma2_scale = 0.05,
                screenlog    = TRUE,
                params0      = NA,
                nGQ          = 20) {
  
  if (!is(tree, "phylo")) {
    stop('tree should be of class phylo')
  }

  if (!any(sapply(polygons, is.owin))) {
    stop('one or more polygons are not in \'owin\' format')
  }
  
  if (any(sapply(polygons, is.empty))) {
    stop('one or more polygons are empty')
  }  

  tl = .subset2(tree,'tip.label')

  if (any(is.na(match(tl, names(polygons))))) {
    stop('tip labels and polygon names do not match')
  }
  
  ntaxa   = length(tl)
  nnode   = .subset2(tree,'Nnode')
  seqnode = seq_len(nnode)

  # initialize 
  areas   = mapply(area, polygons)    
  xy.tips = t(mapply(poly_center, polygons))
  
  ax      = array(NA, dim=c(niter,nnode))
  ay      = array(NA, dim=c(niter,nnode))

  sigma2x = rep.int(NA, niter)
  sigma2y = rep.int(NA, niter)
  
  if (anyNA(params0)) {
    ace_resx = ace(xy.tips[,1L], tree, method = 'ML')
    ace_resy = ace(xy.tips[,2L], tree, method = 'ML')
    ax[1L, (seqnode)] = .subset2(ace_resx,'ace')[seqnode]
    sigma2x[1L]       = .subset2(ace_resx,'sigma2')[1L]
    ay[1L, (seqnode)] = .subset2(ace_resy,'ace')[seqnode]
    sigma2y[1L]       = .subset2(ace_resy,'sigma2')[1L]
  } else {
    if (length(params0) != 2L*nnode + 2L) 
      stop("starting values not of correct length")
    ax[1L,] = params0[seqnode]
    sigma2x[1L] = params0[2L * nnode+1L]
    ay[1L,] = params0[(nnode+1L):(2L * nnode)]
    sigma2y[1L] = params0[2L * nnode+2]
  }

  tredge = .subset2(tree,'edge')
  paren  = tredge[,1L]
  child  = tredge[,2L]
  trlen = .subset2(tree,'edge.length')

  # more efficient to simulate uniforms before hand
  lUs = log(runif(niter))

  # make sigma functions before hand
  wtips = child <= ntaxa
  bm_est_sigma2 = make_bm_est_sigma2(ntaxa, nnode, wtips, paren, child, trlen)
  bm_loglik_ancestors_poly = make_bm_loglik_ancestors_poly(polygons, 
                                ntaxa, nnode, tredge, trlen, nGQ)

  for (it in 2L:niter) {
    
    # random permutation of internal nodes
    nodelist = ntaxa + sample.int(nnode)

    itm1 = it - 1L
    
    # populate current node values
    ax[it,]     = ax[itm1,]
    ay[it,]     = ay[itm1,]
    sigma2x[it] = .subset(sigma2x,itm1)
    sigma2y[it] = .subset(sigma2y,itm1)

    for (node in nodelist) {
      
      approx = 0L
      
      # daughters
      daughter_ids = .subset(child, paren == node)
      daughter_id1 = daughter_ids[1L]
      daughter_id2 = daughter_ids[2L]

      t1 = .subset(trlen, child  == daughter_id1)
      t2 = .subset(trlen, child  == daughter_id2)
     
      # 1st daughter value
      if (daughter_id1 <= ntaxa) { # if tip
        d1_value = .subset2(polygons, daughter_id1)
        approx   = 1L
      } else { # if internal node
        d1mt = daughter_id1 - ntaxa
        d1_value = c(.subset2(ax, it, d1mt), 
                     .subset2(ay, it, d1mt))
      }

      # 2nd daughter value     
      if (daughter_id2 <= ntaxa) { # if tip
        d2_value = .subset2(polygons, daughter_id2)
        approx   = 1L
      } else { # if internal node
        d2mt = daughter_id2 - ntaxa
        d2_value = c(.subset2(ax,it, d2mt), 
                     .subset2(ay,it, d2mt))
      }

      # ancestor value
      a_id = .subset(paren, child == node)

      if (length(a_id) == 0) {
        a_value = c(NA,NA)
        s       = NA
      } else {
        amt     = a_id - ntaxa
        a_value = c(.subset2(ax, it, amt), 
                    .subset2(ay, it, amt))
        s       = .subset(trlen, amt)
      }

      # proposal 
      xy_prop = bm_propose_trio(a_value, d1_value, d2_value, 
                                s, t1, t2, 
                                sigma2x[it], sigma2y[it])

      nmt = node - ntaxa

      if (approx) {
        loglik_prop = bm_loglik_trio(a_value, 
                                     .subset2(xy_prop,'value'), 
                                     d1_value, d2_value, 
                                     s, t1, t2, 
                                     sigma2x[it], sigma2y[it], 
                                     areas, daughter_ids, nGQ)
        loglik_cur = bm_loglik_trio(a_value, 
                                    c(.subset(ax, it, nmt), 
                                      .subset(ay, it ,nmt)), 
                                    d1_value, d2_value, 
                                    s, t1, t2, 
                                    sigma2x[it], sigma2y[it], 
                                    areas, daughter_ids, nGQ)
        
        logratio = loglik_prop - loglik_cur + 
                   .subset2(xy_prop,'logbwdprob') - 
                   .subset2(xy_prop,'logfwdprob')
        
        if (log(runif(1L)) < logratio) {
          subxy = .subset2(xy_prop,'value')
          ax[it,nmt] = subxy[1L]
          ay[it,nmt] = subxy[2L]
        }

      } else {
        subxy = .subset2(xy_prop,'value')
        ax[it, nmt] = subxy[1L]
        ay[it, nmt] = subxy[2L]
      }
    }
    
    # sample sigma2x and sigma2y
    
    # new function estimates sigma2x and sigma2y, 
    # and their standard errors.
    # these are used in a normal approximation proposal below

    obj = bm_est_sigma2(list(x=xy.tips[,1], 
                             y=xy.tips[,2]), 
                        c(ax[it,], ay[it,])
                        )

    s2x_cur = .subset2(obj,'sigma2xhat')
    s2x_sd  = sigma2_scale * .subset2(obj,'sigma2xhat_sd')
    s2y_cur = .subset2(obj,'sigma2yhat')
    s2y_sd  = sigma2_scale * .subset2(obj,'sigma2yhat_sd')
    
    repeat {
      sigma2x_prop = rnorm(1L, mean=s2x_cur, sd=s2x_sd)
      if (sigma2x_prop > 0) break
    }
    logprobratiox = dnorm(s2x_cur, sigma2x_prop, sd=s2x_sd, log=TRUE) - 
                    dnorm(sigma2x_prop, s2x_cur, sd=s2x_sd, log=TRUE)
    
    repeat {
      sigma2y_prop = rnorm(1L, mean=s2y_cur, sd=s2y_sd)
      if(sigma2y_prop>0) break
    }
    logprobratioy = dnorm(s2y_cur, sigma2y_prop, sd=s2y_sd, log=TRUE) - 
                    dnorm(sigma2y_prop, s2y_cur, sd=s2y_sd, log=TRUE)
    
    
    # 1/sigma2 prior
    loglik_cur  = bm_loglik_ancestors_poly(c(ax[it,],ay[it,],sigma2x[it],sigma2y[it])) - 
                  (log(sigma2x[it])+log(sigma2y[it]))
    loglik_prop = bm_loglik_ancestors_poly(c(ax[it,],ay[it,],sigma2x_prop,sigma2y_prop)) - 
                  (log(sigma2x_prop)+log(sigma2y_prop))
    
    logratio = loglik_prop - loglik_cur + logprobratiox + logprobratioy
    
    if (lUs[it] < logratio) {
      sigma2x[it] = sigma2x_prop
      sigma2y[it] = sigma2y_prop
    }
    
    if (screenlog && it%%logevery==0) {
      
      if (it == logevery) {
        cat('Iteration sigma2x sigma2y\n')
        cat(it, sigma2x[it], sigma2y[it],'\n')
      } else {
        cat(it, sigma2x[it], sigma2y[it],'\n')
      }
      
    } 
  }
  
  colnames(ax) = paste('n', names(branching.times(tree)), '_x', sep = '')
  colnames(ay) = paste('n', names(branching.times(tree)), '_y', sep = '')
  
  return(cbind(ax, ay, sigma2x, sigma2y))  	
}




#########################
# polygon center

poly_center = function(poly) {
	
  xy = .subset2(poly,'bdry')
	
  centr = c()
	for (j in seq_along(xy)) {
		
    subx = .subset2(.subset2(xy,j),'x')
    suby = .subset2(.subset2(xy,j),'y')

    x = c(subx, subx[1L])
		y = c(suby, suby[1L])
		
	  	n = length(x)
	  	if (length(y) != n) stop("length of x and y must be equal")

	  	Cx = 0; Cy = 0; A = 0
	  	for (i in seq_len(n-1L)) {
	    	Cx = Cx + (x[i] + x[i+1L])*(x[i]*y[i+1L] - x[i+1L]*y[i]) 
	    	Cy = Cy + (y[i] + y[i+1L])*(x[i]*y[i+1L] - x[i+1L]*y[i]) 
	    	A = A + x[i]*y[i+1L] - x[i+1L]*y[i]
	  	}

	  	A  = A/2
      A6 = 6*A
	  	Cx = Cx/A6
	  	Cy = Cy/A6
		
  		centr = rbind(centr,c(Cx,Cy, area(poly)))
	}
	return(c(weighted.mean(centr[,1L], centr[,3L]),
		       weighted.mean(centr[,2L], centr[,3L]))
        )
}



###################
# Locations at time slices
###################

###################
# takes a phylogenetic tree and a time slice
# and returns the pair of nodes whose branch 
# exists at that time

tree.slice = function(tree, slice) {

	if (!is(tree, 'phylo')) {
        stop('tree should be of class phylo')
    }
    if (!is.binary.tree(tree)) {
    	stop("tree should be fully dichotomous")
    }
	nnode = tree$Nnode
	ntips = min(tree$edge[,1]) - 1
	b.t = branching.times(tree)

	rage = cbind(tree$edge, NA, NA)

	rage[match(1:ntips, rage[,2]), 4] = 0
	rage = rage[order(rage[,1]),]
	rage[,3] = rep(b.t, each = 2)
	rage = rage[order(rage[,2]),]
	rage[which(rage[,2]>ntips),4] = b.t[2:(ntips - 1)]


	c1 = rage[which(rage[, 3] > slice),]
	c2 = c1[which(c1[, 4] < slice),]

	if (nrow(c2) == 0) cat('no branches at that time slice') else return(c2)
		
}


################
# MCMC of species ranges at time slice t


#########################
# compute the log-likelihood of the location of v
# at time slice u, given ancestor a, descendant d, 
# separated by time t

bm_loglik_duo = function(a, v, d, u, t, sx, sy, nGQ) {

   	la = sum(dnorm(v, a, sd=sqrt(u*c(sx, sy)), log=TRUE))

  	if (!is(d, 'owin')) { 
    	ld = sum(dnorm(d, v, sd = sqrt((t-u)*c(sx, sy)), log=TRUE))
  	} else { # d1 is a tip
    	ld =  log(as.numeric(polyCub.SV(d, bigauss_pdf, mx = v[1], my = v[2], sx = sqrt(sx*(t-u)), sy = sqrt(sy*(t-u)), rho = 0, nGQ = nGQ))) - 
        log(area(d))
    	if (is.nan(ld) | !is.finite(ld)) ld = -1e30
  	}

	return(la + ld)
}




#########################
# propose v values (x,y) at time slice u 
# the ancestral values is a=(ax,ay)
# the daughter values are d=(dx,dy) 
# u is the ancestral branch length
# t is the branch length

bm_propose_duo = function(a, d, u, t, sx, sy) {

	if (is(d, 'owin')) { 
    	d = poly_center(d)
  	}

	ax = a[1]; ay = a[2]
  	dx = d[1]; dy = d[2]

	xm1 = (dx*u + ax*(t-u))/t 
	xv1 = (t-u)*u*sx/t
	
	ym1 = (dy*u + ay*(t-u))/t 
	yv1 = (t-u)*u*sy/t
	
    x = rnorm(1, mean=xm1, sd=sqrt(xv1))
	y = rnorm(1, mean=ym1, sd=sqrt(yv1))

    logfwdprob = dnorm(x, mean=xm1, sd=sqrt(xv1), log=TRUE)
    logbwdprob = dnorm(y, mean=ym1, sd=sqrt(yv1), log=TRUE)

    # return value, logfwdprob, logbwdprob
  	return(list(value=c(x,y), logfwdprob=logfwdprob, logbwdprob=logbwdprob))
}



#################
# MCMC for locations at a given 
# slice of time, with the results
# from a rase run 

rase.slice = function(tree, slice, res, polygons, params0 = NA, niter=1e3, logevery=10, nGQ = 20) {

    if (!is(tree, "phylo")) {
        stop('tree should be of class phylo')
    }
    
    if (any(is.na(match(tree$tip.label, names(polygons))))) {
        stop('tip labels and polygon names do not match')
    }
    
                                        # Slice the tree with separate function
    a_d = tree.slice(tree, slice)
    
    ntaxa = length(tree$tip.label) # number of taxa
    nbranch = nrow(a_d) # number of branchs
    
    anc_x = res[, a_d[,1]-ntaxa] 				# all the ancestors posterior distributions from rase results in _x 
    anc_y = res[, tree$Nnode + a_d[,1]-ntaxa] 	# and _y
    
                                        # if no starting parameter values are provided,	
    if (is.na(params0)) { 	# use time weighted mean as starting values
	
        for (b in 1:nbranch) { # for each branch		
            if (a_d[b,4] == 0) { # if daughter is a tip, use the polygon centroid to calculate the mean
                pxy = poly_center(polygons[[b]])
                
                params0[b] = ((mean(anc_x[,b])*(slice - a_d[b, 4])) + 
                              (pxy[1]*(a_d[b, 3] - slice)))/(a_d[b, 3] - a_d[b, 4])
                
                params0[b+nbranch] = ((mean(anc_y[,b])*(slice - a_d[b, 4])) + 
                                      (pxy[2]*(a_d[b, 3] - slice)))/(a_d[b, 3] - a_d[b, 4])
                
            } else { # if daughter is not a tip
                params0[b] = ((mean(anc_x[,b])*(slice - a_d[b, 4])) + 
                              (mean(res[, a_d[b,2]-ntaxa])*(a_d[b, 3] - slice)))/(a_d[b, 3] - a_d[b, 4])
                
                              
                params0[b+nbranch] = ((mean(anc_y[,b])*(slice - a_d[b, 4])) + 
                                      (mean(res[, tree$Nnode + a_d[b,2] - ntaxa])*(a_d[b, 3] - slice)))/(a_d[b, 3] - a_d[b, 4])

            }		
        }
        
		cat("Using time weighted mean as starting parameter values \n")
	
    } else {		
                                        # Check for length of input initial parameter values
        if (length(params0) != (nrow(a_d)*2)) stop("starting values not of correct length")
    }
    
	
	bx = array(NA, dim=c(niter,nbranch))		# array of branch slice x's and y's  	
  	bx[1,] = params0[1:nbranch]					# populate starting nodes with starting values
  	by = array(NA, dim=c(niter,nbranch))
  	by[1,] = params0[(nbranch+1):(2*nbranch)]
  	
  	# use the posterior distribution given by rase of sigma x and y 
	sigma2x = res[,ncol(res)-1]
	sigma2y = res[,ncol(res)]

	for (iter in 2:niter) {
	
		if (logevery && iter%%logevery==0) cat("iter =", iter, "\n")
    	
    	# sample one single joint sampler
    	samp = sample(1:length(sigma2x),1)
    	
    	# sigma2x and sigma2y from this sample (sx & sy)
    	    	
    	sx = sigma2x[samp]
    	sy = sigma2y[samp]
    	
    	# populate current node values
    	bx[iter,] = bx[iter-1,]
    	by[iter,] = by[iter-1,]

		for (i in 1:nbranch) { # each i is a branch in the tree, a row in the output of tree.slice
			
			approx = 0
			# sample the ancestor of the branch from rase posterior distribution (a_value)
			a_value = c(anc_x[samp,i], anc_y[samp,i])
			
			# get time t - time from the ancestor to daughter
			t = a_d[i, 3] - a_d[i, 4]
			# get time u (slice since the ancestor)
    		u = a_d[i, 3] - slice
			
			daughter_id = a_d[i, 2]
    		       
			if (daughter_id <= ntaxa) { # if daughter is a tip
				d_value = polygons[[daughter_id]] # get the polygon as d_value
      			approx = 1
    		} else { # if not a tip, sample from the posterior distribution given by rase
    			d_value = c(res[samp, a_d[i,2]-ntaxa], res[samp, tree$Nnode + a_d[i,2]-ntaxa])			
			}

		    # proposal duo (x & y value)
 			xy_prop = bm_propose_duo(a_value, d_value, u, t, sx, sy)

			if (approx) {
				
				# likelihood of proposal
				loglik_prop = bm_loglik_duo(a_value, xy_prop$value, d_value, 
                                       u, t, sx, sy, nGQ)
          		
          		# likelihood of current node
				loglik_cur = bm_loglik_duo(a_value, c(bx[iter,i], by[iter,i]), 
                                      d_value, u, t, sx, sy, nGQ)

				#logratio
				logratio = loglik_prop - loglik_cur + xy_prop$logbwdprob - xy_prop$logfwdprob

				if (log(runif(1)) < logratio) {
    				bx[iter, i] = xy_prop$value[1]
    	    		by[iter, i] = xy_prop$value[2]
    			}
			} else {
  				bx[iter, i] = xy_prop$value[1]
        		by[iter, i] = xy_prop$value[2]
    		}
		
		}


  	}

	colnames(bx) = paste('b', 1:nbranch, '_x', sep = '')
	colnames(by) = paste('b', 1:nbranch, '_y', sep = '')

	return(cbind(bx, by))

}




################
# Data Handling
################


################
# Transform shapefiles from 'sp' package for use in rase

shape.to.rase = function(shape_poly) {

  if (!is(shape_poly, 'SpatialPolygonsDataFrame')) {
    stop('Error: object is not of class SpatialPolygonsDataFrame')
  }

  pols = list()
  for (i in 1:length(shape_poly)) {   
    
    fp1 = shape_poly[i,]
    
    fp2 = slot(slot(fp1, 'polygons')[[1]], 'Polygons')

    o_p = lapply(fp2, function(p) {
                         if (p@hole) {
                            return(list(x = p@coords[,1], 
                                        y = p@coords[,2]))
                         } else {
                            return(list(x = rev(p@coords[,1]), 
                                        y = rev(p@coords[,2])))
                         }})
    owin_p = owin(poly = o_p, check = TRUE)

    pols[[i]] = owin_p
  }

  return(pols)  
}




################
# Name polygons, order them as the tree tips

name.poly = function(polygons, tree, poly.names = NA) {
	
	if (!is(tree, "phylo")) {
        stop("Error: tree is not of class phylo")
    }

	tips = tree$tip.label
	
	if (any(is.na(poly.names))) {
		nam = names(polygons)
	} else {
		nam = poly.names
		names(polygons) = nam
	}
	if (any(is.na(match(tips, nam)))) stop('tip labels and polygon names do not match')
			
	if (all(match(tips, nam) == 1:length(polygons))) {
		cat('tip labels and polygon names match and are in the same order \n') 
	} else {	
		polygons = polygons[match(tips, nam)]	
	}
		
	return(polygons)
}



#########################
# Visualization functions with rgl
#########################

#########################
# Function to transform data structure for 3D plotting

data.for.3d = function(res, tree, polygons) {  
		
	if (is.null(tree$node.label)) {  	
       	tree$node.label <- paste("n", names(branching.times(tree)), sep="")
    }
      
    xy.tips = t(mapply(poly_center, polygons)) 
    xy.nodes = matrix(colMeans(res)[1:(2*tree$Nnode)], nrow = tree$Nnode, ncol = 2)
    xy.all = data.frame(rbind(xy.tips, as.data.frame(xy.nodes)))

    z = c(rep(0, times=length(tree$tip.label)), branching.times(tree))

    names(xy.all) = c("x", "y")
    label = c(tree$tip.label, tree$node.label)
    return(list(xyz = data.frame(xy.all, z, label), edge = tree$edge, pol = polygons))
}



#########################
# Function that opens the 'rgl' 3d device and plots the phylogenetic tree 
# to the 3d setting
# Note: each node is placed to its estimated geographic position
  
phylo.3d = function(df3, z.scale = 1, pts = TRUE, ...) {
    
    edg = df3$edge
 
    if (pts == TRUE) points3d(df3$xyz$x, df3$xyz$y, z.scale*df3$xyz$z)
        
	for (i in 1:nrow(edg)) {
		x = df3$xyz$x[edg[i,]]
      	y = df3$xyz$y[edg[i,]]
      	z = df3$xyz$z[edg[i,]]
  
      	lines3d(x, y, z*z.scale, ...)
    }
}



#########################
# Function that adds the terminal polygons to the opened 'rgl' device

add.polygons = function(df3, axes = 2, ...) {
    
	for (j in 1:length(df3$pol)) {
    	poli = df3$pol[[j]]

      	polygon3d(x=c(poli$bdry[[1]]$x, poli$bdry[[1]]$x[1]), 
      			y=c(poli$bdry[[1]]$y, poli$bdry[[1]]$y[1]), 
                z=rep(0, times=length(poli$bdry[[1]]$x) + 1), 
                ...)
    }
	if (axes == 0) print('No axes displayed')
	if (axes == 1) {
		axes3d(edges = c('x'), xlab = 'Longitude')
		title3d('','','Longitude')
	} 
	if (axes == 2) {
		axes3d(edges = c('x', 'y'), xlab = 'Longitude', ylab = 'Latitude')
		title3d('','','Longitude','Latitude')
	}
	if (axes == 3) {
		axes3d(edges = c('x', 'y', 'z'), xlab = 'Longitude', ylab = 'Latitude', zlab = 'Time')
		title3d('','','Longitude','Latitude','Time')
	}
}


#########################
# Function that adds the ancestral posterior densities from the Gibbs sampling 
# to the opened 'rgl' device

add.dens = function(df3, res,
                    nlevels = 20, 
                    z.scale = 1, 
                    col = c(1:nnode), ...) {
    
    sxyz      = .subset2(df3,'xyz')
    node.ages = sxyz[sxyz[, 'z'] != 0, 3L] 
    nnode = length(node.ages)
   
    for (i in 1:nnode){
    	densy = sm::sm.density(res[,c(i, i + nnode)], display = 'none')		
	    cont  = contourLines(densy$eval.points[,1], 
                           densy$eval.points[,2], 
                           densy$estimate, 
                           nlevels = nlevels)
		  polygon3d(x = cont[[1]]$x, y = cont[[1]]$y, 
		            z = 0.02+(z.scale*rep.int(node.ages[i],length(cont[[1]]$x))), 
                col = col[i], ...)
    }
}

Try the rase package in your browser

Any scripts or data that you put into this service are public.

rase documentation built on May 2, 2019, 12:46 p.m.