R/visualization.tools.R

Defines functions multiplot PlotModel update.priors get.prior.table plot.priors plot.sim.obs

Documented in get.prior.table multiplot PlotModel plot.priors plot.sim.obs update.priors

#' Plot simulated data against observed
#' @description This function plots an histogram of the simulated summary statistics with the corresponding observed value as a red line for comparison.
#' @param sim A data frame with simulated data.
#' @param obs A vector of observed summary stats corresponding to the sim object.
#' @return Graphic
#' @author Marcelo Gehara
#' @export
plot.sim.obs <- function(sim, obs)
  {
    mylabels <- colnames(sim)
    for (i in 1:ncol(sim)) {
      hist(sim[, i], breaks = 20, xlab = mylabels[i], main = "",
           xlim = c(min(c(na.omit(sim[, i]),obs[i])), max(c(na.omit(sim[, i]),obs[i]))))
      abline(v = obs[i], col = 2)
    }
  }

#' Plot prior distribution
#' @description This function plots a density of the simulated prior.
#' @param model A model object.
#' @param nsamples Number of samples to draw from each prior distribution.
#' @param mu.rates
#' @return Graphic
#' @author Marcelo Gehara
#' @export
plot.priors <- function(model, nsamples=1000, mu.rates=NULL){

  param = NULL
  pb = txtProgressBar(min = 1, max = nsamples, initial = 1)
  for(j in 1:nsamples){
    param.samples <- as.numeric(PipeMaster:::msABC.commander(model, use.alpha=F, arg=1)[[2]][2,])
    if(!(is.null(mu.rates))){
      rates <- do.call(mu.rates[[1]],args=c(1,mu.rates[2:length(mu.rates)]))
      param <- rbind(param,c(param.samples, rates))
      } else {
      rates <- PipeMaster:::sample.mu.rates(model)[[2]]
      param <- rbind(param,c(param.samples, rates))
    }
    setTxtProgressBar(pb,j)
  }
  if(!(is.null(mu.rates))){
    colnames(param) <- c(PipeMaster:::msABC.commander(model, use.alpha=F, arg=1)[[2]][1,],"mu")
  } else {
    colnames(param) <- c(PipeMaster:::msABC.commander(model, use.alpha=F, arg=1)[[2]][1,],"mean.mu", "sd.mu")
  }

  par(mfrow=c(3,3))
  for(i in 1:ncol(param)){
    plot(density(param[,i]), main=colnames(param)[i], col=2)
  }
}

#' Get a table with prior distributions.
#' @description This function makes a table with the parameters of the model and respective prior distribution.
#' @param model A model object.
#' @return A data frame with 4 columns: parameter name, first parameter of prior distribution, secound parameter of prior distribution, prior distribution.
#' @author Marcelo Gehara
#' @export
get.prior.table <- function(model){

  flags <- model$flags

  for(i in 1:length(flags)){
    if(is.null(nrow(flags[[i]]))){
      flags[[i]] <- do.call(rbind,flags[[i]])
    }
  }
    flags <- do.call(rbind,flags)
    flags <- as.data.frame(flags[,c(1,4,5,6)])
    colnames(flags) <- c("Parameter","prior.1","prior.2","distribution")
    return(flags)
}

#' Update priors using a prior table
#' @description This function updates the prior of your models using a prior table. see get.prior.table function
#' @param tab A prior table generated by the get.prior.table function.
#' @param model A model object.
#' @return A data frame with 4 columns: parameter name, first parameter of prior distribution, second parameter of prior distribution, prior distribution.
#' @author Marcelo Gehara
#' @export
update.priors<-function(tab, model){

  for(i in 1:nrow(tab)){

    x <- model$flags[[grep(tab[i,1], model$flags)]]

    if(is.null(nrow(x))){
      y <- which(mapply(grep,paste0("^",tab[i,1],"$"),x)==1)
      x <- x[y]
      x <- x[[1]]
      x[grep(tab[i,1], x),4:6] <- as.matrix(tab[i, 2:4])
      model$flags[[grep(tab[i,1], model$flags)]][[y]]<-x
    } else {
      x[grep(tab[i,1], x),4:6] <- as.matrix(tab[i, 2:4])
      model$flags[[grep(tab[i,1], model$flags)]] <- x
    }

  }
  return(model)
}

#' Plot model
#' @description This function plots a graphical representation of your model.
#' @param model A model object generated in the main.menu.
#' @param use.alpha Logical. If TRUE the most recent population size change will be exponential. If FALSE sudden demographic changes. Default is FALSE.
#'                  This argument changes ONLY the MOST RECENT demographich change. You should indicate the population numbers for the exponential change
#'                  toguether with the logical argument in a vector. Ex.c(T,1,2)
#' @param average.of.priors logical. if TRUE parameters for the plot will be equal to the average of prior values.
#' @param axes logical. if TRUE plot axes with models. Default is TRUE.
#' @return Graphic
#' @author Marcelo Gehara. This function is a wrapper of the PlotMS function of the POPDemog package.
#' @export
PlotModel<-function(model, use.alpha=F, average.of.priors=F,
                    xlab = "Population",
                    ylab = "Time before present (generations)",
                    axes = T){

  model$loci <- t(data.frame(c("rate1" ,"1000", "1",  "1e-08", "1e-08", "runif")))

  model$I <- t(data.frame(c(model$I[1,1:3],rep(10,as.numeric(model$I[1,3])))))

  if(average.of.priors==T){
    x <- PipeMaster:::ms.commander.forplot(model, use.alpha = use.alpha)
    POPdemog::PlotMS(x[[1]], type="ms", col.pop = c(3:(as.numeric(model$I[1,3])+2)),
                     lwd.arrow = 2, size.scale = "log",log.base = 10, time.scale = "generation",
                     N4=4000000, axes = axes, xlab = xlab, ylab = ylab)
  } else { x <- PipeMaster:::ms.commander2(model, use.alpha = use.alpha)
            POPdemog::PlotMS(x[[1]], type="ms", col.pop = c(3:(as.numeric(model$I[1,3])+2)),
                   lwd.arrow = 2, size.scale = "log", log.base = 10,time.scale = "generation",
                   N4=as.numeric(x[[2]][length(x[[2]])]), axes = axes, xlab = xlab, ylab = ylab)
}

  }




#' Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
#' @export
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }

  if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}


#' Principal Component Analysis plot
#' @description plot first 10 PCs of simulated data against observed.
#' @param models A data.frame object with combined summary statistics.
#' @param data A character vector indexing the models object.
#' @param observed Observed summary statistics. Should be the same as in models.
#' @param subsample A number between 0-1 indicating a fraction of rows in the models object to be included in the PCA calculation.
#' @return graphic
#' @author Marcelo Gehara
#' @export
plotPCs <- function (models, index, observed, subsample) {

  labels <- unique(index)
  labels <- sort(c(labels,"observed"))
  sizes <- rep(2,length(labels))
  sizes[which(labels=="observed")]<-10

  shapes <- rep(16,length(labels))
  shapes[which(labels=="observed")]<-8

  # exclude missing data for pca plot
  data.PCA <- index[complete.cases(models)]
  models.PCA <- models[complete.cases(models),]
  # subsample for PCA
  x <- sample(1:length(data.PCA),length(data.PCA)*subsample)
  data.PCA <- data.PCA[x]
  models.PCA <- models.PCA[x,]
  # run PCA
  PCA <- prcomp(rbind(models.PCA, observed), center = T, scale. = T, retx=T)
  # get scores
  scores <- data.frame(PCA$x[,1:ncol(PCA$x)])
  PC <- colnames(scores)[1:10]
  plotPCA<-function(PCS){
    PCS <- rlang::sym(PCS)
    p <- ggplot2::ggplot(scores, ggplot2::aes(x = PC1, y = !! PCS )) +
      ggplot2::theme(legend.position = "none")+
      ggplot2::geom_point(ggplot2::aes(colour=c(data.PCA,"observed"), size=c(data.PCA,"observed"),
                                       shape=c(data.PCA,"observed")))+
      ggplot2::scale_shape_manual(values=shapes)+
      ggplot2::scale_size_manual(values=sizes)+
      ggplot2::scale_color_brewer(palette="Dark2")+
      if(PCS=="PC2") ggplot2::theme(legend.position="top", legend.direction="horizontal", legend.title = ggplot2::element_blank())
    return(p)
  }
  P<-NULL
  for(i in 2:10){
    P[[i]]  <- plotPCA(PC[i])
  }
  gridExtra::grid.arrange(P[[2]], P[[3]], P[[4]], P[[5]], P[[6]], P[[7]], P[[8]], P[[9]], P[[10]], nrow=3)
}
gehara/PipeMaster documentation built on April 19, 2024, 8:14 a.m.