R/sfreemap.plot.R

Defines functions plot_comparison_for_q parse plot_boxplot plot_speed_up plot_comparison

# plot growth rate
# x is a list containing tree vectors of equal site:
#    files: the path for the files generated by sfreemap.test.perf
#    types: the column name to be considered in the corresponding file
#    legend: the value to be added as a legend
#    mode: serial or parallel
#    nsim: number of simulations
#    q: estimated or fixed Q matrix
plot_comparison <- function(x, xlabel, trans="identity", limit=NULL, output=NULL
                            , no.plot=FALSE, lang='pt_BR') {

    print(c('plot_comparison',trans))
    if (!trans %in% c('identity', 'log10')) {
        stop('trans should be "identity" or "log10"')
    }

    files <- x$files
    types <- x$types
    legend <- x$legend
    mode <- x$mode
    nsim <- x$nsim
    q <- x$q
    omp <- x$omp
    cores <- x$cores

    if (is.null(mode)) {
        mode <- rep('serial', length(files))
    }
    if (is.null(nsim)) {
        nsim <- rep(1, length(files))
    }
    if (is.null(q)) {
        q <- rep('estimated', length(files))
    }
    if (is.null(omp)) {
        omp <- rep(1, length(files))
    }
    if (is.null(cores)) {
        cores <- rep(NULL, length(files))
    }

    if (!all.equal(length(files), length(types), length(legend))) {
        stop ("files, types and legend must have the same length")
    }

    # FIXME: this is ugly... there is no need to do the first step outside the main loop
    data <- parse(files[1], types[1], legend[1], limit, mode[1], nsim[1], q[1], omp[1], cores[1])
    first <- data
    speed_up <- list()
    if (length(files) > 1) {
        for (i in 2:length(files)) {
            tmp <- parse(files[i], types[i], legend[i], limit, mode[i], nsim[i], q[i], omp[i], cores[i])
            data <- rbind(data, tmp)
            speed_up[[legend[i]]] <- summary_speed_up(tmp$time, first$time)
        }
    }

    if (isTRUE(no.plot)) {
        return(list(data=data, speed_up=speed_up))
    }

    if (!is.null(output)) {
        png(output, width=1024, height=768)
    }

    if (lang == 'pt_BR') {
        ylabel <- "Tempo decorrido (segundos)"
    } else {
        ylabel <- "Elapsed time (seconds)"
    }

    if (trans == 'log10') {
        breaks=log_breaks(10,10)
    } else {
        breaks=pretty_breaks(n=10)
    }

    fl <- factor(data$legend)
    p <- ggplot(data, aes(x=value, y=time, group=legend, colour=fl, shape=fl)) +
            geom_smooth(method='loess', fullrange=TRUE, se=FALSE) +
            geom_point(size=3) +
            theme_bw(base_size=26) +
            scale_y_continuous(trans=trans, breaks=breaks) +
            scale_x_continuous(breaks=pretty_breaks(n=10)) +
            scale_shape_discrete(name = "") +
            scale_colour_brewer(palette="Set1", name = "") +
            theme(legend.position="top", axis.text.y=element_text(hjust=1.0)) +
            xlab(xlabel) +
            ylab(ylabel)
    print(p)

    if (!is.null(output)) {
        dev.off()
    }

    return(list(data=data, speed_up=speed_up))
}

# plot growth rate
# x is a list containing tree vectors of equal site:
#    files: the path for the files generated by sfreemap.test.perf
#    types: the column name to be considered in the corresponding file
#    legend: the value to be added as a legend
plot_speed_up <- function(x, limit=NULL, output=NULL, print.ideal=TRUE) {

    xlabel <- 'Número de núcleos de processamento'
    ylabel <- 'Speed up'

    data <- plot_comparison(x, NULL, limit=limit, no.plot=TRUE)
    data <- data$data

    legend <- unique(x$legend)
    for (leg in legend) {
        select <- data$legend==leg
        time <- data[select,'time']
        data[select,'speedup'] <- time[1]/time
    }

    if (isTRUE(print.ideal)) {
        nrows <- nrow(data)/length(legend)
        fill <- rep(0, nrows)
        ideal_leg <- rep('Speed up ideal', nrows)
        value <- unique(data$value)
        ideal <- data.frame(time=fill, value=value, legend=ideal_leg, speedup=value)
        colnames(ideal) <- colnames(data)
        data <- rbind(data, ideal)
    }

    if (!is.null(output)) {
        png(output, width=1024, height=768)
    }

    fl <- factor(data$legend)

    p <- ggplot(data, aes(x=value, y=speedup, group=legend, colour=fl, shape=fl)) +
            stat_smooth(method='loess', fullrange=TRUE, se=FALSE) +
            geom_point(size=3) +
            theme_bw(base_size=26) +
            scale_x_continuous(breaks=pretty_breaks(n=10)) +
            scale_y_continuous(breaks=pretty_breaks(n=10)) +
            scale_shape_discrete(name = "") +
            scale_colour_brewer(palette="Set1", name = "") +
            theme(legend.position="top", axis.text.y=element_text(hjust=1.3)) +
            xlab(xlabel) +
            ylab(ylabel)

    print(p)

    if (!is.null(output)) {
        dev.off()
    }

    return(data)
}

# plot a boxplot
# this function is used by sfreemap.test.boxplot
plot_boxplot <- function(out_dir, out_file, data, y, xlabel, ylabel, sfreemap_mean) {

    data <- data.frame(data)

    simmap_mean <- mean(data[[y]])

    output <- paste(out_dir, out_file, sep='/')

    png(output, width=1024, height=768)
    p <- ggplot(data, aes_string(x='factor(generation)', y=y)) +
            theme_bw(base_size=26) +
            geom_boxplot() +
            xlab(xlabel) +
            ylab(ylabel) +
            theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
            theme(axis.title.y = element_text(vjust=1.8)) +
            geom_hline(yintercept=sfreemap_mean, color='red', show.legend=TRUE) +
            geom_hline(yintercept=simmap_mean, color='blue', show.legend=TRUE)
    print(p)
    dev.off()

}

parse <- function(file, type, legend, limit=NULL, mode=NULL, nsim=NULL, q=NULL, omp=NULL, cores=NULL) {
    col.names <- c('tree', 'taxa', 'state', 'time', 'nsim', 'mode', 'q', 'omp', 'cores')
    data <- read.table(file, row.names=NULL, col.names=col.names)
    # filter by mode (parallel/serial)
    if (!is.null(mode)) {
        data <- data[data$mode==mode,]
    }
    # filter by number of simulations (useful for simmap)
    if (!is.null(nsim)) {
        data <- data[data$nsim==nsim,]
    }
    if (!is.null(q)) {
        data <- data[data$q==q,]
    }
    if (!is.null(omp) && type!='omp') {
        data <- data[data$omp==omp,]
    }
    if (!is.null(cores) && type!='cores') {
        data <- data[data$cores==cores,]
    }

    # projection by execution time and the parameter state/taxa/trees/...
    data <- data[,c('time', type)]
    # add legend
    data$legend <- legend
    # remove rows, if necessary
    if (!is.null(limit)) {
        data <- head(data, limit)
    }

    # give names to resulting data.frame
    colnames(data) <- c('time', 'value', 'legend')
    return(data)
}


plot_comparison_for_q <- function(x, xlabel, trans='identity', output=NULL) {

    print(c('plot_comparison_for_q',trans))

    if (!trans %in% c('identity', 'log10')) {
        stop('trans should be "identity" or "log10"')
    }

    files <- x$files
    types <- x$types
    legend <- x$legend
    time <- x$time # time_to_map, time_to_estimate or total

    if (is.null(types)) {
        mode <- rep('state', length(files))
    }
    if (is.null(time)) {
        time <- rep('total', length(files))
    }

    if (!all.equal(length(files), length(legend))) {
        stop ("files, types and legend must have the same length")
    }

    col.names <- c('tree', 'taxa', 'state', 'time', 'nsim', 'mode', 'q', 'omp', 'cores')

    final_data <- NULL
    final_plot <- NULL
    for (i in 1:length(files)) {
        f <- files[i]
        type <- types[i]
        leg <- legend[i]
        timing_type <- time[i]

        data <- read.table(f, row.names=NULL, col.names=col.names)

        # filter
        data <- data[data$nsim==1,]
        data <- data[data$mode=='serial',]

        # compare estimated with fixed q
        fixed <- data[data$q=='fixed',]
        estimated <- data[data$q=='estimated',]
        data <- cbind(fixed[[type]], fixed$time, estimated$time)
        colnames(data) <- c('value', 'time_to_map', 'total')
        data <- data.frame(data)

        # time to estimate Q matrix
        time_to_estimate <- data$total - data$time_to_map
        proportion <- time_to_estimate*100/data$total

        leg <- rep(leg, length(data$value))
        data <- cbind(data, time_to_estimate, proportion, legend=leg)

        projection <- c('value', timing_type, 'legend')
        to_plot <- data[,projection]
        colnames(to_plot) <- c('value', 'time', 'legend')

        final_plot <- rbind(final_plot, to_plot)
        final_data <- rbind(final_data, data)
    }

    final_data <- data.frame(final_data, stringsAsFactors=FALSE)
    final_plot <- data.frame(final_plot, stringsAsFactors=FALSE)

    if (!is.null(output)) {
        png(output, width=1024, height=768)
    }

    fl <- factor(final_plot$legend)

    if (trans == 'log10') {
        breaks=log_breaks(10,10)
    } else {
        breaks=pretty_breaks(n=10)
    }

    p <- ggplot(final_plot, aes(x=value, y=time, group=legend, colour=fl, shape=fl)) +
            geom_smooth(method='loess', fullrange=TRUE, se=FALSE) +
            geom_point(size=3) +
            theme_bw(base_size=26) +
            scale_y_continuous(trans=trans, breaks=breaks) +
            scale_x_continuous(breaks=pretty_breaks(n=10)) +
            scale_colour_brewer(palette="Set1", name = "") +
            scale_shape_discrete(name = "") +
            theme(legend.position="top") +
            xlab(xlabel) +
            ylab("Tempo decorrido (segundos)")
    print(p)

    if (!is.null(output)) {
        dev.off()
    }

    return(final_data)
}
dpasqualin/sfreemap.tests documentation built on May 13, 2017, 4:39 a.m.