R/gui.R
In provenance: Statistical Toolbox for Sedimentary Provenance Analysis

Documented in provenance

#' Menu-based interface for \code{provenance}
#'
#' For those less familiar with the syntax of the \code{R} programming
#' language, the \code{provenance()} function provides a user-friendly
#' way to access the most important functionality in the form of a
#' menu-based query interface. Further details and examples are
#' provided on \url{https://www.ucl.ac.uk/~ucfbpve/provenance/}
#' @author Pieter Vermeesch
#' @references Vermeesch, P., Resentini, A. and Garzanti, E., an R
#'     package for statistical provenance analysis, Sedimentary
#'     Geology, doi:10.1016/j.sedgeo.2016.01.009.
#' @export
provenance <- function(){
    version <- as.character(utils::packageVersion('provenance'))
    message("This is provenance version ", version)
    RS <- RStudio()
    while (TRUE){
        message("Pick an option:\n",
                "1 - sample size calculation\n",
                "2 - plot a single dataset\n",
                "3 - plot multiple datasets\n",
                "4 - Minsorting\n",
                "5 - MDS/PCA/CA\n",
                "6 - Procrustes analysis\n",
                "7 - 3-way MDS\n",
                ifelse(RS,"8 - ","8 - save plots (.pdf)\n9 - "),
                "help\n",
                "q - quit")
        response <- readline()
        if (response == '1'){
            gui.samplesize()
        } else if (response == '2'){
            gui.plot.single()
        } else if (response == '3'){
            gui.plot.multiple()
        } else if (response == '4'){
            gui.minsorting()
        } else if (response == '5'){
            gui.mds()
        } else if (response == '6'){
            gui.3way(TRUE)
        } else if (response == '7'){
            gui.3way(FALSE)
        } else if (!RS & response == '8'){
            gui.save.plots()
        } else if (response == ifelse(RS,'8','9')){
            gui.help()
        } else if (response == 'q'){
            break
        }
    }
}

# sigdig = number of significant digits
gui.samplesize <- function(sigdig=3){
    message("Sample size calculations:\n",
            "1 - Probability of missing a fraction of given size\n",
            "2 - Largest fraction not to be missed with a given confidence\n",
            "3 - Number of grains needed to sample all fractions of a given size")
    response <- readline()
    if (response == '1'){
        n <- readline("enter the number of grains [e.g., 117]: ")
        f <- readline(paste0("enter a number between 0 and 100% indicating\n",
                             "the size of the smallest fraction of interest [e.g., 5]: "))
        p <- get.p(as.numeric(n),as.numeric(f)/100)
        message("\nResult: the likelihood that no fractions greater than ", f , "%\n",
                "of the population were missed in a sample with ", n ," grains is ",
                "p = ",signif(100*p,sigdig) , "%\n")
    } else if (response == '2'){
        n <- readline("enter the number of grains [e.g., 117]: ")
        p <- readline(paste0("enter a number between 0 and 100% indicating\n",
                             "the desired level of confidence [e.g., 5]: "))
        f <- get.f(as.numeric(n),as.numeric(p)/100)
        message("\nResult: the largest fraction of which a ", n , "-grain sample\n",
                "has not been missed with ", 100-as.numeric(p) ,
                "% confidence is f = ", signif(100*f,3),"%\n")
    } else if (response == '3'){
        f <- readline(paste0("enter a number between 0 and 100% indicating\n",
                             "the size of the smallest fraction of interest [e.g., 5]: "))
        p <- readline(paste0("enter a number between 0 and 100% indicating\n",
                             "the desired level of confidence [e.g., 5]: "))
        n <- get.n(as.numeric(p)/100,as.numeric(f)/100)
        message("\nResult: the minimum sample size that guarantees with ",
                100-as.numeric(p),"% certainty not\n",
                "to have missed any fraction greater than ",f,
                "% of the population is n = ",n,"\n")
    } else {
        message('Incorrect input.')
    }
}

gui.plot.single <- function(){
    grDevices::graphics.off()
    message("Plot a single dataset:\n",
            "1 - Ternary diagram\n",
            "2 - Pie charts\n",
            "3 - Radial plot\n",
            "4 - Cumulative Age Distributions\n",
            "5 - Kernel Density Estimates")
    response1 <- readline()
    if (response1 == '1'){
        showpath = FALSE
        dat <- gui.get.dataset(includedistributional=FALSE)
        message("Plot background lines?\n",
                "1 - basic grid [default]\n",
                "2 - descriptive QFL diagram\n",
                "3 - Folk's classification\n",
                "4 - Dickinson's QFL diagram\n",
                "5 - no lines")
        response2 <- readline()
        if (response2 == '2'){
            type <- "QFL.descriptive"
        } else if (response2 == '3'){
            type <- "QFL.folk"
        } else if (response2 == '4'){
            type <- "QFL.dickinson"
        } else if (response2 == '5'){
            type <- "empty"
        } else {
            type <- "grid"
        }
        if (methods::is(dat,"SRDcorrected")){
            response3 <- readline("Show SRD correction [Y or n]? ")
            if (response3 %in% c('n','N')) showpath <- FALSE
            else showpath <- TRUE
        }
        message("1 - Add an error ellipse the entire population\n",
                "2 - Add an error ellipse for the average composition\n",
                "3 - No ellipse [default]")
        response3 <- readline()
        if (response3 %in% c('1','2')){
            alpha <- as.numeric(readline("Confidence level [default=0.05]? "))
            if (is.na(alpha)) alpha <- 0.05
        }
        graphics::plot(ternary(dat),type=type,showpath=showpath)
        if (response3 == '1'){ 
            ternary.ellipse(ternary(dat),alpha=alpha,population=TRUE)
        } else if (response3 == '2'){
            ternary.ellipse(ternary(dat),alpha=alpha,population=FALSE)
        }
    } else if (response1 == '2'){
        dat <- gui.get.dataset(includedistributional=FALSE)
        numcol <- as.numeric(readline("Number of columns in the plot? "))
        if (is.na(numcol)) numcol <- 1
        summaryplot(dat,ncol=numcol)
    } else if (response1 == '3'){
        dat <- gui.open.compositional(counts=TRUE)
        complist <- paste(colnames(dat$x),collapse=',')
        message("Select two components from the following ",
                "list to form the numerator and denominator of ",
                "the ratios to be displayed:\n", complist ,"\n",
                "Enter as a comma separated pair of labels:")
        response <- readline()
        numden <- strsplit(response,split=',')[[1]]
        numden <- gsub(" ","",numden) # get rid of spaces
        radialplot(dat,num=numden[1],den=numden[2])
    } else if (response1 == '4'){
        dat <- gui.open.distributional()
        graphics::plot(dat,CAD=TRUE)
    } else if (response1 == '5'){
        dat <- gui.open.distributional()
        kdes <- gui.get.kdes(dat)
        if (methods::is(kdes,"KDE")){
            graphics::plot(kdes)
        } else { # is(kdes,'KDEs')
            numcol <- as.numeric(readline("Number of columns in the plot? "))
            if (is.na(numcol)) numcol <- 1
            summaryplot(kdes,ncol=numcol)
        }
    } else {
        message('Incorrect input.')
    }
}

gui.plot.multiple <- function(){
    command <- "summaryplot("
    datasets <- gui.get.datasets(kdes=TRUE,includevarietal=FALSE)
    dnames <- names(datasets)
    for (dname in dnames){
        command <- paste0(command,"datasets[[\'",dname,"\']],")
    }
    numcol <- as.numeric(readline("Number of columns? "))
    if (is.na(numcol)) numcol <- 1
    command <- paste0(command,"ncol=",numcol,")")
    eval(parse(text=command))
}

gui.minsorting <- function(){
    phi <- 2
    sigmaphi <- 1
    medium <- "seawater"
    fromphi <- -2.25
    tophi <- 5.5
    byphi <- 0.05
    endcomp <- provenance::endmembers
    mydens <- provenance::densities
    while (TRUE){
        message("Change default:\n",
                "1 - bulk composition [default=tectonic endmembers]\n",
                "2 - average grain size in phi units [default=",phi,"]\n",
                "3 - grain size standard deviation [defaults=",sigmaphi,"]\n",
                "4 - transport medium [default=",medium,"]\n",
                "5 - plot resolution [default: from ",fromphi," to ", tophi," by ",byphi,"]\n",
                "6 - selected minerals [default: all]\n",
                "c - continue")
        response1 <- readline()
        if (response1 == "1") {
            endcomp <- gui.get.dataset(includedistributional=FALSE)
        } else if (response1 == "2"){
            mydens <- gui.load.densities()
        } else if (response1 == "3"){
            phi <- as.numeric(readline("Enter the average grain size (in phi units): "))
        } else if (response1 == "3"){
            message("Enter the standard deviation of the grain size (in phi units): ")
            sigmaphi <- as.numeric(readline())
        } else if (response1 == "4"){
            message("Choose one of the following options:\n",
                    "1 - seawater\n",
                    "2 - freshwater\n",
                    "3 - air")
            response2 <- readline()
            if (response2 == "2"){
                medium <- "freshwater"
            } else if (response2 == "3"){
                medium <- "air"
            } else {
                medium <- "seawater"
            }
        } else if (response1 == "5"){
            message("Change:\n",
                    "1 - minimum [default = ",minphi,"]\n",
                    "2 - maximum [default = ",maxphi,"]\n",
                    "3 - step size [default = ",byphi,"]")
            response2 <- readline()
            if (response2 == "1"){
                minphi <- as.numeric(response2)
            } else if (response2 == "2"){
                maxphi <- as.numeric(response2)
            } else if (response2 == "3"){
                byphi <- as.numeric(response2)
            } else {
                message("Invalid input")
            }
        }  else if (response1 == "6"){
            endcomp <- gui.subset.components(endcomp)
        } else {
            break
        }
    }
    while (TRUE){
        sname <- NULL
        if (length(names(endcomp)) > 1){
            samplist <- paste(names(endcomp),collapse=',')
            message("Select one of the following samples to plot:\n",
                    samplist,
                    "\nor press [Return] to exit")
            smp <- readline()
            if (smp != "") sname <- smp
        }
        grDevices::graphics.off()
        graphics::plot(minsorting(endcomp,mydens,sname=sname,
                                  phi,sigmaphi,medium,fromphi,tophi,byphi))
        if (length(names(endcomp)) == 1 | smp == "") break
    }
}

gui.mds <- function(){
    grDevices::graphics.off()
    dat <- gui.get.dataset()
    redo <- FALSE
    classical <- FALSE
    method <- dat$method
    if (method=="aitchison"){
        message("1 - Use Aitchison distance (PCA, default)")
        message("2 - Use Aitchison distance (MDS)")
        message("3 - Use Bray-Curtis dissimilarity (MDS)")
        response <- readline()
        if (response == "2"){
            # do nothing
        } else if (response == "3"){
            method <- "bray"
        } else {
            graphics::plot(PCA(dat))
            return()
        }
    }
    if (method=="chisq"){
        message("1 - Use Chi-square distance (CA, default)")
        message("2 - Use Chi-square distance (MDS)")
        message("3 - Use Bray-Curtis dissimilarity (MDS)")
        response <- readline()
        if (response == "2"){
            # do nothing
        } else if (response == "3"){
            method <- "bray"
        } else {
            graphics::plot(CA(dat))
            return()
        }
    }
    if (methods::is(dat,"distributional") & length(dat$err)>0){
        message("Choose:\n",
                "1 - Kolmogorov-Smirnov distance\n",
                "2 - Kuiper distance\n",
                "3 - Sircombe-Hazelton distance [default]")
        response <- readline()
        if (response == "1") method <- "KS"
        else if (response == "2") method <- "Kuiper"
        else method <- "SH"
    }
    if ( (methods::is(dat,"distributional") & length(dat$err)==0) |
         methods::is(dat,"varietal") ){
        message("Choose:\n",
                "1 - Kolmogorov-Smirnov distance [default]\n",
                "2 - Kuiper distance")
        response <- readline()
        if (response == "2") method <- "Kuiper"
        else if (response == "3") method <- "SH"
        else method <- "KS"            

    }
    mymds <- MDS(dat,classical,method=method)
    if (mymds$stress < 0.05){
        message("Warning: extremely low stress (=",mymds$stress,").\n",
                "Looks like you\'re overfitting your data!")
    }
    if (!dat$method %in% c("bray","SH")){
        if (mymds$stress < 0.05){
            message("Do you want to switch to classical scaling?")
            response <- readline("[Y or n]: ")
            if (response %in% c("n","N")) classical <- FALSE
            else classical <- TRUE
        }
        if (classical){
            mymds <- MDS(dat,classical,method=method)
        }
    }
    thennlines=FALSE
    thepch=NA
    thecex=1
    thepos=NULL
    while (TRUE){
        message("Options:\n",
                "1 - Add nearest neighbour lines\n",
                "2 - Change plot character\n",
                "3 - Change size of plot character\n",
                "4 - Change position of text label relative to plot character\n",
                "c - Continue")
        response1 <- readline()
        if (response1 == "1"){
            thennlines <- TRUE
        } else if (response1 == "2"){
            thepch <- readline(
                "Specify a plot character [e.g. *, ., o, O, +, NA or 1-25]: ")
            if (grepl("[[:digit:]]",thepch)) thepch <- as.numeric(thepch)
        } else if (response1 == "3"){
            thecex <- as.numeric(readline(
                "Magnification of the default plot character [1 = normal]: "))
        } else if (response1 == "4"){
            thepos <- as.numeric(readline(
                "Position of the text label [1 = below, 2 = left, 3 = above, 4 = right]"))
        } else {
            break
        }
    }
    graphics::plot(mymds,nnlines=thennlines,pch=thepch,pos=thepos,cex=thecex)
}

gui.3way <- function(doprocrustes=FALSE){
    grDevices::graphics.off()
    if (doprocrustes) command <- "procrustes("
    else command <- "indscal("
    datasets <- gui.get.datasets()
    dnames <- names(datasets)
    for (dname in dnames){
        command <- paste0(command,dname,"=","datasets[[\'",dname,"\']],")
    }
    substr(command,start=nchar(command),stop=nchar(command)) <- ")"
    proc <- eval(parse(text=command))
    graphics::plot(proc)    
}

gui.save.plots <- function(){
    devices <- grDevices::dev.list()
    for (device in devices){
        fname <- readline(paste0("Enter a name for plot ",device, ": "))
        grDevices::dev.set(device)
        grDevices::dev.copy2pdf(file=paste0(fname,".pdf"))
    }
}

gui.help <- function(){
    while (TRUE){
    message("Choose a help topic:\n",
            " 1 - compositional data\n",
            " 2 - point-counting data\n",
            " 3 - distributional data\n",
            " 4 - varietal data\n",
            " 5 - mineral densities\n",
            " 6 - sample size calculation\n",
            " 7 - cumulative age distributions\n",
            " 8 - kernel density estimation\n",
            " 9 - radial plots\n",
            "10 - Minsorting and SRD correction\n",
            "11 - multidimensional scaling\n",
            "12 - Procrustes analysis and 3-way MDS\n",
            " c - continue")
        response <- readline()
        if (response == "1"){
            message("Formatting requirements of compositional data files:\n",
                    "---------------------------------------------------\n",
                    "A compositional data file consists of a comma separated table\n",
                    "in which the rows correspond to samples and the columns to categories.\n",
                    "For example:\n",
                    "sample,SiO2,Al2O3,Fe2O3,MgO,CaO,Na2O,K2O,TiO2,P2O5,MnO\n",
                    "N1,82.54,6.14,3.18,1.65,2.24,1.16,1.35,0.44,0.11,0.06\n",
                    "N2,83.60,6.42,2.55,1.25,1.83,1.21,1.48,0.36,0.08,0.04\n",
                    "N3,79.07,7.26,4.39,1.95,2.54,1.23,1.42,0.63,0.09,0.08\n",
                    "N4,89.02,5.27,1.09,0.33,0.48,0.90,1.61,0.17,0.04,0.02\n",
                    "N5,88.46,5.36,1.39,0.29,0.52,0.84,1.64,0.27,0.03,0.02\n",
                    "N6,83.61,6.83,2.19,0.87,1.45,1.15,1.87,0.32,0.05,0.03\n",
                    "N7,85.37,6.10,2.10,0.86,1.29,1.05,1.63,0.29,0.06,0.03\n",
                    "N8,65.16,6.74,15.67,2.22,3.08,0.78,1.11,3.46,0.04,0.24\n",
                    "N9,78.19,7.19,6.90,0.72,1.35,1.11,1.90,1.47,0.02,0.11\n",
                    "N10,80.65,7.86,3.16,1.09,1.64,1.39,2.42,0.54,0.04,0.06\n",
                    "N11,82.27,6.55,3.53,1.48,2.02,1.15,1.61,0.52,0.06,0.07\n",
                    "N12,74.30,7.72,3.42,2.35,5.46,1.48,1.76,0.49,0.11,0.06\n",
                    "T8,85.70,5.89,1.82,0.81,1.44,1.12,1.67,0.23,0.07,0.03\n",
                    "T13,82.54,6.02,2.30,1.42,3.07,1.19,1.46,0.34,0.11,0.04\n",
                    "N13,79.96,6.41,3.19,1.31,2.10,1.09,1.62,0.51,0.06,0.05\n",
                    "N14,73.62,9.96,4.07,2.01,3.45,1.86,2.29,0.44,0.10,0.07\n")
        } else if (response == "2"){
            message("Formatting requirements of point-counting data files:\n",
                    "---------------------------------------------------\n",
                    "A compositional data file consists of a comma separated table\n",
                    "in which the rows correspond to samples and the columns to categories.\n",
                    "For example:\n",
                    "sample,zr,tm,rt,TiOx,sph,ap,ep,gt,st,and,ky,sil,amp,cpx,opx\n",
                    "N1,1,1,0,0,0,1,10,24,2,0,0,0,1,163,1\n",
                    "N2,0,0,1,0,3,1,11,18,1,0,0,1,3,170,1\n",
                    "N3,1,0,0,1,1,0,9,21,1,0,0,0,10,157,1\n",
                    "N4,0,2,4,1,4,0,59,54,0,1,0,0,19,54,2\n",
                    "N5,1,2,2,5,12,2,91,43,5,2,0,0,32,10,1\n",
                    "N6,3,1,2,1,2,0,11,12,1,0,0,0,31,139,3\n",
                    "N7,4,0,1,0,0,0,12,19,0,1,0,0,10,155,1\n",
                    "N8,5,0,0,0,2,0,23,46,1,0,0,1,11,113,0\n",
                    "N9,10,2,0,0,9,0,26,80,2,0,0,0,18,56,0\n",
                    "N10,2,0,0,0,4,0,16,28,2,0,1,0,34,126,0\n",
                    "If you want to use the Minsorting function and SRD correction,\n",
                    "it is important to make sure that the component labels are consistent with\n",
                    "the densities table.\n")
        } else if (response == "3"){
            message("Formatting requirements for distributional data files:\n",
                    "-----------------------------------------------------\n",
                    "A distributional data file consists of a comma separated table\n",
                    "in which the columns correspond to different samples, each of which\n",
                    "may contain a different number of single grain analyses.\n",
                    "For example:\n",
                    "N1,N2,N3,N4,N5,N6,N7,N8,N9,N10\n",
                    "645.4,2764.6,418.6,1228.4,1227,605.1,931.2,2440.3,2117.6,2727.2\n",
                    "496.9,1998.6,1036.1,1445.7,1269.7,1462.6,980,1246.4,1919.1,768\n",
                    "1000.3,997.7,1118.4,1400.5,2127.1,1240.1,1383.9,1131.9,640.9,1294.2\n",
                    "1168.5,1105.5,1221.4,1053.9,1185.1,316.5,2017.4,1116.2,679.7,1462.1\n",
                    "2263.5,620.6,944.7,2073.4,362.6,1524.5,1353.8,1985.7,732.3,1924\n",
                    "1878.6,1133.7,937.8,1107.1,1114.6,1141.3,948.2,1055.6,1190.3,1154.3\n",
                    "769.8,583.6,982,1386.1,701.2,1521.5,618.5,296.2,2005.4,1350.8\n",
                    "1161.7,1216.6,752.6,1221.8,1064.5,1166.7,2104.7,556.1,529.7,538.5\n",
                    "519.4,1181.8,1240.3,871,1133.7,660.1,307.2,815.9,2088.8,1412.2\n",
                    "1213.3,1203.7,1278.2,1418,938.7,675.5,2052.9,2266.4,1584.3,1577.6\n",
                    "271.3,1251.2,1088.2,1853.9,991.8,1211.4,1977.3,1094.6,995.4,2809.5\n",
                    "1065.4,1129.1,2041,2905.9,2039.6,1391.2,1064.2,634.9,1069,659.3\n",
                    "1114.6,,577.8,1928.9,1064.5,2076.7,703,981.3,1858.4,1302.3\n",
                    "998.7,,976.9,728.5,698.1,,1137.6,1088,1157.4,1135.8\n",
                    "603.3,,1411.5,1371.7,1507.9,,1325,670.6,215.9,973.3\n",
                    "465.9,,547.5,814.7,668,,583.8,663.5,,1194.1\n",
                    "489.4,,791.5,,1279.3,,1120.2,1767.5,,1157.1\n",
                    "744.5,,2160.2,,1386.9,,1989.2,,,811.5\n",
                    "1287.4,,1043.6,,1210.8,,1163.4,,,1554.9\n",
                    "It is also possible (though optional) to specify the analytical\n",
                    "uncertainties of these measurements. They are stored in a separate\n",
                    ".csv file with exactly the same format as the measurement as shown above.\n")
        } else if (response == "4"){
            message("Formatting requirements for varietal data files:\n",
                    "------------------------------------------------\n",
                    "A varietal data file looks like a compositional data table, \n",
                    "with the main difference being that the rownames do not correspond \n",
                    "to samples but to individual grains. The prefixes of these grain \n",
                    "names must contain the sample names. For example:\n",
                    "grain,Pb,Th,U,La,Ce,Pr,Nd,Sr,Y,Lu\n",
                    "ACA_1,16.262,90.9,76.3,615,1972,281.6,1359,319.6,1188,16.87\n",
                    "ACA_2,8.195,58.7,25.52,647,1489,165.2,703,253.9,339.6,3.39\n",
                    "ACA_3,6.114,0.058,2.762,12.84,56.2,11.73,89.6,65.3,2019,16.69\n",
                    "ACA_4,10.085,1.93,3.14,36.6,137.4,24.8,138,617,125.2,2.06\n",
                    "ACA_5,14.351,99.4,39.5,582,1871,267,1250,280.6,1095,15.18\n",
                    "ACA_6,16.902,146.3,70.6,680,1886,249.9,1176,429.2,834,11.49\n",
                    "ACA_7,5.473,1.23,3.104,170.4,785,149.5,930,370.6,1127,12.91\n",
                    "ACA_8,8.306,4.17,29.34,19.02,145.9,46.4,450,508,2639,22.85\n",
                    "ACA_9,9.409,17.62,14.92,390,1225,166.2,820,242.8,481,5.26\n",
                    "ACA_10,11.383,29.8,51.9,214,956,180.7,1090,260.4,1413,18.22\n",
                    "ACA_11,17.56,73.7,32.2,695,2110,302,1467,367,1004,11.51\n",
                    "ACA_12,11.044,25.57,54.3,71.8,288.4,66.8,548,298.3,3237,39.01\n",
                    "BUR_1,10.79,0.167,3.54,88.3,308,56.1,338,467.1,306,3.3\n",
                    "BUR_2,22.44,115.6,82,882,1743,189.6,824,378,610,9.33\n",
                    "BUR_3,5.645,12.62,3.56,0.39,1.28,0.266,3.35,447,44.4,3.4\n",
                    "BUR_4,29.2,208.8,92.8,1193,2998,359.5,1623,374.4,1469,26.07\n",
                    "BUR_5,6.689,0.0043,1.473,22.28,131.1,31.8,220.3,419.7,154.1,1.152\n",
                    "BUR_6,10.622,85.2,29.6,729,1700,189,780,301.8,326,3.85\n",
                    "BUR_7,10.245,0.0038,0.601,2.17,9.62,2.04,17.3,394,44.3,0.894\n",
                    "BUR_8,15.12,0.015,0.0429,3.39,8.46,2.09,13.92,577,76.5,1.864\n",
                    "BUR_9,7.732,12.92,5.67,281.5,718,94.2,438,306.4,385,6.29\n",
                    "BUR_10,10.84,48.6,34.5,503.9,791,71,262.3,270.3,160.6,3.32\n",
                    "and so forth. ")
        } else if (response == "5"){
            message("Formatting requirements for mineral density files:\n",
                    "-------------------------------------------------\n",
                    "The Minsorting function and SRD correction require a table of\n",
                    "mineral densities. The provenance package comes with a default table\n",
                    "but it is also possible to specify a different set of densities. It\n",
                    "is important that the labels used for the different components in this\n",
                    "table are consistent with the compositional data files used elsewhere.\n ",
                    "For example:\n",
                    "Q,F,Lv,Ls,Lm,mica,opaques,zr,tm,rt,sph,ap,\n",
                    "mon,oth,ep,gt,ctd,st,and,ky,sil,amp,px,ol\n",
                    "2.65,2.61,2.6,2.65,2.75,2.4,5,4.65,3.15,4.25,3.5,3.2,\n",
                    "5.15,3.5,3.45,4,3.6,3.75,3.15,3.6,3,3.2,3.3,3.35\n")
        } else if (response == "6"){
            message("Sample size calculations:\n",
                    "------------------------\n",
                    "On the most basic level, provenance analysis requires the geologist to\n",
                    "identify certain properties in a representative number of grains from\n",
                    "each sample. The question then arises how many grains constitute a\n",
                    "'representative' number of grains. The answer to this question depends\n",
                    "on the geological problem of interest. The sample size calculators implemented\n",
                    "in provenance() assume that we are interested in collecting at least one grain\n",
                    "from every fraction greater than f=1/N, where N is an integer denoting the\n",
                    "number of fractions. Using simple combinatorics, it is possible to calculate:\n",
                    "(a) the number of grains n required to ensure that all N fractions were sampled\n",
                    "at least once with p% certainty.\n",
                    "(b) the largest fraction f of which it is p% certain that all were sampled\n",
                    "at least one out of n grains.\n",
                    "(c) the likelihood that a sample of size n has missed at least one\n",
                    "fraction of size f.\n",
                    "\nCiteable references:\n",
                    "Vermeesch, P., 2004. How many grains are needed for a provenance study?\n",
                    "Earth and Planetary Science Letters 224, 441-451.\n",
                    "Vermeesch, P., Resentini, A. and Garzanti, E., 2016. An R package for\n",
                    "statistical provenance analysis. Sedimentary Geology,\n",
                    "doi:10.1016/j.sedgeo.2016.01.009.\n")
        } else if (response == "7"){
            message("Cumulative Age Distributions (CADs):\n",
                    "-----------------------------------\n",
                    "The probability distribution of detrital zircon U-Pb ages or any other\n",
                    "distributional dataset may be estimated using histograms or kernel density\n",
                    "estimates (KDEs). For a sample of limited size, these estimates never exactly\n",
                    "agree with the true age distribution, but are smooth approximation thereof.\n",
                    "In contrast, the Cumulative Age Distribution (also known as the 'Empirical\n",
                    "Cumulative Distribution Function) is a method to visualise distributional\n",
                    "datasets without the need for any smoothing.\n",
                    "\nCiteable references:\n",
                    "Vermeesch, P., 2007. Quantitative geomorphology of the White Mountains\n",
                    "(California) using detrital apatite fission track thermochronology.\n",
                    "Journal of Geophysical Research (Earth Surface) 112, 3004. doi:10.1029/2006JF000671.\n",
                    "Vermeesch, P., Resentini, A. and Garzanti, E., 2016. An R package for\n",
                    "statistical provenance analysis. Sedimentary Geology,\n",
                    "doi:10.1016/j.sedgeo.2016.01.009.\n")
        } else if (response == "8"){
            message("Kernel Density Estimation (KDE):\n",
                    "-------------------------------\n",
                    "A large body of statistical literature has been written on the subject\n",
                    "of probability density estimation. The most widely used tools are the\n",
                    "traditional histogram, which is discrete and discontinuous, and the Kernel\n",
                    "Density Estimator (KDE), which is a smooth and continuous alternative.\n",
                    "A KDE is produced by arranging the measurements along a line and stacking\n",
                    "a so-called 'kernel' (typically a Gaussian bell curve) of a certain\n",
                    "width (the 'bandwidth') on top of each of them. Multiple samples can be\n",
                    "plotted together, either using automatically selected bandwidths using the\n",
                    "algorithm of Botev et al. (2010), or any other value\n",
                    "\nCiteable references:\n",
                    "Botev, Z. I., Grotowski, J. F., Kroese, D. P., 2010. Kernel density\n",
                    "estimation via diffusion. Annals of Statistics 38, 2916-2957.\n",
                    "Vermeesch, P., 2012. On the visualisation of detrital age distributions.\n",
                    "Chemical Geology 312-313, 190-194. doi:10.1016/j.chemgeo.2012.04.021.\n",
                    "Vermeesch, P., Resentini, A. and Garzanti, E., 2016. An R package for\n",
                    "statistical provenance analysis. Sedimentary Geology,\n",
                    "doi:10.1016/j.sedgeo.2016.01.009.\n")
        } else if (response == "9"){
            message("Radial Plots:\n",
                    "-------------------------------\n",
                    "Galbraith (1988)'s radial plot is a graphical means of visualising\n",
                    "'heteroscedastic' data. Originally developed for fission track data,\n",
                    "radial plots can also be used to display point-counting ratios.\n",
                    "They allow a visual assessment of the degree to which counting\n",
                    "uncertainties can explain the observed scatter between multiple ratio\n",
                    "estimates. Radial plots are bivariate scatter diagrams that set out the\n",
                    "normalised and arcsin-transformed ratios against their precision.\n",
                    "The measured ratio values can be read by projecting their respective\n",
                    "data points onto a circular scale on the right hand side of the diagram.\n",
                    "The analytical uncertainty of the individual ratio estimates is obtained\n",
                    "by projecting the top and bottom of a 2-sigma error bar onto that same scale.\n",
                    "Thus, high ratios plot at high (positive) angles to the origin; low ratios\n",
                    "plot at low (negative) angles to the origin; and large (and therefore\n",
                    "precise) samples plot to the right of small (and therefore imprecise)\n",
                    "samples. If binomial counting uncertainty is the only source of scatter,\n",
                    "then approximately 95% of the data should plot within a 2-sigma confidence\n",
                    "band around the origin. If significantly more than 5% of the data plot\n",
                    "outside this confidence band, then the data are said to be overdispersed\n",
                    "with respect to the counting uncertainties. In this case, the package\n",
                    "estimates the 'central' ratio and the overdispersion using a logistic normal assumption.\n",
                    "\nCiteable references:\n",
                    "Galbraith, R., 1988, Graphical display of estimates having differing standard\n",
                    "errors. Technometrics, 30(3): 271-281.\n",
                    "Vermeesch, P., 2018, Statistical models for point-counting data.\n",
                    "Earth and Planetary Science Letters, 501: 1-7\n")
        } else if (response == "10"){
            message("Minsorting and SRD correction:\n",
                    "-----------------------------\n",
                    "To facilitate the comparison of detrital modes for provenance analysis\n",
                    "or stratigraphic correlation, we need to first remove the often significant\n",
                    "compositional differences among sediment samples that are caused by\n",
                    "hydrodynamic processes in the depositional environment. Intersample modal\n",
                    "variability can be corrected for by a simple principle. In the absence of\n",
                    "provenance changes and environmental bias, the weighted average\n",
                    "Source Rock Density (SRD) of terrigenous grains should be equal,\n",
                    "for each sample and each grain-size class of each sample, to the\n",
                    "weighted average density of source rocks. By correcting relative\n",
                    "abundances of detrital minerals in proportion to their densities,\n",
                    "we can restore the appropriate SRD index for any provenance and\n",
                    "subprovenance type in each sample or grain-size class (Garzanti et al., 2009).\n",
                    "Within a sample of a certain average grain size, the denser minerals will\n",
                    "tend to be enriched in the finer fraction, and the lighter minerals in the\n",
                    "coarser fraction. This so-called principle of 'settling equivalence'\n",
                    "(Garzanti et al., 2008), is implemented in the Minsorting function, which is\n",
                    "based on the Excel spreadsheet of Resentini et al. (2013).\n",
                    "\nCiteable references:\n",
                    "Garzanti, E., Ando, S., Vezzoli, G., 2009. Grain-size dependence of\n",
                    "sediment composition andenvironmental bias in provenance studies.\n",
                    "Earth and Planetary Science Letters 277, 422-432.\n",
                    "Garzanti, E, Ando, S and Vezzoli, G. Settling equivalence of detrital\n",
                    "minerals and grain-size dependence of sediment composition.\n",
                    "Earth and Planetary Science Letters 273.1 (2008): 138-151.\n",
                    "Resentini, A., Malusa, M.G., Garzanti, E., 2013. MinSORTING: An Excel\n",
                    "worksheet for modelling mineral grain-size distribution in sediments,\n",
                    "with application to detrital geochronology and provenance studies.\n",
                    "Computers & Geosciences 59, 90-97.\n",
                    "Vermeesch, P., Resentini, A. and Garzanti, E., 2016. An R package for\n",
                    "statistical provenance analysis. Sedimentary Geology,\n",
                    "doi:10.1016/j.sedgeo.2016.01.009.\n")
        } else if (response == "11"){
            message("Multidimensional Scaling:\n",
                    "------------------------\n",
                    "An increasing number of provenance studies are based on not just a few\n",
                    "but many samples. The large datasets resulting from such studies call\n",
                    "for a dimension-reducing technique such as Multi-Dimensional Scaling (MDS).\n",
                    "Given a dissimilarity matrix (i.e., a table of pairwise distances),\n",
                    "MDS constructs a 'map' on which 'similar' samples cluster closely\n",
                    "together and 'dissimilar' samples plot far apart. provenance() implements\n",
                    "the following dissimilarity measures:\n",
                    "Compositional data:\n",
                    "(a) the Aitchison distance (all components are strictly positive)\n",
                    "(b) the Bray-Curtis distance (some zero components)\n",
                    "Distributional data:\n",
                    "(c) the Kolmogorov-Smirnov distance\n",
                    "(d) the Sircombe-Hazelton distance\n",
                    "All these dissimilarities can be fitted using a 'non-metric' MDS\n",
                    "algorithm, which aims to preserve the rank order of the relative distances.\n",
                    "For dissimilarities (a) and (c), it is also possible to perform\n",
                    "'classical MDS', which aims to represent the actual values of the dissimilarities\n",
                    "Finally, for the Aitchison distance (a) which turns out to be a Euclidean\n",
                    "distance, it can be shown that MDS is equivalent to Principal Component Analysis\n",
                    "(PCA), in which the configuration can be plotted as a 'compositional biplot',\n",
                    "providing insights into the genetic origin of the observed intersample variability\n",
                    "\nCiteable references:\n",
                    "Vermeesch, P., 2013. Multi-sample comparison of detrital age distributions.\n",
                    "Chemical Geology 341, 140-146.\n",
                    "Vermeesch, P., Resentini, A. and Garzanti, E., 2016. An R package for\n",
                    "statistical provenance analysis. Sedimentary Geology,\n",
                    "doi:10.1016/j.sedgeo.2016.01.009.\n")
        } else if (response == "12"){
            message("Procrustes analysis and 3-way MDS:\n",
                    "---------------------------------\n",
                    "Procrustes analysis and 3-way MDS are simple yet powerful tools\n",
                    "to extract geological insights from large, multi-method provenance datasets.\n",
                    "Procrustes analysis is the process by which a combination of\n",
                    "shape-preserving transformations is used to match the shape of\n",
                    "one object with that of another. Generalised Procrustes Analysis\n",
                    "(GPA) is a generalisation of this procedure to multiple objects.\n",
                    "In a provenance context, GPA extracts a single 'consensus' view from\n",
                    "a collection of MDS configurations, by rotating, reflecting and scaling\n",
                    "them to minimise a least squares criterion.\n",
                    "As the name suggests, 3-way MDS is a generalisation of the MDS method from\n",
                    "two-to three-dimensional dissimilarity matrices. provenance() implements\n",
                    "the INdividual Differences SCALing (INDCAL) algorithm, which is the oldest\n",
                    "and still most widely used 3-way MDS algorithm (de Leeuw and Mair, 2009).\n",
                    "In contrast with 2-way MDS and GPA, INDSCAL produces not one but two\n",
                    "pieces of graphical output: the 'group configuration' and the 'source weights'.\n",
                    "\nCiteable references:\n",
                    "de Leeuw, J., Mair, P., 2009. Multidimensional scaling using majorization:\n",
                    "The R package smacof. Journal of Statistical Software 31, 1-30.\n",
                    "URL: https://www.jstatsoft.org/v31/i03/\n",
                    "Vermeesch, P., Garzanti, E., 2015. Making geological sense of 'Big Data'\n",
                    "in sedimentary provenance analysis. Chemical Geology 409, 20-27.\n",
                    "Vermeesch, P., Resentini, A. and Garzanti, E., 2016. An R package for\n",
                    "statistical provenance analysis. Sedimentary Geology,\n",
                    "doi:10.1016/j.sedgeo.2016.01.009.\n"
                    )
        } else {
            break
        }
    }
}

gui.open.compositional <- function(counts=FALSE){
    if (counts) message('Open a point-counting dataset:')
    else message('Open a compositional dataset:')
    fname <- file.choose()
    if (counts) dat <- read.counts(fname,check.names=FALSE)
    else dat <- read.compositional(fname,check.names=FALSE)
    while (TRUE){
        message("1 - Apply SRD correction\n",
                "2 - Subset components\n",
                "3 - Amalgamate components\n",
                "4 - Subset samples\n",
                "c - Continue")
        response <- readline()
        if (response == "1"){
            dat <- gui.SRD(dat)
        } else if (response == "2"){
            dat <- gui.subset.components(dat)
        } else if (response == "3"){
            dat <- gui.amalgamate.components(dat)
        } else if (response == "4"){
            dat <- gui.subset.samples(dat)
        } else {
            break
        }
    }
    dat
}

gui.open.distributional <- function(){
    message('Open a distributional dataset:')
    fname <- file.choose()
    errorfile <- NA
    dat <- read.distributional(fname,errorfile,check.names=FALSE)
    while (TRUE){
        message("Options:\n",
                "1 - Subset samples\n",
                "2 - Combine samples\n",
                "3 - Load analytical uncertainties\n",
                "c - Continue")
        response <- readline()
        if (response == "1"){
            dat <- gui.subset.samples(dat)
        } else if (response == "2"){
            dat <- gui.combine.samples(dat)
        } else if (response == "3"){
            errorfile <- file.choose()
            dat <- read.distributional(fname,errorfile,check.names=FALSE)
        } else {
            return(dat)
        }
    }
}

gui.open.varietal <- function(){
    message('Open a varietal dataset:')
    fname <- file.choose()
    message("Options:\n",
            "1 - Manually enter the sample names\n",
            "2 - First n characters of the row names\n",
            "3 - Automatically guess the sample names")
    response <- readline()
    if (response == "1"){
        message("Enter the sample names as a comma-separated list.")
        response <- readline()
        nospace <- gsub(" ","",response) # get rid of spaces
        snames <- paste0("c('",gsub(",","','",nospace),"')")  # add apostrophes
        eval(parse(text=paste0("dat <- read.varietal(fname,snames=",snames,")")))
    } else if (response == '2'){
        message("How many characters long is the sample name prefix?")
        response <- readline()
        dat <- read.varietal(fname,snames=as.numeric(response))
    } else {
        dat <- read.varietal(fname)
    }
    while (TRUE){
        message("1 - Subset components\n",
                "2 - Amalgamate components\n",
                "3 - Subset samples\n",
                "c - Continue [default]")
        response <- readline()
        if (response == "1"){
            dat <- gui.subset.components(dat)
        } else if (response == "2"){
            dat <- gui.amalgamate.components(dat)
        } else if (response == "3"){
            dat <- gui.subset.samples(dat)
        } else {
            break
        }
    }
    return(dat)
}

gui.SRD <- function(dat){
    mydens <- gui.load.densities()
    response <- readline("Enter target density in g/cm3: ")
    restore(dat,mydens,target=as.numeric(response))
}

gui.subset.components <- function(dat){
    complist <- paste(colnames(dat$x),collapse=',')
    message("Select a subset group of components from ",
            "the following list:\n", complist ,"\n",
            "Enter as a comma separated list of labels:")
    response <- readline()
    subcomp <- gsub(" ","",response) # get rid of spaces
    subcomp <- paste0("c('",gsub(",","','",subcomp),"')")  # add apostrophes
    eval(parse(text=paste0("subset(dat,components=",subcomp,")")))
}

gui.combine.samples <- function(dat){
    samples <- names(dat$x)
    combinations <- ""
    while (TRUE){
        samplist <- paste(samples,collapse=',')
        message("Select a group of samples from ",
                "the following list:\n", samplist ,"\n",
                "Enter as a comma separated list of labels ",
                "or click [Return] to exit:")
        response <- readline()
        if (response == ""){
            break
        } else {
            subsamples <- strsplit(response,',')[[1]]
            subsamplist <- gsub(" ","",response) # get rid of spaces
            subsamplist <- paste0("c('",gsub(",","','",subsamplist),"')")  # add apostrophes
            sampname <- readline(paste0("Name of the combined samples? "))
            combinations <- paste0(combinations,sampname,"=",subsamplist,",")
            samples <- c(sampname,samples[which(!samples %in% subsamples)])
        }
    }
    leftovers <- samples[which(samples %in% names(dat$x))]
    for (leftover in leftovers){
        combinations <- paste0(combinations,leftover,"=c('",leftover,"'),")
    }
    # replace last comma with bracket
    substr(combinations,start=nchar(combinations),stop=nchar(combinations)) <- ")"
    eval(parse(text=paste0("combine(dat,",combinations)))
}

gui.amalgamate.components <- function(dat){
    components <- colnames(dat$x)
    command <- "amalgamate(dat,"
    while (TRUE){
        complist <- paste(components,collapse=',')
        message("Select a group of components from ",
                "the following list:\n", complist ,"\n",
                "Enter as a comma separated list of labels ",
                "or click [Return] to exit:")
        response <- readline()
        if (response == ""){
            break
        } else {
            subcomponents <- strsplit(response,',')[[1]]
            subcomplist <- gsub(" ","",response) # get rid of spaces
            subcomplist <- paste0("c('",gsub(",","','",subcomplist),"')")  # add apostrophes
            compname <- readline(paste0("Name of the amalgamated component? "))
            command <- paste0(command,compname,"=",subcomplist,",")
            components <- c(compname,components[which(!components %in% subcomponents)])
        }
    }
    leftovers <- components[which(components %in% colnames(dat$x))]
    for (leftover in leftovers){
        command <- paste0(command,leftover,"=c('",leftover,"'),")
    }
    substr(command,start=nchar(command),stop=nchar(command)) <- ")" # replace last comma with bracket
    eval(parse(text=command))
}

gui.subset.samples <- function(dat){
    samplist <- paste(names(dat),collapse=',')
    message("Select a subset group of samples from ",
            "the following list:\n", samplist ,"\n",
            "Enter as a comma separated list of labels:")
    response <- readline()
    subsamp <- gsub(" ","",response) # get rid of spaces
    subsamp <- paste0("c('",gsub(",","','",subsamp),"')")  # add apostrophes
    eval(parse(text=paste0("subset(dat,select=",subsamp,")")))
}

gui.get.kdes <- function(dat){
    from <- NA
    to <- NA
    bw <- NA
    adaptive <- TRUE
    logscale <- FALSE
    normalise <- FALSE
    samebandwidth <- FALSE
    message1 <- paste0("Options:\n",
                       "1 - Set minimum age\n",
                       "2 - Set maximum age\n",
                       "3 - Turn off adaptive density estimation\n",
                       "4 - Plot on a log scale\n",
                       "5 - Set bandwidth\n")
    message2 <- paste0("6 - Use the same bandwidth for all samples\n",
                       "7 - Normalise area under the KDEs\n")
    message3 <- "c - Continue"
    while (TRUE){
        if (length(names(dat))>1){
            message(message1,message2,message3)
        } else {
            message(message1,message3)
        }
        response <- readline()
        if (response == "1"){
            from <- as.numeric(readline("Enter a minimum age limit for the time axis: "))
        } else if (response == "2"){
            to <- as.numeric(readline("Enter a maximum age limit for the time axis: "))
        } else if (response == "3"){
            adaptive <- FALSE
        } else if (response == "4"){
            logscale <- TRUE
        } else if (response == "5"){
            bw <- as.numeric(readline("Enter a custom value for the bandwidth: "))
        } else if (response == "6"){
            samebandwidth <- TRUE
        } else if (response == "7"){
            normalise <- TRUE
        } else {
            break
        }
    }
    if (length(names(dat))>1){ # more than one sample?
        out <- KDEs(dat,from=from,to=to,bw=bw,adaptive=adaptive,
                    normalise=normalise,log=logscale,samebandwidth=samebandwidth)
    } else {
        out <- KDE(dat$x[[1]],from=from,to=to,bw=bw,adaptive=adaptive,log=logscale)
    }
    out
}

gui.load.densities <- function(){
    response <- readline("Open a density file [y] or use default values [N]? ")
    if (response %in% c('Y','y')){
        fname <- file.choose()
        out <- read.densities(fname,check.names=FALSE)
    } else {
        out <- provenance::densities
    }
    out
}

gui.get.datasets <- function(multiple=TRUE,kdes=FALSE,
                             includedistributional=TRUE,
                             includevarietal=includedistributional){
    datasets <- list()
    while (TRUE){
        if (multiple){
            tekst <- c("1 - Add a compositional dataset\n",
                       "2 - Add a point-counting dataset")
            if (includedistributional){
                tekst <- c(tekst,"\n","3 - Add a distributional dataset")
            }
            if (includevarietal){
                tekst <- c(tekst,"\n","4 - Add a varietal dataset")
            }
        } else {
            tekst <- c("1 - Load a compositional dataset\n",
                       "2 - Load a point-counting dataset")
            if (includedistributional){
                tekst <- c(tekst,"\n","3 - Load a distributional dataset")
            }
            if (includevarietal){
                tekst <- c(tekst,"\n","4 - Load a varietal dataset")
            }
        }
        tekst <- c(tekst,"\n","c - Continue")
        message(tekst)
        response <- readline()
        if (response == '1'){
            dat <- gui.open.compositional(counts=FALSE)
        } else if (response == '2'){
            dat <- gui.open.compositional(counts=TRUE)
        } else if (response == '3'){
            dat <- gui.open.distributional()
            if (kdes) dat <- gui.get.kdes(dat)
        } else if (response == '4'){
            dat <- gui.open.varietal()
        } else {
            break
        }
        if (multiple) datasets[[dat$name]] <- dat
        else return(dat)
    }
    datasets
}

gui.get.dataset <- function(includedistributional=TRUE){
    gui.get.datasets(multiple=FALSE,includedistributional=includedistributional)
}