R/make_table.R

Defines functions make.table

make.table <-
  function(
    d,
    formula,
    table.type = 'Bootstrap',
    names = NA,
    value = "value",
    se = "se",
    out.file = "out.tex",
    stars = c(1, 1, 1),
    round.digits = 2,
    h.0 = 0,
    align.cols = NA,
    align.nests = c(NA),
    hline = F,
    remove.duplicates = T,
    empty.fill = '$-$',
    digits = 2,
    nsmall = 2,
    signif.boolean = FALSE,
    sed.files = NA,
    left.column.label = '',
    include.col.names = TRUE,
    vertical.space = TRUE
  ){
    #d: melted data set with value and se rows for regression table or just value row for bootstrap, summary tables
    #table.type: has three values: Bootsrap, Regression and Summary
    #formula: specifices the nesting structure: row1+row2~col1+col2
    #names: is a text file for varaible names in the latex syntax with three columns: variable, old, latex seperated by tab. should be placed inside Datasource/tables
    #value: is the value row for all table types
    #se: is the standard errors for Regression table or it's generated by the bootsrap samples which should be indexed by samp
    #align.cols: character for the latex alignment of the columns, like 'ccc' or 'lgg' for 3 columns
    #align.nests: vector of characters for the latex alignment of the \multicolumn nests above the entries. If there is 3 column varaibles then we should specify like c('c','c','c')
    #hline: if hlines should be added to the table
    #empty.fill: The default value for missing table cells
    #nsmall: Minimum number of decimal places to print
    #digits: Number of significant digits in decimal
    #signif.boolean: Boolean for if we use sigfig function below, rather than conventional formatting
    #sed.files: sed files to run on output table, to change it further
    #left.column.label is a label for the leftmost column
    #include.col.names is a boolean for whether you want to include the column names
    library(stringr)
    col.names <- function(s, old.names, new.names){
      for(i in 1:length(old.names)) names(s)[names(s)==old.names[i]]<-new.names[i]
      s
    }

    if(signif.boolean) {
      round.fn <- function(vec, n=2){
        formatC(
          signif(vec, digits=n),
          digits=n,
          format="fg",
          flag="#",
          big.mark = ','
        )
      }
    } else {
      round.fn <- function(vec, n=2){
        vec %>%
          round(n) %>%
          format(
            digits = n,
            nsmall = n,
            drop0trailing = FALSE,
            scientific = FALSE,
            big.mark = ','
          )
      }
    }

    d <- as.data.frame(d)
    d <- col.names(d, c(value, se), c('value','se'))
    stars <- qnorm(stars)
    asterisk <- data.frame(p = c(0,1,2,3), asterisk = c('','^{*}','^{**}','^{***}'))
    row.vars <- all.vars(formula[[2]])
    nrows <- length(row.vars)
    rows <- paste(row.vars, "", collapse='+ ')
    col.vars <- all.vars(formula[[3]])
    ncols <- length(col.vars)
    pos <- list()
    command <- list()
    names.col <- data.frame()
    names.row <- data.frame()
    names.change <- !is.na(names)
    if(names.change){
      names <- read.delim(names, header=T, sep='\t', stringsAsFactors=FALSE)
      names.col <- filter(names, variable %in% col.vars)
      names.row <- filter(names, variable %in% row.vars)
    }
    if(is.na(align.nests[1])) align.nests <- rep('c', ncols)

    #Headers
    relation <- plyr::ddply(d, col.vars, function(s) data.frame(column = 1))
    header <- relation
    if(ncols > 1){
      for(i in ncols:2){
        var <- col.vars[i]
        for(j in (i-1):1){
          var2 <- col.vars[j]
          header[, var] <- paste(header[, var2], header[, var], sep=' ')
        }
      }
    }
    col.order <- header[, ncols]

    #Latex column names
    latex <- plyr::ldply(seq(ncols), function(i){
      var <- col.vars[i]
      x <- header
      x$.variable <- relation[, var]
      x <- plyr::ddply(x, c(var, '.variable'), function(s) data.frame(n = nrow(s)))
      x$latex <- as.character(x[, '.variable'])
      x$.var <- var
      col.names(x, var, '.value')
    })

    if(names.change & nrow(names.col) > 1){
      for(i in 1:nrow(latex)){
        var <- latex[i, '.var']
        .variable <- latex[i, '.variable']
        for(j in 1:nrow(names.col)){
          var2 <- names.col[j, 'variable']
          old <- names.col[j, 'old']
          .latex <- names.col[j,'latex']
          if(var == var2 & .variable == old) latex[i, 'latex'] <- .latex
        }
      }
    }

    #Fill pos and command
    for(i in 1:ncols){
      var <- col.vars[i]
      pos[i] <- -1
      levels <- length(unique(header[, var]))
      space <- paste(rep('& ', nrows), collapse=' ')
      command[i] <- ''
      if(hline & i == 1) command[i] <- paste(command[[i]],'\\hline', sep='')
      command[i] <- paste(command[[i]], '\\up', sep=' ')
      if(i == ncols) command[i] <- paste(command[[i]],'\\down', sep='')
      command[i] <- paste(command[[i]], space, sep=' ')
      for(j in 1:levels){
        var2 <- as.character(unique(header[, var])[j])
        .cols <- latex[latex$.value == var2 & latex$.var == var, 'n']
        .latex <- latex[latex$.value == var2 & latex$.var == var,'latex']
        sub <- paste('\\multicolumn{', .cols, sep='')
        sub <- paste(sub, align.nests[i], sep='}{')
        sub <- paste(sub, '}{', sep='')
        sub <- paste(sub, .latex, sep='')
        sub <- paste(sub, '}',sep='')
        if(j == levels) sub <- paste(sub, '\\\\', sep=' ')
        if(j == 1) command[i] <- paste(command[[i]], sub, sep='')
        if(j != 1) command[i] <- paste(command[[i]], sub, sep=' & ')
      }
    }
    command[[1]] = paste0(left.column.label, command[[1]])

    #Bootstrap tables
    if(table.type=='Bootstrap'){
      s <- plyr::ddply(d, names(d)[-which(names(d)%in%c('samp', 'value'))], function(s) data.frame(se=sd(s$value)))
      d <- d %>% filter(samp == 0) %>% dplyr::select(-samp)
      d <- merge(d, s, all.x = T)
    }

    #Paste lower nest names
    if(ncols > 1){
      for(i in (ncols-1):1){
        d[, col.vars[ncols]] <- paste(d[, col.vars[i]], d[, col.vars[ncols]], sep=' ')
      }
    }

    #Add asterisk and se rows
    if(table.type == 'Summary' & class(d$value) != 'character'){
      d <-
        mutate(
          d,
          value = round.fn(vec = value, n = digits)
        )
    } else if(table.type != 'Summary') {

      d <-
        mutate(
          d,
          t = ifelse(se == 0, 0, abs(value - h.0) / se),
          p = (t > stars[1]) + (t > stars[2]) + (t > stars[3]),
          se = round.fn(vec = se, n = digits),
          value = round.fn(vec = value, n = digits)
        )
      d <- merge(d, asterisk)
      d <- mutate(d, value=paste(value, asterisk, sep=''))
      d <- d %>% dplyr::select(-c(t, p, asterisk)) %>% mutate(row.type = 'est')
      s <- mutate(d, row.type = 'se', value=paste0('(', str_trim(se), ')'))
      d <- dplyr::select(rbind(d, s) ,-se)
      rows <- paste(rows, "row.type", sep = "+ ")
    }

    d <- dcast(d, as.formula(paste(rows, "~", col.vars[ncols])), value.var='value', fill = empty.fill)
    if(table.type != 'Summary'){
      d <- d[, c(row.vars, as.character(col.order), 'row.type')]
    } else {
      d <- d[, c(row.vars, as.character(col.order))]
    }

    #Rows
    for(i in 1:nrows){
      var <- row.vars[i]
      d[, var] <- as.character(d[, var])
    }
    #Latex
    if(names.change & nrow(names.row) > 1){
      for(i in 1:nrow(names.row)){
        var <- names.row[i, 'variable']
        old <- names.row[i, 'old']
        .latex <- names.row[i,'latex']
        for(j in 1:nrows){
          var2 <- row.vars[j]
          for(k in 1:nrow(d)){
            .value <- d[k, var2]
            if(var2 == var & .value == old) d[k, var2] <- .latex
          }
        }
      }
    }

    #Add space after standard error rows
    if(table.type != 'Summary'){
      d <- mutate(d, standard.err.space = ifelse(vertical.space & (row.type == 'se'), '\\\\[-12 pt]', ''))
      d[, ncol(d)-2] <- paste0(d[, ncol(d)-2], d$standard.err.space)
      d <- dplyr::select(d, -standard.err.space)
    }

    #Duplicates
    if(remove.duplicates){
      if(table.type != 'Summary') d<-dplyr::select(d, -row.type)
      for(i in 1:nrows){
        var <- row.vars[i]
        for(j in nrow(d):2){
          if(d[j, var] == d[j-1, var]) d[j, var] <- ''
        }
      }
    } else if(table.type != 'Summary'){
      for(i in 1:nrows){
        var <- row.vars[i]
        for(j in nrow(d):2){
          if(d[j, 'row.type'] == 'se') d[j, var] <- ''
        }
      }
      d<-dplyr::select(d, -row.type)
    }


    #Spacing
    d[1,1] <- paste('\\up', d[1,1], sep=' ')
    d[nrow(d),1] <- paste(' \\down', d[nrow(d),1], sep=' ')

    #Align
    if(!is.na(align.cols)) align.cols = paste('l', align.cols, sep='')
    if(is.na(align.cols)) align.cols = paste('l', paste(rep('l', nrows), collapse=''), paste(rep('g', dim(d)[2]-nrows), collapse=''), sep='')

    #Hline
    if(hline) hline.value <- c(0, nrow(d))
    if(!hline) hline.value <- NULL


    #Print
    library('xtable')
    if(include.col.names){
      print(xtable(d, align=align.cols), include.rownames=FALSE, include.colnames=FALSE, floating=FALSE, sanitize.text=identity, add.to.row=list(pos=pos, command=unlist(command)), hline.after=hline.value)
      print(xtable(d, align=align.cols), file=out.file, include.rownames=FALSE, include.colnames=FALSE, floating=FALSE, sanitize.text=identity, add.to.row=list(pos=pos, command=unlist(command)), hline.after=hline.value)
    } else {
      print(xtable(d, align=align.cols), include.rownames=FALSE, include.colnames=FALSE, floating=FALSE, sanitize.text=identity, hline.after=hline.value)
      print(xtable(d, align=align.cols), file=out.file, include.rownames=FALSE, include.colnames=FALSE, floating=FALSE, sanitize.text=identity, hline.after=hline.value, only.contents = TRUE)
    }

    #Run sed files
    if(!is.na(sed.files)){
      plyr::l_ply(sed.files, function(i){
        system(paste("sed -i '' -f", i, out.file))
      })
    }
  }
robertlbray/brayr documentation built on June 25, 2020, 1:06 a.m.