R/humdrumR-class.R

Defines functions getColumn

# Humdrum table  ----

#' Humdrum tables (and their "fields")
#' 
#' In the [humdrumR] package, the fundamental data structure is called a **humdrum table**.
#' A humdrum table encodes all the information in a collection of one or more humdrum-syntax files
#' as a single [data.table][data.table::data.table] 
#' (A `data.table` is an "enhanced" version of R's standard [data.frame]).
#' Humdrum tables are stored "inside" every [humdrumRclass] object that you will work with, and various `humdrumR`
#' functions allow you to study or manipulate the them.
#' If you want to directly access the humdrum table within a [humdrumRclass] object, use the [getHumtab()] function.
#' 
#' 
#' In a humdrum table, by default, humdrum data is organized in a maximally "long" (or "tall")
#' format, with each and every single "token" in the original data represented by a single row in the table.
#' Even multiple-stops---tokens separated by spaces---are broken onto
#' their own rows. Meanwhile, each column in the humdrum table represents a single
#' piece of information associated with each token, which we call a **field**.
#' Throughout this documentation, you should keep in mind that a "token" refers
#' to a *row* in the humdrum table while a "field" refers to a *column*:
#' 
#' * Token = row
#' * Field = column
#' 
#' # Fields:
#' 
#' There are six types of fields in a humdrum table: 
#' 
#' 1. Data fields
#' 2. Structure fields
#' 3. Interpretation fields
#' 4. Formal fields
#' 5. Reference fields
#' 6. Grouping fields
#' 
#' When first created by a call to [readHumdrum()], every
#' humdrum table has at least nineteen fields: one data field (`Token`), two interpretation 
#' fields (`Tandem` and `Exclusive`), three formal fields, and thirteen structure fields. Additional
#' formal, interpretation, or reference fields
#' *may* be present depending on the content of the humdrum file(s), and you can create additional data fields
#' by using [within.humdrumR()][withinHumdrum], [mutate.humdrumR()], or other functions.
#' 
#' ### Data fields:
#' 
#' *Data* fields are used to describe individual data points
#' in humdrum data (as opposed to groups of points). 
#' Every humdrum table starts with a data
#' field called **Token**, which
#' contains `character` strings representing the original strings read from the humdrum files. 
#' Users can create as many additional data fields as they like. Every call to
#' [withinHumdrum()] generates new data fields. 
#' 
#' 
#' 
#' ### Structure fields:
#' 
#' Every humdrum table has thirteen *Structure* fields,
#' which describe where each data token was "located" in the original humdrum data: 
#' which file, which spine, which record, etc.
#' See the vignette on humdrum syntax to fully understand the terms here.
#' 
#' + *File info*:
#'     + `Filename` :: `character`
#'         + The unique name of the humdrum file. This may include an appended path 
#'           if more than one file with the same name were read from different directories 
#'           (see the [readHumdrum()] docs).
#'     + `Filepath` :: `character`
#'         + The full file name (always includes its full path).
#'     + `Label` :: `character`
#'         + A label specified during the call to [readHumdrum()], associated with a particular
#'          `readHumdrum` "REpath-pattern." If no label was specified, patterns are just labeled `"_n"`, where "`n`" is the 
#'          number of the pattern. 
#'     + `File` :: `integer`
#'         + A unique number associated with each file (ordered alphabetically, starting from `1`).
#'     + `Piece` :: `integer`
#'         + A number specifying the number of the *piece* in the corpus. 
#'           This is identical to the `File` field except when
#'           more than one piece were read from the same file.
#' + *Location info*:
#'     + `Spine` :: `integer`
#'         + The spine, numbered (from left-to-right) starting from `1`. 
#'         + This field is `NA` wherever `Global == TRUE`.
#'     + `Path` :: `integer`
#'         + The "spine path." Any time a `*^` spine path split occurs in
#'           the humdrum data, the right side of the split becomes a new "path." The original path
#'           is numbered `0` with additional paths numbered with integers to the right.
#'           (If there are no spine path splits, the `Path` field is all `0`s.)
#'         + This field is always `NA` when `Global == TRUE`. 
#'     + `ParentPath` :: `integer`
#'         + For spine paths (i.e., where `Path > 0`), which path was the parent from
#'           which this path split? Where `Path == 0`, parent path is also `0`.
#'     + `Record` :: `integer`
#'         + The record (i.e., line) number in the original file.
#'     + `DataRecord` :: `integer`
#'         + The *data* record enumeration in the file, starting from `1`.
#'     + `Stop` :: `integer`
#'         + Which token in a multi-stop token, numbered starting from `1`.
#'         + In files with no multi-stops, the `Stop` field is all `1`s.
#'         + This field is always `NA` when `Global == TRUE`.
#'     + `Global` :: `logical`
#'         + Did the token come from a global record (as opposed to a local record)?
#'         + When `Global == TRUE`, the `Spine`, `Path`, and `Stop` fields are always `NA`.
#' + *Token info*:
#'     + `Type` :: `character`
#'         + What type of record is it? 
#'             + `"G"` = global comment. 
#'             + `"L"` = local comment
#'             + `"I"` = interpretation
#'             + `"M"` = measure/barline 
#'             + `"D"` = non-null data 
#'             + `"d"` = null data
#'             + `"E"` = exclusive interpretation
#'             + `"S"` = spine-control tokens (`*^`, `*v`, `*-`)
#'         
#' 
#' 
#' ### Interpretation fields:
#' 
#' *Interpretation* fields describe interpretation metadata in the humdrum file(s).
#' Humdrum interpretations are tokens that "carry forward" to data points after them, unless cancelled out by a
#' subsequent interpretation. (See the humdrum syntax vignette for a detailed explanation.)
#' *All* humdrum data must have an *exclusive* interpretation
#' so humdrum tables always have an `Exclusive` (:: `character`) field indicating the
#' exclusive interpretation associated with each token/row of the `Token` field.
#' 
#' Humdrum data may, or may not, include additional *tandem* interpretations. A universal rule for parsing
#' tandem interpretations is impossible, because A) tandem interpretations can "overwrite" each other and B)
#' users can create their own tandem interpretations. The best we can do in all cases is 
#' identify *all* tandem interpretations that have appeared previously in the spine
#' (counting most recent first). All these previous interpretations are encoded in a single
#' character string in the `Tandem` field (see the [tandem()] docs for details). 
#' If working with non-standard interpretations, users can parse the `Tandem` field using the
#' [tandem()] function. 
#' If no tandem interpretations occur in a file, the `Tandem` field is full of empty strings (`""`).
#' 
#' Fortunately, many tandem interpretations are widely used and standardized, and these 
#' interpretations are known by `humdrumR`. Recognized interpretations (such as `*clefG4` and `*k[b-]`)
#' are automatically parsed into their own fields by a call to [readHumdrum()].
#' See the [readHumdrum()] documentation for more details.
#' 
#' 
#' ### Formal fields:
#' 
#' *Formal* fields indicate musical sections, or time windows within
#' a piece, including formal designations ("verse", "chorus", etc.) and measures/bars.
#' Humdrum data may or may not include formal metadata fields, indicated by the token `"*>"`.
#' Classified formal marks are put into fields matching their name.
#' Unclassified formal marks are placed in a field called `Formal` as a default.
#' Nested formal categories are appended with an underscore and a number for each level of descent:
#' `Formal_1, Formal_2, ..., Formal_N`.
#' If part of a section is not given a name in a lower hierarchical level, the field is simply
#' empty (`""`) at that point.
#' 
#' Humdrum data may, or may not, also include barlines (tokens beginning `"="`).
#' However, humdrum tables *always* include three formal fields related to barlines:
#' 
#' + `Bar` :: `integer` 
#'     + How many barline records (single or double) have passed before this token?
#'     + If no `"="` tokens occur in a file, `Bar` is all zeros.
#'     + Note that this field is independent of whether the barlines are labeled with numbers in the humdrum file!
#' + `DoubleBar` :: `integer`
#'     + How many *double*-barline records have passed before this token?
#'     + If no `"=="` tokens occur in a file, `DoubleBar` is all zeros.
#' + `BarLabel` :: `character`
#'     + Any characters that occur in a barline-token *after* an initial `"="` or `"=="`.
#'       These include the `"-"` in the common "implied barline" token `"=-"`,
#'       repeat tokens (like `"=:||"`), and also any *explicit* bar numbers.
#'     + Note that the `Bar` field always enumerate *every* bar record, while
#'       measure-number labels in humdrum data (which appear in the `BarLabel` field) may
#'       do weird things like skipping numbers, repeating numbers, or having suffixes (e.g., `"19a"`).
#'       If no barline tokens appear in the file, `BarLabel` is all empty strings (`""`).
#' 
#' If no barline tokens are present in a file, `Bar` and `DoubleBar` will be nothing but `0`s,
#' and `BarLabel` will be all `NA`.
#' 
#' ### Reference fields:
#' 
#' *Reference* fields describe any **Reference Records**
#' in the humdrum data. Every reference record (records beginning `"!!!"`) in any
#' humdrum file in a corpus read by [readHumdrum] is parsed into a field named
#' by the reference code: `"XXX"` in `"!!!XXX"`.
#' Reference tokens are all identical throughout
#' any humdrum piece. If a reference code appears in one file but not another, the field is
#' `NA` in the file which does not have the code. If no reference records appear in any
#' files read by [readHumdrum()], no reference fields are created.
#' 
#' Examples of common reference records are `"!!!COM:"` (composer) and `"!!!OTL:"` (original title).
#' Any humdrum data with these records will end up having `COM` and `OTL` fields in its humdrum table.
#' 
#' ### Grouping fields:
#' 
#' Grouping fields are special fields which may be created by calls to [group_by()][groupHumdrum].
#' These fields are deleted by calls to [ungroup()][groupHumdrum].
#' These fields are generally hidden/inaccessible to users.
#' 
#' 
#' @section Null data:
#' 
#' In humdrum syntax, there is no requirement that every spine-path contains data
#' in every record. Rather, spines are often padded with *null tokens*.
#' In some cases, entire records may be padded with null tokens.
#' Each type of humdrum record uses a different null token:
#' 
#' + *Intepretation*: `*`
#' + *Comment*: `!`
#' + *Barline*: `=`
#' + *Data*: `.`
#' 
#' Many `humdrumR` functions automatically ignore null data, unless you specifically tell them not to 
#' (usually, using `dataTypes` argument).
#' Whenever different [fields()] are created or [selected][selectedFields], `humdrumR` reevaluates
#' what data locations it considers null.
#' Note that `humdrumR` considers data locations to be "null" when
#' 
#' + the selected fields are all `character` data *and* the token is a one of `c(".", "!", "!!", "=", "*", "**")`; **or**
#' + the selected fields are all `NA` (including `NA_character_`).
#' 
#' When `humdrumR` reevaluates null data, the `Type` field is updated, setting data records to `Type == "d"`
#' for null data and `Type == "D"` for non-null data. 
#' This is the main mechanism `humdrumR` functions use to ignore null data: most functions
#' only look at data where `Type == "D"`.
#' 
#' Whenever you print or [export][writeHumdrum()] a [humdrumR object[humdrumRclass], null data in the selected fields
#' prints as `"."`---thus `NA` values print as `.`.
#' Thus, if you are working with numeric data with `NA` values, these `NA` values will print as `"."`.
#' 
#' 
#' @section Reshaping:
#' 
#' Breaking the complex syntax of humdrum data into the "flat" structure of a humdrum table, with every single token on one line
#' of a `data.table`, makes humdrum data easier to analyze.
#' Of course, thanks to the structure fields, we can easily
#' regroup and reform the original humdrum data or use the structure of the data (like spines) in our analyses.
#' However, in some cases, you might want to work with humdrum data in a different structure or "shape."
#' `humdrumR` has several options for ["collapsing"][collapseHumdrum()] tokens within humdrum tables, 
#' ["cleaving"][cleave()] different parts of the data into new fields,
#' or otherwise [reshaping humdrum data][humCoercion] into basic R data structures you might prefer.
#' 
#' @examples
#' 
#' humData <- readHumdrum(humdrumRroot, "HumdrumData/BachChorales/chor00[1-4].krn")
#' 
#' fields(humData)
#' 
#' getHumtab(humData)
#' getHumtab(humData, dataTypes = 'D')
#' 
#' @family {Core humdrum data representation}
#' @name humTable
NULL




getColumn <- function(humtab, pad = 'corpus') {
    switch(pad,
           corpus = humtab[  , frank(list(Spine, Path), ties.method = 'dense', na.last = 'keep')],
           piece  = humtab[  , frank(x = list(Spine, Path), ties.method = 'dense', na.last = 'keep'), by = Piece]$V1,
           dont   = {
               columns <- humtab[ , list(Column = cumsum(Stop == 1L), `_rowKey_` = `_rowKey_`), by = .(Piece, Record)]
               setorder(columns, `_rowKey_`)
               columns$Column
           })
    
}


#####Humtable methods


orderHumtab <- function(humtab) {
    # if (nrow(humtab) == 0L) return(humtab)
    # orderingcols <- c('File', 'Piece', 'Spine', 'Path', 'Record', 'Stop') 
    
    # can't sort by lists
    
    # setkey(humtab, File, Piece, Spine, Path, Record, Stop)
    setorder(humtab, File, Piece, Spine, Path, Record, Stop)
    
}

reKey <- function(humdrumR) {
    
    humtab <- humdrumR@Humtable
    humtab <- orderHumtab(humtab)
    humtab[ , `_rowKey_` := seq_len(nrow(humtab))]
    
    humdrumR
    
}

#######################################################-
# humdrumR S4 class ----
######################################################-

#' `humdrumR` class
#' 
#' This `S4` class is the basic unit of the 
#' [humdrumR] package.
#' Each `humdrumR object` represents data [read][readHumdrum()] from one or 
#' more humdrum files.
#' In the documentation, we refer to these objects interchangeably as 
#' "`humdrumR` corpora", "`humdrumR` objects," or `humdrumR` data(sets).
#' In coding examples we name them "`humData`."
#' Test is an object/variable is a `humdrumR` dataset using `is.humdrumR()`.
#' 
#' The most important part of a `humdrumR` object is the 
#' [humdrum table][humTable] it holds within it;
#' In essence, a `humdrumR` object is simply a wrapper around its
#' humdrum table, which helps users to
#' to visualize, [filter][subset.humdrumR()], [summarize][humSummary], and [manipulate][withinHumdrum]
#' the table in a variety of ways.
#' 
#' Basic information about the size and shape of `humdrumR` objects can be
#' obtained with calls to [nrecord, npiece, length, ncol, etc.][humSize].
#' More detailed summary information can be obtained with the humdrumR [corpus summary functions][humSummary].
#' `humdrumR` data can also be coerced to more basic R data types using [as.matrix, as.data.frame, etc.][humCoercion].
#' A number of helpful functions are also defined to "reshape" or reorganize the
#'  data (e.g., [cleave()], [rend()], [collapseHumdrum()]).
#' 
#' The most powerful features of [humdrumR] are the tools it gives you to...
#' 
#' + Print a readable view of the data in shorthand/curtailed humdrum syntax.
#' + Filter `humdrumR` data, using [subset.humdrumR()] and the standard `R` [indexing operators][base::Extract]: `[]` and `[[]]`.
#' + Apply arbitrary commands to [humtable][humTable] fields using the [with(in)Humdrum][withinHumdrum] routines,
#' and related tidyverse commands (`mutate()`, `summarize()`, etc.).
#' 
#' @section Viewing your Data: 
#' 
#' If you type the name of an object on the R command line, R will "print" the object in the console.
#' This can be also be done explicitly using the `humdrumR` method of the [print()] function.
#' The humdrumR print method contains a number of arguments, which can be manipulated directly in calls to `print()`.
#' However, the `humdrumR` print argument draws its defaults from global `humdrumR` options
#' which can be also controlled with the [humdrumR()] function.
#' Generaly, changing print options with [humdrumR()] is the best option, as once you change them,
#' all automatic printing will follow your new settings---this means you can avoid explicitly calling [print()].
#'
#' When printing, only the [selected fields][selectedFields] in the data are shown.
#' 
#' #### View types
#' 
#' There are three options for how to view `humdrumR` data, which can be toggled between 
#' using the `view` argument to [print()] or [humdrumR()].
#' Since `view` is the first argument to the [humdrumR()] function, you can switch between views by simply calling
#' `humdrumR('humdrum')` or `humdrumR('score')` or `humdrumR('table')`.
#' The options are:
#' 
#' + `"humdrum"` (the default): prints a humdrum-syntax score representation, with record numbers enumerated at the left side.
#' 
#'   When the `Token` field is selected, and you haven't applied any [filters][subset.humdrumR()],
#'   this view will show your original data as it was in the files you [read][readHumdrum()].
#'   
#'   If more than one field is selected, they are pasted together in the printed output.
#'   
#' + `"table"`: prints a view of the underlying [humdrum table][humTable].
#' 
#'   In addition to the [selected fields][selectedFields], the `Piece`, `Spine`, and `Record` fields will
#'   always print in the output table, as well as `Path` and `Stop` if any paths/stops are present.
#'   
#' + `"score"`: will (attempt to) show a rendered score of (the first piece) in your data.
#' 
#'   This view is only likely to work correctly if you are using Rstudio and connected to the internet.
#'   Score rendering is accomplished using Craig Sapp's [Humdrum Notation Plugin](https://plugin.humdrum.org/).
#'    
#' For `table` and `humdrum` views, if there are more than one pieces in the object, 
#' the beginning of the first piece is printed, followed by the end of the last piece;
#' How *much* of the first/last piece are printed is controlled by the `maxRecordsPerFile` print argument.
#' Both of these views also highlight the output with different colors representing different types of data tokens:
#' this can be disabled using `syntaxHighlight = FALSE`.
#' For `score` view, only the first piece is shown.
#'
#' 
#' 
#' @slot Humtable A [humdrum tables][humTable]---i.e, a [data.table::data.table()] with particular fields.
#' @slot Files A list of two elements. The first, "`Search`", contains a single character representing
#' the `pattern` used in the call to [readHumdrum()] which created this humdrumR object.
#' The second, "`Names`," is a vector of strings representing all the files which matched the `pattern`
#' and were read into the `humdrumR` object, with [names()] corresponding to their "subcorpora" labels (`Label`).
#' @slot Fields A [data.table] indicating the existing fields in the `humdrumR` object's
#' [humdrum table][humTable].
#' The fields are divided into five categories: "Data", "Structure", "Interpretation", "Formal", and "Reference."
#' @slot LoadTime A [POSIXct][base::DateTimeClasses] value, indicating the time at which [readHumdrum()] was
#' called to create this `humdrumR` object.
#' @slot Context A [data.table] with two columns: `Open` and `Close`.
#' Each row represents a contextual window to apply to the data.
#'
#' @param humdrumR ***HumdrumR data.***
#' 
#' Must be a [humdrumR data object][humdrumRclass].
#'
#' @examples 
#' 
#' humData <- readHumdrum(humdrumRroot, "HumdrumData/BachChorales/chor00[1-4].krn")
#' 
#' humData 
#' 
#' humData |> print(view = 'table')
#' 
#' @inheritParams humdrumR
#' @name humdrumRclass
#' @seealso {The actual data is stored in the internal [humdrum table][humTable].
#' You can set printing options globally using the [humdrumR()] function.}
#' @family Core humdrum data representation
#' @aliases humdrumRS4 humData
#' @export
setClass('humdrumR', 
         slots = c(Humtable = 'data.table',
                   Files = 'list',
                   Fields = 'data.frame',
                   LoadTime = 'POSIXct',
                   Context = 'data.table'
                   )) -> makeHumdrumR

setMethod('initialize', 'humdrumR',
          function(.Object, humtab, pattern, tandemFields) {
            # humtab = a humdrum table
            # pattern = the original file search pattern (string)
            # tandem col a logical vector indicating which columns are tandem fields

            fieldTable <- initFields(humtab, tandemFields)
            setcolorder(humtab, fieldTable$Name)
            humtab <- orderHumtab(humtab)
            humtab[ , `_rowKey_` := seq_len(nrow(humtab))]
            
            .Object@Humtable  <- humtab    
            .Object@Fields    <- fieldTable
            .Object@Files     <- list(Search = pattern, Names = unique(humtab$Filepath))
            .Object@LoadTime  <- Sys.time()
            .Object@Context   <- data.table(Open = integer(0), Close = integer(0))
            .Object
          })


structureTab <- function(..., groupby = list()) {
    
    fields <- as.data.frame(list(...))
    
    groupby <- checkWindows(fields[[1]], groupby)
    
    fields$Piece <- fields$Spine <- fields$Stop <- fields$File <- 1L
    fields$Record <- seq_len(nrow(fields))
    fields$Path <- fields$ParentPath <- 0L
    fields[names(groupby)] <- groupby
    
    as.data.table(fields)
}


# humdrumR core methods ####



## As/Is ####

#' @rdname humdrumRclass
#' @export
is.humdrumR <- function(x){
    inherits(x, 'humdrumR')  
} 


#' humdrumR coercion
#' 
#' Many users may wish to work with humdrum data,
#' without having to rely on `humdrumR`'s [with(in).humdrumR][withinHumdrum] functionality.
#' Rather, you'd like to just get "normal" `R` objects out of your humdrum data.
#' `humdrumR` defines a number of functions/methods for "coercing" [humdrum data][humdrumRclass] into
#' basic `R` data types.
#' 
#' @details
#' 
#' Generally, coercion works by evaluating a humdrumR object's the 
#' [selected fields][selectedFields] and forcing the result to be an atomic vector.
#' When multiple field are selected, they are pasted together, separated by `", "`.
#' If a field is not atomic (like a `list`, or `lm` object), a concise representation of the
#' list or object class is printed.
#' The [as.vector(humdrumR)][humCoercion] has the additional
#' option of coercing the resulting vector to a particular type using the `mode` argument.
#' 
#' The [as.matrix(humdrumR)][base::as.matrix()] method take things a step further by putting the evaluated
#' fields into a two-dimensional matrix, with rows representing records and columns indicating 
#' spine paths (see Padding section below).
#' [as.data.frame(humdrumR)][base::as.data.frame()] first calls `as.matrix` then converts the matrix to a
#' `data.frame`.
#' Note that `as.matrix(humdrumR)` places the *entire* corpus object into one matrix, even if there are multiple pieces.
#' In contrast, the plural `as.matrices` and `as.data.frames` call their respective singular versions 
#' separately on each individual file in a [humdrumR corpus][humdrumRclass] and return them all in a list.
#' The [row names][base::rownames()]  of the `matrix`/`data.frame`(s) consist of two integer values, 
#' separated by a `.`, representing: `Piece.Record`.
#'
#' The `as.lines` function converts a [humdrumR object][humdrumRclass] into a `character` vector of text lines,
#' with columns separated by the `sep` argument (defaults to `"\t"`), just as you'd see in a humdrum-syntax file.
#' Each line is a single row from a `as.matrix.humdrumR`, with padded values at the right side removed.
#' The matrix's `Piece.Record` [row names][base::rownames()] are preserved as the lines' [names][base::names()].
#'  
#' Note that multiple-stop token (where `Stop > 1L`) cannot by incorporated into the two 
#' dimensional `matrix`/`data.frame`. Thus, `as.matrix(humdrumR)` calls 
#' [collapseStops(collapseAtomic = TRUE, sep = " ")]
#' on the [humdrumR object][humdrumRclass] before creating a matrix.
#' 
#' @section Padding:
#' 
#' Different pieces in a single [humdrumR object][humdrumRclass]
#' often differ in the number of spines and/or spine paths they contain.
#' To squish them into a two dimensional object (`matrix` or `data.frame`) they must necessarily be padded to the same number of columns.
#' (Global comments---which actually have `NA` spines---are also padded, placing the record in column 1.)
#' The `pad` argument is a single atomic value which is used to pad the matrix.
#' 
#' Another consideration is the behavior of spine paths.
#' In the humdrum syntax, a spine path in a leftward spine "bumps" data in higher spines into new columns, as in this example:
#' 
#' ```
#' **kern  **kern
#' A       E
#' *^      *
#' A       C       E
#' G       B       D
#' *v      *v      *
#' A       C        
#' *-      *-
#' ```
#' 
#' At the beginning and end of the file, the second column holds data for the second spine.
#' However, in the middle of the file, the second column holds data from the second spine path of the first spine.
#' To make the spine structure clearer, `as.matrix(humdrumR)` has the option to pad spine paths.
#' For example, using `"_"` as our `pad` argument:
#' 
#' ```
#' **kern   _        **kern
#' A        _        E
#' *^       _        *
#' A        C        E
#' G        B        D
#' *v       *v       *
#' A        _        C        
#' *-       _        *-
#' ```
#' 
#' This aspect of the matrix padding behavior can be controlled with the `padPaths` argument, with three possible values/behaviors:
#' 
#' + `"corpus"`: Paths are padded such that spine-paths across all pieces in the corpus all align in the same columns.
#'   If even one file has a spine path, all the other files are padded so their spines stay aligned.
#'   This is the default behavior for `as.matrix(humdrumR)`.
#' + `"piece"`: Paths are padded, but only *within* each piece. The spines/paths between different pieces may not align.
#' + `"dont"`: Paths are not padded at all. 
#' 
#' 
#' @param humdrumR ***HumdrumR data.***
#' 
#' Must be a [humdrumR data object][humdrumRclass].
#' 
#' @param dataTypes ***Which types of humdrum record(s) to include.***
#' 
#' Defaults to `"GLIMDd"` for `as.lines()` and `as.matrix()`; `"Dd"` for `as.data.frame()`;
#' `"LIMDd"` for `as.matrices()` and `as.data.frames()`.
#' 
#' Must be a single `character` string. Legal values are `'G', 'L', 'I', 'M', 'D', 'd'` 
#' or any combination of these (e.g., `"LIM"`).
#' (See the [humdrum table][humTable] documentation for explanation.)
#' 
#' @param padPaths ***Determines how spine-paths are aligned in the output.***
#' 
#' Defaults to `"dont"` for `as.lines()`; `"corpus"` for `as.matrix()` and `as.data.frame()`;
#' `"piece"` for `as.matrices()` and `as.data.frames()`
#' 
#' Must be a single `character` string, `"corpus"`, `"piece"`, or `"dont"`.
#'
#' See the details for an explanation.
#'   
#' @param padder ***Used to fill in differences in the number of columns between files and/or spine paths.***
#' 
#' Defaults to `NA`.
#' 
#' Must be a single `atomic` value.
#' 
#' @param sep ***Separator to place between columns in collapsed lines.***
#' 
#' Defaults to `"\t"` (tab).
#' 
#' Must be a single `character` string.
#' 
#' @param mode ***The desired output class.***
#'   
#' Defaults to `"any"`.
#' 
#' Must be a single `character` string naming an [atomic vector type][base::vector] to coerce the output to (i.e., `logical` or `numeric`).
#'   
#' If set to `"any"`, the output type is simply whatever the type of the [selected field][selectedFields] is.
#' 
#' 
#' 
#' 
#' @name humCoercion
#' @export
setMethod('as.vector', 
          signature = c(x = 'humdrumR'),
          function(x, mode = 'any') {
	  checks(mode, xcharacter & xlen1 & xlegal(c('any', 'logical', 'numeric', 'integer', 'character')))

                    if (is.empty(x)) return(vector(mode, 0L))
                    
                    vec <- pullSelectedField(x, 'D')
                    if (mode != 'any') vec <- as(vec, mode)
                    vec
                    
                    })

#' @name humCoercion
#' @export
as.lines <- function(humdrumR, dataTypes = 'GLIMDd', padPaths = 'dont', padder = '', sep = '\t') {
    
          # dataTypes <- checkTypes(dataTypes, 'as.lines')
          checks(dataTypes, xrecordtypes)
          checks(humdrumR, xhumdrumR)
          checks(padPaths, xcharacter & xlen1 & xlegal(c('corpus', 'piece', 'dont', "don't")))
          checks(padder, xatomic & xlen1)
          checks(sep, xatomic & xlen1)
          
          mat <- as.matrix(humdrumR, dataTypes, padPaths = padPaths, padder = padder)
          lines <- applyrows(mat, paste, collapse = sep)
          
          
          lines <- stringr::str_replace_all(lines, paste0('(', sep, padder, ')+$'), '')
          # lines <- stringr::str_remove(lines, '\t+$')
          # 
          names(lines) <- rownames(mat)
          
          lines
                           
          
}

### As single matrix(like) ----

#' @name humCoercion
#' @aliases as.matrix
#' @export
as.matrix.humdrumR <- function(x, dataTypes = 'GLIMDd', padPaths = 'corpus', padder = NA) { 
    
    checks(x, xhumdrumR)
    dataTypes <- checkTypes(dataTypes, 'as.matrix.humdrumR')
    checks(padPaths, xcharacter & xlegal(c('corpus', 'piece', 'dont', "don't")))
    checks(padder, xatomic & xlen1)
    x <- collapseStops(x)
    humtab <- getHumtab(x, dataTypes)
    
    
    i <- data.table::frank(humtab[ , list(Piece, Record)], ties.method = 'dense')
    j <- getColumn(humtab, padPaths)
    j[is.na(j)] <- 1L
    
    field <- pullPrintable(x, selectedFields(x), dataTypes = dataTypes)[[1]]
    if (is.factor(field)) field <- as.character(field) # R does't allow factors in matrices
    # padder <- as(padder, class(field))
    
    output <- matrix(padder, nrow = max(i), ncol = max(j))
    
    output[cbind(i, j)] <- field
    rownames(output) <- unique(humtab[,c('Piece', 'Record')])[order(Piece, Record), paste0(Piece, '.', Record)]
    
    output
    
    
}             








#' @name humCoercion
#' @export
setMethod('as.data.frame', 
          signature = c(x = 'humdrumR'),
          function(x, dataTypes = 'Dd', padPaths = 'corpus', padder = NA) {
              
              as.data.frame(as.matrix.humdrumR(x, dataTypes = dataTypes, padPaths = padPaths, padder = padder), stringsAsFactors = FALSE)
          })

#' @name humCoercion
#' @export
setMethod('as.data.frame', 
          signature = c(x = 'humdrumR'),
          function(x, dataTypes = 'Dd', padPaths = 'corpus', padder = NA) {
              
              as.data.frame(as.matrix.humdrumR(x, dataTypes = dataTypes, padPaths = padPaths, padder = padder), stringsAsFactors = FALSE)
          })

### As (list of) matrix-like ####

#' @name humCoercion
#' @export
as.matrices <- function(humdrumR, dataTypes = 'LIMDd', padPaths = 'piece', padder = NA) {
    checks(humdrumR, xhumdrumR)
    dataTypes <- checkTypes(dataTypes, 'as.matrices')
    checks(padPaths, xcharacter & xlegal(c('corpus', 'piece', 'dont', "don't")))
    checks(padder, xatomic & xlen1)
    
    
    mat <- as.matrix.humdrumR(humdrumR, dataTypes = dataTypes, padPaths = padPaths, padder = padder)
    dontpad <- padPaths != 'corpus'
    
    piece <- as.integer(gsub('\\..*', '', rownames(mat)))
    lapply(unique(piece),
           \(p) {
               submat <- mat[piece == p, , drop = FALSE] 
               if (dontpad) {
                   submat <- submat[ , colSums(!is.na(submat)) > 0L, drop = FALSE]
               }
               
               submat
           })
}

#' @name humCoercion
#' @export 
as.data.frames <- function(humdrumR, dataTypes = 'LIMDd', padPaths = 'piece', padder = NA) {
    checks(humdrumR, xhumdrumR)
    dataTypes <- checkTypes(dataTypes, 'as.data.frames')
    checks(padPaths, xcharacter & xlegal(c('corpus', 'piece', 'dont', "don't")))
    checks(padder, xatomic & xlen1)
    
    
    lapply(as.matrices(humdrumR,dataTypes = dataTypes, padPaths = padPaths, padder = padder), 
           as.data.frame, stringsAsFactors = FALSE)
}





# Shape ####

## Size ----

#' [humdrumR data][humdrumRclass] size and shape
#' 
#' These functions can be used to quickly
#' get basic information about the size and "shape" of
#' a [humdrumR corpus objects][humdrumRclass].
#' For more details, use the [census()] or [spines()] functions instead.
#' 
#' @details 
#' 
#' The following functions are defined.
#' 
#' 
#' + `nfile` : The number of input files in the corpus. 
#'   + [length][base::length()]`(humdrumR)` is a synonym.
#' + `npiece`: The number of pieces in the corpus. (There may be multiple pieces per file.)
#' + `nrecord`: The number of records in the corpus. 
#'   + [nrow][base::nrow()]`(humdrumR)` is a synonym.
#' + `ntoken`: The number of tokens in the corpus.
#' + `ncol(humdrumR)`: Returns the maximum number of "columns" need to represent the data in a 2d matrix.
#'    Matches the default output from [as.matrix(humdrumR)][humCoercion].
#' + `dim(humdrumR)`: the same as `c(nrow(humdrumR), ncol(humdrumR))`.
#' 
#' @section Is/Any:
#' 
#' A few additional functions return quick `TRUE`/`FALSE` answers regarding a [humdrumR corpus][humdrumRclass]:
#' 
#' + `is.empty`: Returns `TRUE` is a corpus contains no *non-null* data tokens (`D` tokens).
#' + `anyPaths`: Returns `TRUE` if there are any spine paths (`Path > 0`) in any pieces in the corpus.
#' + `anyStops`: Returns `TRUE` if there are any multi-stops (`Stop > 1`) in any pieces in the corpus.
#' + `anySubcorpora`: Returns `TRUE` if the corpus was [read][readHumdrum()] with different regex patterns
#'     matching "subcorpora" labels.
#'    + `namesSubcorpora` returns the names of the subcorpora labels (`Label` field).
#' + `anyMultiPieceFiles`: Returns `TRUE` if any files contain more than one piece (`Piece != File`).
#' 
#' @param humdrumR ***HumdrumR data.***
#' 
#' Must be a [humdrumR data object][humdrumRclass].
#' 
#' @param dataTypes ***Which types of humdrum record(s) to include in the census.***
#' 
#' Defaults to `"GLIMDd"`.
#' 
#' Must be a single `character` string. Legal values are `'G', 'L', 'I', 'M', 'D', 'd'` 
#' or any combination of these (e.g., `"LIM"`).
#' (See the [humdrum table][humTable] documentation **Fields** section for explanation.)
#'     
#' @name humSize
#' @export
nrecord <- function(humdrumR, dataTypes = 'GLIMDd') {
          checks(humdrumR, xhumdrumR)
          dataTypes <- checkTypes(dataTypes, 'dataTypes', 'nrecord')
          humtab <- getHumtab(humdrumR, dataTypes = dataTypes)

          nrow(unique(humtab[ , list(Piece, Record)]))
          
}

#' @name humSize
#' @export
setMethod('nrow',  signature = c(x = 'humdrumR'), \(x) nrecord(x))



#' @name humSize
#' @export
ntoken <- function(humdrumR, dataTypes = 'GLIMDd') {
          checks(humdrumR, xhumdrumR)
          dataTypes <- checkTypes(dataTypes, 'dataTypes', 'ntoken')
          humtab <- getHumtab(humdrumR, dataTypes = dataTypes)
          
          nrow(humtab)
}

#' @name humSize
#' @export
npieces <- function(humdrumR) {
          checks(humdrumR, xhumdrumR)
          
          length(unique(getHumtab(humdrumR)$Piece))
}

#' @name humSize
#' @export
nfiles <- function(humdrumR) {
    checks(humdrumR, xhumdrumR)
    
    length(unique(getHumtab(humdrumR)$File))
}

#' @name humSize
#' @export
setMethod('length', signature = c(x = 'humdrumR'), \(x) npieces(x))


#' @name humSize
#' @export
setMethod('ncol', 
          signature = c(x = 'humdrumR'), 
          \(x) max(getColumn(getHumtab(x)), na.rm = TRUE))

#' @name humSize
#' @export
setMethod('dim',  signature = c(x = 'humdrumR'),  \(x) c(nrecord(x), ncol(x)))


#' @name humSize
#' @export
is.empty <- function(humdrumR){
    checks(humdrumR, xhumdrumR)
    nrow(getHumtab(humdrumR, 'D')) == 0L
} 



## Structure ----

#' @rdname humSize
#' @export
anyMultiPieceFiles <- function(humdrumR) {
    checks(humdrumR, xhumdrumR)
    nfiles(humdrumR) != npiece(humdrumR)
}

#' @rdname humSize
#' @export
anyPaths <- function(humdrumR) {
    checks(humdrumR, xhumdrumR)
    humtab <- getHumtab(humdrumR)
    
    any(humtab$Path > 0L, na.rm = TRUE)
    
}

#' @rdname humSize
#' @export
anyStops <- function(humdrumR) {
    checks(humdrumR, xhumdrumR)
    
    humtab <- getHumtab(humdrumR)
    any(humtab$Stop > 1L, na.rm = TRUE)
    
}





#' Does humdrumR corpus contain subcorpora?
#' 
#' [HumdrumR][humdrumRclass] objects can be divided into "subcorpora."
#' `anySubcorpora` and `namesSubcorpora` functions tell us if there are any subcorpora and, if so, what they are called.
#'
#' @name humSize
#' @export
anySubcorpora <- function(humdrumR){
    checks(humdrumR, xhumdrumR)
    
    humtab <- getHumtab(humdrumR)
    
    humtab[ , length(unique(Label)) > 1L]
}

#' @name humSize
#' @export
namesSubcorpora <- function(humdrumR) {
    checks(humdrumR, xhumdrumR)
    
    humtab <- getHumtab(humdrumR)
    
    humtab[ , unique(Label)]
}


is.ragged <- function(humdrumR) {
    # Do the pieces in the corpus vary in number of spines?
    
    humtab <- getHumtab(humdrumR, 'D')
    
    humtab$Column <- getHumtab(humtab, 'piece')
    ncols   <- humtab[!is.na(Column) , max(Column) , by = Filename]$V1
    nspines <- humtab[!is.na(Spine)  , max(Spine)  , by = Filename]$V1
    
    length(unique(ncols)) > 1L || length(unique(nspines)) > 1L
    
}


#################################-
# Humtable manipulation and access ####
###############################-



#' Access a Humdrum Table
#' 
#' The `getHumtab()` function extracts the humdrum table from a [humdrumR object][humdrumRclass].
#' 
#' @param humdrumR ***HumdrumR data.***
#' 
#' Must be a [humdrumR data object][humdrumRclass].
#' 
#' @param dataTypes ***Which types of humdrum record(s) to include in the output.***
#' 
#' Defaults to `"GLIMDd"`.
#' 
#' Must be a `character` string, which specifies which types of data tokens/records to extract.
#' Legal values are: `"G"` (global comments), `"L"` (local comments), `"I"` (interpretations),
#' `"M"` (barlines), `"D"` (non-null data), or `"d"` (null data).
#' Multiple types can be specified in a single string: e.g., `"GLIMD"`.
#' Note that `"I"` also grabs `"E"` (exclusive) and `"S"` (spine-control) tokens.
#' 
#' 
#' @rdname humTable
#' @export
getHumtab <- function(humdrumR, dataTypes = "GLIMDd") {
          humtab <- humdrumR@Humtable
          
          checks(humdrumR, xhumdrumR)
          dataTypes <- checkTypes(dataTypes, 'getHumtab')
          
          
          if (length(setdiff(c('G', 'L', 'I', 'M', 'D', 'd', 'S', 'E'), dataTypes))) {
              humtab <- humtab[Type %in% dataTypes]
          }
          
          humtab

}


`putHumtab<-` <- function(humdrumR, value, overwriteEmpty = 'GLIMDd') {
          # adds humtab into humdrumR
          # Drop determines whether record dataTypes that are 
          # absent from value are left unchanged (drop = FALSE)
          # or replaced with empty data tables (drop = TRUE)
          # If drop indicates a record type (i.e., GLIM) those types are dropped only
          if (!data.table::is.data.table(value)) .stop("putHumtab()<- requires a data.table value.")
    
    
          if (length(overwriteEmpty)) {
              if (overwriteEmpty == 'GLIMDd') {
                  humdrumR@Humtable <- value
                 return(humdrumR)
                }
                  overwriteEmpty <- checkTypes(overwriteEmpty, 'putHumtab<-')
              
          }
          
          overwriteTypes <- unique(value$Type)
          overwriteTypes <- union(overwriteTypes, overwriteEmpty)
          
          # humtab <- rbind(humdrumR@Humtable[!Type %in% overwriteTypes], value, fill = TRUE) 
          # This works the same as the whole forloop bs bellow, but it breaks with token...which only matters for $<-
          
          
          oldhumtab <- humdrumR@Humtable[!Type %in% overwriteTypes]
          
          newcol <- setdiff(colnames(value), colnames(oldhumtab))
          for(col in newcol) {
              class <- class(value[[col]])
              insert <- if (class == 'token') {
                 token(as(NA, class(value[[col]]@.Data)), Exclusive = getExclusive(value[[col]]))
              } else {
                  as(NA, class)
              }
              if (nrow(oldhumtab) == 0L) insert <- insert[0]
              oldhumtab <- cbind(oldhumtab, setNames(as.data.frame(insert), col))
          }
          humtab <- rbind(oldhumtab, value)
          #
          
          humtab <- orderHumtab(humtab)
          humdrumR@Humtable <- humtab
          
          humdrumR
}







##

update_humdrumR <- function(hum, Exclusive, Null, ...) UseMethod('update_humdrumR')
update_humdrumR.humdrumR <- function(hum,  Exclusive = TRUE, Dd = TRUE , ...) {
    humtab <- getHumtab(hum, 'GLIMDd')
    humtab <- update_humdrumR.data.table(humtab, Exclusive, Dd, ...)
    
    putHumtab(hum, overwriteEmpty = c('d')) <- humtab
    hum
}
update_humdrumR.data.table <- function(hum, Exclusive = TRUE, Dd = TRUE, ...) {
    
    if (Exclusive) hum <- update_Exclusive.data.table(hum, ...)
    if (Dd) hum <- update_Dd.data.table(hum, ...)
    hum
    
}


#
update_Exclusive <- function(hum, ...) UseMethod('update_Exclusive')
update_Exclusive.humdrumR <- function(hum, ...) {
    humtab <- getHumtab(hum, 'ID')
    
    fields <- selectedFields(hum)
    update_Exclusive.data.table(humtab, fields) # in place
    
    hum
    
}
update_Exclusive.data.table <- function(hum, fields = 'Token', ...) {
    
    exclusiveFields <- colnames(hum) %in% paste0('Exclusive.', fields)
    if (any(exclusiveFields)) {
        hum[ , Exclusive := do.call('.paste', c(hum[ , exclusiveFields, with = FALSE], list(na.if = all)))]
    } else {
        hum[ , Exclusive := Exclusive.Token]
    }
}

#
update_Dd <- function(hum, field, ...) UseMethod('update_Dd')
update_Dd.humdrumR <- function(hum, field = selectedFields(hum),  allFields = FALSE, ...) {
    
    if (allFields) field <- fields(hum, 'D')$Name
    humtab <- getHumtab(hum, 'GLIMDd')
    
    update_Dd.data.table(humtab, field = field) # in place
    hum
}
update_Dd.data.table <- function(hum, field = 'Token', ...) {
    
    hum[Type %in% c('d', 'D'), 
        Type := {
            null <- nullFields(.SD, field)
            
            ifelse(null, 'd', 'D')
            
        }]
    setindex(hum, NULL) # data.table is creating auto-index for some reason?
}

is.nullToken <- function(tokens) {
    if (is.list(tokens)) {
        lengths(tokens) == 0L
    } else {
        is.na(tokens) | tokens %in% c('*', '=', '!', '!!', '.', '**')
    }
}

nullFields <- function(hum, fields, reduce = '&') {
    nulls <- lapply(hum[ , fields, with = FALSE], is.nullToken)
    Reduce('&', nulls)
}



####################################################-
# Fields ----
####################################################-


## Manipulating the @Fields slot ----

checkFieldTypes <- function(types, argname, callname, includeSelected = TRUE) {
    valid <- c('Data', 'Structure', 'Interpretation', 'Formal', 'Reference', 'Grouping', if (includeSelected) 'selected')
    types <- matched(types, valid, nomatch = types)
    checks(types, #argname = argname, 
           xcharacter & xmaxlength(7) & xplegal(valid))
}

initFields <- function(humtab, tandemFields) {
    fields <- setdiff(colnames(humtab), '_rowKey_')
    fieldTable <- data.table(Name = fields, Class = sapply(humtab, class), Type = 'Reference')
    
    fieldTable[ , Type := {
        Type <- Type
        Type[Name == 'Token'] <- 'Data'
        Type[Name %in% c('Filename', 'Filepath', 'File', 'Label', 'Piece',
                         'Spine', 'Path', 'ParentPath', 'Stop',
                         'Record', 'DataRecord', 'Global', 'Type')] <- 'Structure'
        Type[Name %in% c('Exclusive', 'Tandem', tandemFields)] <- 'Interpretation'
        Type[grepl('^Formal', Name) | Name %in% c('Bar', 'DoubleBar', 'BarLabel')] <- 'Formal'
        Type                 
    }]
    
    humtab[ , Exclusive.Token := Exclusive] # changes this in place
    
    setorder(fieldTable, Type, Name)
    fieldTable[ , Selected := as.integer(Name == 'Token')]
    fieldTable[ , GroupedBy := FALSE]
    fieldTable[ , Complement := FALSE]
    fieldTable
}




fieldClass <- function(x) {
    xclass <- class(x)
    
    if (xclass == 'token') {
        xclass <- paste0(class(x@.Data), 
                        ' (', if (!is.null(x@Exclusive)) paste0('**', x@Exclusive, ' '), 
                        'tokens)')
    }
    if (xclass == 'list') {
        classes <- unique(sapply(x, class))
        xclass <-  paste0('list (of ',  harvard(paste0(setdiff(classes, 'NULL'), "s"), 'and'), ')')
    }
    
    xclass
}

updateFields <- function(humdrumR, selectNew = TRUE) {
    humtab <- getHumtab(humdrumR)
    
    fieldTable <- humdrumR@Fields
    fieldTable <- fieldTable[Name %in% colnames(humtab)] # removes fields that don't exist in humtab
    
    new <- setdiff(colnames(humtab), c(fieldTable$Name, '_rowKey_'))
    new <- new[!grepl('^_complement_|Exclusive\\.', new)]
    if (length(new)) {
        fieldTable <- rbind(fieldTable, 
                            data.table(Name = new, 
                                       Type = 'Data', 
                                       Class = '_tmp_', 
                                       Selected = 0L, 
                                       GroupedBy = FALSE,
                                       Complement = FALSE))
    }
    
    
    fieldTable$Class <- sapply(humtab[ , fieldTable$Name, with = FALSE], 
                               fieldClass)
    fieldTable$Complement <- paste0('_complement_', fieldTable$Name) %in% colnames(humtab)
    
    setorder(fieldTable, Type, Name)
    setcolorder(humtab, fieldTable$Name)
    
    if (length(new) && selectNew) fieldTable[ , Selected := match(Name, new, nomatch = 0L)]
    humdrumR@Fields <- fieldTable
    
    humdrumR
    
}


fillFields <- function(humdrumR, from = 'Token', to, where = NULL) {
    humtab <- getHumtab(humdrumR, 'GLIMDd')
    
    where <- if (!is.null(where)) eval(where, envir = humtab) else TRUE
    
    for (field in to) {
        if (class(humtab[[from]]) == class(humtab[[field]])) {
            
            hits <- is.na(humtab[[field]]) & where
            
            
            humtab[[field]][hits] <- humtab[[from]][hits]
        }
    }
    
    putHumtab(humdrumR) <- humtab
    
    update_Dd(humdrumR)
    
}

## Querying fields ----







#' List fields in a humdrumR object
#' 
#' Use the `fields()` function to list the current fields in 
#' a [humdrumRclass] object.
#' 
#' @section Querying Fields:
#' 
#' The `fields()` function takes a [humdrumR object][humdrumRclass]
#' and returns a [data.table()], with each
#' row describing an available field in the humdrum table.
#' The output table has five columns:
#' 
#' + `Name`
#'   + The field name.
#' + `Class`
#'   + The [class()] of the data in the field.
#' + `Type`
#'   + The type of field (described above). 
#'     Can be `"Data"`, `"Structure"`, `"Interpretation"`, `"Formal"`, `"Reference"`, or `"Grouping"`.
#' + `Selected`,
#'   + A `logical` indicating which fields are [selected][selectedFields()].
#' + `GroupedBy`
#'   + A `logical` indicating which, if any, fields are currently [grouping][humGrouping] the data.
#'
#' Using the [names()] function on a [humdrumR object][humdrumRclass] will
#' get just the field names, the same as `fields(humData)$Name`.
#' 
#' @param fieldTypes ***Which types of fields to list.***
#' 
#' Shows all fields by default.
#' 
#' Must be a `character` vector. Legal options are `"Data"`, `"Structure"`, `"Interpretation"`, `"Formal"`, `"Reference"`,
#' and `"Grouping"`.
#' You can also pass `"selected"` to extract only the [selected fields][selectedFields()].
#' Types can be [partially matched][partialMatching]---for example, `"S"` for `"Structure"`.
#'   
#' @seealso {To actually extract fields from [humdrumR data], see the [pull()] family of functions.}
#' @rdname humTable
#' @export
fields <- function(humdrumR, fieldTypes = c('Data', 'Structure', 'Interpretation', 'Formal', 'Reference', 'Grouping', 'selected')) { 

  checks(humdrumR, xhumdrumR)
  fieldTypes <- checkFieldTypes(fieldTypes, 'fieldTypes', 'fields')
            
  humdrumR@Fields[Type != 'Complement' & (Type %in% fieldTypes | ('selected' %in% fieldTypes & Selected == TRUE))]

}

dataFields <- function(humdrumR) fields(humdrumR, 'Data')$Name

fieldsInExpr <- function(humtab, expr) {
  ## This function identifies which, if any,
  ## fields in a humtable are referenced in an expression (or rhs for formula).
  if (is.humdrumR(humtab)) humtab <- getHumtab(humtab)          
  
  namesInExpr(colnames(humtab), expr)
}

fieldMatch <- function(humdrumR, fieldnames, callfun = 'fieldMatch', argname = 'fields', 
                       fieldTypes = c('Data', 'Structure', 'Interpretation', 'Formal', 'Reference', 'Grouping')) {
    checks(fieldnames, xcharnotempty, argname = fields, seealso = callfun)
    fields <- fields(humdrumR, fieldTypes = fieldTypes)$Name
    target <- pmatch(fieldnames, fields)
    
    
    nomatch <- is.na(target)
    if (all(nomatch)) {
        .stop("In the '{argname}' argument of your call to humdrumR::{callfun},", 
              ifelse = length(fieldnames),
              harvard(fieldnames, 'and', quote = TRUE),
              '<is not the name of a|are not names of> {tolower(harvard(fieldTypes, "or"))} field<|s>',
              'in your humdrumR object.')
    }
    
    if (any(nomatch)) {
        .warn('In the "{argname}" argument of your call to humdrumR::{callfun}, ',
              ifelse = length(argname),
              harvard(fieldnames[is.na(target)],  'and'),
              '<is not the name of a|are not names of> {tolower(harvard(fieldTypes, "or"))} field<|s>',
              'in your humdrumR object.')
        
        target <- target[!nomatch]
        
    }
    
    
    fields[target]
    
}


#' @rdname humTable
#' @export
names.humdrumR <- function(humdrumR) fields(humdrumR)[ , Name]



## Selecting fields ----


#' The "selected" fields of a [humdrumR object][humdrumRclass]
#' 
#' Every `humdrumR` object will have, at any given time, one or more of its
#' fields "selected."
#' The selected fields are the fields that are shown when a 
#' [humdrumR object][humdrumRclass] prints on the console.
#' (At the bottom of the printout, the selected fields are also marked by a `*`.)
#' The selected fields can also be queried directly using the `selectedFields()` function, or 
#' by inspecting the output of [fields()].
#' The selected fields also play other important roles in `humdrumR` 
#' (see details).
#' 
#' @details
#'
#' The "selected" fields play an important role in [humdrumR].
#' In addition to controlling what [fields()] you "see" in the console printout, 
#' the select fields are the fields that many [humdrumR][humdrumR] functions will automatically
#' apply themselves to.
#' For example, if you call [ditto()], [timecount()], or [kern()] on a [humdrumR data object][humdrumRclass],
#' these functions will be applied the selected field(s).
#' (However, most such functions are only applied to the *first* selected field, 
#' if there is more than one; see their own manuals for details.)
#' The first selected field is also passed as the hidden `.` variable in calls to [with()/within()/,
#' mutate()/summarize()/reframe()][withinHumdrum]---so if you don't remember what fields are selected
#' you can just put a `.`!
#' 
#' The selected fields also have a role in identifying "null" data.
#' Whenever new fields are selected, their data tokens are checked for `NA` values or null
#' tokens (`"."`).
#' Anywhere where *all* the selected fields are null, the `Type` field is updated to `"d"`;
#' wherever *any* field is **not** null, the `Type` field is updated to `"D"`.
#' Many functions ignore `d` (null data) tokens by default, so selecting fields can be a way controlling which data you
#' want to analyze and which you don't.
#'
#'
#' ## Selecting fields
#' 
#' Fields can be selected using the tidyverse `select()` function, 
#' and can use any of `select()`'s [special select features][dplyr::select()].
#' If you call `select()` with no argument, the original `Token` field is selected by default.
#'
#' If you use `select()` with a numeric selections or, like `select(1:3)`, fields are numbered in the (row) order
#' shown in call to [fields()].
#' Fields are always sorted first by `Type` (`Data` first), then by name.
#' If you provide a `fieldTypes` argument, the numeric selection is reduced to only those fields you choose,
#' matching with the row-numbers you'd see if you call [fields(humData, fieldTypes = ...)][fields()].
#' So, for example, `select(humData, 1:3, fieldTypes = 'Structure')` will select the first three structural fields.
#' You can also simply provide the keywords `"Data"`, `"Structure"`, 
#' `"Interpretation"`, `"Reference"`, or `"Formal"`
#' to select *all* fields of each [field type][fields()].
#' 
#' Note that when you call `select()` on [humdrumR data][humdrumRclass], 
#' the selected field(s) change **in place**,
#' meaning that the selection changes *even if you don't (re)assign the output*!
#' 
#' @param humdrumR,.data ***HumdrumR data.***
#' 
#' Must be a [humdrumR data object][humdrumRclass].
#' 
#' @param ... ***Which fields to output.***
#' 
#' If no arguments are provided, the `Token` field is selected.
#' 
#' These arguments can be any combination of `character` strings, numbers, or symbols used
#' to match fields in the `humdrumR` input using [tidyverse][dplyr::select()] semantics.
#' 
#' Unlike in tidyverse `select()`, field names can be [partially matched][partialMatching].
#' You can also include `character` strings [partially matching][partialMatching] 
#' `"Data"`, `"Structure"`, `"Interpretation"`, `"Formal"`, `"Reference"` or `"Grouping"`,
#' which will select all fields of those types (see [fields()] for further explanation).
#' 
#' @param fieldTypes ***Which field types are available for numeric selecting?***
#' 
#' Defaults to `"any"`, so all fields are counted for numeric selection.
#' 
#' Must be a `character` vector. Legal options are `"Data"`, `"Structure"`, `"Interpretation"`, `"Formal"`, `"Reference"`,
#' `"Grouping"`, and `"any"`, corresponding to the `Type` column in the output of [fields()].
#' Types can be [partially matched][partialMatching]---for example, `"S"` for `"Structure"`.
#' 
#'
#' @examples
#' 
#' humData <- readHumdrum(humdrumRroot, "HumdrumData/BachChorales/chor00[1-4].krn")
#' 
#' # see what is selected
#' selectedFields(humData)
#' 
#' # change selection
#' humData |> select(Spine, Record, Token) |> selectedFields()
#'
#' humData |> select(Structure)
#' 
#' humData |> select(4)
#' humData |> select(1:3, fieldTypes = 'Structure')
#' 
#' # effect of selection
#' 
#' humData |> select(Token) |> count()
#' humData |> select(Spine) |> count()
#'
#' @seealso {Use [fields()] to see what fields are available, and how they are ordered.
#' To actually *extract* fields, see [pullFields()].}
#' @export
selectedFields <- function(humdrumR) {
    fields(humdrumR)[Selected > 0L][order(Selected)]$Name
} 

selectFields <- function(humdrumR, fields = 'Token') {
    checks(humdrumR, xhumdrumR)
    
    fields <- fieldMatch(humdrumR, fields, 'selectFields', 'fields')
    
    fieldTable <- humdrumR@Fields
    
    fieldTable[ , Selected := match(Name, fields, nomatch = 0L)]
    
    # humdrumR@Fields <- #data.table::copy(fieldTable)
    
    humdrumR <- update_humdrumR.humdrumR(humdrumR, field = fields)
    humdrumR
}



#' @rdname selectedFields
#' @aliases select
#' @export 
select.humdrumR <- function(.data, ..., fieldTypes = "any") {
 
    exprs <- rlang::enexprs(...)
    fields <- if (length(exprs) == 0L) {
      'Token'
    } else {
      tidyselect_humdrumRfields(.data, exprs, fieldTypes, 'select.humdrumR')
    }
    selectFields(.data, fields)
}

#### select helpers ----

tidyselect_humdrumRfields <- function(humdrumR, exprs, fieldTypes, callname) {
  fields <- fields(humdrumR)
  
  types <- c('Data', 'Structure', 'Interpretation', 'Formal', 'Reference', 'Grouping')
  
  fieldTypes <- if ('any' %in% tolower(fieldTypes)) {
    types
  } else {
    fieldTypes <- checkFieldTypes(fieldTypes, 'fieldTypes', callname, includeSelected = FALSE)
  }
 
  
  # select by field type (character string only, partially matched)
  typeSelections <- lapply(exprs, 
                           \(expr) {
                             if (is.character(expr)) {
                               type <- types[pmatch(expr, types, nomatch = 0L)]
                               if (length(type)) fields[Type == type, Name]
                             }
                           })
  
  fieldExprs <- exprs[lengths(typeSelections) == 0L]
  
  # select by field names
  
  fieldExprs <- lapply(fieldExprs, withinExpression,
                       stopOnHit = TRUE,
                       applyTo = c('atomic', 'symbol'),
                       predicate = \(Type, Class) (Type == 'atomic' && Class =='character') | Type == 'symbol',
                       func = \(exprA) {
                           
                           name <- if (exprA$Type == 'symbol') exprA$Head else exprA$Args[[1]]
                           
                           pname <- pmatch(name, fields$Name)
                           if (!is.na(pname)) name <- fields$Name[pname]
                           
                           if (exprA$Type == 'symbol') exprA$Head <- name else exprA$Args[[1]] <- name
                           
                           exprA
                           
                       })
  
  fieldSelections <- local({
    fields <- fields[order(!Type %in% fieldTypes)]
    options <- fields$Name
    expr <- rlang::expr(c(!!!(fieldExprs)))
    tried <- try(options[tidyselect::eval_select(expr, setNames(options, options), strict = FALSE)], silent = TRUE)
    if (class(tried) == 'try-error') .stop('In a call to {callname}(), you can ONLY provide the names of humdrumR fields,',
                                           'or special tidyverse "select features" expressions involving those fields.',
                                            "You can't provide more complex/arbitrary expressions.")
    
    
    tried
  })
 
  ##
  selections <- union(fieldSelections, unlist(typeSelections))
  
  
  if (length(selections) == 0L) {
    exprs <- do.call('harvard', c(lapply(exprs, rlang::as_label), conjunction = '', quote = TRUE))
    .stop("The <expressions {exprs} don't|expression {exprs} doesn't> match any {harvard(fieldTypes, 'or')} fields in your humdrumR data.",
          ifelse = length(exprs) > 1)
  }
  
  selections
}



printableSelectedField <- function(humdrumR, 
                                   dataTypes = 'D', useToken = 'GLIM',
                                   null =  c('charNA2dot', 'NA2dot', 'dot2NA', 'asis')) {
    dataTypes <- checkTypes(dataTypes, 'printableSelectField')
    useToken <- checkTypes(useToken, 'printableSelectField', 'useToken')
    
    printableField <- pullPrintable(humdrumR, fields = selectedFields(humdrumR), 
                                    dataTypes = dataTypes,  useToken = useToken,
                                    collapse = TRUE)
    
    humtab <- getHumtab(humdrumR, dataTypes = 'GLIMDd')
    
    humtab[ , Printable := {
        print <- character(nrow(humtab))
        print[Type %in% useToken] <- Token[Type %in% useToken]
        print[Type %in% dataTypes] <- printableField$Printable
        print
    }]
    
    putHumtab(humdrumR) <- humtab
    
    updateFields(humdrumR)
}


getGroupingFields <- function(humdrumR, .by = NULL, withFunc = 'within.humdrumR') {
    
    if (is.null(.by)) {
        fields(humdrumR)[GroupedBy == TRUE]$Name 
    } else {
        fieldMatch(humdrumR, .by, callfun = withFunc, argname = '.by')
        # tidyselect_humdrumRfields(humdrumR, .by, fieldTypes = 'Data', callname = withFunc)
    }
    
}


## Extracting ("pull") fields ----




pullFields <- function(humdrumR, fields, dataTypes = 'D', 
                       null = 'charNA2dot',
                       drop = FALSE) {

    
    humtab <- getHumtab(humdrumR, dataTypes = dataTypes)
    selectedTable <- humtab[ , fields, with = FALSE]
    
    
    # selectedTable[] <- selectedTable[ , lapply(.SD, 
    #                                            \(field) {
    #                                                if (is.list(field)) {
    #                                                   field[filter] <- lapply(field[filter], '[', i = 0)
    #                                                } else {
    #                                                   field[filter] <- NA
    #                                                }
    #                                                field
    #                                            })]
    # decide how NA/null values are shown
    fieldTypes <- fields(humdrumR)[ , Type[match(colnames(selectedTable), Name)]]
    fieldTypes <- lapply(as.list(fieldTypes), 
                         \(fieldType) if (fieldType == 'Data') humtab$Type else rep(c(Interpretation = 'I', Formal = 'I',
                                                                                      Structure = 'D', Reference = 'G')[fieldType],
                                                                                      fieldType, length.out = nrow(selectedTable)))
    
    selectedTable[] <- mapply(naDots, selectedTable, fieldTypes, MoreArgs = list(null = null), SIMPLIFY = FALSE)
    
    # return
    if (length(fields) == 1L && drop) selectedTable[[1]] else selectedTable
    
}



pullSelectedField <- function(humdrumR, dataTypes = 'D', drop = TRUE, null = 'charNA2dot') {
    fieldInTable <- pullSelectedFields(humdrumR, dataTypes = dataTypes, null = null)[ , 1L, with = FALSE]
    
    if (drop) fieldInTable[[1]] else fieldInTable
    
}

pullSelectedFields <- function(humdrumR, dataTypes = 'D', null = 'charNA2dot') {
    
    pullFields(humdrumR, selectedFields(humdrumR), dataTypes = dataTypes, null = null)
    
}


pullPrintable <- function(humdrumR, fields, 
                          dataTypes = 'D', null = 'NA2dot',
                          useToken = c('G', 'L', 'I', 'M', 'S', 'E'), collapse = TRUE){
    
    fieldTable <- pullFields(humdrumR, union(fields, c('Type')), dataTypes = dataTypes, null = if (collapse) 'dot2NA' else null)
    
    Exclusives <- Filter(length, lapply(fieldTable[ , fields, with = FALSE], getExclusive))
    tandems <- unlist(unique(lapply(fieldTable, getTandem)))
    
    # change fields to character
    fieldTable[ , (fields) := lapply(fields, 
                                     \(field) {
                                         field <- fieldTable[[field]]
                                         
                                         if (is.list(field)) {
                                             field <- list2str(field)
                                         } else {
                                             if (is.token(field)) field <- field@.Data
                                             field[] <- as.character(field)
                                         }
                                         
                                         if (is.matrix(field)) {
                                             matrix[] <- str_pad(c(matrix), width = max(nchar(matrix)))
                                             field <- paste0('[', do.call('paste', as.data.frame(matrix)),  ']')
                                         }
                                         field[fieldTable$Type != 'G'] <- gsub('\t\t*', '', field[fieldTable$Type != 'G'])
                                         field
                                         
                                     })] 
    if (!collapse) return(fieldTable[ , fields, with = FALSE])                  
    
    ## collapse == TRUE:
    field <- Reduce(\(a, b) {
        ifelse(fieldTable$Type %in% c('I', 'M', 'd', 'S') & a == b, a, .paste(a, b, sep = '', na.if = all))
        
    }, fieldTable[, fields, with = FALSE])
    
    
    Type <- fieldTable$Type
    ## Do we need to grab any interpretations from the Token field?
    if (length(useToken) && any(grepl(captureRE(useToken), dataTypes))) {
        # humtab[, !Type %in% c('D', 'd')]
        fill <- Type %in% useToken & (is.na(field) | is.nullToken(field))
        token <- getHumtab(humdrumR, dataTypes = dataTypes)$Token # get token separately because we always want null = 'asis'
        
        if (length(tandems)) {
            tandemRE <- knownInterpretations[Name %in% tandems, RE]
            targets <- Type == 'I' & !is.na(token)
            fill[targets] <- fill[targets]  & Reduce('|', lapply(tandemRE, 
                                                                 stringi::stri_detect_regex, 
                                                                 str = token[targets]))
        }
        field[fill] <- token[fill]
    } 
    
    if (length(Exclusives)) {
        field[Type == 'E'] <- paste0('**', do.call('paste', c(Exclusives[fields], list(sep = '**'))))
    }
    
    

    field <- stringr::str_replace(field, '^\\.[ ,.]*\\.$', '.')
    field <- naDots(field, ifelse(is.na(field) | field == '.', 'd', Type), null)
    field[field == ''] <- "'"
    
    
    data.table(Printable = field)
    
}

### Exported pull functions ----

#' Extract field(s) from [humdrumR data][humdrumRclass]
#' 
#' Individual fields from the [humdrum table][humTable] can be extracted using `pull()`.
#' Multiple fields can be extracted using `pull_data.frame()`, `pull_data.table`, or `pull_tibble()`
#' ---the resulting data.frames are a column-subset of the humdrum table.
#' You can also use the `$` operator to extract a single field, just like `pull()`.
#'
#'
#' @details
#' 
#' The functions `pull()`, `pull.data.xxx()`, `pull.tibble()`, and `$` are 
#' the "escape hatch" to pull your 
#' data out of the [humdrumR data world][humdrumRclass] into "normal" R.
#' Use the `pull()` function or the `$` to access the actual vector content of a single field.
#' The other functions *always* return a `data.frame`/`data.table`/`tibble`, even if it has only one column.
#' 
#' Choose which field(s) to return using the `...`, `var`, or `name` arguments.
#' The `var` and `...` options use tidyverse style select semantics (see [select()][selectedFields]).
#' If no fields are indicated, the data's [selected fields][selectedFields] are pulled; in the case of `pull()` and `$`,
#' only the *first* selected field is pulled.
#' 
#' The `dataTypes` argument controls which *types* of data are pulled---by default, 
#' only non-null data (`Type == "D"`) is pulled.
#' The `$` operator can only grab non-null data.
#' 
#' The `null` argument controls how null data is returned, with four options: 
#' 
#' + `"NA2dot"` means all `NA` values are converted to `"."`; note that this will cause all output to be coerced to `character`.
#' + `"dot2NA"` means all `"."` are converted to `NA`.
#' + `"charNA2dot"` means `NA` values in `character` vectors are converted to `NA`, but not in other atomic types.
#' + `"asis"` means either `NA` or `"."` values may print, depending on what is in the field.
#' 
#' Note that `pull_tibble()` won't work if you don't independently load the `tibble` (or `tidyverse`) package---
#' i.e., call `library(tibble)`.
#' 
#' @param humdrumR,.data,x ***HumdrumR data.***
#' 
#' Must be a [humdrumR data object][humdrumRclass].
#' 
#' @param ... ***Which fields to output.***
#' 
#' If no arguments are provided, the object's [selected fields][selectFields] are pulled.
#' 
#' These arguments can be any combination of `character` strings, numbers, or symbols used
#' to match fields in the `humdrumR` input using [tidyverse][dplyr::select()] semantics.
#' 
#' Unlike in tidyverse `select()`, field names can be [partially matched][partialMatching].
#' You can also include `character` strings [partially matching][partialMatching] 
#' `"Data"`, `"Structure"`, `"Interpretation"`, `"Formal"`, `"Reference"` or `"Grouping"`,
#' which will select all fields of those types (see [fields()] for further explanation).
#' 
#' @param var ***Which field to output.***
#' 
#' Defaults to `selectedFields(humdrumR)[1]`.
#' 
#' Must be either a single `character` string or `symbol` which [partially matches][partialMatching] 
#' a field name, or a single whole-number, which selects the field by the row index of the [fields()] output.
#' If a negative number is provided, nth-to-last index is used---for example, `-1` would grab the last field.
#'   
#' @param dataTypes ***Which types of humdrum record(s) to include.***
#' 
#' Only non-null data tokens (`"D"`) are returned by default.
#' 
#' Must be a single `character` string. Legal values are `'G', 'L', 'I', 'M', 'D', 'd'` 
#' or any combination of these (e.g., `"LIM"`).
#' (See the [humdrum table][humTable] documentation for explanation.)
#' 
#' @param null ***How should null data points be output?***
#' 
#' Default is `"charNA2dot"`.
#' 
#' Must be a single character string, [partially matching][partialMatchng] `"NA2dot"`, `"dot2NA"`, `'charNA2dot"`, or `"asis"`.
#' 
#' @seealso {To know what fields are available to pull, use [fields()].
#'           To know what fields are selected---the default fields to pull---use [selectedFields()].}
#' @examples
#' 
#' humData <- readHumdrum(humdrumRroot, "HumdrumData/BachChorales/chor00[1-4].krn")
#' 
#' humData |> pull(Token)
#' humData$Token
#' 
#' humData |> pull_data.table(Token, Spine)
#' humData |> pull_tibble(everything())
#' 
#' @name pullHumdrum
#' @export
pull_data.table <- function(humdrumR, ..., dataTypes = 'D', null = 'charNA2dot') {
    checks(humdrumR, xhumdrumR)
    dataTypes <- checkTypes(dataTypes, 'pull_data.table')
    checks(null, xplegal(c('charNA2dot', 'NA2dot', 'dot2NA', 'asis')))
    
    exprs <- rlang::enexprs(...)
    fields <- if (length(exprs)) {
        tidyselect_humdrumRfields(humdrumR, exprs, fieldTypes = 'any', callname = 'pull.humdrumR')
    } else {
        selectedFields(humdrumR)
    }
    
    dt <- pullFields(humdrumR, fields, dataTypes = dataTypes, null = null)
    
    names <- .names(exprs)
    if (any(names != '')) names(dt)[names != ''] <- names[names != '']
    
    dt
}

#' @name pullHumdrum
#' @export
pull_data.frame <- function(humdrumR, ..., dataTypes = 'D', null = 'charNA2dot') {
    as.data.frame(pull_data.table(humdrumR, ..., dataTypes = dataTypes, null = null))
}

#' @name pullHumdrum
#' @export
pull_tibble <- function(humdrumR, ..., dataTypes = 'D', null = 'charNA2dot') {
    tibble::as_tibble(pull_data.table(humdrumR, ..., dataTypes = dataTypes, null = null))
}

#' @rdname pullHumdrum
#' @aliases pull
#' @export
pull.humdrumR <- function(.data, var, dataTypes = 'D', null = 'asis') {
    
    checks(.data, xhumdrumR)
    dataTypes <- checkTypes(dataTypes, 'pull.humdrumR', argname = 'var')
    checks(null, xplegal(c('charNA2dot', 'NA2dot', 'dot2NA', 'asis')))
    
    if (missing(var)) var <- selectedFields(.data)[1]
    var <- rlang::ensym(var)
    
    if (is.atomic(var)) checks(var, ((xcharacter | xwholenum) & xlen1))
    
    if (is.symbol(var)) var <- as.character(var)
    if (is.numeric(var)) {
        fields <- fields(.data)$Name
        if (var == 0 | abs(var) > length(fields)) .stop("Your numeric 'var' argument is larger",
                                                        "than the number of fields available to pull.")
        if (var < 0) var <- max(0, length(fields) + 1 + var)
        var <- fields[var]
    } else {
        var <- fieldMatch(.data, var, 'pull')
    }
    
    pullFields(.data, fields = var, dataTypes = dataTypes, null = null)[[1]]
    
    
}

#### $ methods ----

#' @rdname pullHumdrum
#' @export 
setMethod('$', signature = c(x = 'humdrumR'),
          function(x, name) {
              name <- as.character(name)
              
              match <- fieldMatch(x, name, callfun = '$', argname = 'name')
              
              getHumtab(x, 'D')[[match[1]]]
          })


#' @export
setMethod('$<-', signature = c(x = 'humdrumR'),
          function(x, name, value) {
              checks(value, (xvector | xinherits('token')) & xlen)
              
              humtab <- getHumtab(x, 'D')
              if (!(length(value) == 1L | length(value) == nrow(humtab))) .stop("When using humdrumR$<- value, the value must either be length 1",
                                                                                "or exactly the same length as the number of non-null data tokens in the",
                                                                                "humdrumR object's selected fields.")
              name <- as.character(name)
              
              if (name == 'Token') .stop("In your use of humdrumR$<-, you are trying to overwrite the 'Token' field, which is not allowed.",
                                         "This field should always keep the original humdrum data you imported.")
              
              structural <- c('Filename', 'Filepath', 'File', 'Label', 'Bar', 'DoubleBar', 'BarLabel', 'Formal',
                              'Piece', 'Spine', 'Path', 'Stop', 'Record', 'DataRecord', 'Global', 'Type')
              
              if (name %in% structural) .stop("In your use of humdrumR$<-, you are trying to overwrite the structural field '{match}', which is not allowed.",
                                              "For a complete list of structural fields, use the command fields(mydata, 'S').")
              isnew <- !name %in% colnames(humtab)
              humtab[[name]] <- value
              
              putHumtab(x, overwriteEmpty = c()) <- humtab
              x <- updateFields(x)
              
              x
              
          })




#### pull helpers ----


naDots <- function(field, types, null) {
    if (null == 'asis') return(field)
    na <- is.na(field)
    
    nulltoken <- c(G = '!!', I = '*', L = '!', d = '.', D = '.', M = '=', E = '**', S = '*')[types]
    
    
    if (null == 'dot2NA') {
        na <- na | field == nulltoken
        field[na] <- NA
    } else {
        if (null == 'charNA2dot') na <- is.character(field) & na
        field[na] <- nulltoken[na]
    }
    
    field   
}


####################################################-
# Print methods ----
#########################################################-

setMethod('show', signature = c(object = 'humdrumR'),
          function(object) {
                    print.humdrumR(object)
                    return(invisible(NULL)) # this is what show is supposed to do
          })

#' @rdname humdrumRclass
#' @export
print.humdrumR <- function(humdrumR, view = humdrumRoption('view'), 
                           dataTypes = humdrumRoption('dataTypes'), 
                           firstAndLast = TRUE, 
                           screenWidth = options('width')$width - 10L,
                           null = humdrumRoption('nullPrint'), 
                           syntaxHighlight =  humdrumRoption('syntaxHighlight'),
                           maxRecordsPerFile = if (length(humdrumR) == 1L) 800L else humdrumRoption('maxRecordsPerFile'), 
                           maxTokenLength = humdrumRoption('maxTokenLength'), 
                           censorEmptyRecords = humdrumRoption('censorEmptyRecords')) {
    
  checks(humdrumR, xhumdrumR)
  dataTypes <- checkTypes(dataTypes, "print_humdrumR")
  checks(view, xcharacter & xlen1 & xplegal(c('humdrum', 'score', 'table', 'tibble', 'data.frame')))
  checks(firstAndLast, xTF)
  checks(screenWidth, xwholenum & xpositive)
  checks(null, xcharacter & xlen1 & xlegal(c('NA2dot', 'dot2NA', 'charNA2dot', 'asis')))
  checks(syntaxHighlight, xTF)
  checks(maxRecordsPerFile, xwholenum & xpositive)
  checks(maxTokenLength, xwholenum & xpositive)
  checks(censorEmptyRecords, xwholenum & xpositive)
  

  
  if (is.empty(humdrumR)) {
    cat("\nEmpty humdrumR object\n")
    return(invisible(humdrumR))
  }
  
  local({# we (may) change humdrumR object in here.
      Npieces <- npieces(humdrumR)
      Nfiles <- nfiles(humdrumR) # needs to be done before firstLast indexing
      if (Npieces > 2L && firstAndLast) humdrumR <- humdrumR[c(1L, Npieces)]
      
      if (view == 'score') return(print_score(humdrumR, maxRecordsPerFile))
      
      tokmat <- if (view == 'humdrum') {
          tokmat_humdrum(humdrumR, dataTypes, null = null, censorEmptyRecords = censorEmptyRecords)
      } else {
          tokmat_humtable(humdrumR, dataTypes, null = null)
      }
      
      print_tokmat(tokmat, Nmorefiles = Npieces - length(humdrumR), maxRecordsPerFile, maxTokenLength, 
                   screenWidth = screenWidth, showCensorship = view == 'humdrum', syntaxHighlight = syntaxHighlight)
      
      
      
      if (length(humdrumR) > 1L) {
          cat('\n')
          cat('\thumdrumR corpus of', num2print(Npieces), 'pieces',
              if (Nfiles != Npieces) c('(in', num2word(Nfiles), plural(Nfiles, 'files)', 'file)')))
          
          
          if (anySubcorpora(humdrumR)) {
              subnames <- namesSubcorpora(humdrumR)
              cat(' (', num2word(length(subnames)), 
                  " subcorpora: ", 
                  paste(subnames, collapse = ', '), 
                  ')', sep = '')
          }
          cat('.\n')
          
      }
  })
  
  ## Fields
  showFields(humdrumR)
  
  showWindows(humdrumR)
  
  return(invisible(humdrumR))
  
}


tokmat_humtable <- function(humdrumR, dataTypes = 'D', null = c('charNA2dot', 'NA2dot', 'dot2NA', 'asis')) {
    structureFields <- c('Piece', 'Filename', 'Spine', 'Path', 'Record', 'Stop')
    selectedFields <- selectedFields(humdrumR)
    tokenTable <- pullPrintable(humdrumR, unique(c(structureFields, selectedFields)),
                                dataTypes = dataTypes, null = 'charNA2dot',
                                useToken = FALSE, collapse = FALSE) 
   
    
    setcolorder(tokenTable, unique(c(structureFields, selectedFields)))
    
    lastPiece <- max(tokenTable$Piece)
    Filenames <- tokenTable[ , unique(Filename)]
    if (!'Filename' %in% selectedFields) tokenTable[ , Filename := NULL]
    if (!'Path' %in% selectedFields && all(tokenTable$Path == 0, na.rm = TRUE)) tokenTable[, Path := NULL]
    if (!'Stop' %in% selectedFields && all(tokenTable$Stop == 1, na.rm = TRUE)) tokenTable[, Stop := NULL]
    

    tokmat <- do.call('cbind', as.list(tokenTable))
    tokmat[is.na(tokmat)] <- '<NA>'
    
    # syntax highlighting prep
    types <- fields(humdrumR)[, setNames(Type, Name)[colnames(tokenTable)]]
    types <- c(Data = 'D', Interpretation = 'I', Structure = 'N', 'Reference' = 'G', Formal = 'I')[types]
    types[colnames(tokenTable) %in% selectedFields & types == 'N'] <- 'n' # don't italicize structural fields that are selected
    syntax <- col(tokenTable)
    syntax[] <- types[syntax]
    syntax[tokmat == '.' | is.na(tokmat)] <- 'd'
    
    
    # add header/footer
    tokmat <- rbind(names(tokenTable), tokmat, names(tokenTable))
    # syntax <- rbind(syntax[1, ], syntax, syntax[1, ])
    syntax <- rbind('N', syntax, 'N')
    
    # output
    list(Tokmat = tokmat,  
         Piece = c(NA, tokenTable$Piece, NA), 
         Record = c(NA, tokenTable$Record, NA), 
         Filenames = Filenames,
         Global = logical(nrow(tokmat)), 
         Syntax = syntax)
    

    
}

tokmat_humdrum <- function(humdrumR, dataTypes = 'GLIMDd', censorEmptyRecords = Inf, null = c('charNA2dot', 'NA2dot', 'dot2NA', 'asis')) {

  # humdrumR <- printableSelectedField(humdrumR, dataTypes = dataTypes, null = null)
    
  tokmat <- as.matrix(humdrumR, dataTypes = union(c('S', 'E'), dataTypes), padPaths = 'corpus', padder = '')
  
  # removes "hanging stops" like "a . ." -> "a"
  # if (anyStops(humdrumR)) tokmat[] <- stringr::str_replace(tokmat, '( \\.)+$', '')
  #
  if (censorEmptyRecords < Inf) tokmat <- censorEmptySpace(tokmat, collapseNull = censorEmptyRecords)
  
  Filenames <- getHumtab(humdrumR)[ , unique(Filename)]
  Piece   <- gsub('\\..*$', '', rownames(tokmat))
  NRecord <- gsub('^[0-9]*\\.', '', rownames(tokmat))
  
  global <- stringr::str_detect(tokmat[ , 1], '^!!')
  
  
  # syntax highlighting
  syntax <- array(parseTokenType(tokmat, E = TRUE), dim = dim(tokmat))
  
  # add rownames (records)
  tokmat <- cbind(paste0(NRecord, ':  '), tokmat)
  syntax <- cbind('N', syntax)
  
  #output
  list(Tokmat = tokmat, 
       Piece = Piece, 
       Record = NRecord, 
       Filenames = Filenames, 
       Global = global, 
       Syntax = syntax)
  
}

print_tokmat <- function(parsed, Nmorefiles = 0, maxRecordsPerFile, maxTokenLength,
                         screenWidth = options('width')$width - 10, showCensorship = TRUE, syntaxHighlight = TRUE) {
   
    tokmat <- parsed$Tokmat
    Record <- parsed$Record
    Piece <- parsed$Piece
    global <- parsed$Global
    Filenames <- parsed$Filenames
    syntax <- parsed$Syntax
    
    ## censor lines beyond maxRecordsPerFile

    # 
    uniqRec <- tapply_inplace(Record, Piece, seq_along)
    censored <- ifelse(length(unique(Piece)) == 1L | Piece != max(Piece, na.rm = TRUE),
                       uniqRec >  maxRecordsPerFile,
                       uniqRec <= (max(uniqRec[Piece == max(Piece, na.rm = TRUE)], na.rm = TRUE) - maxRecordsPerFile)) & !is.na(Piece)
    tokmat <- tokmat[!censored, , drop = FALSE]
    syntax <- syntax[!censored, , drop = FALSE]
    global <- global[!censored]
    
    
    
    ## Trim and align columns, and collopse to lines
    tokmat[!global, ] <- trimTokens(tokmat[!global, , drop = FALSE], maxTokenLength = maxTokenLength)
    
    lines <- padColumns(tokmat, global, maxTokenLength, screenWidth, if (syntaxHighlight) syntax)
    
    starMessage <- attr(lines, 'message')
  
    # put in Piece indicators
    maxwidth <- min(screenWidth, sum(attr(lines, 'trueColWidth')))
    firsts <- tapply(seq_along(lines), Piece[!censored], min)
    lasts <- tapply(seq_along(lines), Piece[!censored], max)
    
    if (showCensorship) {
        # records of first and last non-censored lines of each file
        
        #  censored ranges (if any)
        ranges <- tapply(Record[censored], factor(Piece)[censored], 
                         \(nr) {
                             if (length(nr) > 1L) paste0(nr[1], '-', nr[length(nr)], ':') else paste0(nr[1], ':')
                         })
        anycensored <- !is.na(ranges)
        ranges <- ranges[anycensored]
        # ranges[is.na(ranges)] <- ":"
        # align : (colon)
        if (length(ranges)) {
            line_colon <- stringr::str_locate(lines, ':')[ , 'start'] - 7L
            range_colon <- stringr::str_locate(ranges, ':')[ , 'start']
            largest_colon <- max(line_colon, range_colon)
            lines <- paste0(strrep(' ', largest_colon - line_colon), lines)
            ranges <- paste0(strrep(' ', largest_colon - range_colon), ranges)
        }
        
        # 
        # 
        
        
        ranges <- stringr::str_pad(ranges, width = maxwidth, pad = ':', side = 'right')
        if (syntaxHighlight) ranges <- textstyle(ranges, style = 'italic')
        ranges <- paste0('\n', ranges)
        
        if (any(anycensored)) lines[lasts[-length(lasts[anycensored])]] <- paste0(lines[lasts[-length(lasts[anycensored])]], ranges[-length(ranges)])
        
        
        if (length(unique(Piece)) > 1L && tail(ranges, 1) != '') {
            lines[tail(firsts, 1)] <- paste0(gsub('^\n', '', tail(ranges, 1)), '\n', lines[tail(firsts, 1)])
        }
        
        # put filenames in
     
    }
    lines[firsts] <- paste0(stringr::str_pad(paste0(' vvv ', Filenames, ' vvv '), width = maxwidth, pad = '#', side = 'both'), '\n', lines[firsts])
    lines[lasts] <- paste0(lines[lasts], '\n', stringr::str_pad(paste0(' ^^^ ', Filenames, ' ^^^ '), width = maxwidth, pad = '#', side = 'both'))
    
    # if any lines have been censored due to screen size, put message at the end
    if (!is.null(starMessage)) {
        lines[length(lines)] <- paste0(lines[length(lines)], '\n', smartPadWrap(starMessage, maxwidth + 1L))
    }
    
    ##
    if (Nmorefiles > 0L) {
        
        message <- c('',
                     paste0('\t\t', .glue("({num2print(Nmorefiles)} <more pieces|other piece>...)", ifelse = Nmorefiles > 1L)),
                     '')
        lines <- append(lines, message, after = tail(firsts, 1) - 1L)
    }
    
    cat(lines, sep = '\n')
    
}


print_score <- function(humdrumR, maxRecordsPerFile) {
    selectedFields <- selectedFields(humdrumR)
    
  humdrumR <- printableSelectedField(humdrumR, dataTypes = 'GLIMDd', null = 'NA2dot')
    
  lines <- as.lines(humdrumR[1])
  
  toHNP(lines, .glue("Viewing the '{paste(selectedFields, collapse = '/')}' <fields|field>",
                     "using the PLUGIN.", ifelse = length(selectedFields) > 1L))
  
  invisible(NULL)
}


censorEmptySpace <- function(tokmat, collapseNull = 10L) {
    if (nrow(tokmat) < 50) return(tokmat)
    null <- apply(matrix(grepl('^\\.( \\.)*$', tokmat) | grepl('^=', tokmat), nrow = nrow(tokmat)), 1, all, na.rm = TRUE)
    
    chunks <- segments(!null)
    
    # newRN <- unlist(tapply(rownames(tokmat), chunks, \(x) if (length(x) <= collapseNull) x else c(x[1], paste0(x[2], '-', tail(x, 1)))))
    
    tokmat <- tapply(seq_len(nrow(tokmat)), chunks, simplify = FALSE, 
                                   \(i) {
                                       nbars <- sum(grepl('^=', tokmat[i, 1]))
                                       
                                       if (nbars == 1 || length(i) <= collapseNull) return(tokmat[i, , drop = FALSE])
                                       
                                       fill <- if (nbars > 0L) {
                                           bars <- tokmat[i, , drop = FALSE][grepl('^=', tokmat[i, 1]), 1]
                                           barnums <- stringr::str_extract(bars, '[0-9a-zA-Z]+')
                                           base <- strrep('=', length(bars))
                                           
                                           if (any(!is.na(barnums))) {
                                               barnums <- barnums[!is.na(barnums)]
                                               barnums <- paste(unique(c(barnums[1], tail(barnums, 1))), collapse = '-')
                                           } else {
                                               barnums <- ""
                                           }
                                           newRN <- paste(rownames(tokmat[i[c(2, length(i))], , drop = FALSE]), collapse = '-')
                                           
                                           paste0(base, barnums)
                                           
                                       } else {
                                           newRN <- paste(rownames(tokmat[i[c(2, length(i))], ]), collapse = '-')
                                           
                                           strrep('.', length(i) - 1)
                                       }
                                       newRN <- c(rownames(tokmat)[i[1]], newRN)
                                       # rbind(tokmat[i[1], , drop = FALSE], paste0('(', fill, ')'))
                                       tokmat <- rbind(tokmat[i[1], , drop = FALSE], fill)
                                       rownames(tokmat) <- newRN
                                       
                                       tokmat
                                       
                     })
    tokmat <- do.call('rbind', tokmat)
    rownames(tokmat) <- stringr::str_replace(rownames(tokmat), '-[0-9]+\\.', '-') # replace redundant fileNumber
    
    tokmat
}

padColumns <- function(tokmat, global, maxTokenLength, screenWidth = options('width')$width - 10L, syntax) {
    # This function takes a token matrix
    # and pads each token with the appropriate number of spaces
    # such that the lines will print as nicely aligned columns.
    # it also adds "***" where there are too many columns to fit on the screen.
    # Finally it collapses each row to a single line.

    tokmat[] <- stringr::str_replace(tokmat, '\t\t*', ' ')
    
    toklen <- nchar(tokmat)
    
    lenCol <- sapply(as.data.frame(toklen[!global, ]), max) + 2L
    # lenCol <- apply(toklen[!global, ], 2, max) + 2L
    
    if (sum(lenCol) < (screenWidth - 5L)) {
        # if there is extra space, fill it (up to maxTokenLength)
        lenCol[-1] <- pmin(lenCol[-1] + ((screenWidth - sum(lenCol)) %/% (length(lenCol) - 1L)), 
                           maxTokenLength)
    }
    screen <- cumsum(lenCol) <= screenWidth
    lenCol <- lenCol[screen]
    tokmat <- tokmat[ , screen, drop = FALSE]

    # do padding
    tokmat[!global,  ] <- padder(tokmat[!global, , drop = FALSE], lenCol)
    tokmat[global, 1L] <- padder(tokmat[global, 1L], lenCol[1]) # column 1 is record number!
    
    # colorize
   
    if (!is.null(syntax)) tokmat[] <- syntaxHighlight(tokmat, syntax)

    
    # collapse to lines
    tokmat[global, -1:-2L] <- ''
    lines <- do.call('paste0', as.data.frame(tokmat))
   

    longGlobal <- global & nchar(lines) > screenWidth
    longColumn <- !screen
    

    if (any(longColumn) || any(longGlobal)) {
        message <- if (any(!screen)) {
            lines[!global] <- paste0(lines[!global], '    ***')
            lines[ global] <- stringr::str_trunc(lines[global], width = sum(lenCol) + 7L, ellipsis = '***')
            
            paste0('(***', num2word(sum(!screen)), plural(sum(longColumn), ' spines/paths ' ,' spine/path '),  'not displayed due to screen size***)')
        } else {
            lines[ global] <- stringr::str_trunc(lines[global], width = screenWidth + 7L, ellipsis = '***')
            paste0('(***', num2word(sum(longGlobal)), ' global ', plural(sum(longGlobal), 'comments ' ,'comment '),  'truncated due to screen size***)')
        }
       
        attr(lines, 'message') <- message
        # lines[length(lines)] <- paste0(lines[length(lines)], '\n', message)
    } 
    
    
    attr(lines, 'trueColWidth') <- lenCol + 1L
    lines
    
}



showFields <-  function(humdrumR) {
          # This function is used to produce the human readable 
          # list fields used by print_humdrumR
          fields <- fields(humdrumR)[Type == 'Data' | Selected > 0 | GroupedBy == TRUE]

          ## prep for printing
          fields[ , Name := paste0(ifelse(Selected, '*', ' '), Name)]
          fields[ , Name := stringr::str_pad(Name, width = max(nchar(Name)), side = 'right')]
          # if (any(diff(fields$Selected) != 1)) fields[ , Name := paste0(Selected, Name)]
          
          fields[ , Print := paste0(Name, ' :: ', Class)]
          
          ## Print fields
          cat('\n')
          fields[(Type == 'Data' | Selected > 0L) & Type != 'Grouping',
                  { cat('  ', Type[1], 'fields:', '\n\t        ')
                    cat(Print, sep = '\n\t        ')
                    cat('\n')
                            }, 
                  by = Type]
          
          # grouping fields
          groupFields <- fields[GroupedBy == TRUE]
          
          if (nrow(groupFields)) {
              ngroups <- nrow(unique(getHumtab(humdrumR, 'D')[ , gsub('[ *]*', '', groupFields$Name), with = FALSE]))
              
              groupFields[ ,
                           { cat('   Grouping fields: (', num2print(ngroups), plural(ngroups, ' groups', ' only one "group"...'), ')\n\t        ', sep = '')
                               cat(Print, sep = '\n\t        ')
                               cat('\n')
                           }]
          }
          
          # cat('\t\tFields: ', paste(fieldprint, collapse = '\n\t\t        '), '\n', sep = '')
          
          invisible(fields)
}


showWindows <- function(humdrumR) {
    windowFrame <- humdrumR@Context
    
    if (nrow(windowFrame)) {
        overlap <- any(windowFrame$Depth > 1L)
        
        cat('   With ', num2print(nrow(windowFrame)),
        if (overlap) ' (overlapping)', ' contextual windows:\n', 
        sep = '')
        
        lengths <- windowFrame[ , lengths(Indices)]
        quants <- round(c(Shortest = min(lengths), Median = median(lengths), Longest = max(lengths)))
        if (length(unique(quants)) == 1L) {
            cat("\t\tAll windows length ==", quants[1],'\n')
        } else {
            quants <- setNames(unique(quants), tapply(names(quants), quants, paste, collapse = '/'))
            cat(paste0('\t\t', 
                        str_pad(paste0(names(quants), ' '), 
                                max(nchar(names(quants)) + 1L)), 
                        'length == ', 
                       quants), 
                 sep = '\n')
        }
    }
}
Computational-Cognitive-Musicology-Lab/humdrumR documentation built on Oct. 22, 2024, 9:28 a.m.