Nothing
#' A matrix-specific wrapper of \code{\link{updateValues}}
#' @description A matrix-specific wrapper of \code{\link{updateValues}} which can take in a matrix, data.frame, or file path, directly.
#' @inheritParams retrieve
#' @inheritParams updateValues
#' @param attributeName String naming the matrix attribute for which to upload data.
#' @param matrix A matrix or dataframe containing the data to upload to magma.
#'
#' Alternatively, a String specifying the file path of a file containing such data.
#'
#' No matter the provision method, colnames must be record identifiers, and rownames should match the values of 'options' associated with the target 'attribute'.
#' Check the 'See Also' section below for how to determine the needed 'options'.
#' @param separator String indicating the field separator to use if providing \code{matrix} as a file path.
#' Default = \code{","}.
#' @param revisions.only Logical. For troubleshooting purposes, when set to \code{TRUE}, no data will be sent to magma.
#' Instead, the list structure that would have been passed to the \code{revisions} input of \code{\link{updateValues}} is returned as output.
#' @return None directly.
#'
#' The function sends data to magma, and the only outputs are information reported via the console.
#' @details This function utilizes the magma/query function, documented here \url{https://mountetna.github.io/magma.html#update},
#' to upload data to a matrix attribute (named \code{attributeName}) of the \code{modelName} model of \code{projectName} project.
#'
#' \code{matrix} data are provided either as a matrix, dataframe, or file path which points toward such data.
#' If given as a file path, the \code{separator} input can be used to adjust for whether the file is a csv (the default, \code{separator = ","}), or tsv, \code{separator = "\t"}, or other format.
#'
#' Data is then validated by ensuring that all row names are among the valid 'options' of the target attribute (See the See Also section below for a note on how to explore these options yourself.).
#' Rows are reordered to be in the same order as these 'options'.
#'
#' For any missing 'options', rows of NAs are added.
#'
#' The data is then transformed and passed along to \code{\link{updateValues}}.
#'
#' The \code{updateValues()} function will summarize records to be updated and allow the user to double-check this information before proceeding.
#'
#' This user-prompt step can be bypassed (useful when running in a non-interactive way) by setting \code{auto.proceed = TRUE}, but NOTE:
#' It is a good idea to always check carefully before proceeding, if possible.
#' Data can be overwritten with NAs or zeros or the like, but improperly named records cannot be easily removed.
#'
#' @seealso
#' \code{\link{updateFromDF}} for a more flexible function for uploading multiple attributes-worth of (non-matrix) data at a time.
#'
#' \code{\link{updateValues}} for the more direct replica of \code{magma/update} which is more even more flexible that \code{updateFromDF}, though a bit more complicated to use.
#'
#' \code{\link{retrieveTemplate}}, then check the \code{<output>$models$<modelName>$template$attributes$<attributeName>$options} to explore the rownames that your matrix should have.
#'
#' \url{https://mountetna.github.io/magma.html#update} for documentation of the underlying \code{magma/update} function.
#'
#' @export
#' @examples
#'
#' if (interactive()) {
#' # First, we use magmaRset to create an object which will tell other magmaR
#' # functions our authentication token (as well as some other optional bits).
#' # When run in this way, it will ask you to give your token.
#' magma <- magmaRset()
#'
#' ### Note that you likely do not have write-permissions for the 'example'
#' # project, so this code can be expected to give an authorization error.
#'
#' ### Retrieve some data from magma, then update that same data.
#' mat <- retrieveMatrix(magma, "example", "rna_seq", "all", "gene_tpm")
#'
#' updateMatrix(
#' target = magma,
#' projectName = "example",
#' modelName = "rna_seq",
#' attributeName = "gene_tpm",
#' matrix = mat)
#' }
#'
#' @importFrom utils read.csv
updateMatrix <- function(
target,
projectName,
modelName,
attributeName,
matrix,
autolink = FALSE,
dryRun = FALSE,
separator = ",",
auto.proceed = FALSE,
revisions.only = FALSE,
template = NULL,
...) {
### Read in matrix if passed as a string (file-location)
if (is.character(matrix)) {
matrix <- read.csv(
matrix, sep = separator, row.names = 1, check.names = FALSE)
if (ncol(matrix)<1){
stop("Parsing error. Is 'separator' correct?")
}
}
# Validate that there are column names
if (is.null(colnames(matrix))) {
stop("Colnames of matrix should be record names.")
}
### Validate rownames (genes / value names)
# Obtain 'validation' / rowname options
if (identical(template, NULL)) {
template <- retrieveTemplate(target, projectName)
}
row_options <-
template$models[[modelName]]$template$attributes[[attributeName]]$validation$value
if (length(row_options)<1) {
cat("WARNING: Target attribute does not have 'validation' info: no feature-names validation can be performed.\n\n")
} else {
# Check rownames against options
if (! all(rownames(matrix) %in% row_options)) {
stop("Validation error: rownames of 'matrix' are not valid options for ", attributeName,".")
} else {
# Add NAs for any options not in the current matrix
for (not_there in setdiff(row_options, rownames(matrix))) {
matrix <- rbind(matrix, NA)
rownames(matrix)[nrow(matrix)] <- not_there
}
# Reorder as in options
matrix <- matrix[row_options, ,drop = FALSE]
}
}
# Transform into a nested list
# Note: Do not supply recordNames directly to vapply as any "-" will be
# converted to "."
rec_att_vals <- vapply(seq_len(ncol(matrix)), function(x) {
x <- list(matrix[,x])
names(x) <- attributeName
list(x)
}, FUN.VALUE = list(1))
names(rec_att_vals) <- colnames(matrix)
revs <- list(rec_att_vals)
# Because list(modelName = ...) would not substitute the value of modelName
names(revs) <- modelName
if (revisions.only) {
return(revs)
}
### Pass to updateValues() to:
# Summarize to user
# Check with the user before proceeding
# Perform upload
updateValues(
target = target,
projectName = projectName,
revisions = revs,
auto.proceed = auto.proceed,
autolink = autolink,
dryRun = dryRun,
template = template,
...)
}
#' Easier to use wrapper of \code{\link{updateValues}}
#' @description A wrapper of \code{\link{updateValues}} which takes in updates in the form of a dataframe, csv, tsv, with rows = records and columns = attributes.
#' @inheritParams retrieve
#' @inheritParams updateMatrix
#' @inheritParams updateValues
#' @param df A dataframe, containing the data to upload to magma.
#'
#' Alternatively, a String specifying the file path of a file containing such data.
#'
#' See below for additional formatting details.
#' @param separator String indicating the field separator to use if providing \code{df} as a file path.
#' Default = \code{","}.
#' Use \code{"\t"} for tsvs.
#' @param show.df Logical which sets whether the \code{df}-data should be printed out.
#' @param table.method "replace" or "append".
#' Sets the methodology used for building the \code{revisions} to request for table-type model updates:
#' \itemize{
#' \item "append": Add the currently defined records, attaching them to parents defined in the \code{df}.
#' \item "replace": Add the currently defined records, attaching them to parents defined in the \code{df}, AND unlink / remove all current records, of \code{modelName}, attached to parent records defined in the \code{df}.
#' }
#' @return None directly.
#'
#' The function sends data to magma, and the only outputs are information reported via the console.
#' @details This function provides a simple method for updating multiple attributes of multiple magma records provided as a rectangular dataframe, or equivalent file structure.
#' It utilizes the magma/query function, documented here \url{https://mountetna.github.io/magma.html#update},
#' to upload data after converting to the format required by that function.
#'
#' The user-indicated \code{df} is read in, presented to the user for inspection, then transformed to the necessary format and passed along to \code{\link{updateValues}}.
#'
#' The \code{updateValues()} function will then summarize records to be updated and allow the user to double-check this information before proceeding.
#'
#' This user-prompt step can be bypassed (useful when running in a non-interactive way) by setting \code{auto.proceed = TRUE}, but NOTE:
#' It is a good idea to always check carefully before proceeding, if possible.
#' Data can be overwritten with NAs or zeros or the like, or disconnected from parent records, but improperly named records cannot be easily removed.
#'
#' For "standard" models with explicit identifiers, the function targets the \code{df}'s row-indicated records and column-indicated attributes of the \code{modelName} model of \code{projectName} project.
#' In such cases, the first column of \code{df} must contain the identifiers of the records your wish to update.
#'
#' For table-type models which do not have explicit identifiers, the function creates records per each row of the given \code{df} with the requested values filled in for column-indicated attributes of the \code{modelName} model of \code{projectName} project, and attaches these records to the indicated parent records.
#' In such cases, a column named as the parent model must exist in \code{df} to provide the parent identifiers of all requested new data.
#' In such cases, \code{table.method} must also be given as either \code{"append"} or \code{"replace"}.
#'
#' \code{df} can be provided either as a data.frame directly, or as a file path pointing to a file containing such data.
#' If given as a file path, the \code{separator} input can be used to adjust for whether the file is a csv (the default, \code{separator = ","}), or tsv, \code{separator = "\t"}, or other format.
#'
#' The \code{df} data structure when targeting 'standard' models:
#' \itemize{
#' \item Rows = records, with the first column indicating the record identifiers.
#' \item Columns = represent the data desired to be given for each attribute.
#' \item Column Names (or the top row when providing a file) = attribute names.
#' Except for the first column (ignored as this column's data are used as identifiers), all column names must be valid attribute names of the target \code{modelName}.
#' }
#'
#' The \code{df} data structure when targeting table-type models:
#' \itemize{
#' \item Rows = records, but no identifiers are needed.
#' \item Columns = represent the data desired to be given for each attribute.
#' \item Column Names (or the top row when providing a file) = attribute names.
#' At least one column must be named after the parent model and must represent parent model identifiers.
#' }
#'
#' @section Use Case. Using this function to change records' identifiers:
#'
#' To do so, provide a file or dataframe where
#' 1) The first column, named something random Iits name will be ignored.), contains current identifiers;
#' 2) Some other column, named as the attribute which is treated as the identifier for the model, contains the new identifiers
#'
#' To determine the identifier attribute's name, you can use \code{\link{retrieveTemplate}}:
#'
#' \code{retrieveTemplate(<target>, <projectName>)$models$<modelName>$template$identifier}.
#'
#'
#' @seealso
#' \code{\link{updateMatrix}} for uploading matrix data
#'
#' \code{\link{updateValues}} for a more direct replica of \code{magma/update} which is more flexible, though a bit more complicated to use.
#'
#' \url{https://mountetna.github.io/magma.html#update} for documentation of the underlying \code{magma/update} function.
#' @export
#' @examples
#'
#' if (interactive()) {
#' # First, we use magmaRset to create an object which will tell other magmaR
#' # functions our authentication token (as well as some other optional bits).
#' # When run in this way, it will ask you to give your token.
#' magma <- magmaRset()
#'
#' ### Note that you likely do not have write-permissions for the 'example'
#' # project, so this code can be expected to give an authorization error,
#' # yet still provides a good example of how to structure your code.
#'
#' ### Case 1: A 'standard' model which has an identifier:
#' # Retrieving some example data from magma to use as our update
#' df <- retrieve(
#' magma, projectName = "example", modelName = "rna_seq",
#' recordNames = "all",
#' attributeNames = c("tube_name", "biospecimen", "cell_number")
#' )
#' df
#' # Keys to note:
#' # - the first column of this df holds the identifiers of all records we
#' # wish to target. (The fact that this column is properly named as
#' # 'tube_name' des not matter.)
#' # - all subsequent columns hold the new values and are named as the
#' # attributes we wish to update.
#'
#' # To update values of the 'standard'-type "rna_seq" model
#' updateFromDF(
#' target = magma,
#' projectName = "example",
#' modelName = "rna_seq",
#' df = df)
#'
#' ### Case 2: A 'table' model which has no identifiers:
#' # Retrieving some example data from magma to use as our update
#' table <- retrieve(
#' magma, projectName = "example", modelName = "demographic",
#' recordNames = "all",
#' attributeNames = c("subject", "name", "value")
#' )
#' table
#' # Keys to note:
#' # - the first column of this df holds the parent record identifiers we
#' # wish to target. The fact that this column is properly named as
#' # 'subject' does matter, but this column does not need to have
#' # been the first column.
#' # - all subsequent columns hold the new values and are named as the
#' # attributes we wish to update.
#'
#' ## Key decision:
#' # For table models, you must additionally decide whether to 'append' to
#' # (meaning to add them in addition to all previous records which
#' # attach to the same parents), or 'replace' (meaning clear all previous
#' # records attaching to the same parents this update hits, so that only
#' # the values within this update will exist) current values of the
#' # target model with your update's values.
#' # This choice is given to the 'table.method' input.
#'
#' # To update values of the 'table'-type "demographics" model
#' updateFromDF(
#' target = magma,
#' projectName = "example",
#' modelName = "demographic",
#' table.method = "replace",
#' df = table)
#'
#' }
#'
#' @importFrom utils read.csv
#' @importFrom stats setNames
updateFromDF <- function(
target,
projectName,
modelName,
df,
table.method = NULL,
autolink = FALSE,
dryRun = FALSE,
separator = ",",
show.df = TRUE,
auto.proceed = FALSE,
revisions.only = FALSE,
template = NULL,
...) {
### Read in df if passed as a string (file-location)
if (is.character(df)) {
df <- read.csv(
df, sep = separator, check.names = FALSE, header = TRUE)
if (ncol(df)<2){
stop("Parsing error. Is 'separator' correct?")
}
}
if (show.df) {
cat("Data recieved:\n")
print(df)
}
if (identical(template, NULL)) {
template <- retrieveTemplate(target, projectName)
}
isTable <- .is_table_model(target, projectName, modelName, template)
### Validation & convert revisions
if (isTable) {
### Targeting table models, no identifiers and special replace case
if (identical(table.method, NULL)) {
stop("'", modelName, "' model of '", projectName, "' project is a table-type model, but 'table.method' input is not set. Set it to \"append\" or \"replace\".")
}
if (!table.method %in% c("append", "replace")) {
stop("'table.method' must be \"append\" or \"replace\", but is: \"", table.method, "\".")
}
parentModelName <- template$models[[modelName]]$template$parent
if (!parentModelName %in% names(df)) {
stop("Parent attribute, ", parentModelName, ", must be given when updating records of table-type models.")
}
# Prep revisions
tempIDs <- paste0("::temp-id-", seq_len(nrow(df)))
df <- cbind('__IDS__' = tempIDs, df)
revs <- .df_to_revisions(df, modelName = modelName)
# Add to revisions for 'replace' method
if (table.method=="replace") {
parents <- unique(df[[parentModelName]])
parent_revs <- lapply(
parents,
function(this) {
setNames(list(tempIDs[df[[parentModelName]]==this]), modelName)
}
)
names(parent_revs) <- parents
revs[[parentModelName]] <- parent_revs
}
} else {
### Targeting "standard" models, with identifiers
# Validate that 1st column, which should be IDs, is all unique values
if (any(duplicated(df[,1]))) {
stop("Values of 1st column (record identifiers) must be unique.")
}
if (ncol(df) < 2) {
stop("df has one column, but at least 2 are required.")
}
# Prep revisions
revs <- .df_to_revisions(df, modelName = modelName)
}
if (revisions.only) {
return(revs)
}
### Pass to updateValues() to:
# Summarize to user
# Check with the user before proceeding
# Perform upload
updateValues(
target = target,
projectName = projectName,
revisions = revs,
autolink = autolink,
dryRun = dryRun,
auto.proceed = auto.proceed,
template = template,
...)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.