R/SamHeader.R

Defines functions SamHeader parseSamHeaderLines SamHeader.character SamHeader.Sam as.data.frame.SamHeader

Documented in as.data.frame.SamHeader parseSamHeaderLines SamHeader SamHeader.character SamHeader.Sam

# samHeader.R - Construct and work with the header from a *.sam file

#' Return a SamHeader object
#'
#' S3 method that returns an S3 SamHeader objet.
#'
#' @section Internal implementation:
#'   A \code{SamHeader} object is supposed to be
#'   opaque to the user, with internals accesible via \code{SamSource(x)},
#'   \code{as.data.frame(x)}, and \code{areHeaderTagsParsed(x)}.
#'
#'   Currently this object is structured as a \code{list} with two elements:
#'   \code{header} which is the header data frame and
#'   \code{areHeaderTagsParsed}, which is \code{TRUE} when the record sub-tags
#'   are parsed parsed and \code{FALSE} otherwise. Currently this is always
#'   false. The object also has an attribute, \code{source}, which is the source
#'   object. This structure may change on a patch release with no notice.
#'
#' @param x For \code{SamHeader(x)}, this is the object to extract or build a
#'   sam header object from. It can be a character vector of unparsed header
#'   lines or a \code{Sam} object. For \code{as.data.frame(x)}, it can only be a
#'   \code{SamHeader} object.
#'
#' @param source The \code{SamSource} object to associate with a header. If
#'   \code{x} is a \code{Sam} object, it will use that objects \code{SamSource},
#'   otherwise it will use a default \code{SamSource()}
#'
#' @param ... Required for S3 object method implementation. Not currently used.
#'
#' @return For \code{SamHeader()}, a \code{SamHeader} object.
#'
#'   For \code{as.data.frame(SamHeader)}, a data frame as generated by
#'   \code{\link{parseSamHeaderLines}},
#'
#' @seealso Sam, SamSource, as.data.frame.SamSource
#'
#' @export
SamHeader <- function(x, ...) {
	UseMethod("SamHeader")
}

#' Parse strings as sam file header lines
#'
#' @param x Vector of string, each a sam file header
#'
#' @return A SamHeader data frame. Currently this has only two columns, the
#'   first giving the tag name, the second the original full line. Not the final
#'   version of this.
#'
#' @export
parseSamHeaderLines <- function( x ) {
	tag= substr(trimws(x), 2, 3)
	if (any(substr( x, 1, 1 ) != "@")) {
		stop( "Header lines must begin with \"@\"" )
	}
	if (any(substr( tag, 2, 2 ) == ":")) {
		warning( "Possible use of one character tags in headers" )
	}
	data.frame(
		tag=tag,
		record = trimws(sub("^@..\\t", "", x)),
		row.names= 1:length(x),
		stringsAsFactors = FALSE
	)
}

#' @rdname SamHeader
#' @export
SamHeader.character <- function( x, source= SamSource(NULL), ... ) {
	return( structure(
		class= "SamHeader", source= source,
		list( header= parseSamHeaderLines(x), areHeaderTagsParsed= FALSE )
	))
}

#' @rdname SamHeader
#' @export
SamHeader.Sam <- function( x, ... ) {

	return( structure(
		class= "SamHeader", source= attr(x, "source"),
		list( header= x$header, areHeaderTagsParsed= x$areHeaderTagsParsed )
	))
}

#' @rdname  SamHeader
#' @export
as.data.frame.SamHeader <- function(x, ...) {
	return(x$header)
}
jefferys/SamSeq documentation built on Oct. 25, 2019, 8:11 a.m.