R/validity-methods.R

################################################################################
# Validity methods:
#
# These are delicate, because they are effectively at the S4 infrastructure
# level, in between "new" and the constructor. Some of the issues that might
# otherwise go here can be also be dealt with in the constructors, if needed.
################################################################################
validbiom = function(object){
	
	# All the required top-level keys are present
	biomreqdkeys = c("id", "format", "format_url", "type", "generated_by", "date",
									 "rows", "columns", "matrix_type", "matrix_element_type", "shape", "data")
  
	if( !all(biomreqdkeys %in% names(object)) ){
		missingkeysmessage = paste("", 
					"Not all required top-level keys are present in biom-object.",
					"Required keys are:", paste0(biomreqdkeys, collapse="\n"),
					sep="\n")
		return(missingkeysmessage)
	}

	met = matrix_element_type(object)
	if(!identical(length(met), 1L)){
		return("biom matrix_element_type field should have only 1 element")
	}
	if( !met %in% c("int", "float", "unicode") ){
		return("biom matrix_element_type value is unsupported.")
	}
	
	# Basic biom_shape value validity.
	bshape = biom_shape(object)
	if(!inherits(bshape, "integer")){
		return("problem with biom shape value type")
	}
	if(!identical(length(bshape), 2L)){
		return("problem with biom shape value length")
	}
	if(any(bshape < 0)){
		return("problem with biom shape value: negative value")
	}
	
	# Matrix shape and number of row/col elements matches
	if( ncol(object) > length(object$columns) ){
		return("shape field specifies more cols than are present in metadata")
	}
	if( nrow(object) > length(object$rows) ){
		return("shape field specifies more rows than are present in metadata")
	}
	if( ncol(object) < length(object$columns) ){
		return("more metadata cols than specified by shape field")
	}
	if( nrow(object) < length(object$rows) ){
		return("more metadata rows than specified by shape field")
	}	
	
	# The type field has an acceptable value
	biomtypevals = c("OTU table",
		"Pathway table",
		"Function table",
		"Ortholog table",
		"Gene table",
		"Metabolite table",
		"Taxon table"
	)
	if( !object$type %in% biomtypevals ){
		return("type field has unsupported value")
	}
	
	# The matrix_type has an acceptable value
	biommatrixtypes = c("sparse", "dense")
	if( !object$matrix_type %in% biommatrixtypes ){
		return("matrix_type field has unsupported value")
	}
	
	# The matrix_element_type has an acceptable value
	biommatrixelemtypes = c("int", "float", "unicode")
	if( !object$matrix_element_type %in% biommatrixelemtypes ){
		return("matrix_element_type field has unsupported value")
	}	
	
	# If sparse, all data fields should have length of 3 (row, col, val)
	if( identical(object$matrix_type, "sparse") ){
		if( !all(sapply(object$data, length)==3L) ){
			return("Some data fields for this sparse biom format do not have 3 elements")
		}
	}
	# If dense, data fields should have length equal to ncol
	if( identical(object$matrix_type, "dense") ){
		if( !all(sapply(object$data, length)==ncol(object)) ){
			return(paste("Some data fields for this dense biom format", 
									 "do not have the expected number columns:", ncol(object), sep=" "))
		}
	}	
	
	# metadata ids -- rownames and colnames -- must be unique
	dupr = duplicated(rownames(object))
	dupc = duplicated(colnames(object))
	if( any(dupr) ){
		return(paste("The following row ids were duplicated: \n",
					paste0(rownames(object)[dupr], collapse="\n"), sep=""))
	}
	if( any(dupc) ){
		return(paste("The following col ids were duplicated: \n",
					paste0(colnames(object)[dupc], collapse="\n"), sep=""))
	}	
	
	# If we get all the way here without returning a string,
	# then consider it a valid object.
	# In which case should return TRUE.
	return(TRUE)
}
################################################################################
## assign the function as the validity method for the biom-class
setValidity("biom", validbiom)
################################################################################
joey711/biomformat0 documentation built on May 19, 2019, 3:01 p.m.