R/subarray.R

Defines functions subarray mask unmask factor.to.character

########################
######  subarray  ######
########################

#subarray: returns a subarry, according to the given dimnames list
subarray <- 
  function(arr, dnames=NULL, useDimNames = FALSE, keepDimension = TRUE, 
           mask = NULL){
	MAX_DIM <- 4 #maximal dimension untill which the access to the array is 
               #realized directly (arr[dnames[[1]], dnames[[2]], ...] ) which is
               #faster. For higher dimensions, an index.matrix will be created.

#################### first checks for correct use of function ##################
  
  if(is.null(dnames) | (length(dnames) == 0)){
    warning("Since dnames is NULL or of length zero, ",
            "the whole array was returned")
    return(arr)
  }
  
  #checking for correct class of arr:
  if(!is.vector(arr) & !is.array(arr) & !is.data.frame(arr) & !is.matrix(arr))
    stop("The class of arr can't be interpreted!")
  else if(!is.data.frame(arr)) arr <- as.array(arr)

  if(is.null(dimnames(arr)))
    stop("Make sure, that arr has dimnames at least in one dimension!")

  if(useDimNames){
    if(is.null(names(dimnames(arr))) | is.null(names(dnames))){
      stop("Please make sure, that the names of the dimensions of arr and the ",
           "names of the components of dnames exist. ",
           "Or set useDimNames to FALSE!")
    }
  }
  
  if(length(dnames) > length(dim(arr))){
    stop("Your array has only ", length(dim(arr)), 
         " dimensions, whereas your list of dimension names has ", 
         length(dnames)," components!")
  }
    
######################## prepare arr and dnames for correct use ################
  #optionally mask arr, in case some dimnames exist more than once:
  if(!is.null(mask)){
   arr <- mask(arr, mask=mask)
  }
  
  ndim <- length(dim(arr))  #number of dimensions
  dimnamesNotExisting <- NULL
  dimnamesExisting <- NULL
	
  # first, add dimnames to the array, if they don't exist, and save, in which 
  # dimensions there are dimnames and in which not. 
  for(i in 1:ndim){
    if(is.null(dimnames(arr)[[i]])){
      dimnames(arr)[[i]] <- paste("X", i , 1:dim(arr)[i], sep="")
      dimnamesNotExisting <- c(dimnamesNotExisting, i)
    }else{
      dimnamesExisting <- c(dimnamesExisting, i)
    }
  }
  
  # now sort dnames according to arr. If components of dnames are NULL, it is 
  # supposed, that the whole dimension has to be chosen
  dnames.tmp <- list()

  compOfDnames <- 1:length(dnames)
  #first priority have named components - sort them first:
  if(useDimNames){
    for(i in 1:length(dnames)){
      if(is.null(dnames[[i]])) next
      if(!is.null(names(dnames)[i]) & (names(dnames)[i] != "")){
        find <- match(names(dnames)[i], names(dimnames(arr)))
        if(!is.na(find)){
          dnames.tmp[[find]] <- dnames[[i]]
          compOfDnames <- compOfDnames[-match(i, compOfDnames)]
          if(!is.na(match(find, dimnamesExisting))){
            dimnamesExisting <- dimnamesExisting[-match(find, dimnamesExisting)]
          }
          next
        }
        warning("Couldn't find dimension ", names(dnames)[i], 
                " in array! Try setting useDimNames to FALSE.")
      }
    }
  }
  
  for(i in compOfDnames){
    if(is.null(dnames[[i]])) next
    #if a component of list is at a right position, leave it there:
    if(!is.na(match(i, dimnamesExisting))){
      dnames.tmp[[i]] <- dnames[[i]]
      dimnamesExisting <- dimnamesExisting[-match(i, dimnamesExisting)]
      next
    }
    #if not, put it at the first dimension, where dimnames exist:
    if(length(dimnamesExisting) == 0){
      warning("The component ", i , " of dnames could not be used!")
      next
    }
    dnames.tmp[[dimnamesExisting[1]]] <- dnames[[i]]
    dimnamesExisting <- dimnamesExisting[-1]
  }
  
  dnames <- dnames.tmp
  length(dnames) <- ndim
  names(dnames) <- names(dimnames(arr))
  #if a component of dnames is null, take all dimnames of arr in this dimension
  for(i in 1:ndim){
    if(is.null(dnames[[i]]))
      dnames[[i]] <- dimnames(arr)[[i]]
    else{
      #to avoid problems with dimnames being numbers:
      dnames[[i]] <- as.character(dnames[[i]])
      #check if entries in dnames exist in arr:
      find <- match(dnames[[i]], dimnames(arr)[[i]])      
      # if one element of find is NA, then the respective dimname does not exist
      # in arr:
      for(j in 1:length(find)){
      	if(is.na(find[j]))
      		stop(dnames[[i]][j], " does not exist in dimension ", i, 
               " of the array! Make sure, dnames is correctly defined.")
      }
    }
  }
  
	#now, creating the dimension vector of the subarray:
	dim.vector <- NULL
	# in case of keepDimensions being FALSE, dimnamesNotExisting has to
	# be changed, which will be done, using dontKeep
  dontKeep <- rep(0, times=length(dimnamesNotExisting))  
  
	for(i in 1:ndim){
		dim.vector <- c(dim.vector, length(dnames[[i]]))
    # if keepDimension is FALSE, then we need to remove those dimensions from 
    # dimnamesNotExisting, which have only one element: 
    # (import for later removing of dimnames which have been added temporarily)
    if(!keepDimension & (dim.vector[i] == 1)){
      find <- which(dimnamesNotExisting > i)
      dontKeep[find] <- dontKeep[find] +1
    }
	}
  dimnamesNotExisting<- dimnamesNotExisting - dontKeep
######################## the core functionality ################################
 
	# now we need to write the corresponding content of the original array into a 
  # subarray. Untill dimension of MAX_DIM this is realized directly, 
  # from then it is realized using index matrices:
	if(ndim <= MAX_DIM){
		if(ndim == 1)
			subarray <- arr[ dnames[[1]] ]
		if(ndim == 2)
			subarray <- arr[ dnames[[1]] , dnames[[2]] ]
		if(ndim == 3)
			subarray <- arr[ dnames[[1]] , dnames[[2]] , dnames[[3]] ]
		if(ndim == 4)
			subarray <- arr[ dnames[[1]] , dnames[[2]] , dnames[[3]] , dnames[[4]] ]
    
		if(keepDimension & !is.data.frame(subarray))
      subarray <- array(subarray, dim = dim.vector, dimnames = dnames)
	}
	else{
    id.matrix <- create.index.matrix(dnames, arr)
    
    # if we dont want to keep Dimension, we need to have dimnames and dimension 
    # vectors, where all single entries are deleted:
    if(!keepDimension){
      dnames.tmp <- list()
      dim.vector.tmp <- vector()
      
      # Go through each dimension. if it has more than one element copy the 
      # corresponding dnames and dim.vector element to the 
      # temporary equivalences
      for(i in 1:ndim){
        if(length(dnames[[i]]) > 1){
          dnames.tmp[[length(dnames.tmp) + 1]] <- dnames[[i]]
          dim.vector.tmp <- c(dim.vector.tmp, dim.vector[i])
        }
      }
      dnames <- dnames.tmp
      dim.vector <- dim.vector.tmp
    }
		subarray <- array(arr[id.matrix], dim = dim.vector, dimnames = dnames)
	}

  #remove dimnames, where added:
  if(length(dimnamesNotExisting) != 0){
    for(i in 1:length(dimnamesNotExisting))
      dimnames(subarray)[[dimnamesNotExisting[i]]] <- vector()
  }
  #in case, mask was used, unmask the result:
	if(!is.null(mask)){
	  subarray <- unmask(subarray, mask=mask)
	}
	return(subarray)
}
############################## finish subarray #################################

  
############################
######	subarray<-		######
############################
	
#subarray<-: accesses a subarry, according to the given dimnames list

"subarray<-" <- function(arr, dnames, useDimNames = FALSE, mask = NULL, value){
	MAX_DIM <- 4 # maximal dimension untill which the access to the array is 
               # realized via character vectors
###################### first checks for correct use of function ################
  if(is.null(dnames) | (length(dnames) == 0)){
    stop("Dnames is NULL or of length zero!")
  }
  #checking for correct class of arr:
  if(!is.vector(arr) & !is.array(arr) & !is.data.frame(arr) & !is.matrix(arr))
    stop("The class of arr can't be interpreted!")
  else if(!is.data.frame(arr)){ arr <- as.array(arr)}
  # for data.frames, we need to make sure, that no column contains factors:
  else {
    arr <- factor.to.character(arr)
    if(is.data.frame(value)){
      value <- factor.to.character(value)
    }
  }
  #checking for correct class of value:
  if(!is.vector(value) & !is.array(value) & !is.data.frame(value) & 
    !is.matrix(value)){
    stop("The class of value can't be interpreted!")
  }
    
	# saving, if only one value has to be assigned (TRUE) or a whole subarray
  SINGLE_VALUE <- is.vector(value) & (length(value) == 1)
  if(!SINGLE_VALUE & !is.data.frame(value)){  value <- as.array(value)  }
  
  if(is.null(dimnames(arr))){
    stop("Make sure, that arr has dimnames at least in one dimension!")}
      
  if(useDimNames){
    if(is.null(names(dimnames(arr))) | is.null(names(dnames))){
      stop("Please make sure, that names of the dimensions of arr and names of",
           " the components of dnames exist. Or set useDimNames to FALSE!")
    }
  }
  
  if(length(dnames) > length(dim(arr))){
    stop("Your array has only ", length(dim(arr)), 
         " dimensions, whereas your list of dimension names has ", 
         length(dnames)," components!")
  }
  
################# prepare arr and dnames for correct use #######################
	#Optionally mask arr, in case some dimnames exist more than once:
	if(!is.null(mask)){
    arr <- mask(arr, mask=mask)
	}
	
  ndim <- length(dim(arr))  #number of dimensions
  dimnamesNotExisting <- NULL
  dimnamesExisting <- NULL
  
  # first, add dimnames to the array, if they don't exist, and save, in which 
  # dimensions there are dimnames, in which not and, if useDimNames is set, 
  # which dimensions are named:
  for(i in 1:ndim){
    if(is.null(dimnames(arr)[[i]])){
      dimnames(arr)[[i]] <- paste("X", i , 1:dim(arr)[i], sep="")
      dimnamesNotExisting <- c(dimnamesNotExisting, i)
    }else{
      dimnamesExisting <- c(dimnamesExisting, i)
    }
  }
  
  # now sort dnames according to arr. If components of dnames are NULL, 
  # it is supposed, that the whole dimension has to be chosen
  dnames.tmp <- list()
  
  compOfDnames <- 1:length(dnames)
  #first priority have named components - sort them first:
  if(useDimNames){
    for(i in 1:length(dnames)){
      if(is.null(dnames[[i]])) next
      if(!is.null(names(dnames)[i]) & (names(dnames)[i] != "")){
        find <- match(names(dnames)[i], names(dimnames(arr)))
        if(!is.na(find)){
          dnames.tmp[[find]] <- dnames[[i]]
          compOfDnames <- compOfDnames[-match(i, compOfDnames)]
          if(!is.na(match(find, dimnamesExisting))){
            dimnamesExisting <- dimnamesExisting[-match(find, dimnamesExisting)]
          }
          next
        }
        warning("Couldn't find dimension ", names(dnames)[i], 
                " in array! Try setting useDimNames to FALSE.")
      }
    }
  }
  
  for(i in compOfDnames){
    if(is.null(dnames[[i]])) next
    #if a component of list is at a right position, leave it there:
    if(!is.na(match(i, dimnamesExisting))){
      dnames.tmp[[i]] <- dnames[[i]]
      dimnamesExisting <- dimnamesExisting[-match(i, dimnamesExisting)]
      next
    }
    #if not, put it at the first dimension, where dimnames exist:
    if(length(dimnamesExisting) == 0){
      warning("The component ", i , " of dnames could not be used!")
      next
    }
    dnames.tmp[[dimnamesExisting[1]]] <- dnames[[i]]
    dimnamesExisting <- dimnamesExisting[-1]
  }
  dnames <- dnames.tmp
  length(dnames) <- ndim
  names(dnames) <- names(dimnames(arr))
  #if a component of dnames is null, take all dimnames of arr in this dimension
  for(i in 1:ndim){
    if(is.null(dnames[[i]]))
      dnames[[i]] <- dimnames(arr)[[i]]
    else{
      #to avoid problems with dimnames being numbers:
      dnames[[i]] <- as.character(dnames[[i]])
      #check if entries in dnames exist in arr:
      find <- match(dnames[[i]], dimnames(arr)[[i]])      
      # if one element of find is NA, then the respective dimname does not 
      # exist in arr:
      for(j in 1:length(find)){
      	if(is.na(find[j]))
      		stop(dnames[[i]][j], " does not exist in dimension ", i, 
               " of the array!  Make sure, dnames is correctly defined.")
      }
    }
  }
  
  # in case of value being an array, check, if the amount of data to write into
  # the subarray corresponds with the fields in the subarray:
  if(!SINGLE_VALUE){    
    # check, if the amount of data to write into the subarray corresponds with 
    # the fields in the subarray
    freeSpace <- prod(sapply(dnames, length))
    if(freeSpace != prod(dim(value)))
      stop("The space in the subarray (", freeSpace, 
           ") does not correspond with the amount of data in value (", 
           length(value),")!")    
  }
	
#########################  the core functionality  #############################
  
	# now we need to write the corresponding content of the original array into 
  # a subarray. Untill dimension of MAX_DIM this is realized directly, from then
  # it is realized using index matrices:
	if(ndim <= MAX_DIM){
		if(ndim == 1)
			arr[ dnames[[1]] ] <- value
		if(ndim == 2)
			arr[ dnames[[1]] , dnames[[2]] ] <- value
		if(ndim == 3)
			arr[ dnames[[1]] , dnames[[2]] , dnames[[3]] ] <- value
		if(ndim == 4)
			arr[ dnames[[1]] , dnames[[2]] , dnames[[3]] , dnames[[4]] ] <- value

	}
	else{	
		id.matrix <- create.index.matrix(dnames, arr)
		arr[id.matrix] <- value
	}

  #remove dimnames, where added:
  if(length(dimnamesNotExisting) != 0){
    for(i in 1:length(dimnamesNotExisting))
      dimnames(arr)[[dimnamesNotExisting[i]]] <- vector()
  }
	#in case, mask was used, unmask the result:
	if(!is.null(mask)){
	  arr <- unmask(arr, mask=mask)
	}
	return(arr)
}	

##########################  finish subarray<-  #################################

 ##################
#####  mask  #####
##################

# renames same dimnensionnames of an array
mask <- function(arr, mask='#', namesAlso = FALSE){
  if(is.array(arr) | is.matrix(arr) | is.data.frame(arr)){
    # mask all dimnames
    newDnames <- dimnames(arr)
    for(dim in 1:length(dim(arr))){
      while(!identical(unique(newDnames[[dim]]), newDnames[[dim]])){
        find <- match(unique(newDnames[[dim]]), newDnames[[dim]])
        newDnames[[dim]][-find] <- paste(newDnames[[dim]][-find], mask, sep="")
      }
    }
    dimnames(arr) <- newDnames
    # now mask the names of dimensions:
    if(namesAlso){
      newNames <- names(dimnames(arr))
      while(!identical(unique(newNames), newNames)){
        find <- match(unique(newNames), newNames)
        newNames[-find] <- paste(newNames[-find], mask, sep="")
      }
      names(dimnames(arr)) <- newNames
    }
  }
  return(arr)
}
################################  finish mask  #################################

####################
#####  unmask  #####
####################

# returns an array, which has been modified with mask, into its original state
#(make sure to use same mask!)
unmask <- function(arr, mask='#', namesAlso = FALSE){
  if(is.array(arr) | is.matrix(arr) | is.data.frame(arr)){
    for(dim in 1:length(dim(arr))){
      tmp <- gsub(pattern=paste("(",mask,")+", sep=""), 
                  replacement = "", x = dimnames(arr)[[dim]])
      dimnames(arr)[[dim]] <- tmp
    }
    if(namesAlso){
      names(dimnames(arr)) <- gsub(pattern=paste("(",mask,")+", sep=""), 
                         replacement = "", x = names(dimnames(arr)))
    }
  }
  return(arr)
}
#################################  finish unmask  ##############################

#########################
## factor.to.character ##
#########################

# This function takes a data.frame and converts each column, which is given as
# a Factor into a character column.
factor.to.character <- function(dataFrame, mask = NULL){
  if(!is.null(mask)){
    dataFrame <- mask(dataFrame, mask)
  }
  if(is.data.frame(dataFrame)){
    dNames <- dimnames(dataFrame) # since 'as.data.frame' applies 'make.names'
                                  # to each dimension, save the dimnames
    dataFrame <-rapply(dataFrame, as.character, classes="factor", how="replace")
    dataFrame <- as.data.frame(dataFrame, stringsAsFactors=FALSE)
    dimnames(dataFrame) <- dNames # and write them back into the dataFrame
  }
  else {
    warning("The function 'factor.to.character' expects a data.frame. Since ",
            "something else was handed over, nothing was changed!")
  }
  if(!is.null(mask)){
    dataFrame <- unmask(dataFrame, mask)
  }
  return(dataFrame)
}
############################# finish factor.to.character #######################
pik-piam/nitrogen documentation built on Nov. 5, 2019, 12:48 a.m.