R/baseIO.R

Defines functions h5_to_matrix matrix_to_h5 h5_to_df df_to_h5

Documented in df_to_h5 h5_to_df h5_to_matrix matrix_to_h5

# ---  base data IO

# packages
library(hdf5r)
library(Matrix)

#' Data frame to h5
#'
#' Data frame is converted to and saved the h5 file
#' @param df Data frame of cell annotation or gene annotation
#' @param h5 The h5 file
#' @param gr_name The group name represents the property of the data frame.
#' @export
#'
df_to_h5 <- function(df, h5, gr_name=NULL){
  # if(!gr_name %in% names(h5)){
  #   h5df <- h5$create_group(gr_name)
  # }
  # else{h5df <- h5[[gr_name]]
  # }
  h5df <- h5$create_group(gr_name)
  h5df[['index']] = rownames(df)
  if(ncol(df)>0){
    h5df[['colnames']] = colnames(df)
  }
  # factor to levels,charactor to levels,logical to levels
  for(k in names(df)){
    if(is.factor(df[[k]])){
      h5df[[k]]<- as.integer(df[[k]]) - 1L # for 0 begin
      h5df[[paste0(k,'_levels')]]<- levels(df[[k]])
      h5attr(h5df[[k]], 'origin_dtype') = 'category'
    }
    if(is.character(df[[k]])){
      str_to_lvl <- factor(df[[k]])
      h5df[[k]]<- as.integer(str_to_lvl) - 1L
      h5df[[paste0(k,'_levels')]]<- levels(str_to_lvl)
      h5attr(h5df[[k]], 'origin_dtype') = 'string'
    }
    if(is.logical(df[[k]])){
      h5df[[k]] <- as.integer(df[[k]])
      h5attr(h5df[[k]], 'origin_dtype') = 'bool'
    }
    if(any(is.numeric(df[[k]]),is.integer(df[[k]]))){
      h5df[[k]] <- df[[k]]
      h5attr(h5df[[k]], 'origin_dtype') = 'number'
    }
  }
}

#' H5 to dataframe
#'
#' H5 gruop is converted to usable data frame including the observes annotation (cell annotation) and variables annotation (gene annotation)
#' @param h5df The hdf5 group saved the data frame using list mode.
#' @return A data frame(annotation)
#' @export
#'
h5_to_df <- function(h5df){
  df_list <- list()
  df_list[['index']] <- h5df[['index']][]
  for(k in names(h5df)){
    if(length(h5attr_names(h5df[[k]]))>0){
      df_dtype <- h5attr(h5df[[k]], 'origin_dtype')
      if(df_dtype == 'category'){
        e0 <- h5df[[k]][] + 1L
        lvl <- h5df[[paste0(k,'_levels')]][]
        df_list[[k]] <- structure(.Data = e0, .Label = lvl, class = 'factor')
      }
      if(df_dtype == 'string'){
        e0 <- h5df[[k]][] + 1L
        lvl <- h5df[[paste0(k,'_levels')]][]
        df_list[[k]] <- as.character(structure(.Data = e0, .Label = lvl, class = 'factor'))
      }
      if(df_dtype == 'bool'){
        df_list[[k]] <- as.logical(h5df[[k]][])
      }
      if(df_dtype == 'number'){
        df_list[[k]] <- h5df[[k]][]
      }
    }
  }
  df = as.data.frame(df_list, row.names = 'index')
  if('colnames' %in% names(h5df)){
    cnames <- h5df[['colnames']][]
    df = df[,cnames]
  }else{
    df=df
  }
  return(df)
}


#' Matrix to H5 format
#'
#' The matrix including the dense matrix and sparse matrix is converted to the matrix in h5 format or is stored into the h5 file.
#' @param mat The matrix object including matrix(R) and sparse matrix(Matrix package)
#' @param h5 The H5 file name that we write in
#' @param gr_name The h5 gorup store the matrix (dense matrix or sparse matrix)
#' @param save.obs.name The rownames isn't be saved(FALSE by defualt)
#' @param save.var.name The colnames isn't be saved(FALSE by defualt)
#' @export
#'
matrix_to_h5 <- function(mat, h5, gr_name = NULL, save.obs.name = FALSE, save.var.name = FALSE){
  if(!gr_name %in% names(h5)){
    h5mat = h5$create_group(gr_name)
  }
  else{
    h5mat = h5[[gr_name]]
  }
  if('dgCMatrix' %in% class(mat)){
    h5mat[['values']] <- slot(object = mat, name = 'x')
    h5mat[['indices']] <- slot(object = mat, name = 'i')
    h5mat[['indptr']] <- slot(object = mat, name = 'p')
    h5mat[['dims']] <- rev(slot(object = mat, name = 'Dim'))
    if(save.obs.name & save.var.name){
      h5mat[['var_names']] <- slot(object = mat, name = 'Dimnames')[[1]]
      h5mat[['obs_names']] <- slot(object = mat, name = 'Dimnames')[[2]]
    }
    h5attr(h5mat, 'datatype') <- 'SparseMatrix'
  }
  else if('matrix' %in% class(mat)){
    h5mat[['matrix']] <- mat
    h5mat[['dims']] <- rev(dim(mat))
    if(save.obs.name & save.var.name){
      h5mat[['var_names']] <- slot(object = mat, name = 'Dimnames')[[1]]
      h5mat[['obs_names']] <- slot(object = mat, name = 'Dimnames')[[2]]
    }
    h5attr(h5mat, 'datatype') <- 'Array'
    warning(paste0(substitute(gr_name), ' is dense matrix'))#2
  }
  else if('Graph' %in% class(mat)){
    h5mat[['values']] <- slot(object = mat, name = 'x')
    h5mat[['indices']] <- slot(object = mat, name = 'i')
    h5mat[['indptr']] <- slot(object = mat, name = 'p')
    h5mat[['dims']] <- rev(slot(object = mat, name = 'Dim'))
    h5attr(h5mat, 'datatype') <- 'SparseMatrix'
  }else{
    stop('The matrix type is wrong')
  }
}

#' H5 to Matrix format
#'
#' H5 group is converted to usable matrice including dense matrices and sparse matrices.
#' Dense matrices(by R) and sparse matrices(constructed by Matrix package)
#' @param h5 The name of the group the stores the matrix data in h5 file
#' @param obs_names The observes names, such as cell names
#' @param var_names The variables names, such as gene names
#' @return dense matrix or sparse matrix
#' @export
#'
h5_to_matrix <-  function(h5mat, obs.name=NULL, var.name=NULL){
  if(all(c('obs_names','var_names') %in% names(h5mat))){
    obs.name = h5mat[['obs_names']][]
    var.name = h5mat[['var_names']][]
  }
  if(h5attr(h5mat, 'datatype') == 'SparseMatrix'){
    mat <- Matrix::sparseMatrix(i = h5mat[['indices']][],
                                p = h5mat[['indptr']][],
                                x = h5mat[['values']][],
                                dims = rev(h5mat[['dims']][]),
                                index1 = FALSE)
    if(!is.null(obs.name) & !is.null(var.name)){
      dimnames(mat) <- list(var.name, obs.name)
    }
    else{
      warning('There are no dimnames in the sparse matrix')
    }
  }
  else if(h5attr(h5mat, 'datatype') == 'Array'){
    mat <- h5mat[['matrix']][,]
    if(!is.null(obs.name) & !is.null(var.name)){
      dimnames(mat) <- list(var.name, obs.name)
    }
    else{
      warning('There are no dimnames in the matrix')
    }
  }
  return(mat)
}
JiekaiLab/RIOH5 documentation built on June 5, 2021, 8:37 a.m.