R/create_eqtl_input_h5.R

Defines functions create_eqtl_input_h5 add_geno_pheno_covar_h5 add_snp_info_h5 add_gene_info_h5

Documented in add_gene_info_h5 add_geno_pheno_covar_h5 add_snp_info_h5 create_eqtl_input_h5

#' Creating eQTL input hdf5 file templates.
#'
#' Generates an hdf5 file with the basic groups for eQTL analysis.
#'
#' @param file_name The name of the file to be created
#'
#' @return HDF5 file with groups phenotypes, genotypes, and covars with subgroups col_info and row_info will be created in the current directory.
#' @keywords eQTL, HDF5
#'
#' @import rhdf5
#' @export
create_eqtl_input_h5 <- function(file_name){
	level1.groups <- c("phenotypes", "genotypes", "covars")
    
	h5createFile(file_name)
	
    for(l1 in 1:length(level1.groups)){
        h5createGroup(file_name, level1.groups[l1])
        h5createGroup(file_name, paste(level1.groups[l1], "col_info", sep = "/"))
        h5createGroup(file_name, paste(level1.groups[l1], "row_info", sep = "/"))
    }
	
	h5createGroup(file_name, "K_mx")
	H5close()
}

#' Adding genotypes and phenotypes to hdf5 file
#'
#' Use in-memory objects to create genotypes and phenotypes datasets in a given hdf5 file 
#'
#' @param file_name Name of the HDF5 file to add the information to
#' @param phenotypes Phenotype matrix to save
#' @param genotypes Genotype matrix to save
#' @param covars Covariate matrix to save
#'
#' @return Saves the given objects into the hdf5 generated by the create_eqtl_input_h5() function.
#' @keywords eQTL, HDF5
#'
#' @import rhdf5
#' @export
add_geno_pheno_covar_h5 <- function(file_name, 
	                                phenotypes = NULL, 
									genotypes = NULL, 
									covars = NULL){
	
	if(!is.null(phenotypes)){
		message("Saving phenotypes")
		
		n_samples <- nrow(phenotypes)
		n_pheno <- ncol(phenotypes)
		
		h5createDataset(file_name, 
						"phenotypes/matrix", 
						c(n_samples, n_pheno), 
						chunk = NULL, level = 0)
		h5write(phenotypes, file_name, "phenotypes/matrix")
		
		if(!is.null(colnames(phenotypes))){
			message("Saving column names to col_info/id")
			h5write(colnames(phenotypes), file_name, "phenotypes/col_info/id")
		} else {
			message("No column names for phenotypes detected")
		}
	    
		if(!is.null(rownames(phenotypes))){
			message("Saving row names to row_info/id")
			h5write(rownames(phenotypes), file_name, "phenotypes/row_info/id")
		} else {
			message("No row names for phenotypes detected")
		}
	    
		
	} else {
		message("No phenotypes given")
	}
    
    
    
	if(!is.null(genotypes)){
		message("Saving genotypes")
		
		n_samples <- nrow(genotypes)
		n_geno <- ncol(genotypes)
		
		h5createDataset(file_name, 
						"genotypes/matrix", 
						c(n_samples, n_geno), 
						chunk = NULL, level = 0)
						
		h5write(genotypes, file_name, "genotypes/matrix")
		
		if(!is.null(colnames(genotypes))){
			message("Saving column names to col_info/id")
			h5write(colnames(genotypes), file_name, "genotypes/col_info/id")
		} else {
			message("No column names for genotypes detected")
		}
	    
		if(!is.null(rownames(genotypes))){
			message("Saving row names to row_info/id")
			h5write(rownames(genotypes), file_name, "genotypes/row_info/id")
		} else {
			message("No row names for genotypes detected")
		}
		
	} else {
		message("No genotypes given")
		
	}
    
    if(!is.null(covars)){
		message("Saving covariates")
        h5write(covars, file_name, "covars/matrix")
		if(!is.null(colnames(covars))){
			message("Saving column names to col_info/id")
			h5write(colnames(covars), file_name, "covars/col_info/id")
		} else {
			message("No column names for covars detected")
		}
	    
		if(!is.null(rownames(covars))){
			message("Saving row names to row_info/id")
			h5write(rownames(covars), file_name, "covars/row_info/id")
		} else {
			message("No row names for covars detected")
		} 
    } else {
    	
		message("No covariates given")
    }

	H5close()
}

#' Adding SNP information to an HDF5 file
#'
#' Use in-memory objects to add SNP information to a given HDF5 file
#'
#' @param file_name Name of the HDF5 file to add the information to
#' @param snp_info Dataframe with SNP information for all SNPs present in genotypes/matrix
#' @param id_col Column name of snp_info that contains id information
#' @param chr_col Column name of snp_info that contains chromosome information
#' @param pos_col Column name of snp_info that contains position information
#'
#' @return Saves the given objects into the hdf5 generated by the create_eqtl_input_h5() function.
#' @keywords eQTL, HDF5
#'
#' @import rhdf5
#' @export

add_snp_info_h5 <- function(file_name = NULL, 
						    snp_info = NULL, 
							id_col = "id", 
							chr_col = "chromosome",
							pos_col = "position"){
    
	snp_chr <- gsub("chr", "", as.character(snp_info[, chr_col]))
    h5write(snp_chr, file_name, "genotypes/col_info/geno_chr")
    h5write(as.numeric(snp_info[,pos_col]), file_name, "genotypes/col_info/geno_pos")
	h5write(as.character(snp_info[,id_col]), file_name, "genotypes/col_info/id")
									
	H5close()
									
}

#' Adding gene information to an HDF5 file
#'
#' Use in-memory objects to add gene information to a given HDF5 file
#'
#' @param file_name Name of the HDF5 file to add the information to
#' @param gene_info Dataframe with gene information for all genes present in genotypes/matrix
#' @param id_col Column name of gene_info that contains id information
#' @param chr_col Column name of gene_info that contains chromosome information
#' @param start_col Column name of gene_info that contains start positions 
#' @param end_col Column name of gene_info that contains end positions
#' @param entrez_col Column name of gene_info that contains entrez ids
#' @param symbol_col Column name of gene_info that contains gene symbols
#' @return Saves the given objects into the hdf5 generated by the create_eqtl_input_h5() function.
#' @keywords eQTL, HDF5
#'
#' @import rhdf5
#' @export

add_gene_info_h5 <- function(file_name = NULL, 
						    gene_info = NULL, 
							id_col = "id", 
							chr_col = "chromosome",
							start_col = "start",
							end_col = "end", 
							entrez_col = NULL,
							symbol_col = NULL){

	gene_chr <- gsub("chr", "", as.character(gene_info[,chr_col]))
	h5write(as.character(gene_chr), file_name, "phenotypes/col_info/pheno_chr")
	h5write(as.numeric(gene_info[,start_col]), file_name, "phenotypes/col_info/pheno_start")
	h5write(as.numeric(gene_info[,end_col]), file_name, "phenotypes/col_info/pheno_end")
    
	if(!is.null(symbol_col)){
        h5write(as.character(gene_info[,symbol_col]), file_name, "phenotypes/col_info/pheno_symbol")
    }
    if(!is.null(entrez_col)){
        h5write(as.character(gene_info[,entrez_col]), file_name, "phenotypes/col_info/entrez")
    }
									
	H5close()
									
}
jinhyunju/eQTLtools documentation built on May 19, 2019, 10:35 a.m.