# Input-output functions handling .gct files.
# -------------------------------------------------------
# reads a gct file and returns data frame
readGct = function(file_name) {
gct_header = read.table(file_name, skip=1, nrows=1, sep="\t") # read .gct file header
meta_nrow = gct_header[1, 4] # rows to skip
meta_ncol = gct_header[1, 3] # columns to skip
if (is.null(meta_nrow)) {
meta_nrow = 0
}
if (is.null(meta_ncol)) {
meta_ncol = 0
}
# Reads a .gct file into a data frame. Flat format of both main data, row meta data, and column meta data.
# main_mat = read.table(file_name, skip=2, sep="\t", header=FALSE)
main_mat = fread(file_name, skip=2, header=FALSE)
out = parseAnnotatedMatrix(as.data.frame(main_mat), meta_nrow, meta_ncol) # seperate main and meta data. out.x, out.meta_row, out.meta_col
return(out)
}
# Reads
readGctMeta = function(file_name) {
gct_header = read.table(file_name, skip=1, nrows=1, sep="\t") # read .gct file header
meta_nrow = gct_header[1, 4]
meta_ncol = gct_header[1, 3]
meta_data = fread(file_name, skip=2, nrows=meta_nrow+2) # column meta data and first data entry
out = parseAnnotatedMatrix(as.data.frame(meta_data), meta_nrow, meta_ncol)
return(out)
}
# Returns list containing main data in $data and meta data in $meta_row and $meta_col
# meta_nrow, meta_ncol is from .gct file and encodes where the 1-indexed positions where the
# the main data matrix starts.
parseAnnotatedMatrix = function(mat, meta_nrow, meta_ncol) {
# Input check
if (!is.integer(meta_nrow) & meta_nrow != 0) {
stop("meta_nrow is not an integer")
}
if (!is.integer(meta_ncol) & meta_ncol != 0) {
stop("meta_ncol is not an integer")
}
# Main data matrix
x = mat[(meta_nrow+2):nrow(mat), (meta_ncol+2):ncol(mat)] # indicies are zero indexed and does not account for row and column names (hence +2)
# Convert to numeric matrix
x = as.matrix(x)
class(x) = "numeric"
# Column meta data, stored in data frame with named columns for each type of metadata. Columns are samples.
if (meta_nrow == 0) {
meta_col = mat[1, (meta_ncol+2):ncol(mat), drop=FALSE] # only the second row
} else {
# non-zero.
meta_col = mat[1:(meta_nrow+1), (meta_ncol+2):ncol(mat)]
}
# meta_col = d[2:meta_nrow, (meta_ncol+2):ncol(d)]
meta_col = t(meta_col)
meta_col = as.data.frame(meta_col)
if (meta_nrow == 0) {
colnames(meta_col) = "id"
} else {
colnames(meta_col) = c("id", as.vector(mat[2:(meta_nrow+1), 1]))
}
# Row meta data, stored in data frame with named columns for each type of metadata. Rows are peptides
meta_row = mat[(meta_nrow+2):nrow(mat), 1:(meta_ncol+1)]
meta_row = as.data.frame(meta_row)
colnames(meta_row) = as.vector(t(mat[1, 1:(meta_ncol+1)])) # transpose to column vector
# out = list(data=data.matrix(x), meta_col=meta_col, meta_row=meta_row)
out = list(data=x, meta_col=meta_col, meta_row=meta_row)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.