converts text file to filevector format

Description

The file provides the data to be converted to filevector format. The file may provide the data only (no row and column names) in which case col/row names may be left empty or provided in separate files (in which case it is assumed that names are provided only for the imported columns/rows – see skip-options). There is an option to skip a number of first ros and columns. The row and column names may also be provided in the file itself, in which case one needs to tell the row/column number providing column/row names. Unless option "R_matrix" is set to TRUE, it is asumed that the number of columns is always the same acorss the file. If above option is provided, it is assumed that both column and row names are provided in the file, and the first line contains one column less than other lines (such is the case with files produced from R using the function write.table(...,col.names=TRUE,row.names=TRUE).

Usage

1
2
3
4
text2databel(infile, outfile, colnames, rownames, skipcols, skiprows,
  transpose = FALSE, R_matrix = FALSE, type = "DOUBLE",
  cachesizeMb = 64, readonly = TRUE, naString = "NA",
  unlinkTmpTransposeFiles = TRUE)

Arguments

infile

input text file name

outfile

output filevector file name; if missing, it is set to infile+".filevector"

colnames

where are the column names stored? If missing, no column names; if integer, this denotes the row of the input file where the column names are specified; if character string then the string specifies the name of the file with column names

rownames

where are the row names stored? If missing, no row names; if integer, this denotes the column of the input file where the row names are specified; if character string then the string specifies the name of the file with row names

skipcols

how many columns of the input file to skip

skiprows

how many rows of the input file to skip

transpose

whether the file is to be transposed

R_matrix

if true, the file format is assumed to follow the format of R data matrix produced with write.table(...,col.names=TRUE,row.names=TRUE)

type

data DatABEL type to use ("DOUBLE", "FLOAT", "INT", "UNSIGNED_INT", "UNSIGNED_SHORT_INT", "SHORT_INT", "CHAR", "UNSIGNED_CHAR")

cachesizeMb

cache size for the resulting 'databel-class' object

readonly

whether the resulting 'databel-class' object should be opened in readonly mode

naString

the string used for missing data (default: NA)

unlinkTmpTransposeFiles

Boolean to indicate whether the intermediate "_fvtmp.fvi/d" files should be deleted. Default: TRUE. These intermediate files are generated while transposing the filevector files.

Value

The converted file is stored in the file system, a databel-class object connection to the file is returned.

Author(s)

Yurii Aulchenko

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
cat("this is an example which you can run if you can write to the
file system\n")

## Not run: 

# create matrix
NC <- 5
NR <- 10
data <- matrix(rnorm(NC*NR),ncol=NC,nrow=NR)
rownames(data) <- paste("r",1:NR,sep="")
colnames(data) <- paste("c",1:NC,sep="")
data

# create text files
write.table(data, file="test_matrix_dimnames.dat", row.names=TRUE,
            col.names=TRUE, quote=FALSE)
write.table(data, file="test_matrix_colnames.dat", row.names=FALSE,
            col.names=TRUE, quote=FALSE)
write.table(data, file="test_matrix_rownames.dat", row.names=TRUE,
            col.names=FALSE, quote=FALSE)
write.table(data, file="test_matrix_NOnames.dat", row.names=FALSE,
            col.names=FALSE, quote=FALSE)
write(colnames(data), file="test_matrix.colnames")
write(rownames(data), file="test_matrix.rownames")

# generate identical data
text2databel(infile="test_matrix_dimnames.dat",
             outfile="test_matrix_dimnames", R_matrix=TRUE)
x <- databel("test_matrix_dimnames")
data <- as(x, "matrix")
data

# convert text two filevector format

text2databel(infile="test_matrix_NOnames.dat",
             outfile="test_matrix_NOnames.fvf",
             colnames="test_matrix.colnames",
             rownames="test_matrix.rownames")
x <- databel("test_matrix_NOnames.fvf")
if (!identical(data, as(x, "matrix"))) stop("not identical data")

text2databel(infile="test_matrix_NOnames.dat",
             outfile="test_matrix_NOnames_T.fvf",
             colnames="test_matrix.colnames",
             rownames="test_matrix.rownames", transpose=TRUE)
x <- databel("test_matrix_NOnames_T.fvf")
if (!identical(data, t(as(x, "matrix")))) stop("not identical data")

text2databel(infile="test_matrix_rownames.dat",
             outfile="test_matrix_rownames.fvf",
             rownames=1, colnames="test_matrix.colnames")
x <- databel("test_matrix_rownames.fvf")
if (!identical(data, as(x, "matrix"))) stop("not identical data")

text2databel(infile="test_matrix_colnames.dat",
             outfile="test_matrix_colnames.fvf",
             colnames=1, rownames="test_matrix.rownames")
x <- databel("test_matrix_colnames.fvf")
if (!identical(data, as(x, "matrix"))) stop("not identical data")

text2databel(infile="test_matrix_dimnames.dat",
             outfile="test_matrix_dimnames.fvf", R_matrix=TRUE)
x <- databel("test_matrix_dimnames.fvf")
if (!identical(data, as(x, "matrix"))) stop("not identical data")

# stupid extended matrix in non-R format
newmat <- matrix(-100, ncol=NC+3, nr=NR+2)
newmat[3:(NR+2), 4:(NC+3)] <- data
newmat[2, 4:(NC+3)] <- paste("c", 1:NC, sep="")
newmat[3:(NR+2), 3] <- paste("r", 1:NR, sep="")
newmat
write.table(newmat, file="test_matrix_strange.dat",
            col.names=FALSE, row.names=FALSE, quote=FALSE)

text2databel(infile="test_matrix_strange.dat",
             outfile="test_matrix_strange.fvf",
             colnames=2, rownames=3)
x <- databel("test_matrix_strange.fvf")
if (!identical(data, as(x, "matrix"))) stop("not identical data")


## End(Not run)