g.inspectfile = function(datafile, desiredtz = "", params_rawdata = c(),
configtz = c(), ...) {
#get input variables
input = list(...)
if (length(input) > 0 || length(params_rawdata) == 0) {
# or if params_[...] aren't specified (so need to be filled with defaults).
# So, inside GGIR this will not be used, but it is used when g.inspectfile is used on its own
# as if it was still the old g.inspectfile function
params = extract_params(params_rawdata = params_rawdata,
input = input,
params2check = "rawdata") # load default parameters
params_rawdata = params$params_rawdata
rm(params)
}
# note that if the file is an RData file then this function will not be called
# the output of this function for the original datafile is stored inside the RData file in the form of object I
getbrand = function(filename = c(), datafile = c()) {
sf = c(); isitageneactive = c(); mon = c(); dformat = c() #generating empty variables
extension = unlist(strsplit(filename,"[.]"))
if (tolower(extension[length(extension)]) == "gz") {
extension = extension[length(extension) - 1]
} else {
extension = extension[length(extension)]
}
switch(extension,
"bin" = { dformat = FORMAT$BIN },
"cwa" = ,
"CWA" = { mon = MONITOR$AXIVITY
dformat = FORMAT$CWA
},
"gt3x" = { mon = MONITOR$ACTIGRAPH
dformat = FORMAT$GT3X
},
"GT3X" = { mon = MONITOR$ACTIGRAPH
dformat = FORMAT$GT3X
if (file.access(datafile, 2) == 0) { # test for write access to file
# rename file to be lower case gt3x extension
file.rename(from = datafile, to = gsub(pattern = ".GT3X", replacement = ".gt3x", x = datafile))
datafile = gsub(pattern = ".GT3X", replacement = ".gt3x", x = datafile)
warning("\nWe have renamed the GT3X file to gt3x because GGIR dependency read.gt3x cannot handle uper case extension")
} else {
stop("\nGGIR needs to change the file extension from GT3X to gt3x, but it does not seem to have write permission to the file.")
}
},
"csv" = { dformat = FORMAT$CSV
testheader = read.csv(datafile, nrow = 1, skip = 0, header = FALSE)
if (grepl("ActiGraph", testheader[1], fixed=TRUE)) {
mon = MONITOR$ACTIGRAPH
} else {
testcsv = read.csv(datafile, nrow = 10, skip = 10)
testcsvtopline = read.csv(datafile, nrow = 2,skip = 1)
if (ncol(testcsv) == 2 && ncol(testcsvtopline) < 4) {
mon = MONITOR$GENEACTIV
} else if (ncol(testcsv) >= 4 && ncol(testcsvtopline) >= 4) {
mon = MONITOR$AXIVITY
} else {
stop(paste0("\nError processing ", filename, ": unrecognised csv file format.\n"))
}
}
},
"wav" = { stop(paste0("\nError processing ", filename, ": Axivity .wav file format is no longer supported.\n")) },
{ stop(paste0("\nError processing ", filename, ": unrecognised file format.\n")) }
)
if (ismovisens(datafile)) {
dformat = FORMAT$BIN
mon = MONITOR$MOVISENS
sf = 64
header = "no header"
} else if (dformat == FORMAT$BIN) { # .bin and not movisens
# try read the file as if it is a geneactiv and store output in variable 'isitageneactive'
isitageneactive = GGIRread::readGENEActiv(filename = datafile, start = 0, end = 1)
if (length(isitageneactive) >= 1) {
if (all(names(isitageneactive) %in% c("header", "data.out") == TRUE)) {
mon = MONITOR$GENEACTIV
tmp = unlist(strsplit(unlist(as.character(isitageneactive$header$SampleRate))," "))[1]
# occasionally we'll get a decimal seperated by comma; if so, replace the comma with a dot
tmp = sub(",", ".", tmp, fixed = TRUE)
sf = as.numeric(tmp)
#also try to read sf from first page header
sf_r = sf
csvr = c()
suppressWarnings(expr = {
try(expr = {csvr = as.matrix(read.csv(datafile, nrow = 10,
skip = 200, sep = ""))
}, silent = TRUE)
})
if (length(csvr) > 1) {
for (ii in 1:nrow(csvr)) {
tmp3 = unlist(strsplit(as.character(csvr[ii,1]),"quency:")) #part of 'frequency'
if (length(tmp3) > 1) {
# occasionally we'll get a decimal seperated by comma; if so, replace the comma with a dot
tmp3 = sub(",", ".", tmp3, fixed = TRUE)
sf_r = as.numeric(tmp3)
}
}
if (length(sf_r) > 0 && !is.na(sf_r)) {
if (sf_r != sf && abs(sf_r - sf) > 5) { # use pageheader sample frequency if it is not the same as header sample frequency
sf = sf_r
warning(paste0("sample frequency used from page header: ", sf, " Hz"))
}
}
}
} else {
stop(paste0("\nError processing ", filename, ": possibibly a corrupt GENEActive file"))
}
} else {
stop(paste0("\nError processing ", filename, ": unrecognised .bin file"))
}
} else if (dformat == FORMAT$CSV) { #no checks for corrupt file yet...maybe not needed for csv-format?
if (mon == MONITOR$GENEACTIV) {
tmp = read.csv(datafile, nrow = 50, skip = 0)
tmp = as.character(tmp[which(as.character(tmp[,1]) == "Measurement Frequency"),2])
tmp = as.numeric(unlist(strsplit(tmp," "))[1])
# occasionally we'll get a decimal seperated by comma; if so, replace the comma with a dot
tmp = sub(",", ".", tmp, fixed = TRUE)
sf = as.numeric(tmp)
} else if (mon == MONITOR$ACTIGRAPH) {
tmp = read.csv(datafile, nrow = 9, skip = 0)
tmp = colnames(tmp)
tmp = as.character(unlist(strsplit(tmp,".Hz"))[1])
# tmp3 = as.character(unlist(strsplit(tmp2,"yy.at."))[2])
# following suggestion by XInyue on github https://github.com/wadpac/GGIR/issues/102 replaced by:
tmp = as.character(unlist(strsplit(tmp, ".at.",fixed = T))[2])
# occasionally we'll get a decimal seperated by comma; if so, replace the comma with a dot
tmp = sub(",", ".", tmp, fixed = TRUE)
sf = as.numeric(tmp)
} else if (mon == MONITOR$AXIVITY) {
# sample frequency is not stored
tmp = read.csv(datafile, nrow = 100000, skip = 0)
tmp = as.numeric(as.POSIXct(tmp[, 1], origin = "1970-01-01"))
sf = length(tmp) / (tmp[length(tmp)] - tmp[1])
sf = floor((sf) / 5 ) * 5 # round down to nearest integer of 5, we never want to assume that there is more frequency content in a signal than there truly is
}
} else if (dformat == FORMAT$CWA) {
PP = GGIRread::readAxivity(datafile, start = 1, end = 10, desiredtz = desiredtz)
H = PP$header
sf = H$frequency
} else if (dformat == FORMAT$GT3X) {
info = try(expr = {read.gt3x::parse_gt3x_info(datafile, tz = desiredtz)},silent = TRUE)
if (inherits(info, "try-error") == TRUE || is.null(info)) {
warning(paste0("\nFile info could not be extracted from ", datafile), call. = FALSE)
sf = NULL # set to NULL in order to tell other GGIR functions that file was corrupt
} else {
info = info[lengths(info) != 0] # remove odd NULL in the list
sf = info[["Sample Rate"]]
}
}
invisible(list(dformat = dformat, mon = mon, sf = sf, datafile = datafile))
}
#======================================================================
# main script
filename = unlist(strsplit(as.character(datafile),"/"))
filename = filename[length(filename)]
monnames = c("genea", "geneactive", "actigraph", "axivity", "movisens", "verisense") #monitor names
fornames = c("bin", "csv", "wav", "cwa", "csv", "gt3x") #format names
if (length(filename) == 0) {
warning("no files to analyse", call. = FALSE)
}
if (length(params_rawdata[["rmc.firstrow.acc"]]) == 0) {
INFI = getbrand(filename, datafile)
mon = INFI$mon
dformat = INFI$dformat
sf = INFI$sf
datafile = INFI$datafile
} else {
dformat = FORMAT$AD_HOC_CSV
mon = MONITOR$AD_HOC
Pusercsvformat = read.myacc.csv(rmc.file = datafile,
rmc.nrow = 5,
rmc.dec = params_rawdata[["rmc.dec"]],
rmc.firstrow.acc = params_rawdata[["rmc.firstrow.acc"]],
rmc.firstrow.header = params_rawdata[["rmc.firstrow.header"]],
rmc.header.length = params_rawdata[["rmc.header.length"]],
rmc.col.acc = params_rawdata[["rmc.col.acc"]],
rmc.col.temp = params_rawdata[["rmc.col.temp"]],
rmc.col.time = params_rawdata[["rmc.col.time"]],
rmc.unit.acc = params_rawdata[["rmc.unit.acc"]],
rmc.unit.temp = params_rawdata[["rmc.unit.temp"]],
rmc.unit.time = params_rawdata[["rmc.unit.time"]],
rmc.format.time = params_rawdata[["rmc.format.time"]],
rmc.bitrate = params_rawdata[["rmc.bitrate"]],
rmc.dynamic_range = params_rawdata[["rmc.dynamic_range"]],
rmc.unsignedbit = params_rawdata[["rmc.unsignedbit"]],
rmc.origin = params_rawdata[["rmc.origin"]],
rmc.desiredtz = params_rawdata[["rmc.desiredtz"]],
rmc.configtz = params_rawdata[["rmc.configtz"]],
rmc.sf = params_rawdata[["rmc.sf"]],
rmc.headername.sf = params_rawdata[["rmc.headername.sf"]],
rmc.headername.sn = params_rawdata[["rmc.headername.sn"]],
rmc.headername.recordingid = params_rawdata[["rmc.headername.sn"]],
rmc.header.structure = params_rawdata[["rmc.header.structure"]],
rmc.check4timegaps = params_rawdata[["rmc.check4timegaps"]],
rmc.scalefactor.acc = params_rawdata[["rmc.scalefactor.acc"]],
desiredtz = desiredtz,
configtz = configtz)
if (inherits(Pusercsvformat$header, "character") && Pusercsvformat$header == "no header") {
sf = params_rawdata[["rmc.sf"]]
} else {
sf = as.numeric(Pusercsvformat$header["sample_rate",1])
}
if (is.null(sf) || is.na(sf)) {
stop("\nFile header doesn't specify sample rate. Please provide rmc.sf value to process ", datafile)
} else if (sf == 0) {
stop("\nFile header doesn't specify sample rate. Please provide a non-zero rmc.sf value to process ", datafile)
}
}
if (mon == MONITOR$GENEACTIV && dformat == FORMAT$CSV) {
stop(paste0("The GENEActiv csv reading functionality is deprecated in",
" GGIR from version 2.6-4 onwards. Please, use either",
" the GENEActiv bin files or try to read the csv files with",
" GGIR::read.myacc.csv"), call. = FALSE)
}
if (dformat == FORMAT$BIN) {
if (mon == MONITOR$GENEACTIV) {
H = GGIRread::readGENEActiv(filename = datafile, start = 0, end = 1)$header
} else if (mon == MONITOR$MOVISENS) {
H = "file does not have header" # these files have no header
xmlfile = paste0(dirname(datafile), "/unisens.xml")
if (file.exists(xmlfile)) {
# as we are only interested in two character fields
# try extract value by reading xml as text file to avoid having to add
# software dependencies
header = as.character(read.csv(xmlfile, nrow = 1))
tmp1 = unlist(strsplit(header, "measurementId="))[2]
ID = gsub(pattern = " ",replacement = "", unlist(strsplit(tmp1, " timestampStart"))[1])
header = paste0(read.csv(xmlfile, nrow = 10, skip = 2), collapse = " ")
tmp1 = unlist(strsplit(header, "sensorSerialNumber value="))[2]
SN = unlist(strsplit(tmp1, "/>"))[1]
header = data.frame(serialnumber = SN, ID = ID)
if (length(header) > 1) {
H = t(header)
}
filename = unlist(strsplit(as.character(datafile),"/"))
filename = filename[length(filename) - 1]
}
}
} else if (dformat == FORMAT$CSV) {
if (mon == MONITOR$ACTIGRAPH) {
H = read.csv(datafile, nrow = 9, skip = 0)
} else if (mon == MONITOR$AXIVITY) {
H = "file does not have header" # these files have no header
}
} else if (dformat == FORMAT$CWA) {
PP = GGIRread::readAxivity(datafile, start = 1, end = 10, desiredtz = desiredtz)
H = PP$header
} else if (dformat == FORMAT$AD_HOC_CSV) { # csv data in a user-specified format
header = Pusercsvformat$header
} else if (dformat == FORMAT$GT3X) { # gt3x
info = try(expr = {read.gt3x::parse_gt3x_info(datafile, tz = desiredtz)},silent = TRUE)
if (inherits(info, "try-error") == TRUE || is.null(info)) {
warning(paste0("\nFile info could not be extracted from ", datafile), call. = FALSE)
sf = NULL # set to NULL in order to tell other GGIR functions that file was corrupt
H = NULL
header = NULL
} else {
info = info[lengths(info) != 0] # remove odd NULL in the list
H = matrix("", length(info), 2)
H[, 1] = names(info)
for (ci in 1:length(info)) {
if (inherits(info[[ci]], "POSIXct") == TRUE) {
H[ci, 2] = format(info[[ci]])
} else {
H[ci, 2] = as.character(info[[ci]])
}
}
sf = as.numeric(H[which(H[,1] == "Sample Rate"), 2])
}
}
if (is.null(sf) || sf == 0) {
warning(paste0("\nSample frequency not recognised in ", basename(datafile)), call. = FALSE)
}
if (dformat != FORMAT$AD_HOC_CSV && is.null(sf) == FALSE) {
H = as.matrix(H)
if (ncol(H) == 3 && dformat == FORMAT$CSV && mon == MONITOR$ACTIGRAPH) {
if (length(which(is.na(H[,2]) == FALSE)) == 0) {
H = as.matrix(H[,1])
}
}
if (ncol(H) == 1 && dformat == FORMAT$CSV) {
if (mon == MONITOR$ACTIGRAPH) {
vnames = c("Number:","t Time","t Date",":ss)","d Time","d Date","Address:","Voltage:","Mode =")
Hvalues = Hnames = rep(" ",length(H))
firstline = colnames(H)
for (run in 1:length(H)) {
for (runb in 1:length(vnames)) {
tmp = unlist(strsplit(H[run],vnames[runb]))
if (length(tmp) > 1) {
Hnames[run] = paste(tmp[1], vnames[runb], sep = "")
Hvalues[run] = paste(tmp[2], sep = "")
}
}
}
H = cbind(Hnames,Hvalues)
H = rbind(c("First line",firstline),H)
} else {
H = cbind(c(1:length(H)),H)
}
}
if (dformat == FORMAT$CWA) {
header = data.frame(value = H, row.names = rownames(H), stringsAsFactors = TRUE)
} else {
if ((mon == MONITOR$GENEACTIV && dformat == FORMAT$BIN) || (mon == MONITOR$MOVISENS && length(H) > 0)) {
varname = rownames(as.matrix(H))
H = data.frame(varname = varname,varvalue = as.character(H), stringsAsFactors = TRUE)
} else if (dformat != FORMAT$AD_HOC_CSV) {
if (length(H) > 1 && class(H)[1] == "matrix") H = data.frame(varname = H[,1],varvalue = H[,2], stringsAsFactors = TRUE)
}
}
if (dformat != FORMAT$CWA && length(H) > 1 && (class(H)[1] == "matrix" || class(H)[1] == "data.frame")) {
RowsWithData = which(is.na(H[,1]) == FALSE)
header = data.frame(value = H[RowsWithData, 2], row.names = H[RowsWithData, 1], stringsAsFactors = TRUE)
}
if (H[1,1] == "file does not have header") { #no header
header = "no header"
}
if (mon == MONITOR$ACTIGRAPH && dformat != FORMAT$GT3X) {
verisense_check = substr(colnames(read.csv(datafile,nrow = 1)[1]), start = 36, stop = 44)
if (identical('Verisense', toString(verisense_check))) {
mon = MONITOR$VERISENSE
}
}
}
if (!is.null(sf)) {
# detect dot or comma separator in the data file
op <- options(warn = -1) # turn off warnings
on.exit(options(op))
suppressWarnings(expr = {decn = g.dotorcomma(datafile, dformat, mon,
rmc.dec = params_rawdata[["rmc.dec"]])})
options(warn = 0) # turn on warnings
} else {
decn = "."
}
monc = mon
monn = ifelse(mon > 0, monnames[mon], "unknown")
dformc = dformat
dformn = fornames[dformat]
invisible(list(header = header, monc = monc, monn = monn,
dformc = dformc, dformn = dformn, sf = sf, decn = decn, filename = filename))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.