#' Imports categorical data
#'
#' @param startCol character. The column containing the start of each epoch.
#' In hh:mm:ss or mm:ss format, or just a number representing seconds (see timeMaster).
#' @param endCol character. The column containing the end of each epoch.
#' @param codingCol numeric vector. The column(s) containing categories. These
#' will be converted to factors, searching across all files for possible levels.
#' @param catName character. The name of the category
#' @param namefilt character. A name or part of name, to filter among all files in path.
#' @param removeSec numeric or numeric vector. number of seconds to be subtracted from the
#' @param idOrder
#' @param idSep
#' @param ...
#' @param path
#'
#' @return
#' @export
#'
#' @examples
readCategories = function(path,
startCol, endCol,codingCol, catName, namefilt = NA,
removeSec = 0,
idOrder= c("id","session","group"), #the order of identifiers in the filenames
idSep = "_", #the separator of identifiers in the filename
... #additional options to be passed to read.table
){
# debug
# path = "data/csv"
# namefilt = ""
# catName = "SELF"
# codingCol=c(4:6,8)
# removeSec = 0
# startCol =1
# endCol = 2
# sep = ";"
# idOrder = c("id","session")
# idSep = "_"
#############
# path ="R/manuTest"
# #"..\\Minutaggi_CC\\minutaggi_Emanuele_exchange_CC"",
# catName = "PIRS"
# removeSec = 0
# startCol =1; endCol = 2; sep = ";"; codingCol = 3:4;
# idOrder = c("x", "session", "id");
# #idOrder = c("x"; "session"; "id"; "x");
# idSep = "_"
#
l = list(...) # l = list(sep=";")
l = c(list(path,namefilt,idOrder,idSep),l)
l$stringsAsFactors = FALSE
# l$as.is = TRUE
if(is.null(l$colClasses)) l$colClasses = "character"
if(is.null(l$header)) l$header = TRUE
imp = do.call(genericIO, l)
lf= imp$lf
sess= imp$sess
dyadIds = imp$dyadIds
group = imp$group
role = imp$role
filenames = imp$filenames
shortNames = imp$shortNames
ndyads = imp$ndyads
nFiles = imp$nFiles
if(length(removeSec)==1) {message("removeSec = ",removeSec," was used for all files")
removeSec = rep(removeSec, length(lf))
} else if(length(removeSec)!=length(lf)) stop("removeSec must be defined for every file (n=",length(lf),")")
cat("File name","\t","seconds removed\r\n")
#debug: iFile = 1; file = lf[[iFile]]
listCat = Map(function(file,iFile){
cat(shortNames[iFile],"\t",removeSec[iFile],"\r\n")
# file = as.data.frame(file)
#remove na shit
if(nrow(file)>0){
file[file==""] = NA
file = file[rowSums(is.na(file)) != ncol(file),]
# file = file[,colSums(is.na(file)) != nrow(file) | seq_along(file) %in% codingCol]
file = file[,seq_along(file)<=max(c(max(codingCol),startCol,endCol))]
file[is.na(file)] = "NA" #use character NA to keep smooth subsequent analyses
#convert shitty time formats to seconds
for(i in 1:length(file[[startCol]])) {
result = tryCatch({
a = timeMaster(file[[startCol]][i],out = "sec",add = -removeSec[iFile])
b = timeMaster(file[[endCol]][i],out = "sec",add = -removeSec[iFile])
if(b-a <0 )
cat("--negative duration spotted at line:",i,"\r\n")
if(a<0 )
cat("--negative start time (after removeSec) spotted at line:",i,"\r\n")
if(b<0 )
cat("--negative end time (after removeSec) spotted at line:",i,"\r\n")
if(i>1){ #check if start is after the previous end. Epochs should NOT overlap
if(a<timeMaster(file[[endCol]][i-1],out = "sec",add = -removeSec[iFile]))
cat("--overlapping epochs spotted at line:",i,"\r\n")
}
}, error = function(e) {
cat("--invalid time value found at line:",i,"\r\n")
}, finally = {
})
}
file[[startCol]] = timeMaster(file[[startCol]],out = "sec",add = -removeSec[iFile])
file[[endCol]] = timeMaster(file[[endCol]],out="sec",add = -removeSec[iFile])
if(any(is.na(file[[startCol]]))) stop("NA was found in startCol on line ", which(is.na(file[[startCol]])))
if(any(is.na(file[[endCol]]))) stop("NA was found in startCol on line ", which(is.na(file[[endCol]])))
deltaSec = file[[endCol]] - file[[startCol]]
deltaSec[deltaSec==0] =1
# if(any(file[[endCol]]<0)) stop("After removeSec negative times were found in 'end' column in file " ,shortNames[iFile])
# if(any(file[[startCol]]<0)) stop("After removeSec negative times were found in 'start' column in file ",shortNames[iFile])
#if(length(deltaSec[deltaSec<0])>0) warning("negative durations spotted in file ",shortNames[iFile],"\r\n",which(deltaSec[deltaSec<0],arr.ind = T))
#trim leading and ending whitespaces
for(i in 1:ncol(file)){
if(is.character(file[[i]])){
file[[i]] = gsub("^\\s+|\\s+$", "", file[[i]])
}
}
res = data.frame("start"=file[[startCol]], "end" =file[[endCol]], "delta"=deltaSec)
#add the coding columns, which will be converted to factor later
res = cbind(res, file[, codingCol])
#add all other columns that should be characters if colClasses was not overridden
res = cbind(res, file[-c(startCol,endCol,codingCol)])
} else {
res <- data.frame(matrix(ncol = 3+length(codingCol), nrow = 0))
}
res
},lf,seq_along(lf))
experiment = DyadExperiment(path,Map(function(session,nSession){
#for each type of signal, add a new DyadSignal to the present DyadSession
signalList = list(
DyadCategory(name=catName, data=session )
)
ses = DyadSession(groupId = group[[nSession]], sessionId= sess[[nSession]], dyadId = dyadIds[[nSession]],
signalList=signalList, s1Name = NA, s2Name = NA, fileName = shortNames[[nSession]] )
names(ses) = catName
return(ses)
},listCat,seq_along(listCat)))
names(experiment) = paste(group,dyadIds,sess,sep="_")
#now for every coding, run across all files to search all possible levels, then transform to factor.
for(cate in 1:(3+length(codingCol))){
xxx = c()
yyy = c()
for(i in 1:length(experiment)){
xxx=c(xxx,experiment[[i]][[catName]][,cate])
yyy=c(yyy,colnames(experiment[[i]][[catName]][cate]))
}
codes = sort(unique(xxx))
titles = sort(unique(yyy))
#are there uncoherent colnames?
if(length(titles)>1){
cat0("\r\n\r\nIn column ",codingCol[cate], " various names where found and coherced to the most common:\r\n")
cat(titles)
if(paste0("X",cate) %in% titles) titles = titles[-which(paste0("X",cate) == titles)]
titles = names(table(yyy))[which.max(table(yyy))]
for(i in 1:length(experiment)){
colnames(experiment[[i]][[catName]])[cate] = titles
}
}
if(cate >3){
cat0("\r\n\r\nFor coding '",titles,"', the following levels where detected:\r\n")
report = table(xxx)
names(dimnames(report)) = ""
print(report)
# cat0("\r\n --------------------")
for(i in 1:length(experiment)){
experiment[[i]][[catName]][,cate] = factor(experiment[[i]][[catName]][,cate],levels = codes )
}
}
}
return(experiment)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.