## wrapper functions - these are the main functions that you will use to train a model, classify data using a pre-trained model, and do cross-validation
trainModel = function(annotations, accelerometers=NULL, GPS=NULL, winSize=60,
modelName, names=NULL, strat=TRUE,
ntree=500, mtry=NULL, replace=TRUE,
nsample=10000, nodesize=1, sampsize=10000) {
# function to train a model - either from raw data or from pre-computed features
# INPUTS:
# annotations: path to directory containing annotation files
# accelerometers: path to directory containing raw accelerometer files
# GPS: path to directory containing GPS files
# winSize: window size, in seconds (default is 60)
# modelName: path to where you want to save the model (e.g. "~/myModel.RData")
# names: (optional) only use these participants to train the model
# strat: Boolean - use stratified sampling in the random forest - choose equal amounts of each activity type when you're training (default is TRUE)
# ntree: number of trees in the random forest (default is 500)
# mtry: number of variables randomly sampled as candidates at each split in a tree (default is square root of the number of features)
# replace: Should sampling of cases be done with or without replacement? (default is TRUE)
# nsample: number of data samples to choose BEFORE you train the random forest
# sampsize: random forest sampling parameter: size of sample to draw
# nodesize: minimum size of terminal nodes (default=1)
# convert annotations from bout-format to windows
labelDir = annotationsToLabels(annotations, winSize, names)
# extract features from sensor data
featDirs = sensorsToFeatures(accelerometers, GPS, winSize, names)
if (length(featDirs) == 0) { stop("there was a problem extracting features") }
# train the model
cat("\ntraining model from", length(featDirs), "devices\n")
# train the model from features
trainFromFeatures(labelDir, featDirs, winSize=winSize, modelName=modelName,
names=names, strat=strat, ntree=ntree, mtry=mtry, replace=replace,
nsample=nsample, nodesize=nodesize, sampsize=sampsize)
}
classify = function(accelerometers=NULL, GPS=NULL, modelName, saveDir, names=NULL) {
# function to classify data - either from raw data or from pre-computed features
# INPUTS
# accelerometers: path to a directory containing raw acelerometer files (if NULL, you should give it GPS)
# GPS: path to a directory containing GPS files (if NULL, you should give it accelerometers)
# modelName: path to a pre-trained model (.Rdata format)
# saveDir: path to a directory where you want the output saved
# names: (optional) if provided, only process these identifiers
# look up the window size from the model
winSize = loadModel(modelName, "winSize")
# compute features from raw data - return path to where features are saved
featDirs = sensorsToFeatures(accelerometers, GPS, winSize, names)
# check that feature extraction step worked
if (length(featDirs) == 0) { stop("No data directories found") }
cat("\n")
# do classification
testAllDir(featDirs, modelName, saveDir, names)
}
looXval = function(annotations, accelerometers=NULL, GPS=NULL, winSize=60,
saveDir, names=NULL, strat=TRUE,
ntree=500, mtry=NULL, replace=TRUE,
nsample=10000, nodesize=1, sampsize=10000) {
# function to do leave-one-participant-out cross-validation
# INPUTS:
# annotations: path to directory containing annotation files
# accelerometers: path to directory containing raw accelerometer files
# GPS: path to directory containing GPS files
# winSize: window size, in seconds (default is 60)
# saveDir: path to directory where predictions will be saved
# names: (optional) only use these participants to train the model
# strat: Boolean - use stratified sampling in the random forest - choose equal amounts of each activity type when you're training (default is TRUE)
# ntree: number of trees in the random forest (default is 500)
# mtry: number of variables randomly sampled as candidates at each split in a tree (default is square root of the number of features)
# replace: Should sampling of cases be done with or without replacement? (default is TRUE)
# nsample: number of data samples to choose BEFORE you train the random forest
# sampsize: random forest sampling parameter: size of sample to draw
# nodesize: minimum size of terminal nodes (default=1)
if (is.null(names)){
names = list.files(annotations)
}
for (i in length(names)){
testName = names[i]
trainNames = names[-i]
sprintf("Cross-validating participant %s", testName)
modelName = file.path(saveDir, "xvalModelTemp.Rdata")
# first train the model
trainModel(annotations=annotations, accelerometers=accelerometers, GPS=GPS, winSize=winSize,
modelName=modelName, names=trainNames, strat=strat, ntree=ntree,
mtry=mtry, replace=replace, nsample=nsample, nodesize=nodesize, sampsize=sampsize)
# then test the model
classify(accelerometers=accelerometers, GPS=GPS, modelName=modelName, saveDir=saveDir, names=testName)
# delete the temporary model file
file.remove(modelName)
}
}
sensorsToFeatures = function(accelerometers=NULL, GPS=NULL, winSize, names=NULL) {
# extract features (if they don't already exist) and return feature directories from raw sensor directories
# INPUTS:
# accelerometers: path to directory containing raw accelerometer files
# GPS: path to directory containing GPS files
# winSize: window size, in seconds (default is 60)
# names: (optional) extract features from these participants
# intialization
featDirs = character(0)
# do GPS features
if (!is.null(GPS)) { # check if there are GPS features to compute
if (!file.exists(GPS)) {
# the GPS path doesn't exist
stop("GPS file/directory not found")
}
if (file.info(GPS)$isdir) {
# the GPS path (i.e., GPS) is a directory
if (isFeatureDirectory(GPS)) {
# checks if the path is already to the feature directory
GPSFeatDir = GPS
} else {
# set up the feature directory name: GPS_Features_(winSize)
GPSFeatDir = paste(GPS, "Features", as.character(winSize), sep="_")
# extract GPS features from a directory
extractFeatsPALMSDir(GPS, GPSFeatDir, winSize, names)
}
} else {
# the GPS path (i.e., GPS) is a file
# set up the feature directory name: GPS_Features_(winSize)
GPSFeatDir = paste(file_path_sans_ext(GPS), "Features", as.character(winSize),
sep="_")
# extract GPS features from a single file
extractFeatsPALMSOneFile(GPS, GPSFeatDir, winSize)
}
# add the GPS feature directory to the list of feature directories
featDirs = c(featDirs, GPSFeatDir)
}
# do accelerometers features
if (!is.null(accelerometers)) { # check if there are GPS features to compute
for (acc in accelerometers) { # loop through accelerometers (e.g., wrist, hip)
if (!file.exists(acc)) {
# the accelerometer path doesn't exist
stop("accelerometer directory not found")
}
if (isFeatureDirectory(acc)) {
# checks if the path is already to the feature directory
accFeatDir = acc
} else {
# set up the feature directory name: acc_Features_(winSize)
accFeatDir = paste(acc, "Features", as.character(winSize), sep="_")
# extract accelerometer features from a directory
extractAccelerometerFeatures(acc, accFeatDir, winSize, names)
}
# add the accelerometer feature directory to the list of feature directories
featDirs = c(featDirs, accFeatDir)
}
}
# return the list of feature directories
return(featDirs)
}
isFeatureDirectory = function(dir) {
# check if the directory is a feature directory
file = list.files(dir, full.names=TRUE)[1]
if (file.info(file)$isdir) {
return (TRUE)
} else {
return (FALSE)
}
}
trainFromFeatures = function(labelDir, featDirs, winSize, modelName, names=NULL,
strat=TRUE, ntree=500, mtry=NULL, sampsize=10000,
replace=TRUE, nsample=10000, nodesize=1) {
# function to train a model from pre-computed features
# INPUTS:
# labelDir: path to directory containing label files (in window format)
# featDirs: path to list of feature directories
# winSize: window size, in seconds (default is 60)
# modelName: path to where you want to save the model (e.g. "~/myModel.RData")
# names: (optional) only use these participants to train the model
# strat: Boolean - use stratified sampling in the random forest - choose equal amounts of each activity type when you're training (default is TRUE)
# ntree: number of trees in the random forest (default is 500)
# mtry: number of variables randomly sampled as candidates at each split in a tree (default is square root of the number of features)
# replace: Should sampling of cases be done with or without replacement? (default is TRUE)
# nsample: number of data samples to choose BEFORE you train the random forest
# sampsize: random forest sampling parameter: size of sample to draw
# nodesize: minimum size of terminal nodes (default=1)
if (is.null(names)) {
# if no participant names are provided, use all the names in the label directory
names = list.files(labelDir)
}
# train the random forest
rf = trainRF(labelDir, featDirs, names=names, strat=strat, ntree=ntree,
mtry=mtry, replace=replace, nsample=nsample, nodesize=nodesize,
sampsize=sampsize)
# train the HMM
hmm = trainHMM(labelDir, rf, names)
# create the directory in which to save the model (if it doesn't exist)
if (!file.exists(dirname(modelName))){
dir.create(dirname(modelName), recursive=TRUE)
}
# save the trained model
save(rf, hmm, winSize, file=modelName)
cat("model saved to", modelName, "\n")
}
testAllDir = function(featDirs, modelName, saveDir, names=NULL) {
# function to classify test data from features
# INPUTS:
# featDirs: path to list of feature directories
# modelName: path to saved model that will be applied
# saveDir: path to a directory where you want the output saved
# names: (optional) if provided, only process these identifiers
if (is.null(names)) {
names = list.files(featDirs[1])
}
# saveDir1 is directory where unsmoothed predictions are saved
saveDir1 = file.path(saveDir, "Level1")
for (i in 1:length(names)) {
testRF(featDirs, modelName, saveDir1, names[i])
testHMM(saveDir1, modelName, saveDir, names[i])
}
cat("predictions saved to", saveDir, "\n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.