View source: R/classify_summary_statistics.R
classify_summary_statistics | R Documentation |
This function takes data, typically a dataframe of events, which are classified into different behavioural states using different algorithms.
classify_summary_statistics(
dta,
method = "hmm",
states = 2,
family = stats::gaussian(),
...
)
dta |
data to be classified, can be anything |
method |
method for classifying data, currently support "hmm" for hidden markov model and "kmeans" for kmeans clustering |
states |
number of states to classify the data into |
family |
By default |
... |
any additional inputs for depmixs4::depmix, stats::kmeans, cluster::diana, cluster::diana or EMbC::embc functions, depending on method selected |
the data's classification based on the chosen algorithm
Forgy, E. W. (1965). Cluster analysis of multivariate data: efficiency vs interpretability of classifications. Biometrics, 21, 768–769.
Hartigan, J. A. and Wong, M. A. (1979). Algorithm AS 136: A K-means clustering algorithm. Applied Statistics, 28, 100–108. doi: 10.2307/2346830.
Lloyd, S. P. (1957, 1982). Least squares quantization in PCM. Technical Note, Bell Laboratories. Published in 1982 in IEEE Transactions on Information Theory, 28, 128–137.
MacQueen, J. (1967). Some methods for classification and analysis of multivariate observations. In Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability, eds L. M. Le Cam & J. Neyman, 1, pp. 281–297. Berkeley, CA: University of California Press.
Ingmar Visser and Maarten Speekenbrink (2010). depmixS4: An R Package for Hidden Markov Models. Journal of Statistical Software, 36(7), p. 1-21.
Lawrence R. Rabiner (1989). A tutorial on hidden Markov models and selected applications in speech recognition. Proceedings of IEEE, 77-2, p. 267-295.
Kaufman, L. and Rousseeuw, P.J. (1990). Finding Groups in Data: An Introduction to Cluster Analysis. Wiley, New York.
Anja Struyf, Mia Hubert and Peter J. Rousseeuw (1996) Clustering in an Object-Oriented Environment. Journal of Statistical Software 1. http://www.jstatsoft.org/v01/i04
Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997). Integrating Robust Clustering Techniques in S-PLUS, Computational Statistics and Data Analysis, 26, 17–37.
Lance, G.N., and W.T. Williams (1966). A General Theory of Classifactory Sorting Strategies, I. Hierarchical Systems. Computer J. 9, 373–380.
Belbin, L., Faith, D.P. and Milligan, G.W. (1992). A Comparison of Two Approaches to Beta-Flexible Clustering. Multivariate Behavioral Research, 27, 417–433.
Gower, J. C. (1971) A general coefficient of similarity and some of its properties, Biometrics 27, 857–874.
Garriga, J., Palmer, J.R., Oltra, A. and Bartumeus, F., 2016. Expectation-maximization binary clustering for behavioural annotation. PLoS One, 11(3), p.e0151984.
Garriga, J., Palmer, J.R.B., Oltra, A. and Bartumeus, F., 2014. EMbC: expectation-maximization binary clustering. arXiv preprint arxiv:1503.04059, 1.
## Not run:
#######################################################
# data prep
#######################################################
data(bee_eater)
start = as.POSIXct("2015-07-01","%Y-%m-%d", tz="UTC")
end = as.POSIXct("2016-06-01","%Y-%m-%d", tz="UTC")
PAM_data = create_crop(bee_eater, start, end)
twl = GeoLight::twilightCalc(PAM_data$light$date, PAM_data$light$obs,
LightThreshold = 2, ask = FALSE)
availavariable = c("pressure", "light", "acceleration")
to_classify= create_summary_statistics(PAM_data,
method= "flap",
twl = twl)
to_classify= to_classify[complete.cases(to_classify),]
#######################################################
# k-means example
#######################################################
classification = classify_summary_statistics(to_classify[,c("cum_altitude_change",
"night_P_diff" )],
states=2, "kmeans")$cluster
pressure_classification = create_merged_classification(from = to_classify$start,
to =to_classify$end,
classification = classification,
add_to = PAM_data$pressure)
plot(PAM_data$pressure$date, PAM_data$pressure$obs,
type="l")
points(PAM_data$pressure$date, PAM_data$pressure$obs,
col= pressure_classification+1,
pch=16)
#######################################################
# HMM example
#######################################################
classification = classify_summary_statistics(to_classify[,c("cum_altitude_change",
"night_P_diff" )]
#to_classify$night_P_diff +
#to_classify$cum_altitude_change
,
states=2, "hmm")$cluster
pressure_classification = create_merged_classification(from = to_classify$start,
to =to_classify$end,
classification = classification,
add_to = PAM_data$pressure)
plot(PAM_data$pressure$date, PAM_data$pressure$obs,
type="l")
points(PAM_data$pressure$date, PAM_data$pressure$obs,
col= pressure_classification+1,
pch=16)
#######################################################
# EMBC example
#######################################################
classification = classify_summary_statistics(to_classify[,c("cum_altitude_change",
"night_P_diff" )],
"embc")
pressure_classification = create_merged_classification(from = to_classify$start,
to =to_classify$end,
classification = classification$cluster,
add_to = PAM_data$pressure)
plot(PAM_data$pressure$date, PAM_data$pressure$obs,
type="l")
points(PAM_data$pressure$date, PAM_data$pressure$obs,
col= classification$output@C[pressure_classification+1],
pch=16)
#######################################################
# agnes example
#######################################################
classification = classify_summary_statistics(to_classify[,c("cum_altitude_change",
"night_P_diff" )],
states = 2,
"agnes")
plot(classification$output, main="agnes", which.plot = 2)
pressure_classification = create_merged_classification(from = to_classify$start,
to =to_classify$end,
classification = classification$cluster,
add_to = PAM_data$pressure)
plot(PAM_data$pressure$date, PAM_data$pressure$obs,
type="l")
points(PAM_data$pressure$date, PAM_data$pressure$obs,
col= pressure_classification+1,
pch=16)
#######################################################
# diana example
#######################################################
classification = classify_summary_statistics(to_classify[,c("cum_altitude_change",
"night_P_diff" )],
states = 2,
"diana")
plot(classification$output, which.plot = 2, main="diana")
pressure_classification = create_merged_classification(from = to_classify$start,
to =to_classify$end,
classification = classification$cluster,
add_to = PAM_data$pressure)
plot(PAM_data$pressure$date, PAM_data$pressure$obs,
type="l")
points(PAM_data$pressure$date, PAM_data$pressure$obs,
col= pressure_classification+1,
pch=16)
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.