Description Usage Arguments Value Author(s) Examples
Overlapping or sudo-overlapping observation clustering within k-dimensional partitions.
1 2 |
id |
A vector or data.frame representing a unique identifier or key. |
kdim |
A vector or data.frame representing a "k-dimensional" index across which clusters cannot be formed. |
startdate |
A date vector of each observation's start date. |
enddate |
A date vector of each observation's end date. |
slack |
a positive numeric value representing the gap in days over which a cluster can be formed. the default is |
restartindex |
If |
kdpec
returns a data.frame with the column or group of columns used to uniquely identify each observation along with the following:
kdimidx |
K-Dimensional Index. A sequential ID indexing each k-dimensional set. |
episode |
A sequential ID indexing each episodic cluster. |
Robert P. Bronaugh
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | ## Not run:
# merge a patient's claims for a specific diagnosis together:
# use kdim to prevent episode clustering across patient and diagnosis
# (i.e.,) the combination of PatientID and Diagnosis become a partition
# across which episodic clusters cannot be formed).
# restartindex = TRUE starts the episode index over at 1 for each k-dimensional partition
data(epclaims)
attach(epclaims)
require(sqldf)
kd = kdpec(id = epclaims$ClaimNumber,kdim = cbind(epclaims$PatientID,epclaims$Diagnosis)
,startdate = epclaims$ServiceStart,enddate = epclaims$ServiceEnd
,restartindex=TRUE)
# print the id, k-dimensional partition index (kdimidx), and the episodes
print(kd)
# restartindex = FALSE
kd = kdpec(epclaims$ClaimNumber,cbind(epclaims$PatientID,epclaims$Diagnosis),
epclaims$ServiceStart,epclaims$ServiceEnd,restartindex=FALSE)
print(kd)
# merge episode indexes with original data
ep.2 = sqldf("SELECT ep.PatientID
,ep.ClaimNumber
,ep.Diagnosis
,ep.ServiceStart
,ep.ServiceEnd
,kd.kdimidx
,kd.episode
FROM epclaims ep
INNER JOIN kd
ON ep.ClaimNumber = kd.id")
# plot time spans of original records
washcol = wash("gry",0.8)
for (i in 1:nrow(epclaims)) {
if (i ==1) {
plot(c(epclaims$ServiceStart[i],epclaims$ServiceEnd[i]),rep(i,2)
,type="l", col = washcol, lwd = 3
,xlim = c(min(epclaims$ServiceStart)-3
,max(epclaims$ServiceStart)+3)
,ylim = c(0,15)
,xlab = "length of service"
,ylab = "claim record index")
} else if (i < 6) {
lines(c(epclaims$ServiceStart[i],epclaims$ServiceEnd[i])
,rep(i,2),col = washcol, lwd = 3)
} else if ( i < 10) {
lines(c(epclaims$ServiceStart[i],epclaims$ServiceEnd[i])
,rep(i,2),col = washcol, lwd = 3)
} else if (i == 10) {
lines(c(epclaims$ServiceStart[i],epclaims$ServiceEnd[i])
,rep(i,2),col = washcol, lwd = 3)
} else {
lines(c(epclaims$ServiceStart[i],epclaims$ServiceEnd[i])
,rep(i,2),col = washcol, lwd = 3)
}
}
# plot time spans of original records. Color by assigned k-dim index
washcol = c(wash("blu1",1),wash("grn2",1),wash("org",1),wash("red1",1))
for (i in 1:nrow(ep.2)) {
if (i ==1) {
plot(c(ep.2$ServiceStart[i],ep.2$ServiceEnd[i]),rep(i,2)
,type="l", col = washcol[ep.2$kdimidx[i]], lwd = 3
,xlim = c(min(ep.2$ServiceStart)-3,max(ep.2$ServiceStart)+3)
,ylim = c(0,15)
,xlab = "length of service"
,ylab = "claim record index")
} else if (i < 6) {
lines(c(ep.2$ServiceStart[i],ep.2$ServiceEnd[i]),rep(i,2)
,col = washcol[ep.2$kdimidx[i]], lwd = 3)
} else if ( i < 10) {
lines(c(ep.2$ServiceStart[i],ep.2$ServiceEnd[i]),rep(i,2)
,col = washcol[ep.2$kdimidx[i]], lwd = 3)
} else if (i == 10) {
lines(c(ep.2$ServiceStart[i],ep.2$ServiceEnd[i]),rep(i,2)
,col = washcol[ep.2$kdimidx[i]], lwd = 3)
} else {
lines(c(ep.2$ServiceStart[i],ep.2$ServiceEnd[i]),rep(i,2)
,col = washcol[ep.2$kdimidx[i]], lwd = 3)
}
}
# merge records to get the full length of each episode
ep.episodes = data.frame("kdimidx" = tapply(ep.2$kdimidx,ep.2$episode,min),
"episodeStart" = as.Date(tapply(ep.2$ServiceStart
,ep.2$episode,min),origin = "1970-01-01"),
"episodeEnd" = as.Date(tapply(ep.2$ServiceEnd
,ep.2$episode,max),origin = "1970-01-01"))
# plot the length of service of each episode. kdimidx, not claim
# records, are on the y axis colors represent each kdimidx
washcol = c(wash("blu1",1),wash("grn2",1),wash("org",1),wash("red1",1))
i = 1
for (i in 1:nrow(ep.episodes)) {
if (i ==1) {
plot(c(ep.episodes$episodeStart[i],ep.episodes$episodeEnd[i])
,rep(ep.episodes$kdimidx[i],2)
,type="l", col = washcol[ep.episodes$kdimidx[i]], lwd = 3
,xlim = c(min(ep.2$ServiceStart)-3,max(ep.2$ServiceStart)+3)
,ylim = c(0,4)
,xlab = "length of episode"
,ylab = "k-dimensional index")
} else if (i < 6) {
lines(c(ep.episodes$episodeStart[i],ep.episodes$episodeEnd[i])
,rep(ep.episodes$kdimidx[i],2)
,col = washcol[ep.episodes$kdimidx[i]], lwd = 3)
} else if ( i < 10) {
lines(c(ep.episodes$episodeStart[i],ep.episodes$episodeEnd[i])
,rep(ep.episodes$kdimidx[i],2)
,col = washcol[ep.episodes$kdimidx[i]], lwd = 3)
} else if (i == 10) {
lines(c(ep.episodes$episodeStart[i],ep.episodes$episodeEnd[i])
,rep(ep.episodes$kdimidx[i],2)
,col = washcol[ep.episodes$kdimidx[i]], lwd = 3)
} else {
lines(c(ep.episodes$episodeStart[i],ep.episodes$episodeEnd[i])
,rep(ep.episodes$kdimidx[i],2)
,col = washcol[ep.episodes$kdimidx[i]], lwd = 3)
}
}
detach(epclaims)
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.