Nothing
seqgroups <- function(alldata = NULL, labels = NULL, lag = 1, adjacent = TRUE,
onezero = NULL, tailed = 2, test = "homogeneity", output = "all") {
cat('\n\nLag Sequential Analysis of data that are in segments (e.g, for multiple dyads or groups\n\n')
cat("\nRequested 'tail' (1 or 2) for Significance Tests =", tailed, "\n")
if (!adjacent) {
adjacent <- 0
} else if (adjacent && is.null(onezero)) {
adjacent <- 1
} else if (adjacent && !is.null(onezero)) {
adjacent <- 2
}
alldata <- matrix(alldata, ncol = 1)
# are all data values numeric?
if ((all(sapply(alldata, is.numeric))) == TRUE) {
# create a version of alldata without the numeric segment codes, in order to get the max code value
dattemp <- alldata[alldata < 1000]
ncodes <- max(dattemp)
if (is.null(labels)) {
labels <- 1:ncodes
for (lupe in 1:ncodes) labels[lupe] <- paste("Code", lupe)
}
}
# if any data values are characters, treat them all as strings & provide integer values for the analyses
if ((any(sapply(alldata, is.character))) == TRUE) {
# create a version of alldata without the "segment" values, in order to get the code labels
dattemp <- alldata[nzchar(gsub("segment+", "", alldata))]
labels <- unique(dattemp)
newdat <- matrix(-9999, length(alldata), 1)
for (lupe in 1:length(alldata)) {
# replace code values that aren't segment codes with integers
if (alldata[lupe, 1] != "segment") newdat[lupe, 1] <- which(labels == alldata[lupe, 1], arr.ind = F)
if (alldata[lupe, 1] == "segment") newdat[lupe, 1] <- 999999
}
alldata <- newdat
ncodes <- length(labels)
}
# pooling data for stationarity analyses
if (test == "stationarity") {
totdata <- 0
for (x in 1:nrow(alldata)) {
if (alldata[x] < 1000) {
totdata <- rbind(totdata, alldata[x])
}
}
totdata <- matrix(totdata[2:nrow(totdata)], ncol = 1)
freqst <- matrix(0, ncodes, ncodes)
for (y in 1:length(totdata)) {
if (y + lag <= length(totdata)) {
freqst[totdata[y], totdata[y + lag]] <- freqst[totdata[y], totdata[y +
lag]] + 1
}
}
}
# out <- matrix(0, 1, (ncodes * ncodes + 1))
each <- matrix(0, 1, (ncodes * ncodes))
start <- 2
lastone <- 0
for (a in 1:nrow(alldata)) {
freqs <- matrix(0, ncodes, ncodes)
data <- 0
for (b in start:nrow(alldata)) {
if (alldata[b] < 1000) {
data <- rbind(data, alldata[b])
end <- b
}
if (alldata[b] >= 1000) {
start <- b + 1
break
}
}
data <- matrix(data[2:length(data)], ncol = 1)
lastone <- rbind(lastone, data[nrow(data)])
freqs <- matrix(0, ncodes, ncodes)
for (c in 1:nrow(data)) {
if (c + lag <= nrow(data)) {
freqs[data[c], data[c + lag]] <- freqs[data[c], data[c + lag]] + 1
}
}
each <- rbind(each, matrix(t(freqs), 1, (ncodes * ncodes)))
if (end == nrow(alldata)) {
break
}
}
lastone <- matrix(c(lastone[2:nrow(lastone), 1], lastone[nrow(lastone), 1]), ncol = 1)
if (test == "homogeneity") {
freqst <- (matrix(t(colSums(each)), ncodes, ncodes))
freqst <- t(freqst)
}
rowtott <- rowSums(freqst)
pp <- matrix(0, ncodes, ncodes)
each <- rbind(each[2:nrow(each), ], matrix(t(freqst), 1, ncol(each)))
# initializing
lrx2sh <- 0
for (dindex in 1:nrow(each)) {
lrx2t <- matrix(0)
freqs <- t(matrix(t(each[dindex, ]), ncodes, ncodes))
rowtots <- matrix(rowSums(freqs))
coltots <- matrix(colSums(freqs), ncol = ncodes)
ntrans <- sum(rowtots)
prows <- rowtots / ntrans
pcols <- coltots / ntrans
tprob <- matrix(-9999, ncodes, ncodes)
et <- matrix(-9999, ncodes, ncodes)
expfreq <- matrix(-9999, ncodes, ncodes)
zadjres <- matrix(-9999, ncodes, ncodes)
pzadjres <- matrix(-1, ncodes, ncodes)
yulesq <- matrix(-9999, ncodes, ncodes)
var <- matrix(-9999, ncodes, ncodes)
min <- matrix(-9999, ncodes, ncodes)
kappa <- matrix(-9999, ncodes, ncodes)
zkappa <- matrix(-9999, ncodes, ncodes)
pzkappa <- matrix(1, ncodes, ncodes)
n <- ntrans + 1
nr <- rowtots
nr[lastone[dindex, 1]] <- nr[lastone[dindex, 1]] + 1
# Warning message for specification error.
if ((adjacent == 0) & (any(diag(freqs) == 0))) {
cat("\n\nWarning:\nYou have indicated that consecutive or adjacent codes can never repeat")
cat("\n(adjacent = 0), yet repeating codes have been found in the data. See the ")
cat("\nmain diagonal of the frequency matrix. This specification error will result ")
cat("\nin faulty computations for LRX2, z-values, and adjusted residuals.\n")
}
for (i in 1:ncodes) {
for (j in 1:ncodes) {
# Note: more refined computations for when adjacent codes cannot repeat appear below,
# after the above 2 loops are complete.
if ((adjacent == 0) && (ntrans - rowtots[i] > 0)) {
pcols[j] <- coltots[j] / (ntrans - rowtots[i])
}
if ((adjacent == 0) && (ntrans - rowtots[j] > 0)) {
expfreq[i, j] <- (rowtots[i] * coltots[j]) / (ntrans - rowtots[j])
}
if ((adjacent == 0) && (n - nr[i] > 0)) {
et[i, j] <- (nr[i] * nr[j]) / (n - nr[i])
}
if (adjacent == 1) {
et[i, j] <- (nr[i] * nr[j]) / n
expfreq[i, j] <- (rowtots[i] * coltots[j]) / ntrans
}
# transitional probabilities.
if (rowtots[i] > 0) {
tprob[i, j] <- freqs[i, j] / rowtots[i]
}
# tablewise LRX2
if ((freqs[i, j] > 0) && (expfreq[i, j] > 0)) {
lrx2t <- lrx2t + 2 * (freqs[i, j] * log(freqs[i, j] / expfreq[i, j]))
}
# LRX2 for stationarity/homogeneity.
if (dindex < nrow(each)) {
if (rowtott[i] > 0) {
pp[i, j] <- freqst[i, j] / rowtott[i]
}
if ((tprob[i, j] > 0) && (pp[i, j] > 0)) {
lrx2sh <- lrx2sh + 2 * (freqs[i, j] * log(tprob[i, j] / pp[i, j]))
}
}
# adjusted residuals (z values) & sig levels.
if ((expfreq[i, j] * (1 - pcols[j]) * (1 - prows[i])) > 0) {
zadjres[i, j] <- ((freqs[i, j] - expfreq[i, j])) / sqrt(expfreq[i, j] *
(1 - pcols[j]) * (1 - prows[i]))
pzadjres[i, j] <- (1 - pnorm(abs(zadjres[i, j]))) * tailed
}
# Yule's Q; see Bakeman & Gottman, 1997, p. 129.
a <- freqs[i, j]
b <- rowtots[i] - freqs[i, j]
c <- coltots[j] - freqs[i, j]
d <- ntrans - rowtots[i] - coltots[j] + freqs[i, j]
if ((a * d + b * c) > 0) {
yulesq[i, j] <- (a * d - b * c) / (a * d + b * c)
}
# kappas, z values & sig levels.
var[i, j] <- (nr[i] * nr[j] * (n - nr[j]) * (n - nr[i])) / (n^2 * (n - 1))
if (var[i, j] > 0) {
zkappa[i, j] <- (freqs[i, j] - et[i, j]) / sqrt(var[i, j])
if (nr[i] <= nr[j]) {
min[i, j] <- nr[i]
} else {
min[i, j] <- nr[j]
}
if (min[i, j] - et[i, j] != 0) {
kappa[i, j] <- (freqs[i, j] - et[i, j]) / (min[i, j] - et[i, j])
if (kappa[i, j] < 0) {
kappa[i, j] <- (freqs[i, j] - et[i, j]) / et[i, j]
}
pzkappa[i, j] <- (1 - pnorm(abs(zkappa[i, j]))) * tailed
}
}
}
}
if ((adjacent == 0) || (adjacent == 2)) {
# maximum likelihood estimation of the expected cell frequencies using
# iterative proportional fitting (Wickens, 1989, pp. 107-112).
rsumsf <- rowSums(freqs)
csumsf <- colSums(freqs)
if (is.null(onezero)) {
onezero <- matrix(1, ncodes, ncodes)
diag(onezero) <- 0
}
# The two previous commands create a matrix of ones and
# zeros that is used in estimating the expected cell
# frequencies. A "1" indicates that the expected frequency
# for a given cell is to be estimated, whereas a "0"
# indicates that the expected frequency for the cell
# should NOT be estimated, typically because it is a
# structural zero (codes that cannot follow one another).
# By default, the matrix that is created by the above
# commands has zeros along the main diagonal, and ones
# everywhere else, which will be appropriate for most data
# sets. However, if yor data happen to involve structural
# zeros that occur in cells other than the cells along the
# main diagonal, then you must create a ONEZERO matrix
# with ones and zeros that is appropriate for your data
# before running any of the commands below. Enter your
# ONEZERO matrix now.
expfreq <- onezero
for (ipfloop in 1:100) {
# adjusting by row.
xr <- matrix(0, ncodes, 1)
rsumse <- rowSums(expfreq)
for (r in 1:ncodes) {
if (rsumse[r] > 0) {
xr[r] <- rsumsf[r] / rsumse[r]
}
}
for (i in 1:ncodes) {
for (j in 1:ncodes) {
if (onezero[i, j] == 1) {
expfreq[i, j] <- expfreq[i, j] * xr[i]
}
}
}
# adjusting by column.
xc <- matrix(0, 1, ncodes)
csumse <- colSums(expfreq)
for (c in 1:ncodes) {
if (csumse[c] > 0) {
xc[c] <- csumsf[c] / csumse[c]
}
}
for (i in 1:ncodes) {
for (j in 1:ncodes) {
if (onezero[i, j] == 1) {
expfreq[i, j] <- expfreq[i, j] * xc[j]
}
}
}
rdiffs <- rsumsf - rowSums(expfreq)
cdiffs <- csumsf - colSums(expfreq)
if ((max(rdiffs) < 1e-04) & (max(cdiffs) < 1e-04)) {
break
}
}
cat("Maximum likelihood estimation of the expected cell frequencies using iterative proportional fitting ")
if ((max(rdiffs) < 1e-04) & (max(cdiffs) < 1e-04)) {
cat("converged after the following number of iterations:", ipfloop, "\n\n")
} else {
cat(
"did NOT converge after the following number of iterations:", ipfloop,
"\n\n"
)
}
# tablewise LRX2
lrx2t <- matrix(0)
for (i in 1:ncodes) {
for (j in 1:ncodes) {
if ((freqs[i, j] > 0) & (expfreq[i, j] > 0)) {
lrx2t <- lrx2t + 2 * freqs[i, j] * log(freqs[i, j] / expfreq[i, j])
}
}
}
# adjusted residuals for matrices with structural zeros (Christensen, 1997, p. 357).
# constructing the design matrix. 345
x <- matrix(1, nrow = ncodes^2, ncol = 1)
y <- matrix(0, nrow = ncodes^2, ncol = ncodes - 1)
z <- matrix(0, nrow = ncodes^2, ncol = ncodes - 1)
for (i in 1:(ncodes - 1)) {
for (j in 1:ncodes) {
y[i * ncodes + j, i] <- 1
z[(((j - 1) * ncodes) + (i + 1)), i] <- 1
}
}
des1 <- cbind(x, y, z)
# pruning values corresponding to cells with structural zeros.
onezero2 <- matrix(t(onezero), ncodes^2, 1)
dm1 <- matrix(t(expfreq), ncodes^2, 1)
dm2 <- matrix(-9999, 1, 1)
des2 <- matrix(-9999, 1, ncol(des1))
for (p in 1:(ncodes^2)) {
if (onezero2[p] == 1) {
dm2 <- rbind(dm2, dm1[p])
des2 <- rbind(des2, des1[p, ])
}
}
dm2 <- dm2[2:nrow(dm2), 1]
des2 <- des2[2:nrow(des2), ]
dm2 <- diag(dm2)
if (det(t(des2) %*% dm2 %*% des2) != 0) {
a <- des2 %*% (solve(t(des2) %*% dm2 %*% des2)) %*% t(des2) %*% dm2
acounter <- 1
for (i in 1:ncodes) {
for (j in 1:ncodes) {
if (onezero[i, j] != 0) {
zadjres[i, j] <- (freqs[i, j] - expfreq[i, j]) / sqrt(expfreq[i, j] *
(1 - a[acounter, acounter]))
acounter <- acounter + 1
}
}
}
} else {
cat("\n\nWarning:\nA nonsingular matrix has been identified, which means that proper adjusted")
cat("\nresiduals cannot be computed for this data, probably because there are no values for one")
cat("\nor more codes. Try recoding using sequential integers, and redo the analyses.")
cat("\nThe adjusted residuals that are printed below are based on equation 5 from")
cat("\nBakeman & Quera (1995, p. 274), and are close approximations to the proper values.")
cat("\nThe procedures recommended by Bakemen & Quera (1995, p. 276), Haberman (1979), and")
cat("\nChristensen (1997) cannot be conducted with nonsingular matrices.\n\n")
}
for (i in 1:ncodes) {
for (j in 1:ncodes) {
if (onezero[i, j] == 0) {
zadjres[i, j] <- 0
yulesq[i, j] <- 0
kappa[i, j] <- 0
zkappa[i, j] <- 0
pzadjres[i, j] <- 1
pzkappa[i, j] <- 1
}
}
}
}
# if (outfile == 1) {
# outf <- freqs
# } else if (outfile == 2) {
# outf <- tprob
# } else if (outfile == 3) {
# outf <- zadjres
# } else if (outfile == 4) {
# outf <- yulesq
# } else if (outfile == 5) {
# outf <- kappa
# }
# out <- rbind(out, cbind(dindex, matrix(t(outf), 1, (ncodes * ncodes))))
b <- as.matrix(labels[1:ncodes])
if ((dindex < nrow(each)) && (output == "all")) {
cat("\n\n\nGroup/Segment Number:", dindex, "\n")
} else if (dindex == nrow(each)) {
cat("\n\n\nPooled Data:\n\n")
cat("\nNumber of cases/groups/segments:", nrow(each) - 1, "\n")
dfsh <- ncodes * (ncodes - 1) * ((nrow(each) - 1) - 1)
plrx2sh <- 1 - pchisq(abs(lrx2sh), dfsh)
lchisq <- cbind(lrx2sh, dfsh, plrx2sh)
cat("\n\nLikelihood Ratio (Chi-Square) Test of Homogeneity/Stationarity = ",
round(lrx2sh,2),", df = ",dfsh,", p = ",round(plrx2sh,5),"\n\n",sep='')
}
if ((dindex == nrow(each)) || (output == "all")) {
b <- as.matrix(labels[1:ncodes])
bb <- rbind(b, "Totals")
cfreq <- rbind(cbind(freqs, rowtots), cbind(coltots, ntrans))
rownames(cfreq) <- bb
colnames(cfreq) <- bb
cat("\nCell Frequencies, Row & Column Totals, & N\n\n")
print(cfreq)
if ((adjacent == 0) || (adjacent == 2)) {
cat("\n\nThe processed ONEZERO matrix appears below. In the ONEZERO matrix,")
cat("\na 0 indicates a structural zero, and a 1 indicates that an expected ")
cat("\ncell frequency will be estimated.\n")
cat("\nONEZERO matrix:\n\n")
rownames(onezero) <- b
colnames(onezero) <- b
print(onezero)
}
cat("\n\nExpected Values/Frequencies\n\n")
rownames(expfreq) <- b
colnames(expfreq) <- b
print(round(expfreq, 2))
cat("\n\nTransitional Probabilities\n\n")
rownames(tprob) <- b
colnames(tprob) <- b
print(round(tprob, 2))
if (adjacent == 1) {
dft <- (ncodes - 1)^2
} else {
dft <- (ncodes - 1)^2 - (ncodes^2 - sum(onezero))
}
plrx2t <- 1 - pchisq(abs(lrx2t), dft)
tlr <- cbind(lrx2t, dft, plrx2t)
cat("\n\nTablewise Likelihood Ratio (Chi-Square) test = ",round(lrx2t,2),
", df = ",dft,", p = ",round(plrx2t,5),"\n",sep='')
cat("\n\nAdjusted Residuals\n\n")
rownames(zadjres) <- b
colnames(zadjres) <- b
print(round(zadjres, 2))
cat("\n\nSignificance Levels for the Adjusted Residuals\n\n")
rownames(pzadjres) <- b
colnames(pzadjres) <- b
print(round(pzadjres, 4))
cat("\n\nYule's Q Values\n\n")
rownames(yulesq) <- b
colnames(yulesq) <- b
print(round(yulesq, 2))
cat("\n\nUnidirectional Kappas\n\n")
rownames(kappa) <- b
colnames(kappa) <- b
print(round(kappa, 2))
cat("\n\nz values for the Unidirectional Kappas\n\n")
rownames(zkappa) <- b
colnames(zkappa) <- b
print(round(zkappa, 2))
cat("\n\nSignificance Levels for the Unidirectional Kappas\n\n")
rownames(pzkappa) <- b
colnames(pzkappa) <- b
print(round(pzkappa, 4))
}
}
seqg_output <- list(
freqs = freqs, expfreqs = expfreq, probs = tprob, chi = lchisq, adjres = zadjres,
p = pzadjres, YulesQ = yulesq, kappas = kappa, z = zkappa, pk = pzkappa, output=output
)
return(invisible(seqg_output))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.