Nothing
####################################################################################################################
#
# get.shw
# liest Conquest-Outputfiles (*.shw) als R-Objekte ein
#
# Version: 1.8.0
# Published:
# Author: Sebastian Weirich
# Maintainer:
#
# 14.10.2011 MH: gestabled
# 1.2.1 (2011-10-03, NH): Benennung von Spalten in Output geändert, entsprechen jetzt Namen in ZKD-Ergebnisstruktur
# 1.2.0 (2011-10-01, NH): Bugfix: Regression jetzt auch für nur eine Dimension ohne Namen auslesbar
# Konfidenzintervallberechnung für DIF vereinfacht
# Benennung von Konfidenzintervallen in Output geändert in ci.lb.X bzw. ci.ub.X
#
# 28.09.2011 NH: verschönert, default für dif.term auf NULL gesetzt, parameter datei in file geändert
# 16.09.2011 MH: Regression auslesen, wird als ...$regression an return-Liste rangehangen
# 19.08.2011 SW: DIF, Konfidenzintervalle auf 99%, außerdem Grenzen für absoluten und sig. DIF frei variierbar
# 08.08.2011 MH: auf stable gesetzt wegen besserer sourcebarkeit
# 05.01.2011 NH: Kriterien für signifikanten DIF angepasst: 0.43 und 0.64 statt 0.3 und 0.6
# 14.10.2011 SW: "trim" durch "crop" ersetzt, depends no longer on "gdata"
# 25.11.2011 SW: "cat" durch "eatTools:::sunk" ersetzt
# 28.11.2011 SW: Fehler wenn alle Items verankert sind also also kein einziger Standardfehler bestimmt wird,
# fuehrte zu falscher Benennung der Spalten: gefixed ...
# 28.02.2012 SW: liest nun auch Kovarianz- und Korrelationstabelle ein
#
####################################################################################################################
### dif.term ... Name des DIF-terms (character), muss nur angegeben werden, wenn DIF-Analysen eingelesen werden sollen.
### split.dif ... sollen bei DIF-Analysen nur die Werte der ersten Gruppe zurückgegeben werden?
### abs.dif.bound ... positive Zahl; Schwellenwert für absoluten DIF
### sig.dif.bound ... positive Zahl; Schwellenwert für Signifikanzkriterium
## Hilfsfunktion zur Berechnung von DIF-Konfidenzintervallen
confidence <- function ( par , error, alpha ) {
z.alpha <- abs(qnorm(alpha/2))
ci.lower <- par - 2 * z.alpha * error
ci.upper <- par + 2 * z.alpha * error
ci <- data.frame ( ci.lower, ci.upper )
colnames (ci) <- c ( paste ( "ci.lb", 100 - 100 * alpha, sep = "."), paste ( "ci.ub", 100 - 100 * alpha, sep = "."))
return (ci)
}
get.shw <- function (file, dif.term = NULL, split.dif = TRUE, abs.dif.bound = 0.64, sig.dif.bound = 0.43) {
funVersion <- "get.shw_1.7.0"
allInput <- readLines(file)
fDeviance <- grep("Final Deviance", allInput) ### find the row with the final deviance
stopifnot(length(fDeviance) == 1)
fDeviance <- as.numeric(unlist(lapply (strsplit(allInput[fDeviance], " +"), FUN=function(ll) {ll[length(ll)]}) ))
# find terms
termNameLines <- grep("TERM", allInput)
nTerms <- length(termNameLines)
if (nTerms == 0) {
eatTools:::sunk(paste(funVersion, ": No TERM-statement found in file ", file, ".\n", sep = ""))
stop()
}
termNames <- strsplit(allInput[termNameLines], ":")
termNames <- crop(sapply(termNames, "[[", 2))
# find first and last line for each term
termStart <- termNameLines + 6
termEnd <- grep("An asterisk", allInput)[1:nTerms]
termEnd <- termEnd - 2
eatTools:::sunk(paste(funVersion, ": Found ", nTerms, " term(s): ", paste(termNames, collapse = ", "), "\n", sep = ""))
# initialize output list
outputList <- vector(nTerms, mode = "list")
names(outputList) <- termNames
# read data for each term
for (i in seq(along = termNames)) {
termLines <- seq(termStart[i], termEnd[i])
outputColNamesLine <- termStart[i] - 2
outputColNames <- strsplit(allInput[outputColNamesLine], " +")
outputColNames <- c("item.nr", "item.name", outputColNames[[1]][-c(1:2)])
outputColNames <- gsub("\\^", "", outputColNames)
outputColNames <- outputColNames[sort(c(seq(along = outputColNames), grep("CI", outputColNames)))]
# name columns according to zkd conventions
mnsqCols <- grep("MNSQ", outputColNames)
if (length(mnsqCols) == 2) {
outputColNames[mnsqCols] <- c ( "outfit", "infit" )
outputColNames[mnsqCols + 3] <- c ( "outfit.t", "infit.t" )
}
ciCols <- grep("CI", outputColNames)
if (length(ciCols) == 4 ) {
outputColNames[ciCols] <- c ("outfit.ci.lb", "outfit.ci.ub", "infit.ci.lb", "infit.ci.ub")
}
# read term input
termInput <- crop(allInput[termLines])
termInput <- gsub("\\* ", " NA", termInput)
substituteString <- paste(c("\\(", ")", ","), collapse = "|")
termInput <- gsub(substituteString, " ", termInput)
# find missing values and replace them with NA
termOutput <- strsplit(termInput, " +")
nInputCols <- sapply(termOutput, length)
correctInputLines <- which(nInputCols == max(nInputCols))
missingInputLines <- which(nInputCols != max(nInputCols))
lastSpace <- gregexpr(" [[:graph:]]", termInput[correctInputLines])[[1]]
posEstimateCol <- regexpr("ESTIMATE", allInput[outputColNamesLine])
lastSpace <- lastSpace[lastSpace > (posEstimateCol - 3)]
for (ii in seq(along = missingInputLines)) {
for (iii in seq(along = lastSpace)) {
isSpace <- substr(termInput[missingInputLines[ii]], lastSpace[iii] + 2, lastSpace[iii] + 2) == " "
if (isSpace) {
termInput[missingInputLines[ii]] <- paste(substr(termInput[missingInputLines[ii]], 1, lastSpace[iii]),
"NA", substring(termInput[missingInputLines[ii]], lastSpace[iii] + 3), sep = "")
}
}
}
termOutput <- strsplit(termInput, " +")
maxNInputCols <- max(nInputCols)
# check number of output colnames and number of input columns
if (maxNInputCols < length(outputColNames)) {
eatTools:::sunk(paste(funVersion, ": Several columns empty for term '", termNames[i], "' in file: '", file, "'. Outputfile may be corrupted. Please check!\n", sep = ""))
maxNInputCols <- length(outputColNames)
}
if (maxNInputCols > length(outputColNames)) {
if (maxNInputCols == length(outputColNames) + 1) {
if ( !is.null(dif.term) ) {
if( termNames [i] != dif.term ) {
eatTools:::sunk(paste(funVersion, ": Found one more column than column names. Expect missing column name before 'ESTIMATE'. Check outputfile for term '",
termNames[i], "' in file: '", file, "'. \n", sep = ""))
}
}
ind.name <- which(outputColNames == "ESTIMATE")
outputColNames <- c(outputColNames[1:ind.name - 1], "add.column", outputColNames[ind.name:length(outputColNames)])
}
if (maxNInputCols > length(outputColNames) + 1) {
eatTools:::sunk(paste(funVersion, ": Found more columns than column names. Check outputfile for term '", termNames[i], "' in file: '", file, "'. \n", sep = ""))
outputColNames <- c(outputColNames, rep("add.column", maxNInputCols - length(outputColNames)))
}
}
# make output data frame
tempTermOutput <- do.call(rbind, lapply( termOutput, FUN=function(ii) { c(gsub("\\*","",ii), rep(NA,times = maxNInputCols - length(ii) ))} ))
tempTermOutput[tempTermOutput == "NA"] <- NA
options(warn = -1)
termOutput <- data.frame(apply(tempTermOutput, 2, as.numeric))
characterCols <- which(colMeans(is.na(termOutput)) == 1)
termOutput[, characterCols] <- tempTermOutput[, characterCols]
colnames(termOutput) <- outputColNames
options(warn = 0)
if ("ESTIMATE" %in% colnames(termOutput)[characterCols]) {
eatTools:::sunk(paste(funVersion, ": 'ESTIMATE' column for term '", termNames[length(termNames)], "' in file: '", file, "' does not seem to be a numeric value. \n", sep = ""))
}
# read DIF term (if DIF term is specified)
if (!is.null(dif.term)) {
if (sum(termNames == dif.term) == 0) {
eatTools:::sunk(paste(funVersion, ": Term declared as DIF: '", dif.term, "' was not found in file: '", file, "'. \n", sep = ""))
}
# compute absolute DIF and DIF significance
if (termNames[i] == dif.term) {
eatTools:::sunk(paste(funVersion, ": Treat '", termNames[i], "' as DIF term.\n", sep = ""))
abs.dif = 2 * termOutput$ESTIMATE
ci.90 <- confidence ( abs.dif, termOutput$ERROR, alpha = .10 )
ci.95 <- confidence ( abs.dif, termOutput$ERROR, alpha = .05 )
ci.99 <- confidence ( abs.dif, termOutput$ERROR, alpha = .01 )
sig.90 <- ifelse(abs(abs.dif) > abs.dif.bound &
abs(ci.90 [ , 1]) > sig.dif.bound &
abs(ci.90 [ , 2]) > sig.dif.bound, 1, 0)
sig.95 <- ifelse(abs(abs.dif) > abs.dif.bound &
abs(ci.95 [ , 1]) > sig.dif.bound &
abs(ci.95 [ , 2]) > sig.dif.bound, 1, 0)
sig.99 <- ifelse(abs(abs.dif) > abs.dif.bound &
abs(ci.99 [ , 1]) > sig.dif.bound &
abs(ci.99 [ , 2]) > sig.dif.bound, 1, 0)
termOutput <- cbind(termOutput, abs.dif, ci.90, ci.95, ci.99, sig.90, sig.95, sig.99, filename = file)
if (split.dif == TRUE) {
termOutput <- termOutput[1:(nrow(termOutput)/2), ]
}
}
}
outputList[[i]] <- termOutput
}
# read regression coefficients
regrStatement <- grep("REGRESSION COEFFICIENTS", allInput) + 2
isRegression <- length(regrStatement) > 0
if ( isRegression) {
regrEnd <- grep("An asterisk next", allInput)
regrEnd <- regrEnd[which(regrEnd > regrStatement)][1] - 2
dimensionsLine <- grep("Regression Variable",allInput)
stopifnot(length(dimensionsLine) ==1)
# check dimensions
nameDimensions <- unlist(strsplit(allInput[dimensionsLine], " +"))[-1]
nDimensions <- length(nameDimensions)
if ( nDimensions == 0 ) {
nameDimensions <- "Dimension1"
nDimensions <- length (nameDimensions)
}
eatTools:::sunk(paste(funVersion, ": Found ",nDimensions," dimension(s): ",paste(nameDimensions, collapse=", "),"\n",sep=""))
regrStart <- grep("CONSTANT",allInput)
regrStart <- regrStart[regrStart <= regrEnd][1]
regrInput <- crop(allInput[regrStart:regrEnd])
regrInput <- gsub("\\(|)", "", regrInput)
regrInput <- gsub("\\*", " NA", regrInput)
regrInput <- strsplit(regrInput," +")
regrNames <- sapply (regrInput, "[", 1 )
tempRegrOutput <- do.call(rbind, regrInput)
tempRegrOutput[tempRegrOutput == "NA"] <- NA
options(warn = -1)
regrOutput <- data.frame(matrix (apply(tempRegrOutput, 2, as.numeric), nrow = nrow(tempRegrOutput), byrow = FALSE))
options(warn = 0)
characterCols <- which(colMeans(is.na(regrOutput)) == 1)
regrOutput[, characterCols] <- tempRegrOutput[, characterCols]
colnames(regrOutput) <- c("reg.var", paste(rep(c("coef","error"),nDimensions), rep(nameDimensions,each=2),sep="_") )
regrOutput$filename <- file
outputList$regression <- regrOutput
}
### Kovarianz-/ Korrelationsmatrix einlesen: schwierig, also Trennen nach ein- vs. mehrdimensional. Eindimensional: zweimal "-----" zwischen Beginn und Ende des COVARIANCE-Statements
### allInput muss mit "Scan" neu eingelesen werden, "readLines" klappt nicht ...
allInput <- scan(file,what="character",sep="\n",quiet=TRUE)
korStart <- grep("COVARIANCE/CORRELATION MATRIX", allInput)
korEnd <- grep("An asterisk next", allInput)
korEnd <- min(korEnd[korEnd > korStart])
korStriche <- grep("-----",allInput)
korStriche <- korStriche[korStriche > korStart & korStriche < korEnd]
if(length(korStriche) == 2) { ### eindimensional!
varRow <- grep("Variance", allInput)
variance <- as.numeric( unlist( lapply(strsplit(allInput[varRow]," +"), FUN=function(ll) {ll[length(ll)]}) ) )
names(variance) <- "variance"
outputList$cov.structure <- variance
}
if(length(korStriche) > 2) { ### mehrdimensional!
bereich <- allInput[ (min(korStriche) + 1) : (max(korStriche) - 1 ) ]
bereich <- bereich[ -grep("----",bereich)]
bereich <- strsplit(crop(bereich)," +")
for (ii in 2:(length(bereich)-1) ) {
if(ii <= length(bereich[[ii]]) ) {
bereich[[ii]] <- c(bereich[[ii]][1:(ii-1)], NA, bereich[[ii]][ii:length(bereich[[ii]])])
}
if(ii > length(bereich[[ii]]) ) {
bereich[[ii]] <- c(bereich[[ii]][1:(ii-1)], NA)
}
}
bereich.data.frame <- asNumericIfPossible(data.frame(do.call("rbind", bereich[-1]),stringsAsFactors=FALSE), verbose = FALSE)
colnames(bereich.data.frame) <- bereich[[1]]
outputList$cov.structure <- bereich.data.frame
}
outputList$final.deviance <- fDeviance
return(outputList)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.