Nothing
# zum Einbinden
#source("/Users/Felix/Documents/R/Funktionen/FUtils.r")
print("Standardfunktionen 'FUtils.R' werden geladen ...")
# @param df: data.frame with relevant variables
# @param control: vector containing column numbers or column names of variables in the data.frame that should be controlled for (you can only use ONE kind - either numbers or names)
# @param lower.tri.method: by default, in the lower triangle the zero-order-correlations are shown, and in the upper triangle the partial correlations; only possible option at the moment: lower.tri.method = "par" --> in both triangles the partial correlations are shown
parMatrix <- function(df, control, lower.tri.method = "cor") {
require(ggm)
options(warn=-1)
cat("Coefficients are controlled for:\n")
cat(colnames(df[control]))
cat("\n\n")
cat("above diagonal: partial correlations, below: zero-order-correlations\n\n")
# mark columns consisting only of NAs (these would crash the procedure)
skipList <- novar.colnums(df)
dims <- ncol(df)
dnames <- colnames(df)
# if the control parameter consists of strings: transform to numbers
if (is.character(control[1])) {
control <- which(colnames(df) %in% control)
}
par <- matrix(rep(0,dims ^ 2), nrow=dims, ncol=dims, dimnames = list(dnames, dnames))
ret <- par
# calculate partial correlations
for (i in 1:ncol(df)) {
for (j in 1:ncol(df)) {
if (i != j & sum(control==i)==0 & sum(control==j)==0 & !(i %in% skipList) & !(j %in% skipList)) {
par[i,j] <- ggm::pcor(c(i, j, control), var(df, na.rm = TRUE, use="p"))
}
if ((i %in% skipList) | (j %in% skipList)) par[i,j] <- NA
}
}
# calculate zero-order-correlations
co <- cor(df, use = "p")
ret <- par
if (lower.tri.method != "par") {
ret[lower.tri(ret)] <- co[lower.tri(co)]
}
diag(ret) <- 1
options(warn=1)
return(ret)
}
# f1: formula, e.g. ~sex+age (controlling for sex and age)
# skip: which columns should be skipped? (you should provide column names)
parMatrix.res <- function(df, f1, skip=c("")) {
# mark columns consisting only of NAs (these would crash the procedure)
# ... and columns for which is controlled (this would result in an invalid formula)
skipList <- novar.colnums(df)
t1 <- attr(terms(f1), "term.labels")
controls <- which(colnames(df) %in% t1)
skipList <- c(skipList, controls)
skips <- which(colnames(df) %in% skip)
skipList <- c(skipList, skips)
ret <- df
for (i in 1:ncol(ret)) {
if (!(i %in% skipList)) ret[,i] <- NA
}
# calculate partial correlations
for (i in 1:ncol(df)) {
if (!(i %in% skipList)) {
# Die jeweilige Formel konstruieren
new <- paste(colnames(df)[i], "~.")
f2 <- update(f1, new)
# lineare Regression: für die angegebene Formel kontrollieren
l1 <- lm(f2, df, na.action=na.exclude)
# Residuen abspeichern
#ret[names(l1$residuals),i] <- l1$residuals
ret[,i] <- resid(l1)
}
}
return(ret)
}
# Hilfsfunktion save_var: berechnet die Varianz, gibt aber keinen Fehler aus, wenn nur NAs vorkommen (gibt in diesem Fall wieder NA zurück)
save_var <- function (vec) {
if (is.nan(mean(vec, na.rm=TRUE))) {return(NA) } else {
v <- var(vec, na.rm=TRUE)
return(v)
}
}
novar.colnums <- function(df){
options(warn=-1)
# mark columns with no variance
delList <- c()
for (i in 1:ncol(df)) {if (is.factor(df[,i])) delList <- c(delList, i)}
for (i in 1:ncol(df)) {
if (!(i %in% delList) & (is.na(save_var(df[,i])) | save_var(df[,i]) == 0)) delList <- c(delList, i)
}
options(warn=1)
return(delList)
}
# remove columns with no variance from data frame
novar.rm <- function(df){
delList <- novar.colnums(df)
if (length(delList) > 0) {
print(paste("Column", colnames(df)[delList]," has no variance and is removed from the data set"))
df <- df[,-delList]
}
return(df)
}
# remove columns consisting of NAs from data frame
NArm <- function(df, index=2, del = "", cols=NULL){
if (is.null(cols)) cols <- 1:ncol(df)
dels <- c()
NAs <- apply(df[,cols], index, function(x) sum(is.na(x) | x==del))
le <- apply(df[,cols], index, length)
dels <- NAs == le
if (index==1) {df2 <- df[!dels,]} else
if (index==2) {df2 <- df[,!dels]}
return(df2)
}
# calculates average correlation using Fisher's Z-transformation, averaging the Z-values and back-transforming the average to a Pearson correlation.
cor.average <- function(corlist) {
fz <- function(co) {
return(0.5 * log((1 + co)/(1 - co)))
}
f <- mean(fz(corlist))
return((exp(2*f)-1)/(exp(2*f)+1))
}
# draws a correlation matrix where the significance of correlations is marked with a sign:
# *** = p < 0.001
# ** = p < 0.01
# * = p < 0.05
# † = p < 0.10
#
# @param digits: the number of digits to which values are rounded (default = 2)
corMatrix <- function(df, digits=2, method="pearson", exact=NULL) {
dims <- ncol(df)
ids <- nrow(df)
dnames <- colnames(df)
ps <- matrix(rep(1,dims ^ 2), nrow=dims, ncol=dims, dimnames = list(dnames, dnames))
# calculate p-values of correlations
for (i in 1:ncol(df)) {
for (j in 1:ncol(df)) {
if (sum(!is.na(df[,i])) > 3 & sum(!is.na(df[,j])) > 3) ps[i,j] <- cor.test(df[,i], df[,j], use="p", method=method, exact=exact)$p.value
}
}
# calculate zero-order-correlations
co <- round(cor(df, use = "p", method=method), digits)
for (i in 1:ncol(co)) {
for (j in 1:ncol(df)) {
if (ps[i,j]<0.001) co[i,j] <- paste(co[i,j], "***") else
if (ps[i,j]<0.01) co[i,j] <- paste(co[i,j], "**") else
if (ps[i,j]<0.05) co[i,j] <- paste(co[i,j], "*") else
if (ps[i,j]<0.1) co[i,j] <- paste(co[i,j], "†")
if (i==j) co[i,j] = 1
}
}
ret <- list(cor = co, annotation = "***: p < 0.001; **: p < 0.01; *: p < 0.05; †: p < 0.1")
return(ret)
}
#------------------------------------------------------------
# ---- Proper Item Scaling --------------------------------------
#------------------------------------------------------------
cronbach <- function(items, keys = NULL, scoring = mean, verbose="long", full.scores = NULL, scale=FALSE){
library(psych)
if (is.null(keys)) keys = c(rep(1, ncol(items)))
if (!is.null(full.scores)) {
full.scores <- as.matrix(full.scores)
}
valids <- !(apply(items, 1, function(x) all(is.na(x))))
if (any(valids==FALSE)) {
print(paste(sum(!valids),"participants removed because of lacking data (only NAs)"));
items <- items[valids,]
full.scores <- full.scores[valids,]
}
items.recoded <- t((keys == -1)*(max(items, na.rm = TRUE)-t(items)) + (keys == 1)*t(items))
if (scale==TRUE) items.recoded <- apply(items.recoded, 2, scale)
scores <- apply(items.recoded, 1, scoring, na.rm=TRUE)
scores[apply(items, 1, function(x) sum(!is.na(x))<2)] <- NA
n <- length(scores[!is.na(scores)])
getAlpha <- function(i, k) {
# alpha berechnen
scores <- as.matrix(i) %*% as.matrix(k)
item.var <- diag(var(i, use = "pairwise"))
cov.scales <- cov(scores, use = "pairwise")
var.scales <- diag(cov.scales)
cor.scales <- cor(scores, use = "pairwise")
sum.item.var <- item.var %*% abs(k)
num.item <- diag(t(abs(k)) %*% abs(k))
alpha.scale <- (var.scales - sum.item.var) * num.item/((num.item -
1) * var.scales)
return(as.numeric(round(alpha.scale,3)))
}
alpha <- getAlpha(items, keys)
print(paste("Alpha (",ncol(items)," Items; n=",n,"): ",alpha, sep=""))
print("----------------------------")
# scale if item deleted
if (verbose=="long" & ncol(items)>2) {
if (!is.null(full.scores)) {
print(round(cor(data.frame(scores, full.scores), use="p"),2)[1,])
}
res <- data.frame(label = vector(mode="character", 0), alpha = vector(mode="numeric", 0), diff =vector(mode="numeric", 0))
if (!is.null(full.scores)) {
for (f in 1:ncol(full.scores)) res <- cbind(res, vector(mode="numeric", 0))
print("----------------------------")
}
for (i in 1:ncol(items)) {
alpha_m1 <- getAlpha(items[,-i], keys[-i])
sc_m1 <- apply(items.recoded[,-i], 1, scoring, na.rm=TRUE)
resline <- data.frame(label = paste("Ohne Item",i,"(",colnames(items)[i],")"), alpha = round(alpha_m1,3), diff = round(alpha_m1-alpha,3))
if (!is.null(full.scores)) {
for (f in 1:ncol(full.scores)) resline <- cbind(resline, round(cor(sc_m1, full.scores[,f], use="p"),2))
}
res <- rbind(res, resline)
}
if (!is.null(full.scores)) {
colnames(res)[4:ncol(res)] <- colnames(full.scores)
}
print(res)
}
attr(scores, "alpha") <- alpha
return(scores)
}
# Hilfsfunktion für copy.clipboard.RTF:
getStars <- function(val) {
res <- ""
if (val <= 0.05) res <- "*"
if (val <= 0.01) res <- "**"
if (val <= 0.001) res <- "***"
return(res)
}
#------------------------------------------------------------
# ---- Copy RTF-formatted table to clipboard ---------------
#------------------------------------------------------------
# suppress: < this value, cells content is invisible
# highlight: >= this value cells are printed in bold
# greyed: < this value cells are printed in italic
# stars: a matrix of p-values with the same dimensions like mat
#
# Bsp: copy.clipboard.RTF(res)
copy.clipboard.RTF <- function(mat, row.names = TRUE, col.names=TRUE, suppress = 0, highlight = 1, greyed = 0, stars=NULL, font="Helvetica") {
# standard rtf header
#rtf_header <- paste("{\\rtf1\\ansi\\deff0{\\fonttbl{\\f0 ",font,"}}\r", sep="")
#rtf_header <- paste("{\\rtf1\\ansi\\ansicpg1252\\cocoartf949\\cocoasubrtf330{\\fonttbl\\f0\\fswiss\\fcharset0 ",font,";}",sep="");
if (mode(mat)=="character") num <- FALSE
rtf_header <- paste("{\\rtf1\\ansi{\\fonttbl\\f0\\fswiss\\fcharset0 ",font,";}",sep="");
rtf_outro <- "\r}"
#rtf_string stores the content
rtf_string <- "";
# Header-Row with colnames (if requested)
if (col.names){
for (col in 1:ncol(mat)) rtf_string <- paste(rtf_string, "\\tab ", colnames(mat)[col], sep="")
rtf_string <- paste(rtf_string, "\\par ", sep="")
}
for (r in 1:nrow(mat)) {
if (row.names) rtf_string <- paste(rtf_string, rownames(mat)[r],sep="")
for (col in 1:ncol(mat)) {
if (!is.na(mat[r,col])) {
if (num==TRUE) {
# Opening tag for bold printing
if (abs(mat[r,col])>=highlight) rtf_string <- paste(rtf_string, "{\\b ", sep="")
# Opening tag for italic printing
if (abs(mat[r,col])<greyed) rtf_string <- paste(rtf_string, "{\\i ", sep="")
# Insert Cells (only if >= suppress)
if (abs(mat[r,col])>suppress) {
rtf_string <- paste(rtf_string, "\\tab ", mat[r,col], sep="")
if (!is.null(stars)) {
rtf_string <- paste(rtf_string, getStars(stars[r,col]), sep="")
}
} else {rtf_string <- paste(rtf_string, "\\tab ", sep="")}
# Closing tag for bold printing
if (abs(mat[r,col])>=highlight) rtf_string <- paste(rtf_string, "}", sep="")
# Closing tag for italic printing
if (abs(mat[r,col])<greyed) rtf_string <- paste(rtf_string, "}", sep="")
} else {
rtf_string <- paste(rtf_string, "\\tab ", mat[r,col], sep="")
}
} else {
rtf_string <- paste(rtf_string, "\\tab ", sep="")
}
}
# new paragraph after each line
rtf_string <- paste(rtf_string, "\\par ", sep="")
}
clipboard <- pipe("pbcopy", open="w")
write(paste(rtf_header, rtf_string, rtf_outro, sep=""), clipboard)
close(clipboard)
}
copy2rows.clipboard.RTF <- function(mat, mat2, row.names = TRUE, col.names=TRUE, suppress = 0, highlight = 1, greyed = 0, stars=FALSE, font="Helvetica", labels = c("","")) {
# standard rtf header
rtf_header <- paste("{\\rtf1\\ansi{\\fonttbl\\f0\\fswiss\\fcharset0 ",font,";}",sep="");
rtf_outro <- "\r}"
#rtf_string stores the content
rtf_string <- "";
# Header-Row with colnames (if requested)
if (col.names){
# Für die extra-Label-Spalte noch ein Tab
rtf_string <- paste(rtf_string, "\\tab ", sep="")
for (col in 1:ncol(mat)) rtf_string <- paste(rtf_string, "\\tab ", colnames(mat)[col], sep="")
rtf_string <- paste(rtf_string, "\\par ", sep="")
}
for (r in 1:nrow(mat)) {
if (row.names) rtf_string <- paste(rtf_string, rownames(mat)[r],sep="")
# Extra Labels 1
rtf_string <- paste(rtf_string, "\\tab ", labels[1],sep="")
for (col in 1:ncol(mat)) {
if (!is.na(abs(mat[r,col]))) {
# Opening tag for bold printing
if (abs(mat[r,col])>=highlight) rtf_string <- paste(rtf_string, "{\\b ", sep="")
# Opening tag for italic printing
if (abs(mat[r,col])<greyed) rtf_string <- paste(rtf_string, "{\\i ", sep="")
# Insert Cells (only if >= suppress)
if (abs(mat[r,col])>suppress) {rtf_string <- paste(rtf_string, "\\tab ", mat[r,col], sep="")}
else {rtf_string <- paste(rtf_string, "\\tab ", sep="")}
# Closing tag for bold printing
if (abs(mat[r,col])>=highlight) rtf_string <- paste(rtf_string, "}", sep="")
# Opening tag for italic printing
if (abs(mat[r,col])<greyed) rtf_string <- paste(rtf_string, "}", sep="")
} else {
rtf_string <- paste(rtf_string, "\\tab ", sep="")
}
}
# new paragraph after each line
rtf_string <- paste(rtf_string, "\\par ", sep="")
# Extra Labels 1
rtf_string <- paste(rtf_string, "\\tab ", labels[2], sep="")
for (col in 1:ncol(mat2)) {
if (!is.na(abs(mat2[r,col]))) {
# Opening tag for bold printing
if (abs(mat2[r,col])>=highlight) rtf_string <- paste(rtf_string, "{\\b ", sep="")
# Opening tag for italic printing
if (abs(mat2[r,col])<greyed) rtf_string <- paste(rtf_string, "{\\i ", sep="")
# Insert Cells (only if >= suppress)
if (abs(mat2[r,col])>suppress) {rtf_string <- paste(rtf_string, "\\tab ", mat2[r,col], sep="")}
else {rtf_string <- paste(rtf_string, "\\tab ", sep="")}
# Closing tag for bold printing
if (abs(mat2[r,col])>=highlight) rtf_string <- paste(rtf_string, "}", sep="")
# Opening tag for italic printing
if (abs(mat2[r,col])<greyed) rtf_string <- paste(rtf_string, "}", sep="")
} else {
rtf_string <- paste(rtf_string, "\\tab ", sep="")
}
}
# new paragraph after each line
rtf_string <- paste(rtf_string, "\\par ", sep="")
}
clipboard <- pipe("pbcopy", open="w")
write(paste(rtf_header, rtf_string, rtf_outro, sep=""), clipboard)
close(clipboard)
}
#------------------------------------------------------------
# ---- Get marginal significant correlation for a given number of participants -----------------
#------------------------------------------------------------
# Helper: Transform correlation to Fisher's Z
r2Z <- function(r) {
return(0.5 * log((1 + r)/(1 - r)))
}
# Helper: REcode Fisher's to correlation
Z2r <- function(Z) {
return((exp(2*Z)-1)/(exp(2*Z)+1))
}
# get the marginal correlation that is significant in a certain sample size
margin.correlation <- function(n, p=0.025) {
Z_crit <- abs(qnorm(p))*sqrt(1/(n-3))
return(Z2r(Z_crit))
}
# Calculated probability that the difference of two correlations is not significant
corr.diff <- function(r1, r2, n1, n2) {
z <- abs(r2Z(r1)-r2Z(r2))/sqrt((1/(n1-3))+(1/(n2-3)))
return(1 - round(pnorm(z),2))
}
#------------------------------------------------------------
# ---- Moderation Analysis ---------------------------------
#------------------------------------------------------------
# ... gracefully borrows code from the QuantPSyc package by Thomas D. Fletcher
mod.anal <- function(formule, data, slope.z=1, plot=FALSE, main = "", xlab = NULL, ylab = NULL, zlab=NULL, ylimit=1.5) {
# center moderator:
varnames <- rownames(attr(terms(formule), "factors"))
x1 <- varnames[2]
data[x1] <- data[x1]-mean(data[x1], na.rm=TRUE)
x2 <-varnames[3]
data[x2] <- data[x2]-mean(data[x2], na.rm=TRUE)
y <- varnames[1]
lm1 <- lm(formule, data)
print(summary(lm1))
z <- lm1$model[[3]]
zhi <- slope.z * sd(z, na.rm = TRUE)
zlo <- -slope.z * sd(z, na.rm = TRUE)
zme <- 0
b0 <- summary(lm1)$coef[1, 1]
b1 <- summary(lm1)$coef[2, 1]
b2 <- summary(lm1)$coef[3, 1]
b3 <- summary(lm1)$coef[4, 1]
x.zhi <- (b1 + b3 * zhi)
x.zlo <- (b1 + b3 * zlo)
x.zme <- (b1 + b3 * zme)
int.zhi <- (b0 + b2 * zhi)
int.zlo <- (b0 + b2 * zlo)
int.zme <- (b0 + b2 * zme)
seb.zhi <- sqrt(vcov(lm1)[2, 2] + 2 * zhi * vcov(lm1)[2,
4] + zhi^2 * vcov(lm1)[4, 4])
seb.zlo <- sqrt(vcov(lm1)[2, 2] + 2 * zlo * vcov(lm1)[2,
4] + zlo^2 * vcov(lm1)[4, 4])
seb.zme <- sqrt(vcov(lm1)[2, 2] + 2 * zme * vcov(lm1)[2,
4] + zme^2 * vcov(lm1)[4, 4])
td <- qt(0.975, df = summary(lm1)$df[2])
zhi.u <- x.zhi + td * seb.zhi
zhi.l <- x.zhi - td * seb.zhi
zlo.u <- x.zlo + td * seb.zlo
zlo.l <- x.zlo - td * seb.zlo
zme.u <- x.zme + td * seb.zme
zme.l <- x.zme - td * seb.zme
mat <- matrix(NA, 3, 5)
colnames(mat) <- c("INT", "Slope", "SE", "LCL", "UCL")
rownames(mat) <- c("at zHigh", "at zMean", "at zLow")
mat[1, ] <- c(int.zhi, x.zhi, seb.zhi, zhi.l, zhi.u)
mat[2, ] <- c(int.zme, x.zme, seb.zme, zme.l, zme.u)
mat[3, ] <- c(int.zlo, x.zlo, seb.zlo, zlo.l, zlo.u)
if (is.null(ylab)) ylab <- paste("raw",y)
if (is.null(xlab)) xlab <- paste("centered ",x1)
if (is.null(zlab)) zlab <- x2
yhi <- mean(lm1$model[[1]], na.rm = TRUE) + ylimit * sd(lm1$model[[1]], na.rm = TRUE)
ylo <- mean(lm1$model[[1]], na.rm = TRUE) - ylimit * sd(lm1$model[[1]], na.rm = TRUE)
plot(lm1$model[[2]], lm1$model[[1]], main = main, xlab = xlab, ylab = ylab, ylim = c(ylo, yhi))
abline(mat[1, 1], mat[1, 2], col = 4)
abline(mat[3, 1], mat[3, 2], col = 2)
abline(mat[2, 1], mat[2, 2], lty = 2, col = 1)
legend(locator(1), paste(c("+","-"),slope.z," SD on ",zlab,sep=""), lty = 1, col = c(4,
2))
return(list(sim.slopes = mat))
}
#------------------------------------------------------------
# ---- Regressionstools --------------------------------------
#------------------------------------------------------------
xyplot.r <- function(...) {
library(lattice)
library(grid)
xyplot(...,
panel = function(x,y) {
panel.dotplot(x,y)
panel.lmline(x,y)
r = round(cor(x,y, use="p"),3)
grid.text(paste("r=", r), y = unit(0.1, "npc"), gp=gpar(...))
}
)
}
#------------------------------------------------------------
# Hierarchische multiple Regression: inkrementelle Validität
hmreg <- function(AV, UV1, UV2, data=NULL) {
if (is.null(data)) {
AV_name <- deparse(substitute(AV))
UV1_name <- deparse(substitute(UV1))
UV2_name <- deparse(substitute(UV2))
} else {
AV_name <- AV
UV1_name <- UV1
UV2_name <- UV2
AV <- data[,AV]
UV1 <- data[,UV1]
UV2 <- data[,UV2]
}
delList <- is.na(AV) | is.na(UV1) | is.na(UV2)
AV <- AV[!delList]
UV1 <- UV1[!delList]
UV2 <- UV2[!delList]
l1 <- lm(AV~UV1)
l2 <- lm(AV~UV2)
l3 <- lm(AV~UV1+UV2)
s1 <- summary(l1)
s2 <- summary(l2)
s3 <- summary(l3)
a1 <- anova(l1)
a2 <- anova(l2)
a12 <- anova(l1,l3)
a21 <- anova(l2,l3)
res <- matrix(NA, nrow=4, ncol=9, dimnames=list(c(UV1_name, UV2_name, paste(UV1_name,"+",UV2_name), paste(UV2_name,"+",UV1_name)),c("R2", "adj.R2","adj.R2.change","F.model","p.model","F.change", "p.change", "df1","df2")))
res[1,"R2"] <- s1$r.squared
res[2,"R2"] <- s2$r.squared
res[3,"R2"] <- s3$r.squared
res[4,"R2"] <- r2 <- s3$r.squared
res[1,"adj.R2"] <- s1$adj.r.squared
res[2,"adj.R2"] <- s2$adj.r.squared
res[3,"adj.R2"] <- s3$adj.r.squared
res[4,"adj.R2"] <- s3$adj.r.squared
res[1,"adj.R2.change"] <- s1$adj.r.squared
res[2,"adj.R2.change"] <- s2$adj.r.squared
res[3,"adj.R2.change"] <- s3$adj.r.squared - s1$adj.r.squared
res[4,"adj.R2.change"] <- s3$adj.r.squared - s2$adj.r.squared
res[1,"F.model"] <- s1$fstatistic[1]
res[2,"F.model"] <- s2$fstatistic[1]
res[3,"F.model"] <- s3$fstatistic[1]
res[4,"F.model"] <- s3$fstatistic[1]
res[1,"p.model"] <- s1$coefficients[2,4]
res[2,"p.model"] <- s2$coefficients[2,4]
res[3,"p.model"] <- 1-pf(s3$fstatistic[1], s3$fstatistic[2], s3$fstatistic[3])
res[4,"p.model"] <- 1-pf(s3$fstatistic[1], s3$fstatistic[2], s3$fstatistic[3])
res[1,"F.change"] <- a1$F[1]
res[2,"F.change"] <- a2$F[1]
res[3,"F.change"] <- a12$F[2]
res[4,"F.change"] <- a21$F[2]
res[1,"p.change"] <- a1$P[1]
res[2,"p.change"] <- a2$P[1]
res[3,"p.change"] <- a12$P[2]
res[4,"p.change"] <- a21$P[2]
res[1,"df1"] <- a1$Df[1]
res[2,"df1"] <- a2$Df[1]
res[3,"df1"] <- a12$Df[2]
res[4,"df1"] <- a21$Df[2]
res[1,"df2"] <- a1$Df[2]
res[2,"df2"] <- a2$Df[2]
res[3,"df2"] <- a12$Res.Df[2]
res[4,"df2"] <- a21$Res.Df[2]
r2.part1 <- round(cor(AV, lm(UV1~UV2)$resid)^2,3)
r2.part2 <- round(cor(AV, lm(UV2~UV1)$resid)^2,3)
print(paste("Unique variance component for",UV1_name,":",r2.part1,"; =",round(r2.part1*100/r2,2),"%"))
print(paste("Unique variance component for",UV2_name,":",r2.part2,"; =",round(r2.part2*100/r2,2),"%"))
print(paste("Joint variance component:",round(r2-r2.part1-r2.part2,3),"; =",round((r2-r2.part1-r2.part2)*100/r2,2),"%"))
round(res,3)
}
hmreg2 <- function(f1, f2, f3, data=NULL) {
l1 <- lm(f1)
l2 <- lm(f2)
l3 <- lm(f3)
s1 <- summary(l1)
s2 <- summary(l2)
s3 <- summary(l3)
a1 <- anova(l1)
a2 <- anova(l2)
a12 <- anova(l1,l3)
a21 <- anova(l2,l3)
res <- matrix(NA, nrow=4, ncol=9, dimnames=list(c(UV1_name, UV2_name, paste(UV1_name,"+",UV2_name), paste(UV2_name,"+",UV1_name)),c("R2", "adj.R2","adj.R2.change","F.model","p.model","F.change", "p.change", "df1","df2")))
res[1,"R2"] <- s1$r.squared
res[2,"R2"] <- s2$r.squared
res[3,"R2"] <- s3$r.squared
res[4,"R2"] <- r2 <- s3$r.squared
res[1,"adj.R2"] <- s1$adj.r.squared
res[2,"adj.R2"] <- s2$adj.r.squared
res[3,"adj.R2"] <- s3$adj.r.squared
res[4,"adj.R2"] <- s3$adj.r.squared
res[1,"adj.R2.change"] <- s1$adj.r.squared
res[2,"adj.R2.change"] <- s2$adj.r.squared
res[3,"adj.R2.change"] <- s3$adj.r.squared - s1$adj.r.squared
res[4,"adj.R2.change"] <- s3$adj.r.squared - s2$adj.r.squared
res[1,"F.model"] <- s1$fstatistic[1]
res[2,"F.model"] <- s2$fstatistic[1]
res[3,"F.model"] <- s3$fstatistic[1]
res[4,"F.model"] <- s3$fstatistic[1]
res[1,"p.model"] <- s1$coefficients[2,4]
res[2,"p.model"] <- s2$coefficients[2,4]
res[3,"p.model"] <- 1-pf(s3$fstatistic[1], s3$fstatistic[2], s3$fstatistic[3])
res[4,"p.model"] <- 1-pf(s3$fstatistic[1], s3$fstatistic[2], s3$fstatistic[3])
res[1,"F.change"] <- a1$F[1]
res[2,"F.change"] <- a2$F[1]
res[3,"F.change"] <- a12$F[2]
res[4,"F.change"] <- a21$F[2]
res[1,"p.change"] <- a1$P[1]
res[2,"p.change"] <- a2$P[1]
res[3,"p.change"] <- a12$P[2]
res[4,"p.change"] <- a21$P[2]
res[1,"df1"] <- a1$Df[1]
res[2,"df1"] <- a2$Df[1]
res[3,"df1"] <- a12$Df[2]
res[4,"df1"] <- a21$Df[2]
res[1,"df2"] <- a1$Df[2]
res[2,"df2"] <- a2$Df[2]
res[3,"df2"] <- a12$Res.Df[2]
res[4,"df2"] <- a21$Res.Df[2]
r2.part1 <- round(cor(AV, lm(UV1~UV2)$resid)^2,3)
r2.part2 <- round(cor(AV, lm(UV2~UV1)$resid)^2,3)
print(paste("Unique variance component for",UV1_name,":",r2.part1,"; =",round(r2.part1*100/r2,2),"%"))
print(paste("Unique variance component for",UV2_name,":",r2.part2,"; =",round(r2.part2*100/r2,2),"%"))
print(paste("Joint variance component:",round(r2-r2.part1-r2.part2,3),"; =",round((r2-r2.part1-r2.part2)*100/r2,2),"%"))
round(res,3)
}
#------------------------------------------------------------
# ---- Sonstige Tools --------------------------------------
#------------------------------------------------------------
# medianSplit does an intelligent medianSplit: if groups are defined, the groupwise median is
# employed; it also is checked whether "<" or "<=" leads to more equal group sizes
# demo: all4$QInvSplit = medianSplit(all4$QInv2, group = all4$IV)
medianSplit <- function(var, group = NULL, varname=NULL, labels = c("low", "high")) {
# find the group median
if (!is.null(group)) {
group <- as.factor(group)
groupmedian <- aggregate(var, list(group), median, na.rm=TRUE)
names(groupmedian) <- c("group", "md")
dat <- data.frame(var, group)
dat <- merge(dat, groupmedian, by="group")
} else {
groupmedian <- median(var, na.rm=TRUE)
dat <- data.frame(var, md = groupmedian)
}
# check if "<" or "<=" leads to more equal groups
relFreq1 <- abs((sum(dat["var"] <= dat["md"], na.rm=TRUE)/nrow(dat)) - 0.5)
relFreq2 <- abs((sum(dat["var"] < dat["md"], na.rm=TRUE)/nrow(dat)) - 0.5)
if (relFreq1 < relFreq2) {
"%smaller%" <- function(x,y) {x <= y}
} else {
"%smaller%" <- function(x,y) {x < y}
}
mS <- as.factor(as.vector(ifelse(dat["var"] %smaller% dat["md"], labels[1], labels[2])))
return(mS)
}
# performs a test of normality (but you should better use shapiro.test)
ks.norm <- function(x) {
ks <- ks.test(x, "pnorm", mean=mean(x), sd=sqrt(var(x)))
return(ks)
}
reverse <- function(x) {
return((max(x, na.rm=TRUE)+1)-x)
}
posify <- function(x) {
if (min(x, na.rm=TRUE)>=1) {return(x)}
else {return(x+abs(min(x, na.rm=TRUE))+1)}
}
skewness <- function (x, na.rm = FALSE, ...) {
# A function implemented by D. Wuertz
# Description:
# Returns the value of the skewness of a
# distribution function. Missing values
# can be handled.
# FUNCTION:
# Warnings:
if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
warning("argument is not numeric or logical: returning NA")
return(as.numeric(NA))}
# Remove NAs:
if (na.rm) x = x[!is.na(x)]
# Skewness:
n = length(x)
if (is.integer(x)) x = as.numeric(x)
skewness = sum((x-mean(x))^3/sqrt(var(x))^3)/length(x)
# Return Value:
skewness
}
# performs a transformation for skewed distributions.
# reversing is applied automatically depending on the skewness.
# func: the function to be applied
trans <- function(x, func=log10) {
if (skewness(x, na.rm=TRUE)>0) {
return(func(posify(x)))
} else {
return(reverse(func(reverse(x))))
}
}
autoTrans <- function(x, plot=FALSE, verbose=TRUE) {
# testen, ob eine Transformation überhaupt nötig ist:
test <- shapiro.test(x)$p.value
if (test > 0.1) {
if (verbose) print(paste(deparse(substitute(x))," - AutoTrans: no transformation needed, p=",test, "AutoTrans NO TRANS"))
if (plot) plot(density(x, na.rm=TRUE), main=paste("Shapiro-Wilks: p=", round(test,5)), sub="no transformation needed")
attr(x, "method") <- "no transformation needed"
attr(x, "p.value") <- test
return(x)
}
FUNCS <- c(log10, sqrt, function(y) 1/y)
trans_str <- c("log10", "sqrt", "1/x")
x2 <- matrix(NA, length(x), length(FUNCS))
for (i in 1:length(FUNCS)) x2[,i] <- trans(x, FUNCS[[i]])
s <- vector(mode="numeric", length=3)
for (i in 1:length(FUNCS)) s[i] <- shapiro.test(x2[,i])$p.value
if (plot) {
par(mfcol=c(1,3))
for (i in 1:length(FUNCS)) {
color <- (i==which.max(s))+1
plot(density(x2[,i], na.rm=TRUE), main=paste("Shapiro-Wilks: p=", round(s[i],5)), col=color)
}
}
res <- x2[,which.max(s)]
attr(res, "method") <- trans_str[which.max(s)]
attr(res, "p.value") <- s[which.max(s)]
if (verbose) print(paste(deparse(substitute(x)),"- AutoTrans:",attr(res, "method"), ", p=",attr(res, "p.value"), "AutoTrans",ifelse(s[which.max(s)]>=0.1,"SUCESS", "FAILED")))
return(res)
}
na.omit.cols <- function(df, cols) {
validList <- apply(df, 1, function(x) sum(is.na(x[cols]))==0)
return(df[validList,])
}
jitter.binary <- function(a, jitt=0.5, y1=0, y2=1) {
ifelse(a==0, runif(length(a), y1, y1+jitt), runif(length(a), y2-jitt, y2))
}
# HLM braucht zwei Dateien: eine mit Level-1 Daten, eine mit Level-2 Daten
# IDs geben die Zugehörigkeit zur Gruppe auf Level2 an
# - IDs müssen alle die gleiche Länge haben (also z.B. 001 bis 100)
# - Die Fälle auf Level-1 müssen nach IDs geordnet sein
# Formatierung: HLM erwartet eine fixe Spaltenzahl, daher sprintf;
#
# HLM-Formatangabe: (A4,2F12.3) (A4 = ID, dann 2 x 12.3 Format)
# Bsp.-Export:
# Level 1
# HLM.export("H1.dat", IA2$id, IA2$effect2, IA2$tstep2)
# Level 2
# HLM.export("H2.dat", l2preds$id, l2preds$int, l2preds$aut, l2preds$IV, l2preds$relsatis)
HLM.export <- function(fname, id, ...) {
sprintf.NA <- function(var, NA_str = "99999") {
var[is.na(var)] = NA_str
return(sprintf("%12.3f", var))
}
args <- list(...)
output <- data.frame(sprintf("%04.0f", id))
for (i in 1:length(args)) {
output <- cbind(output, sprintf.NA(args[[i]]))
}
write.table(output, fname, row.names=FALSE, col.names=FALSE, quote=FALSE, sep="")
}
#TODO: Die Funkntion ist noch nicht so ganz fertig ...
cor.evol <- function(x,y, start=10, steps=NULL, fun=list(cor.test, corb), est_name=c("estimate", "cor.est"), ci_name=c("conf.int","cor.ci")) {
indices <- length(fun)
delList <- is.na(x) | is.na(y)
x <- x[-delList]
y <- y[-delList]
if (is.null(steps)) steps <- length(x)
delta <- length(x)/steps
cor.evol <- array(NA, dim = c(steps, 3, indices))
for (j in 1:indices) {
for (i in 1:steps) {
pos <- round(start + i*delta)
if(pos>length(x)) pos <- length(x)
if (j==2) co <- fun[[j]](x[1:pos],y[1:pos], nboot=100)
else co <- fun[[j]](x[1:pos],y[1:pos])
cor.evol[i,1,j] <- co[[est_name[j]]]
cor.evol[i,2,j] <- co[[ci_name[j]]][1]
cor.evol[i,3,j] <- co[[ci_name[j]]][2]
}
}
val <- 1:steps
plot(NA, type="n", ylim=c(-1,1), xlim=c(1,steps), xlab="n", ylab="Correlation", xaxt="n")
axis(1, at=1:steps, labels=round((1:steps)*delta))
for (j in 1:indices) {
lines(val, cor.evol[val,1,j], col=j)
lines(val, cor.evol[val,2,j], lty="dashed", col=j)
lines(val, cor.evol[val,3,j], lty="dashed", col=j)
}
abline(h=0, col="grey40")
cor.evol <- cor.evol[start:length(x),,]
return(cor.evol)
}
#cor.evol(final$int, final$aut, fun=list(cor.test), est_name="estimate", ci_name="conf.int")
#co <- cor.evol(final$int, final$aut, steps=10)
#cor.evol(final$relsatis, final$dist1, fun=list(cor.test), est_name="estimate", ci_name="conf.int")
#------------------------------------------------------------
# Usage Countdown
## before loop:
# start <- countdown(0) #remember the starting time and shows "Go!"
## in loop:
# countdown(count, max, start)
countdown <- function(count, max=100, start=NULL) {
if (count==0) {
if (is.null(start)) cat("Go! ") else cat("Go! ")
}
if (count > 0 & count <= max) {
if (is.null(start)) {
cat("\b\b\b\b\b")
cat(paste(sprintf("%03.0f", round((count*100)/max,0))," %", sep=""))
} else {
# Die ETA abschätzen ...
ti <- Sys.time()
ETA <- (difftime(ti, start, units="secs")/count)*(max-count)
cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b")
msg <- paste(sprintf("%3.0f", round((count*100)/max,0))," %; ETA = ", sprintf("%3.0f",round(ETA))," sec.", sep="")
cat(msg)
return(msg)
}
}
if (count == max) {
cat("\n")
if (is.null(start)) print("Finished.") else print(paste("Finished. Elapsed time = ",round(difftime(ti, start, units="secs"),1)," sec."))
}
if (count==0) return(Sys.time())
}
# Funktion nimmt eine lange Liste von Variablen und schiebt eine Variable von einer Spalte in Reihen,
# sortiert nach einer ID.
# Bsp.:
# > df
# id ilabel value
# 1 A item1 1
# 2 A item2 2
# 3 A item3 3
# 4 B item1 4
# 5 B item2 5
# 6 B item3 6
# > swap(df, 1, 3, labels=c("i1","i2","i3"))
# id i1 i2 i3
# 1 A 1 2 3
# 2 B 4 5 6
swapCol <- function(df, idCol, valueCol, labels=NULL) {
df[,idCol] <- as.factor(df[,idCol])
vpn <- length(levels(df[,idCol]))
slice <- df[df[,idCol]==levels(df[,idCol])[1],]
vars <- nrow(slice)
res <- data.frame(id=vector(mode="character", 0), data= matrix(NA, 0, vars))
start <- Sys.time()
countdown(0)
for (i in 1:vpn) {
slice <- df[df[,idCol]==levels(df[,idCol])[i],]
res <- rbind(res, data.frame(id=levels(df[,idCol])[i], data=t(slice[,valueCol])))
countdown(i, vpn, start)
}
if (!is.null(labels)) colnames(res)[2:ncol(res)] <- labels
return(res)
}
bargraph.CI2 <- function(...) {
# CI mit bootstrap
library(Hmisc)
library(sciplot)
my.ci <- function(x) c(smean.cl.boot(x)[2],smean.cl.boot(x)[3])
bargraph.CI(..., ci.fun=my.ci, sub="95% confidence intervals with bootstrap")
}
lineplot.CI2 <- function(...) {
# CI mit bootstrap
library(Hmisc)
library(sciplot)
my.ci <- function(x) c(smean.cl.boot(x)[2],smean.cl.boot(x)[3])
lineplot.CI(..., ci.fun=my.ci, sub="95% confidence intervals with bootstrap")
}
#------------------------------------
#-- Item Selection
#------------------------------------
# set = dataframe with items
# key = keys of items
# dels = positions to be deleted
# tset = TRUE/FALSE-vector with test set
# add = variables to be correlated with resulting scale (before and after deletion)
delInfo <- function(set, key, dels = NULL, tset=1, add=NULL) {
if (!is.null(dels)) {
set1 <- set[tset,-dels]
set2 <- set[!tset,-dels]
key1 <- key[-dels]
} else {
set1 <- set[tset,]
set2 <- set[!tset,]
key1 <- key
}
set.full <- set[tset,]
full <- cronbach(set.full, key, verbose="none")
short <- cronbach(set1, key1, full.scores = data.frame(full, add))
short2 <- cronbach(set2, key1)
diff <- colMeans(set1, na.rm=TRUE)
trenn <- apply(set1, 2, function(x) abs(cor(x, full, use="p")))
plot(data.frame(diff, trenn))
text(data.frame(diff+0.15, trenn), colnames(set1))
}
getStanine.z <- function() {
stan <- c(0.04, 0.11, 0.23, 0.4, 0.6, 0.77, 0.89, 0.96)
round(qnorm(stan),2)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.