R/FUtils.r

# zum Einbinden
#source("/Users/Felix/Documents/R/Funktionen/FUtils.r")

print("Standardfunktionen 'FUtils.R' werden geladen ...")


# @param df: data.frame with relevant variables
# @param control: vector containing column numbers or column names of variables in the data.frame that should be controlled for (you can only use ONE kind - either numbers or names)
# @param lower.tri.method: by default, in the lower triangle the zero-order-correlations are shown, and in the upper triangle the partial correlations; only possible option at the moment: lower.tri.method = "par" --> in both triangles the partial correlations are shown

parMatrix <- function(df, control, lower.tri.method = "cor") {

	require(ggm)
	options(warn=-1)

	cat("Coefficients are controlled for:\n")
	cat(colnames(df[control]))
	cat("\n\n")
	cat("above diagonal: partial correlations, below: zero-order-correlations\n\n")
	
	# mark columns consisting only of NAs (these would crash the procedure)
	skipList <- novar.colnums(df)
	
	dims <- ncol(df)
	dnames <- colnames(df)
	
	# if the control parameter consists of strings: transform to numbers
	if (is.character(control[1])) {
		control <- which(colnames(df) %in% control)
	}
	

	par <- matrix(rep(0,dims ^ 2), nrow=dims, ncol=dims, dimnames = list(dnames, dnames))
	ret <- par

# calculate partial correlations
	for (i in 1:ncol(df)) {
		for (j in 1:ncol(df)) {
			if (i != j & sum(control==i)==0 & sum(control==j)==0 & !(i %in% skipList) & !(j %in% skipList)) {
				par[i,j] <- ggm::pcor(c(i, j, control), var(df, na.rm = TRUE, use="p"))
			}	
			if ((i %in% skipList) | (j %in% skipList)) par[i,j] <- NA
		}

	}	

	# calculate zero-order-correlations
	co <- cor(df, use = "p")	
	ret <- par
	
	if (lower.tri.method != "par") {
		ret[lower.tri(ret)] <- co[lower.tri(co)]
	}
	
	diag(ret) <- 1

	options(warn=1)
	return(ret)
	
}


# f1: formula, e.g. ~sex+age (controlling for sex and age)
# skip: which columns should be skipped? (you should provide column names)

parMatrix.res <- function(df, f1, skip=c("")) {

		# mark columns consisting only of NAs (these would crash the procedure)
		# ... and columns for which is controlled (this would result in an invalid formula)
		skipList <- novar.colnums(df)

		t1 <- attr(terms(f1), "term.labels")
		controls <- which(colnames(df) %in% t1)
		skipList <- c(skipList, controls)
		
		skips <- which(colnames(df) %in% skip)
		skipList <- c(skipList, skips)

		ret <- df
		for (i in 1:ncol(ret)) {
			if (!(i %in% skipList)) ret[,i] <- NA
		}

	# calculate partial correlations
		for (i in 1:ncol(df)) {
			if (!(i %in% skipList)) {
				
				# Die jeweilige Formel konstruieren
				new <- paste(colnames(df)[i], "~.")
				f2 <- update(f1, new)
				
				# lineare Regression: für die angegebene Formel kontrollieren
				l1 <- lm(f2, df, na.action=na.exclude)
				
				# Residuen abspeichern
				#ret[names(l1$residuals),i] <- l1$residuals
				ret[,i] <- resid(l1)
			}	

		}	

		return(ret)
}


# Hilfsfunktion save_var: berechnet die Varianz, gibt aber keinen Fehler aus, wenn nur NAs vorkommen (gibt in diesem Fall wieder NA zurück)
save_var <- function (vec) {
	if (is.nan(mean(vec, na.rm=TRUE))) {return(NA) } else {
		v <- var(vec, na.rm=TRUE)
		return(v)
	}	
	
}


novar.colnums <- function(df){

	options(warn=-1)

	# mark columns with no variance
	delList <- c()
	for (i in 1:ncol(df)) {if (is.factor(df[,i])) delList <- c(delList, i)}
	for (i in 1:ncol(df)) {
		if (!(i %in% delList) & (is.na(save_var(df[,i])) | save_var(df[,i]) == 0)) delList <- c(delList, i)	
	}
	
	options(warn=1)
	
	return(delList)
}




# remove columns with no variance from data frame
novar.rm <- function(df){
	delList <- novar.colnums(df)

	if (length(delList) > 0) {
		print(paste("Column", colnames(df)[delList]," has no variance and is removed from the data set"))
		df <- df[,-delList]
	}
	
	return(df)
}



# remove columns consisting of NAs from data frame
NArm <- function(df, index=2, del = "", cols=NULL){

	if (is.null(cols)) cols <- 1:ncol(df)

	dels <- c()
	
	NAs <- apply(df[,cols], index, function(x) sum(is.na(x) | x==del))
	le <- apply(df[,cols], index, length)
	dels <- NAs == le
	
	if (index==1) {df2 <- df[!dels,]} else
	if (index==2) {df2 <- df[,!dels]}
	
	return(df2)
}



# calculates average correlation using Fisher's Z-transformation, averaging the Z-values and back-transforming the average to a Pearson correlation.

cor.average <- function(corlist) {
	
	fz <- function(co) {
		return(0.5 * log((1 + co)/(1 - co)))
	}
	
	f <- mean(fz(corlist))
	return((exp(2*f)-1)/(exp(2*f)+1))
	
}



# draws a correlation matrix where the significance of correlations is marked with a sign:
# *** = p < 0.001
#  ** = p < 0.01
#   * = p < 0.05
#   † = p < 0.10
#
# @param digits: the number of digits to which values are rounded (default = 2)

corMatrix <- function(df, digits=2, method="pearson", exact=NULL) {

	dims <- ncol(df)
	ids <- nrow(df)
	dnames <- colnames(df)

	ps <- matrix(rep(1,dims ^ 2), nrow=dims, ncol=dims, dimnames = list(dnames, dnames))

# calculate p-values of correlations
	for (i in 1:ncol(df)) {
		for (j in 1:ncol(df)) {
			if (sum(!is.na(df[,i])) > 3 & sum(!is.na(df[,j])) > 3) ps[i,j] <- cor.test(df[,i], df[,j], use="p", method=method, exact=exact)$p.value
		}
	}	

	# calculate zero-order-correlations
	co <- round(cor(df, use = "p", method=method), digits)

	for (i in 1:ncol(co)) {
		for (j in 1:ncol(df)) {
			if (ps[i,j]<0.001) co[i,j] <- paste(co[i,j], "***") else 
			if (ps[i,j]<0.01) co[i,j] <- paste(co[i,j], "**") else 
			if (ps[i,j]<0.05) co[i,j] <- paste(co[i,j], "*") else 
			if (ps[i,j]<0.1) co[i,j] <- paste(co[i,j], "†")			
			if (i==j) co[i,j] = 1
		}

	}	

	ret <- list(cor = co, annotation = "***: p < 0.001; **: p < 0.01; *: p < 0.05; †: p < 0.1")
	return(ret)
}




#------------------------------------------------------------
# ----  Proper Item Scaling --------------------------------------
#------------------------------------------------------------

cronbach <- function(items, keys = NULL, scoring = mean, verbose="long", full.scores = NULL, scale=FALSE){
	library(psych)
	
	if (is.null(keys)) keys = c(rep(1, ncol(items)))
	if (!is.null(full.scores)) {
		full.scores <- as.matrix(full.scores)
	}
	
	valids <- !(apply(items, 1, function(x) all(is.na(x))))
	if (any(valids==FALSE)) {
		print(paste(sum(!valids),"participants removed because of lacking data (only NAs)"));
		items <- items[valids,]
		full.scores <- full.scores[valids,]
	}
	

	items.recoded <- t((keys == -1)*(max(items, na.rm = TRUE)-t(items)) + (keys == 1)*t(items))
	if (scale==TRUE) items.recoded <- apply(items.recoded, 2, scale)
	
	scores <- apply(items.recoded, 1, scoring, na.rm=TRUE)
	
	scores[apply(items, 1, function(x) sum(!is.na(x))<2)] <- NA
	n <- length(scores[!is.na(scores)])
	
	getAlpha <- function(i, k) {
		# alpha berechnen
		scores <- as.matrix(i) %*% as.matrix(k)
		item.var <- diag(var(i, use = "pairwise"))
	    cov.scales <- cov(scores, use = "pairwise")
	    var.scales <- diag(cov.scales)
	    cor.scales <- cor(scores, use = "pairwise")
	    sum.item.var <- item.var %*% abs(k)
	    num.item <- diag(t(abs(k)) %*% abs(k))
	    alpha.scale <- (var.scales - sum.item.var) * num.item/((num.item - 
	        1) * var.scales)
	
		return(as.numeric(round(alpha.scale,3)))
	}


	alpha <- getAlpha(items, keys)
	print(paste("Alpha (",ncol(items)," Items; n=",n,"): ",alpha, sep=""))
	print("----------------------------")
	
	# scale if item deleted
	if (verbose=="long" & ncol(items)>2) {
				if (!is.null(full.scores)) {
			print(round(cor(data.frame(scores, full.scores), use="p"),2)[1,])
		}
		
		res <- data.frame(label = vector(mode="character", 0), alpha = vector(mode="numeric", 0), diff =vector(mode="numeric", 0))
		if (!is.null(full.scores)) {
			for (f in 1:ncol(full.scores)) res <- cbind(res, vector(mode="numeric", 0))
			print("----------------------------")
		}
				
		for (i in 1:ncol(items)) {
			
			alpha_m1 <- getAlpha(items[,-i], keys[-i])
			sc_m1 <- apply(items.recoded[,-i], 1, scoring, na.rm=TRUE)
			
			resline <- data.frame(label = paste("Ohne Item",i,"(",colnames(items)[i],")"), alpha = round(alpha_m1,3), diff = round(alpha_m1-alpha,3))
			
			if (!is.null(full.scores)) {					
				for (f in 1:ncol(full.scores)) resline <- cbind(resline, round(cor(sc_m1, full.scores[,f], use="p"),2))
			}
				
			res <- rbind(res, resline)
		}
		
		if (!is.null(full.scores)) {
			colnames(res)[4:ncol(res)] <- colnames(full.scores)
		}
		
		print(res)
	}
	
	attr(scores, "alpha") <- alpha
	return(scores)
}


# Hilfsfunktion für copy.clipboard.RTF:

getStars <- function(val) {
	
	res <- ""
	
	if (val <= 0.05) res <- "*"
	if (val <= 0.01) res <- "**"
	if (val <= 0.001) res <- "***"
	
	return(res)
}


#------------------------------------------------------------
# ----  Copy RTF-formatted table to clipboard ---------------
#------------------------------------------------------------
# suppress: < this value, cells content is invisible
# highlight: >= this value cells are printed in bold
# greyed: < this value cells are printed in italic
# stars: a matrix of p-values with the same dimensions like mat
#
# Bsp: copy.clipboard.RTF(res)

copy.clipboard.RTF <- function(mat, row.names = TRUE, col.names=TRUE, suppress = 0, highlight = 1, greyed = 0, stars=NULL, font="Helvetica") {
	
	# standard rtf header
	#rtf_header <- paste("{\\rtf1\\ansi\\deff0{\\fonttbl{\\f0 ",font,"}}\r", sep="")
	#rtf_header <- paste("{\\rtf1\\ansi\\ansicpg1252\\cocoartf949\\cocoasubrtf330{\\fonttbl\\f0\\fswiss\\fcharset0 ",font,";}",sep="");
	
	
	if (mode(mat)=="character") num <- FALSE
	
	rtf_header <- paste("{\\rtf1\\ansi{\\fonttbl\\f0\\fswiss\\fcharset0 ",font,";}",sep="");
	rtf_outro <- "\r}"
	
	#rtf_string stores the content
	rtf_string <- "";
	
	# Header-Row with colnames (if requested)
	if (col.names){
		for (col in 1:ncol(mat)) rtf_string <- paste(rtf_string, "\\tab ", colnames(mat)[col], sep="")
		rtf_string <- paste(rtf_string, "\\par ", sep="")
	}
	
	for (r in 1:nrow(mat)) {
		
		
		if (row.names) rtf_string <- paste(rtf_string, rownames(mat)[r],sep="")
		
		for (col in 1:ncol(mat)) {
			
			if (!is.na(mat[r,col])) {
			
				
				if (num==TRUE) {
					# Opening tag for bold printing
					if (abs(mat[r,col])>=highlight) rtf_string <- paste(rtf_string, "{\\b ", sep="")
					# Opening tag for italic printing
					if (abs(mat[r,col])<greyed) rtf_string <- paste(rtf_string, "{\\i ", sep="")
						
				# Insert Cells (only if >= suppress)
					if (abs(mat[r,col])>suppress) {
						rtf_string <- paste(rtf_string, "\\tab ", mat[r,col], sep="")
						if (!is.null(stars)) {
							rtf_string <- paste(rtf_string, getStars(stars[r,col]), sep="")
						}
					} else {rtf_string <- paste(rtf_string, "\\tab ", sep="")}
			
					# Closing tag for bold printing
					if (abs(mat[r,col])>=highlight) rtf_string <- paste(rtf_string, "}", sep="")
					# Closing tag for italic printing
					if (abs(mat[r,col])<greyed) rtf_string <- paste(rtf_string, "}", sep="")
				} else {
					rtf_string <- paste(rtf_string, "\\tab ", mat[r,col], sep="")
				}			
			
			} else {
				rtf_string <- paste(rtf_string, "\\tab ", sep="")
			}
		}
		
		# new paragraph after each line
		rtf_string <- paste(rtf_string, "\\par ", sep="")
	}
	
	clipboard <- pipe("pbcopy", open="w")
	write(paste(rtf_header, rtf_string, rtf_outro, sep=""), clipboard)
	close(clipboard)
}


copy2rows.clipboard.RTF <- function(mat, mat2, row.names = TRUE, col.names=TRUE, suppress = 0, highlight = 1, greyed = 0, stars=FALSE, font="Helvetica", labels = c("","")) {
	
	# standard rtf header
	rtf_header <- paste("{\\rtf1\\ansi{\\fonttbl\\f0\\fswiss\\fcharset0 ",font,";}",sep="");
	rtf_outro <- "\r}"
	
	#rtf_string stores the content
	rtf_string <- "";
	
	# Header-Row with colnames (if requested)
	if (col.names){
		
		# Für die extra-Label-Spalte noch ein Tab
		rtf_string <- paste(rtf_string, "\\tab ", sep="")
		
		for (col in 1:ncol(mat)) rtf_string <- paste(rtf_string, "\\tab ", colnames(mat)[col], sep="")
		rtf_string <- paste(rtf_string, "\\par ", sep="")
	}
	
	for (r in 1:nrow(mat)) {
		
		
		if (row.names) rtf_string <- paste(rtf_string, rownames(mat)[r],sep="")
		
		# Extra Labels 1
		rtf_string <- paste(rtf_string, "\\tab ", labels[1],sep="")
		
		for (col in 1:ncol(mat)) {
			
			if (!is.na(abs(mat[r,col]))) {
				# Opening tag for bold printing
				if (abs(mat[r,col])>=highlight) rtf_string <- paste(rtf_string, "{\\b ", sep="")
				# Opening tag for italic printing
				if (abs(mat[r,col])<greyed) rtf_string <- paste(rtf_string, "{\\i ", sep="")
			
				# Insert Cells (only if >= suppress)
				if (abs(mat[r,col])>suppress) {rtf_string <- paste(rtf_string, "\\tab ", mat[r,col], sep="")}
				else {rtf_string <- paste(rtf_string, "\\tab ", sep="")}
			
				# Closing tag for bold printing
				if (abs(mat[r,col])>=highlight) rtf_string <- paste(rtf_string, "}", sep="")
				# Opening tag for italic printing
				if (abs(mat[r,col])<greyed) rtf_string <- paste(rtf_string, "}", sep="")
			} else {
				rtf_string <- paste(rtf_string, "\\tab ", sep="")
			}			
		}
		
		# new paragraph after each line
		rtf_string <- paste(rtf_string, "\\par ", sep="")
		
		# Extra Labels 1
		rtf_string <- paste(rtf_string, "\\tab ", labels[2], sep="")
		
		for (col in 1:ncol(mat2)) {
			
			if (!is.na(abs(mat2[r,col]))) {
				# Opening tag for bold printing
				if (abs(mat2[r,col])>=highlight) rtf_string <- paste(rtf_string, "{\\b ", sep="")
				# Opening tag for italic printing
				if (abs(mat2[r,col])<greyed) rtf_string <- paste(rtf_string, "{\\i ", sep="")
			
				# Insert Cells (only if >= suppress)
				if (abs(mat2[r,col])>suppress) {rtf_string <- paste(rtf_string, "\\tab ", mat2[r,col], sep="")}
				else {rtf_string <- paste(rtf_string, "\\tab ", sep="")}
			
				# Closing tag for bold printing
				if (abs(mat2[r,col])>=highlight) rtf_string <- paste(rtf_string, "}", sep="")
				# Opening tag for italic printing
				if (abs(mat2[r,col])<greyed) rtf_string <- paste(rtf_string, "}", sep="")
			} else {
				rtf_string <- paste(rtf_string, "\\tab ", sep="")
			}
			
		}
		
		# new paragraph after each line
		rtf_string <- paste(rtf_string, "\\par ", sep="")
	}
	
	clipboard <- pipe("pbcopy", open="w")
	write(paste(rtf_header, rtf_string, rtf_outro, sep=""), clipboard)
	close(clipboard)
}


#------------------------------------------------------------
# ----  Get marginal significant correlation for a given number of participants  -----------------
#------------------------------------------------------------

# Helper: Transform correlation to Fisher's Z
r2Z <- function(r) {
	return(0.5 * log((1 + r)/(1 - r)))
}

# Helper: REcode  Fisher's to correlation
Z2r <- function(Z) {
	return((exp(2*Z)-1)/(exp(2*Z)+1))
}

# get the marginal correlation that is significant in a certain sample size
margin.correlation <- function(n, p=0.025) {
	Z_crit <- abs(qnorm(p))*sqrt(1/(n-3))
	return(Z2r(Z_crit))
}


# Calculated probability that the difference of two correlations is not significant
corr.diff <- function(r1, r2, n1, n2) {
	z <- abs(r2Z(r1)-r2Z(r2))/sqrt((1/(n1-3))+(1/(n2-3)))
	return(1 - round(pnorm(z),2))
}


#------------------------------------------------------------
# ----  Moderation Analysis ---------------------------------
#------------------------------------------------------------
# ... gracefully borrows code from the QuantPSyc package by Thomas D. Fletcher

mod.anal <- function(formule, data, slope.z=1, plot=FALSE, main = "", xlab = NULL, ylab = NULL, zlab=NULL, ylimit=1.5) {
    

	# center moderator:
	varnames <- rownames(attr(terms(formule), "factors"))
	x1 <- varnames[2]
	   data[x1] <- data[x1]-mean(data[x1], na.rm=TRUE)
	x2 <-varnames[3]
	   data[x2] <- data[x2]-mean(data[x2], na.rm=TRUE)
	y <- varnames[1]
		
	
	lm1 <- lm(formule, data)
	print(summary(lm1))

	z <- lm1$model[[3]]
    zhi <- slope.z * sd(z, na.rm = TRUE)
    zlo <- -slope.z * sd(z, na.rm = TRUE)
    zme <- 0

    b0 <- summary(lm1)$coef[1, 1]
    b1 <- summary(lm1)$coef[2, 1]
    b2 <- summary(lm1)$coef[3, 1]
    b3 <- summary(lm1)$coef[4, 1]

    x.zhi <- (b1 + b3 * zhi)
    x.zlo <- (b1 + b3 * zlo)
    x.zme <- (b1 + b3 * zme)
    int.zhi <- (b0 + b2 * zhi)
    int.zlo <- (b0 + b2 * zlo)
    int.zme <- (b0 + b2 * zme)
    seb.zhi <- sqrt(vcov(lm1)[2, 2] + 2 * zhi * vcov(lm1)[2, 
        4] + zhi^2 * vcov(lm1)[4, 4])
    seb.zlo <- sqrt(vcov(lm1)[2, 2] + 2 * zlo * vcov(lm1)[2, 
        4] + zlo^2 * vcov(lm1)[4, 4])
    seb.zme <- sqrt(vcov(lm1)[2, 2] + 2 * zme * vcov(lm1)[2, 
        4] + zme^2 * vcov(lm1)[4, 4])
    td <- qt(0.975, df = summary(lm1)$df[2])
    zhi.u <- x.zhi + td * seb.zhi
    zhi.l <- x.zhi - td * seb.zhi
    zlo.u <- x.zlo + td * seb.zlo
    zlo.l <- x.zlo - td * seb.zlo
    zme.u <- x.zme + td * seb.zme
    zme.l <- x.zme - td * seb.zme
    mat <- matrix(NA, 3, 5)
    colnames(mat) <- c("INT", "Slope", "SE", "LCL", "UCL")
    rownames(mat) <- c("at zHigh", "at zMean", "at zLow")
    mat[1, ] <- c(int.zhi, x.zhi, seb.zhi, zhi.l, zhi.u)
    mat[2, ] <- c(int.zme, x.zme, seb.zme, zme.l, zme.u)
    mat[3, ] <- c(int.zlo, x.zlo, seb.zlo, zlo.l, zlo.u)

	if (is.null(ylab)) ylab <- paste("raw",y)
	if (is.null(xlab)) xlab <- paste("centered ",x1)
	if (is.null(zlab)) zlab <- x2

	
    yhi <- mean(lm1$model[[1]], na.rm = TRUE) + ylimit * sd(lm1$model[[1]], na.rm = TRUE)
    ylo <- mean(lm1$model[[1]], na.rm = TRUE) - ylimit * sd(lm1$model[[1]], na.rm = TRUE)
    plot(lm1$model[[2]], lm1$model[[1]], main = main, xlab = xlab, ylab = ylab, ylim = c(ylo, yhi))
    abline(mat[1, 1], mat[1, 2], col = 4)
    abline(mat[3, 1], mat[3, 2], col = 2)
    abline(mat[2, 1], mat[2, 2], lty = 2, col = 1)
    legend(locator(1), paste(c("+","-"),slope.z," SD on ",zlab,sep=""), lty = 1, col = c(4, 
        2))
	
	return(list(sim.slopes = mat))
}


#------------------------------------------------------------
# ---- Regressionstools  --------------------------------------
#------------------------------------------------------------

xyplot.r <- function(...) {
	
	library(lattice)
	library(grid)
	
xyplot(...,
		
		panel = function(x,y) {
			panel.dotplot(x,y)
			panel.lmline(x,y)
			r = round(cor(x,y, use="p"),3)
			grid.text(paste("r=", r), y = unit(0.1, "npc"), gp=gpar(...))
		}		
)

}

#------------------------------------------------------------
# Hierarchische multiple Regression: inkrementelle Validität

hmreg <- function(AV, UV1, UV2, data=NULL) {
	
	if (is.null(data)) {
		AV_name <- deparse(substitute(AV))
		UV1_name <- deparse(substitute(UV1))
		UV2_name <- deparse(substitute(UV2))	
	} else {
		AV_name <- AV
		UV1_name <- UV1
		UV2_name <- UV2
		
		AV <- data[,AV]
		UV1 <- data[,UV1]
		UV2 <- data[,UV2]
	}
	

	delList <- is.na(AV) | is.na(UV1) | is.na(UV2)
	AV <- AV[!delList]
	UV1 <- UV1[!delList]
	UV2 <- UV2[!delList]

	l1 <- lm(AV~UV1)
	l2 <- lm(AV~UV2)
	l3 <- lm(AV~UV1+UV2)
	
	s1 <- summary(l1)
	s2 <- summary(l2)
	s3 <- summary(l3)
	
	a1 <- anova(l1)
	a2 <- anova(l2)
	a12 <- anova(l1,l3)
	a21 <- anova(l2,l3)
	
	
	res <- matrix(NA, nrow=4, ncol=9, dimnames=list(c(UV1_name, UV2_name, paste(UV1_name,"+",UV2_name), paste(UV2_name,"+",UV1_name)),c("R2", "adj.R2","adj.R2.change","F.model","p.model","F.change", "p.change", "df1","df2")))

	res[1,"R2"] <- s1$r.squared
	res[2,"R2"] <- s2$r.squared
	res[3,"R2"] <- s3$r.squared
	res[4,"R2"] <- r2 <- s3$r.squared

	res[1,"adj.R2"] <- s1$adj.r.squared
	res[2,"adj.R2"] <- s2$adj.r.squared
	res[3,"adj.R2"] <- s3$adj.r.squared
	res[4,"adj.R2"] <- s3$adj.r.squared
	
	res[1,"adj.R2.change"] <- s1$adj.r.squared
	res[2,"adj.R2.change"] <- s2$adj.r.squared
	res[3,"adj.R2.change"] <- s3$adj.r.squared - s1$adj.r.squared
	res[4,"adj.R2.change"] <- s3$adj.r.squared - s2$adj.r.squared
	
	res[1,"F.model"] <- s1$fstatistic[1]
	res[2,"F.model"] <- s2$fstatistic[1]
	res[3,"F.model"] <- s3$fstatistic[1]
	res[4,"F.model"] <- s3$fstatistic[1]

	res[1,"p.model"] <- s1$coefficients[2,4]
	res[2,"p.model"] <- s2$coefficients[2,4]
	res[3,"p.model"] <- 1-pf(s3$fstatistic[1], s3$fstatistic[2], s3$fstatistic[3])
	res[4,"p.model"] <- 1-pf(s3$fstatistic[1], s3$fstatistic[2], s3$fstatistic[3])
	
	res[1,"F.change"] <- a1$F[1]
	res[2,"F.change"] <- a2$F[1]
	res[3,"F.change"] <- a12$F[2]
	res[4,"F.change"] <- a21$F[2]

	res[1,"p.change"] <- a1$P[1]
	res[2,"p.change"] <- a2$P[1]
	res[3,"p.change"] <- a12$P[2]
	res[4,"p.change"] <- a21$P[2]
	
	
	res[1,"df1"] <- a1$Df[1]
	res[2,"df1"] <- a2$Df[1]
	res[3,"df1"] <- a12$Df[2]
	res[4,"df1"] <- a21$Df[2]

	res[1,"df2"] <- a1$Df[2]
	res[2,"df2"] <- a2$Df[2]
	res[3,"df2"] <- a12$Res.Df[2]
	res[4,"df2"] <- a21$Res.Df[2]
	
	r2.part1 <- round(cor(AV, lm(UV1~UV2)$resid)^2,3)
	r2.part2 <- round(cor(AV, lm(UV2~UV1)$resid)^2,3)

	
	print(paste("Unique variance component for",UV1_name,":",r2.part1,"; =",round(r2.part1*100/r2,2),"%"))
	print(paste("Unique variance component for",UV2_name,":",r2.part2,"; =",round(r2.part2*100/r2,2),"%"))
	print(paste("Joint variance component:",round(r2-r2.part1-r2.part2,3),"; =",round((r2-r2.part1-r2.part2)*100/r2,2),"%"))

	round(res,3)
}
	
	
	
	
hmreg2 <- function(f1, f2, f3, data=NULL) {
	
	l1 <- lm(f1)
	l2 <- lm(f2)
	l3 <- lm(f3)
	
	s1 <- summary(l1)
	s2 <- summary(l2)
	s3 <- summary(l3)
	
	a1 <- anova(l1)
	a2 <- anova(l2)
	a12 <- anova(l1,l3)
	a21 <- anova(l2,l3)
	
	
	res <- matrix(NA, nrow=4, ncol=9, dimnames=list(c(UV1_name, UV2_name, paste(UV1_name,"+",UV2_name), paste(UV2_name,"+",UV1_name)),c("R2", "adj.R2","adj.R2.change","F.model","p.model","F.change", "p.change", "df1","df2")))

	res[1,"R2"] <- s1$r.squared
	res[2,"R2"] <- s2$r.squared
	res[3,"R2"] <- s3$r.squared
	res[4,"R2"] <- r2 <- s3$r.squared

	res[1,"adj.R2"] <- s1$adj.r.squared
	res[2,"adj.R2"] <- s2$adj.r.squared
	res[3,"adj.R2"] <- s3$adj.r.squared
	res[4,"adj.R2"] <- s3$adj.r.squared
	
	res[1,"adj.R2.change"] <- s1$adj.r.squared
	res[2,"adj.R2.change"] <- s2$adj.r.squared
	res[3,"adj.R2.change"] <- s3$adj.r.squared - s1$adj.r.squared
	res[4,"adj.R2.change"] <- s3$adj.r.squared - s2$adj.r.squared
	
	res[1,"F.model"] <- s1$fstatistic[1]
	res[2,"F.model"] <- s2$fstatistic[1]
	res[3,"F.model"] <- s3$fstatistic[1]
	res[4,"F.model"] <- s3$fstatistic[1]

	res[1,"p.model"] <- s1$coefficients[2,4]
	res[2,"p.model"] <- s2$coefficients[2,4]
	res[3,"p.model"] <- 1-pf(s3$fstatistic[1], s3$fstatistic[2], s3$fstatistic[3])
	res[4,"p.model"] <- 1-pf(s3$fstatistic[1], s3$fstatistic[2], s3$fstatistic[3])
	
	res[1,"F.change"] <- a1$F[1]
	res[2,"F.change"] <- a2$F[1]
	res[3,"F.change"] <- a12$F[2]
	res[4,"F.change"] <- a21$F[2]

	res[1,"p.change"] <- a1$P[1]
	res[2,"p.change"] <- a2$P[1]
	res[3,"p.change"] <- a12$P[2]
	res[4,"p.change"] <- a21$P[2]
	
	
	res[1,"df1"] <- a1$Df[1]
	res[2,"df1"] <- a2$Df[1]
	res[3,"df1"] <- a12$Df[2]
	res[4,"df1"] <- a21$Df[2]

	res[1,"df2"] <- a1$Df[2]
	res[2,"df2"] <- a2$Df[2]
	res[3,"df2"] <- a12$Res.Df[2]
	res[4,"df2"] <- a21$Res.Df[2]
	
	r2.part1 <- round(cor(AV, lm(UV1~UV2)$resid)^2,3)
	r2.part2 <- round(cor(AV, lm(UV2~UV1)$resid)^2,3)

	
	print(paste("Unique variance component for",UV1_name,":",r2.part1,"; =",round(r2.part1*100/r2,2),"%"))
	print(paste("Unique variance component for",UV2_name,":",r2.part2,"; =",round(r2.part2*100/r2,2),"%"))
	print(paste("Joint variance component:",round(r2-r2.part1-r2.part2,3),"; =",round((r2-r2.part1-r2.part2)*100/r2,2),"%"))

	round(res,3)
}
	
	
	
	
#------------------------------------------------------------
# ----  Sonstige Tools --------------------------------------
#------------------------------------------------------------

# medianSplit does an intelligent medianSplit: if groups are defined, the groupwise median is 
# employed; it also is checked whether "<" or "<=" leads to more equal group sizes
# demo: all4$QInvSplit = medianSplit(all4$QInv2, group = all4$IV)

medianSplit <- function(var, group = NULL, varname=NULL, labels = c("low", "high")) {
	
	# find the group median
	if (!is.null(group)) {
		group <- as.factor(group)
		groupmedian <- aggregate(var, list(group), median, na.rm=TRUE)
		names(groupmedian) <- c("group", "md")
	
		dat <- data.frame(var, group)
		dat <- merge(dat, groupmedian, by="group")
	} else {
		groupmedian <- median(var, na.rm=TRUE)
		dat <- data.frame(var, md = groupmedian)
	}
	

	# check if "<" or "<=" leads to more equal groups
	relFreq1 <- abs((sum(dat["var"] <= dat["md"], na.rm=TRUE)/nrow(dat)) - 0.5)
	relFreq2 <- abs((sum(dat["var"] < dat["md"], na.rm=TRUE)/nrow(dat)) - 0.5)

	if (relFreq1 < relFreq2) {
		"%smaller%" <- function(x,y) {x <= y}
	} else {
		"%smaller%" <- function(x,y) {x < y}
	}

	mS <- as.factor(as.vector(ifelse(dat["var"] %smaller% dat["md"], labels[1], labels[2])))

	return(mS)
}


# performs a test of normality (but you should better use shapiro.test)
ks.norm <- function(x) {
	ks <- ks.test(x, "pnorm", mean=mean(x), sd=sqrt(var(x)))
	return(ks)
}


reverse <- function(x) {
	return((max(x, na.rm=TRUE)+1)-x)
}

posify <- function(x) {
	if (min(x, na.rm=TRUE)>=1) {return(x)}
	else {return(x+abs(min(x, na.rm=TRUE))+1)}
}

skewness <- function (x, na.rm = FALSE, ...) {   
	# A function implemented by D. Wuertz
 
    # Description:
    #   Returns the value of the skewness of a
    #   distribution function. Missing values
    #   can be handled.
   
    # FUNCTION:
   
    # Warnings:
    if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) {
        warning("argument is not numeric or logical: returning NA")
        return(as.numeric(NA))}
       
    # Remove NAs:
    if (na.rm) x = x[!is.na(x)]

    # Skewness:
    n = length(x)
    if (is.integer(x)) x = as.numeric(x)
    skewness = sum((x-mean(x))^3/sqrt(var(x))^3)/length(x)
   
    # Return Value:
    skewness 
}



# performs a transformation for skewed distributions.
# reversing is applied automatically depending on the skewness.
# func: the function to be applied

trans <- function(x, func=log10) {
	if (skewness(x, na.rm=TRUE)>0) {
		return(func(posify(x)))
	} else {
		return(reverse(func(reverse(x))))
	}
}



autoTrans <- function(x, plot=FALSE, verbose=TRUE) {
	
	# testen, ob eine Transformation überhaupt nötig ist:
	test <- shapiro.test(x)$p.value
	if (test > 0.1) {
		
		if (verbose) print(paste(deparse(substitute(x))," - AutoTrans: no transformation needed, p=",test, "AutoTrans NO TRANS"))
		if (plot) plot(density(x, na.rm=TRUE), main=paste("Shapiro-Wilks: p=", round(test,5)), sub="no transformation needed")
		
		attr(x, "method") <- "no transformation needed"
		attr(x, "p.value") <- test	
		return(x)
	}
	
	FUNCS <- c(log10, sqrt, function(y) 1/y)
	trans_str <- c("log10", "sqrt", "1/x")
	
	x2 <- matrix(NA, length(x), length(FUNCS))
	for (i in 1:length(FUNCS)) x2[,i] <- trans(x, FUNCS[[i]])

	s <- vector(mode="numeric", length=3) 
	for (i in 1:length(FUNCS)) s[i] <- shapiro.test(x2[,i])$p.value
	
	if (plot) {
		par(mfcol=c(1,3))
		for (i in 1:length(FUNCS)) {
			color <- (i==which.max(s))+1
			plot(density(x2[,i], na.rm=TRUE), main=paste("Shapiro-Wilks: p=", round(s[i],5)), col=color)
		}
	}
	
	res <- x2[,which.max(s)]
	attr(res, "method") <- trans_str[which.max(s)]
	attr(res, "p.value") <- s[which.max(s)]
	
	if (verbose) print(paste(deparse(substitute(x)),"- AutoTrans:",attr(res, "method"), ", p=",attr(res, "p.value"), "AutoTrans",ifelse(s[which.max(s)]>=0.1,"SUCESS", "FAILED")))
	return(res)
}

na.omit.cols <- function(df, cols) {
	
	validList <- apply(df, 1, function(x) sum(is.na(x[cols]))==0)
	return(df[validList,])
	
}

jitter.binary <- function(a, jitt=0.5, y1=0, y2=1) {
	ifelse(a==0, runif(length(a), y1, y1+jitt), runif(length(a), y2-jitt, y2))
}


# HLM braucht zwei Dateien: eine mit Level-1 Daten, eine mit Level-2 Daten
# IDs geben die Zugehörigkeit zur Gruppe auf Level2 an
#	- IDs müssen alle die gleiche Länge haben (also z.B. 001 bis 100)
#	- Die Fälle auf Level-1 müssen nach IDs geordnet sein
# Formatierung: HLM erwartet eine fixe Spaltenzahl, daher sprintf; 
#
# HLM-Formatangabe: (A4,2F12.3) (A4 = ID, dann 2 x 12.3 Format)
# Bsp.-Export:
# 	Level 1
#	HLM.export("H1.dat", IA2$id, IA2$effect2, IA2$tstep2)
# 	Level 2
#	HLM.export("H2.dat", l2preds$id, l2preds$int, l2preds$aut, l2preds$IV, l2preds$relsatis)
HLM.export <- function(fname, id, ...) {

	sprintf.NA <- function(var, NA_str = "99999") {
		var[is.na(var)] = NA_str
		return(sprintf("%12.3f", var))
	}
	
	args <- list(...)
	
	output <- data.frame(sprintf("%04.0f", id))
	for (i in 1:length(args)) {
		output <- cbind(output, sprintf.NA(args[[i]]))
	}

	write.table(output, fname, row.names=FALSE, col.names=FALSE, quote=FALSE, sep="")
	
}




#TODO: Die Funkntion ist noch nicht so ganz fertig ...

cor.evol <- function(x,y, start=10, steps=NULL, fun=list(cor.test, corb), est_name=c("estimate", "cor.est"), ci_name=c("conf.int","cor.ci")) {
	
	indices <- length(fun)
	delList <- is.na(x) | is.na(y)
	x <- x[-delList]
	y <- y[-delList]
	
	if (is.null(steps)) steps <- length(x)
	delta <- length(x)/steps
	
	cor.evol <- array(NA, dim = c(steps, 3, indices))
	
	for (j in 1:indices) {
	
		for (i in 1:steps) {
			
			pos <- round(start + i*delta)
			if(pos>length(x)) pos <- length(x)
			
			if (j==2) co <- fun[[j]](x[1:pos],y[1:pos], nboot=100)
			else co <- fun[[j]](x[1:pos],y[1:pos])
			cor.evol[i,1,j] <- co[[est_name[j]]]
			cor.evol[i,2,j] <- co[[ci_name[j]]][1]
			cor.evol[i,3,j] <- co[[ci_name[j]]][2]
		}
		
	}
	
	
	val <- 1:steps
	
	plot(NA, type="n", ylim=c(-1,1), xlim=c(1,steps), xlab="n", ylab="Correlation", xaxt="n")
	axis(1, at=1:steps, labels=round((1:steps)*delta))
	
	for (j in 1:indices) {
		lines(val, cor.evol[val,1,j], col=j)
		lines(val, cor.evol[val,2,j], lty="dashed", col=j)
		lines(val, cor.evol[val,3,j], lty="dashed", col=j)
	}
	
	abline(h=0, col="grey40")
	
	
	cor.evol <- cor.evol[start:length(x),,]
	return(cor.evol)
}
#cor.evol(final$int, final$aut, fun=list(cor.test), est_name="estimate", ci_name="conf.int")
#co <- cor.evol(final$int, final$aut, steps=10)
#cor.evol(final$relsatis, final$dist1, fun=list(cor.test), est_name="estimate", ci_name="conf.int")




#------------------------------------------------------------
# Usage Countdown

## before loop:
# start <- countdown(0)	#remember the starting time and shows "Go!"

## in loop:
# countdown(count, max, start)

countdown <- function(count, max=100, start=NULL) {
	
	if (count==0) {
		if (is.null(start)) cat("Go!  ") else cat("Go!                   ")
	}
	
	if (count > 0 & count <= max) {
		if (is.null(start)) {
			cat("\b\b\b\b\b") 
			cat(paste(sprintf("%03.0f", round((count*100)/max,0))," %", sep=""))
		} else {
			
			# Die ETA abschätzen ...
		  	ti <- Sys.time()
		  	ETA <- (difftime(ti, start, units="secs")/count)*(max-count)
		
			cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b")
			msg <- paste(sprintf("%3.0f", round((count*100)/max,0))," %; ETA = ", sprintf("%3.0f",round(ETA))," sec.", sep="") 
			cat(msg)
			return(msg)
		}
		
	}
	
	if (count == max) {
		cat("\n")
		if (is.null(start)) print("Finished.") else print(paste("Finished. Elapsed time = ",round(difftime(ti, start, units="secs"),1)," sec."))
	}
	
	if (count==0) return(Sys.time())
	
}





# Funktion nimmt eine lange Liste von Variablen und schiebt eine Variable von einer Spalte in Reihen, 
# sortiert nach einer ID.
# Bsp.:
# > df
#   id ilabel value
# 1  A  item1     1
# 2  A  item2     2
# 3  A  item3     3
# 4  B  item1     4
# 5  B  item2     5
# 6  B  item3     6
# > swap(df, 1, 3, labels=c("i1","i2","i3"))
#   id i1 i2 i3
# 1  A  1  2  3
# 2  B  4  5  6

swapCol <- function(df, idCol, valueCol, labels=NULL) {
	
	df[,idCol] <- as.factor(df[,idCol])
	vpn <- length(levels(df[,idCol]))
	slice <- df[df[,idCol]==levels(df[,idCol])[1],]	
	vars <- nrow(slice)
	
	res <- data.frame(id=vector(mode="character", 0), data= matrix(NA, 0, vars))
	
	start <- Sys.time()
	countdown(0)
	
	for (i in 1:vpn) {
		slice <- df[df[,idCol]==levels(df[,idCol])[i],]
		res <- rbind(res, data.frame(id=levels(df[,idCol])[i], data=t(slice[,valueCol])))
		countdown(i, vpn, start)
	}
	
	if (!is.null(labels)) colnames(res)[2:ncol(res)] <- labels
	return(res)
}



bargraph.CI2 <- function(...) {
	# CI mit bootstrap
	library(Hmisc)
	library(sciplot)
	my.ci <- function(x) c(smean.cl.boot(x)[2],smean.cl.boot(x)[3])

	bargraph.CI(..., ci.fun=my.ci, sub="95% confidence intervals with bootstrap")
}



lineplot.CI2 <- function(...) {
	# CI mit bootstrap
	library(Hmisc)
	library(sciplot)
	my.ci <- function(x) c(smean.cl.boot(x)[2],smean.cl.boot(x)[3])

	lineplot.CI(..., ci.fun=my.ci, sub="95% confidence intervals with bootstrap")
}




#------------------------------------
#-- Item Selection
#------------------------------------

# set = dataframe with items
# key = keys of items
# dels = positions to be deleted
# tset = TRUE/FALSE-vector with test set
# add = variables to be correlated with resulting scale (before and after deletion)
delInfo <- function(set, key, dels = NULL, tset=1, add=NULL) {
	if (!is.null(dels)) {
		set1 <- set[tset,-dels]
		set2 <- set[!tset,-dels]
		key1 <- key[-dels]
	} else {
		set1 <- set[tset,]
		set2 <- set[!tset,]
		key1 <- key
	}
	set.full <- set[tset,]
	
	full <- cronbach(set.full, key, verbose="none")
	short <- cronbach(set1, key1, full.scores = data.frame(full, add))
	
	short2 <- cronbach(set2, key1)
	
	diff <- colMeans(set1, na.rm=TRUE)
	trenn <- apply(set1, 2, function(x) abs(cor(x, full, use="p")))
	plot(data.frame(diff, trenn))
	text(data.frame(diff+0.15, trenn), colnames(set1))
}



getStanine.z <- function() {
	stan <- c(0.04, 0.11, 0.23, 0.4, 0.6, 0.77, 0.89, 0.96)
	round(qnorm(stan),2)
}

Try the FUtils package in your browser

Any scripts or data that you put into this service are public.

FUtils documentation built on May 2, 2019, 4:40 p.m.