Nothing
#drastically simplified, March 14, 2009 from two loops to 1 matrix operation
#modified July 2, 2013 to allow not counting the diagonal
#modified 08/31/23 to not use the lower.tri but rather the complete matrix
"count.pairwise" <-
function (x, y=NULL,diagonal=TRUE)
{ .Deprecated("pairwiseCount",msg="count.pairwise is deprecated. Please use the pairwiseCount function.")
if(is.null(y)) {n <- t(!is.na(x)) %*% (!is.na(x)) } else { n <- t(!is.na(x)) %*% (!is.na(y)) }
if(!diagonal) diag(n) <- NA
return(n) }
pairwiseDescribe <- function(x,y=NULL,diagonal=FALSE,...) {
cp <- pairwiseCount(x,y=y,diagonal=diagonal)
#cp <- as.vector(cp[lower.tri(cp,diag=diagonal)]) #this overestimates the and also gives the wrong answer if diagonal=TRUE
cp <- as.vector(cp)
describe(cp,...)
}
# replaces count.pairwise
# slightly improved
# "pairwiseCount" <-
# function (x, y=NULL,diagonal=TRUE)
# { x <- !is.na(x)
# if(is.null(y)) {n <- t(x) %*% (x) } else { n <- t(x) %*% (!is.na(y)) }
# if(!diagonal & is.null(y)) diag(n) <- NA
# return(n) }
#improvement using crossprod reduces memory load and speeds it up noticably.
#For 255K cases and 953 variables, the timings are 114 seconds versus 193 before
#timings seem to vary as square of variables (from 100 to 800 at least)
"pairwiseCount" <-
function (x, y=NULL,diagonal=TRUE)
{ x <- !is.na(x)
if(is.null(y)) {n <- crossprod(x)} else { n <- crossprod(x,!is.na(y)) }
if(!diagonal & is.null(y)) diag(n) <- NA
return(n) }
#doesn't work yet
"pairwiseDelete" <- function(x,cut=0) {
if(isCorrelation(x)) {#we have already found correlations, get rid of the least number of NAs possible
nvar <- ncol(x)
nmissing <-apply(x,2, function(xx) sum(is.na(xx)))
max.missing <- max(nmissing,na.rm=TRUE)
select <- (nmissing == max.missing)
newx <- x[!select,!select]
} else { #do a count pairwise and then get rid of the least number <= cut
if(ncol(x)!= nrow(x) ) {pc <- pairwiseCount(x)}
#OK, it is a matrix of counts
nbad <- apply(pc,2, function(xx) sum(xx < cut))
max.bad <- max(nbad)
select <- (nbad == max.bad)
newx <- pc[!select,!select] }
return(newx)
}
# created July 30, 2018 to impute correlations
"pairwiseImpute" <- function(keys,R,fix=FALSE) {
cl <- match.call()
if(!isCorrelation(R)) {message("Correlations found from raw data")
cp <- pairwiseCount(R)
R <- cor(R,use="pairwise")
} else {cp <- NULL
cpij <- NA}
n.var <- ncol(R)
diag(R) <- NA
if(is.list(keys)) {select <- sub("-","",unlist(keys))
select <- select[!duplicated(select)] } else {
keys <- keys2list(keys)
select <- selectFromKeyslist(colnames(R),keys)
select <- select[!duplicated(select)]}
R <- R[select,select,drop=FALSE]
if(!is.null(cp)) cp <- cp[select,select,drop=FALSE]
keynames <- colnames(keys)
keys <- make.keys(R,keys)
n.keys <- dim(keys)[2]
n.items <- dim(keys)[1]
abskeys <- abs(keys)
keynames <- colnames(keys)
num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale
n.keys <- ncol(keys)
#brute force
av.r <- matrix(NA,n.keys,n.keys)
count.r <- matrix(NA,n.keys,n.keys)
percent <- matrix(NA,n.keys,n.keys)
size <- matrix(NA,n.keys,n.keys)
for(i in 1:n.keys) {
for( j in 1:n.keys) {
selectij <- R[keys[,i] >0,keys[,j] > 0]
if(!is.null(cp)) {cpij <- cp[keys[,i] >0,keys[,j] > 0]}
av.r[i,j] <- mean(selectij,na.rm=TRUE)
size[i,j] <- mean(cpij,na.rm=TRUE)
count.r[i,j] <- sum(!is.na(selectij))
percent[i,j] <- count.r[i,j]/(count.r[i,j] + sum(is.na(selectij)))
}
}
colnames(av.r) <- rownames(av.r) <- colnames(keys)
colnames(count.r) <- rownames(count.r) <- colnames(keys)
colnames(percent) <- rownames(percent) <- colnames(keys)
colnames(size) <- rownames(size) <- colnames(keys)
if(is.null(cp)) size <- NULL
if(fix) {
for(i in 1:n.keys) {
for( j in 1:n.keys) {
temp <- which(is.na( R[keys[,i] > 0,keys[,j]>0]))
R[keys[,i] > 0,keys[,j]>0][temp] <- av.r[i,j]
}
}
diag(R) <- 1
result <- list(av.r =av.r,count=count.r,percent=percent,imputed=R,Call=cl)
} else {
result <- list(av.r =av.r,count=count.r,percent=percent,size=size,Call=cl) }
class(result) <- c("psych","pairwise")
return(result)
}
#modified July 20th to give more useful summaries
"pairwiseReport" <- function(x,y=NULL,cut=0,diagonal=FALSE,...) {
cl <- match.call()
if(isCorrelation(x)) {#we have already found correlations, flag those that are missing
report <- which(is.na(x),arr.ind=TRUE)
} else {
#do a count pairwise and then get report those below <= cut
if(NCOL(x)!=NROW(x)) {pc <- pairwiseCount(x,y)} else {pc <- x}
if(is.null(y)) {cp <- as.vector(pc[lower.tri(pc,diag=diagonal)])
des <- describe(cp,...)} else {des <- NULL}
report <- which((pc <= cut),arr.ind=TRUE)
}
if(length(report >0)) {if(is.null(y)) report <- report[report[,1] > report[,2],,drop=FALSE]
N <- rep(0,dim(report)[1])
for (i in 1:dim(report)[1] ){N[i] <- pc[report[i,1],report[i,2]]}
report <- cbind(report,N)
if(is.null(y) ) {
df <- data.frame(rows=colnames(x)[report[,1]],cols=colnames(x)[report[,2]],N=N) } else { #although this can be pretty big
df <- data.frame(rows =colnames(x)[report[,1]],cols =colnames(y)[report[,2]], N=N) }
result <- list(description=des, rows=table(df$rows),cols=table(df$cols),cut = cut, df=df, Call=cl)} else {
result <- list(description=des,rows=NULL,cols=NULL,cut=cut,df=NULL,Call=cl)}
class(result) <- c("psych","pairwiseCounts")
return(result)
}
#Added Feb 1, 2021 to allow smaller samples for faster processing of enormous data sets (e.g. sapa)
"pairwiseSample" <- function(x,y=NULL,diagonal=FALSE,size=100,...) {
cl <- match.call()
n.items <- NCOL(x)
size <- min(size,n.items)
ss <- sample(n.items,size)
#do a count pairwise and then get report those below <= cut
if(NCOL(x)!=NROW(x)) {pc <- pairwiseCount(x[ss])}
cp <- as.vector(pc[lower.tri(pc,diag=diagonal)])
des <- describe(cp,quant=c(.05,.25,.5,.75,.95))
result <- des
# class(result) <- c("psych","pairwiseCounts")
return(result)
}
"pairwisePlot" <- function(x,y=NULL,upper=TRUE,diagonal=TRUE,labels=TRUE,show.legend=TRUE,n.legend=10,colors=FALSE,gr=NULL,minlength=6,xlas=1,ylas=2,main="Relative Frequencies",count=TRUE,...) {
if(count){ r <- pairwiseCount(x=x,y=y,diagonal=diagonal)} else {r <- x}
if(!upper) r[col (r) < row(r) ] <- NA #blank out the upper diagonal
if(!diagonal) r[col(r) == row(r)] <- NA
nvar <- NROW(r)
nf <- NCOL(r)
MAR <- 5
if(is.null(colnames(x))) colnames(r) <- paste0("V",1:nf)
if(is.null(rownames(x))) rownames(r) <- paste0("V",1:nvar)
if(!labels) {minlength <- NULL
max.len <- 1} else {
max.len <- min(max(nchar(rownames(r)))/6,minlength)}
r <- r/max(r,na.rm=TRUE)
zlim <- c(0,1)
if(colors) {
if(missing(gr)) {gr <- colorRampPalette(c("red","white","blue"))}
colramp <- gr(n.legend)
} else {
colramp <- grey((n.legend:0)/n.legend)}
#colramp <- adjustcolor(colramp,alpha.f =alpha)
if(nvar != nf) { r <- t(r) }
#if(!is.null(select)) {r <- r[select,select]
# pval <- pval[select,select]
# nvar <- length(select)
# }
#reverse the order of the columns (if square)
ord1 <- seq(nvar,1,-1)
#if(nf == nvar) {
r <- r[,ord1]
#}
#reorder the columns to allow image to work
#MAR <- 5
par(mar = c(MAR +max.len,MAR+max.len, 4, .5))
line <- NA
tick <- TRUE
if(show.legend) { #set it up to do two plots
layout(matrix(c(1,2),nrow=1),widths=c(.9,.1),heights=c(1,1))
}
image(r,col=colramp,axes=FALSE,main=main,zlim=zlim)
box()
if(labels) {
if(!is.null(minlength)) {
rownames(r) <- abbreviate(rownames(r),minlength = minlength)
colnames(r) <- abbreviate(colnames(r),minlength = minlength)
max.len <- max(nchar(rownames(r)))/6}
at1 <- (0:(nf-1))/(nf-1)
at2 <- (0:(nvar-1)) /(nvar-1)
lab1 <- rownames(r)
lab2 <- colnames(r)
axis(2,at=at2,labels=lab2,las=ylas,...)
tick <- FALSE
axis(1,at=at1,labels=lab1,las=xlas,line=line,tick=tick,...)
}
#screen 2
leg <- matrix(seq(from=zlim[1],to=zlim[2],by =(zlim[2] - zlim[1])/n.legend),nrow=1)
par(mar=c(MAR,0, 4,3))
image(leg,col=colramp,axes=FALSE,zlim=zlim)
at2 <- seq(0,1,1/n.legend)
labels =seq(zlim[1],zlim[2],(zlim[2]-zlim[1])/(length(at2)-1))
axis(4,at=at2,labels =labels,las=2,...)
#put them back in the logical order
ord1 <- seq(nvar,1,-1)
r <- r[,ord1 ]
invisible(r)
}
"pairwiseZero" <- function(x,y=NULL, min=0, short=TRUE) {
nvar <- NCOL(x)
ynull <- is.null(y)
if(ynull) y <- x
ny <- NCOL(y)
count <- 0
loc <- list()
rn <- colnames(x)
cn <- colnames(y)
for(i in 1:(nvar-(ynull))) {
for(j in (min(i,ny) + (ynull)):ny) {
minxy <- min(tableF(x[,i],y[,j]), na.rm=TRUE)
if(minxy <= min) {count <- count + 1
loc[[count]] <- c(rn[i],cn[j])}
}
}
if(count>0) {cat("\n Number of cells with values <= ", min," = ", count,"\n")
count.df <- matrix(unlist(loc), ncol=2, byrow=TRUE)
colnames(count.df) <- c("row", "column")
print(count.df)
tables <- list()
for (i in 1:count) {
cat("\n Item pair ", count.df[i,],"\n")
tables[[i]] = tableF(x[,count.df[i,1]],y[,count.df[i,2]] )
if(!short) {# cat("\n" ,count.df[i, ],"\n" )
print(tables[[i]])}
}
names(tables) <- paste(count.df[,1],count.df[,2])
} else {cat("\n Minimum cell all exceed", min)}
result <- list(items=count.df, tables=tables)
invisible(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.