AnthroStats: Anthropology Statistics

#' Sex estimation following Walker 2008
#' 
#'
#' @param X = data.frame containing individual scores, glabella, orbits, mental, mastoid, and nuchal
#' @param population = Either "American-English" or "Native-American"
#' @param equations = Either "all" or any combination of "GlMaMe", "GlMa", "GlMe", "MeMa", "OrMe", and "NuMa"
#' @param research = TRUE or FALSE if research statistics should be calculated
#'
#' @keywords walker_cranial.a
#' @export
#' walker_cranial.a()

walker_cranial.a <- function(X = NULL, population = "American-English", equations = "all", research = FALSE) {
	#If all equations create column length	
	if(length(equations) == 1 && equations[1] == "all") {
		equations <- c("GlMaMe", "GlMa", "GlMe", "MeMa", "OrMe", "NuMa")
	}
	
	clength <- length(equations) 
	results <- data.frame(matrix(NA, ncol=(1 + (clength*4)), nrow=nrow(X))) #results data.frame
	
	columnnames <- c("ID") #First column is ID
	for(i in equations) {
		columnnames <- c(columnnames, paste(i, "_d_score", sep=""))
		columnnames <- c(columnnames, paste(i, "_sex", sep=""))
		columnnames <- c(columnnames, paste(i, "_probability", sep=""))
		columnnames <- c(columnnames, paste(i, "_correct_classification", sep=""))
	}
	colnames(results) <- columnnames #set columnnames for results

	if(research) {
		research_temp <- data.frame(matrix(NA, ncol=clength, nrow=nrow(X))) #research_temp data.frame 
		colnames(research_temp) <- equations #sets columnnames for research
		X$sex <- tolower(X$sex)
	}

	#I really don't like re-creating variables that are static, but this probably won't reduce performance too much
	d_score <- function(temp) {
		if(population == "American-English") {
			if(!is.na(temp$glabella) && !is.na(temp$mastoid) && !is.na(temp$mental) && xx == "GlMaMe") {
				score_temp <- (temp$glabella * -1.375) + (temp$mastoid * -1.185) + (temp$mental * -1.151) + 9.128    #Equation 1
				correct_classification_male <- 88.4
				correct_classification_female <- 86.4
			}
			if(!is.na(temp$glabella) && !is.na(temp$mastoid) && xx == "GlMa") {
				score_temp <- (temp$glabella * -1.568) + (temp$mastoid * -1.459) + 7.434 #Equation 2
				correct_classification_male <- 85.4
				correct_classification_female <- 82.9
			}
			if(!is.na(temp$glabella) && !is.na(temp$mental) && xx == "GlMe") {
				score_temp <- (temp$glabella * -1.525) + (temp$mental * -1.485) + 7.372 #Equation 3
				correct_classification_male <- 86.6
				correct_classification_female <- 82.1
			}
			if(!is.na(temp$mental) && !is.na(temp$mastoid) && xx == "MeMa") {
				score_temp <- (temp$mental * -1.629) + (temp$mastoid * -1.415) + 7.382 #Equation 4
				correct_classification_male <- 79.9
				correct_classification_female <- 83.6
			}
			if(!is.na(temp$orbit) && !is.na(temp$mental) && xx == "OrMe") {
				score_temp <- (temp$orbit * -1.007) + (temp$mental * -1.850) + 6.018 #Equation 5
				correct_classification_male <- 78.1
				correct_classification_female <- 82.9
			}
			if(!is.na(temp$nuchal) && !is.na(temp$mastoid) && xx == "NuMa") {
				score_temp <- (temp$nuchal * -0.7) + (temp$mastoid * -1.559) + 5.329 #Equation 6
				correct_classification_male <- 76.8
				correct_classification_female <- 77.9
			}
		}
		if(population == "Native-American") {
			if(!is.na(temp$orbit) && !is.na(temp$mental) && xx == "OrMe") {
				score_temp <- (temp$orbit * -0.499) + (temp$mastoid * -1.599) + 5.329 #Equation 7
				correct_classification_male <- 78.1
				correct_classification_female <- 77.9
			}
			if(!is.na(temp$mental) && !is.na(temp$mastoid) && xx == "MeMa") {
				score_temp <- (temp$mental * -0.576) + (temp$mastoid * -1.136) + 4.765 #Equation 8
				correct_classification_male <- 74.1
				correct_classification_female <- 72.7
			}
			if(!is.na(temp$glabella) && !is.na(temp$mastoid) && xx == "GlMa") {
				score_temp <- (temp$glabella * -0.797) + (temp$mastoid * -1.085) + 5.025 #Equation 9 
				correct_classification_male <- 69.5
				correct_classification_female <- 82.9
			}
		}
		return(c(score_temp, (1 / (1 + exp(1) ^ -score_temp)), correct_classification_male, correct_classification_female)) #Returns into nested loops the results
	}

	#loop for calculating results
	for(i in 1:nrow(X)) {
		temp <- X[i, drop=FALSE] #by row
		results[i,1] <- temp$ID #copy ID of specimen
		for(xx in equations) {
			score_temp <- d_score(temp)
			if(score_temp[1] < 0) {
				results[i,][[paste(xx,"_d_score", sep="")]] <- score_temp[1]
				results[i,][[paste(xx,"_sex", sep="")]] <- "Male"
				results[i,][[paste(xx,"_probability", sep="")]] <- (1 - score_temp[2])
				results[i,][[paste(xx,"_correct_classification", sep="")]] <- score_temp[3]
			}
			if(score_temp[1] > 0) {
				results[i,][[paste(xx,"_d_score", sep="")]] <- score_temp[1]
				results[i,][[paste(xx,"_sex", sep="")]] <- "Female"
				results[i,][[paste(xx,"_probability", sep="")]] <- score_temp[2]
				results[i,][[paste(xx,"_correct_classification", sep="")]] <- score_temp[4]
			}
			if(research) { 
				if(results[i,][[paste(xx,"_sex", sep="")]] == temp$sex) {
					rr <- 1
				}
				else rr <- 0
				research_temp[i,][[xx]] <- rr
			}		
		}#End of internal for loop
	}#End of for loop

	#calculate research statistics
	if(research) {
		research_results <- data.frame(matrix(0, ncol=3, nrow=ncol(research_temp))) 
		colnames(research_results) <- c("Equation","Correct_classification","Incorrect_classification")
		for(i in 1:ncol(research_temp)) {
			FP <- length(research_temp[research_temp[,i] == 0,][,i])
			TP <- length(research_temp[research_temp[,i] == 1,][,i])
			correct_classification <- performance.a(X=data.frame(FP,TP), type = "PPV")
			incorrect_classification <- performance.a(X=data.frame(FP,TP), type = "FDR")
			research_results[i,1] <- colnames(research_temp)[i]
			research_results[i,2] <- correct_classification[1,1]
			research_results[i,3] <- incorrect_classification[1,1]
		}
		return(list(results, research_results, research_temp))
	}
	
	return(results)
}