Nothing
##
## This version is for the R Extension
##
## Edit 10:02 03/10/2013 1: altered call to use PACKAGE dll gdm
## Edit 10:26 08/10/2013 1: added wordsize function to determine if 32bit or 64bit OS.
## Edit 08:00 22/11/2013 1: fixed complaints from R-Forge checking
## Edit 13:27-US-EST 24/2/14 1: Added formatGDMData and createSitePair functions -MDL
#
"gdm" <-
function (data, geo=FALSE, splines=NULL, quantiles=NULL)
{
options(warn.FPU = FALSE)
##
## sanity check on the data table
##
if(class(data)!="data.frame"){
warning("The input site-pair data table is not a data frame. Due to this the GDM results may not be correct.")
}
if ( ncol(data) < 6 )
stop("Not enough columns in data")
if ( nrow(data) < 1 )
stop("Not enough rows in data")
##
## check that the response data is [0..1]
##
rtmp <- data[,1]
if (length(rtmp[rtmp<0]) > 0)
stop("Response data has negative values")
if (length(rtmp[rtmp>1]) > 0)
stop("Response data has values greater than 1")
##
## current data format is response,weights,X0,Y0,X1,Y1 before any predictor data (thus 6 leading columns)
##
LEADING_COLUMNS <- 6
if ( geo )
{
nPreds <- ( ncol(data) - LEADING_COLUMNS ) / 2 + 1
}
else
{
nPreds <- ( ncol(data) - LEADING_COLUMNS ) / 2
}
if ( nPreds < 1 )
stop("Data has NO predictors")
##
## setup the predictor name list
##
if ( geo )
{
if ( nPreds > 1 )
predlist <- c("Geographic", names(data)[(LEADING_COLUMNS+1):(LEADING_COLUMNS+nPreds-1)])
else
predlist <- c("Geographic")
}
else
{
predlist <- names(data)[(LEADING_COLUMNS+1):(LEADING_COLUMNS+nPreds)]
}
##
## deal with the splines and quantiles
##
if (is.null(quantiles))
{
##
## generate quantiles internally from the data
##
if ( is.null(splines) )
{
nSplines <- 3
quantvec <- rep(0, nPreds * nSplines)
splinvec <- rep(nSplines, nPreds)
if ( geo )
{
## get quantiles for the geographic distance
v <- sqrt((data[,3]-data[,5])^2 + (data[,4]-data[,6])^2)
quantvec[1] <- min(v)
quantvec[2] <- median(v)
quantvec[3] <- max(v)
if ( nPreds > 1 )
{
## get quantiles for the environmental predictors
for (i in seq(from = 1, to = nPreds-1, by = 1))
{
v <- c(data[,i+LEADING_COLUMNS], data[,i+LEADING_COLUMNS+nPreds-1])
index = i * nSplines
quantvec[index+1] <- min(v)
quantvec[index+2] <- median(v)
quantvec[index+3] <- max(v)
}
}
}
else
{
## get quantiles for the environmental predictors after skipping geographic preds
for (i in seq(from = 1, to = nPreds, by = 1))
{
v <- c(data[,i+LEADING_COLUMNS], data[,i+LEADING_COLUMNS+nPreds])
index = (i-1) * nSplines
quantvec[index+1] <- min(v)
quantvec[index+2] <- median(v)
quantvec[index+3] <- max(v)
}
}
}
else
{
##
## otherwise check that the supplied splines vector has enough data and minumum spline values of 3
##
if ( length(splines) != nPreds )
{
stop(paste("splines argument has", length(splines), "items but needs", nPreds, "items."))
}
## count the total number of user defined splines to dimension the quantiles vector
quantvec <- rep(0, sum(splines))
splinvec <- splines
if ( geo )
{
if ( splines[1] < 3 )
stop("Must greater than or equal to 3 splines per predictor")
## get quantiles for the geographic distance
v <- sqrt((data[,3]-data[,5])^2 + (data[,4]-data[,6])^2)
quantvec[1] <- min(v) ## 0% quantile
quantvec[splines[1]] <- max(v) ## 100% quantile
quant_increment <- 1.0 / (splines[1]-1)
this_increment <- 1
for (i in seq(from = 2, to = (splines[1]-1), by = 1))
{
## mid % quantiles
quantvec[i] <- quantile(v,quant_increment*this_increment)
this_increment <- this_increment + 1
}
if ( nPreds > 1 )
{
## get quantiles for the environmental predictors
current_quant_index <- splines[1]
for (i in seq(from = 1, to = nPreds-1, by = 1))
{
num_splines <- splines[i+1]
if ( num_splines < 3 )
stop("Must greater than or equal to 3 splines per predictor")
v <- c(data[,i+LEADING_COLUMNS], data[,i+LEADING_COLUMNS+nPreds-1])
quantvec[current_quant_index+1] <- min(v) ## 0% quantile
quantvec[current_quant_index+num_splines] <- max(v) ## 100% quantile
quant_increment <- 1.0 / (num_splines-1)
this_increment <- 1
for (i in seq(from = 2, to = (num_splines-1), by = 1))
{
## mid % quantiles
quantvec[current_quant_index+i] <- quantile(v,quant_increment*this_increment)
this_increment <- this_increment + 1
}
current_quant_index <- current_quant_index + num_splines
}
}
}
else
{
## get quantiles for the environmental predictors
current_quant_index <- 0
for (i in seq(from = 1, to = nPreds, by = 1))
{
num_splines <- splines[i]
if ( num_splines < 3 )
stop("Must greater than or equal to 3 splines per predictor")
v <- c(data[,i+LEADING_COLUMNS], data[,i+LEADING_COLUMNS+nPreds])
quantvec[current_quant_index+1] <- min(v) ## 0% quantile
quantvec[current_quant_index+num_splines] <- max(v) ## 100% quantile
quant_increment <- 1.0 / (num_splines-1)
this_increment <- 1
for (i in seq(from = 2, to = (num_splines-1), by = 1))
{
## mid % quantiles
quantvec[current_quant_index+i] <- quantile(v,quant_increment*this_increment)
this_increment <- this_increment + 1
}
current_quant_index <- current_quant_index + num_splines
}
}
}
}
else
{
##
## user defined quantiles supplied as an argument
##
if ( is.null(splines) )
{
## check that there are nPreds * 3 quantiles in the user defined vector
if ( length(quantiles) != (nPreds * 3) )
{
stop(paste("There should be", (nPreds * 3), "items in the quantiles argument, not", length(quantiles), "items."))
}
## now check that each of the three quantiles for each predictor are in ascending order
for (i in seq(from = 1, to = nPreds, by = 1))
{
index = i * 3
if ((quantiles[index-1] < quantiles[index-2] ) ||
(quantiles[index] < quantiles[index-2]) ||
(quantiles[index] < quantiles[index-1]))
{
stop(paste("Wrong quantiles for predictor:", predlist[i]))
}
}
nSplines <- 3
quantvec <- quantiles
splinvec <- rep(nSplines, nPreds)
}
else
{
## check that there are sum(splines) quantiles in the user defined vector
if ( length(quantiles) != sum(splines) )
{
stop(paste("There should be", sum(splines), "items in the quantiles argument, not", length(quantiles), "items."))
}
## now check that each of the quantiles for each predictor are in ascending order
index = 0
for (i in seq(from = 1, to = nPreds, by = 1))
{
for (j in seq(from = 2, to = splines[i], by = 1))
{
if (quantiles[index+j] < quantiles[index+j-1])
{
stop(paste("Wrong quantiles for predictor:", predlist[i]))
}
}
index <- index + splines[i]
}
quantvec <- quantiles
splinvec <- splines
}
}
p1 <- 0
p2 <- 0
p3 <- 0
p4 <- 0
p5 <- rep(0,times=length(quantvec))
p6 <- rep(0,times=nrow(data))
p7 <- rep(0,times=nrow(data))
p8 <- rep(0,times=nrow(data))
##
## Call the dll function
##
z <- .C( "GDM_FitFromTable",
paste(getwd()),
as.matrix(data),
as.integer(geo),
as.integer(nPreds),
as.integer(nrow(data)),
as.integer(ncol(data)),
as.integer(splinvec),
as.double(quantvec),
gdmdev = as.double(p1),
nulldev = as.double(p2),
expdev = as.double(p3),
intercept = as.double(p4),
coeffs = as.double(p5),
response = as.double(p6),
preddata = as.double(p7),
ecodist = as.double(p8),
PACKAGE = "gdm"
)
## call <- match.call()
m <- match.call(expand.dots = F)
gdmOb = structure(list(dataname = m[[2]],
geo = geo,
sample = nrow(data),
gdmdeviance = z$gdmdev,
nulldeviance = z$nulldev,
explained = z$expdev,
intercept = z$intercept,
predictors = predlist,
coefficients = z$coeffs,
quantiles = quantvec,
splines = splinvec,
creationdate = date(),
observed = z$response,
predicted = z$preddata,
ecological = z$ecodist))
class(gdmOb) = c("gdm", "list")
return(gdmOb)
}
"plot.gdm" <-
function (model, plot.layout = c(2,2), plot.color = rgb(0.0,0.0,1.0), plot.linewidth=2.0)
{
options(warn.FPU = FALSE)
PSAMPLE <- 200
preddata <- rep(0,times=PSAMPLE)
##
## establish what plot layout to use
##
thisplot <- 0
one_page_per_plot <- FALSE
if ((plot.layout[1]==1) && (plot.layout[2]==1))
one_page_per_plot <- TRUE
else
par(mfrow=plot.layout)
##
## apply the link function and plot.....
##
plot( model$ecological,
model$observed,
xlab="Predicted Ecological Distance",
ylab="Observed Compositional Dissimilarity", type="n" )
points( model$ecological, model$observed, pch=20, cex=0.25, col=plot.color )
overlayX <- seq( from=min(model$ecological), to=max(model$ecological), length=PSAMPLE )
overlayY <- 1 - exp( - overlayX )
lines( overlayX, overlayY, lwd=plot.linewidth )
thisplot <- thisplot + 1
##
## use the raw data and plot.....
##
if (one_page_per_plot)
{
dev.new()
dev.next()
}
plot( model$predicted,
model$observed,
xlab="Predicted Compositional Dissimilarity",
ylab="Observed Compositional Dissimilarity", type="n" )
points( model$predicted, model$observed, pch=20, cex=0.25, col=plot.color )
overlayX <- overlayY <- seq( from=min(model$predicted), to=max(model$predicted), length=PSAMPLE )
lines( overlayX, overlayY, lwd=plot.linewidth )
thisplot <- thisplot + 1
##
## determine the max of all the predictor data
##
preds <- length(model$predictors)
predmax <- 0
splineindex <- 1
for ( i in 1:preds )
{
## only if the sum of the coefficients associated with this predictor is > 0.....
numsplines <- model$splines[i]
if ( sum(model$coefficients[splineindex:(splineindex+numsplines-1)]) > 0 )
{
## get predictor plot Y-data
z <- .C( "GetPredictorPlotData",
pdata = as.double(preddata),
as.integer(PSAMPLE),
as.double(model$coefficients[splineindex:(splineindex+numsplines-1)]),
as.double(model$quantiles[splineindex:(splineindex+numsplines-1)]),
as.integer(numsplines),
PACKAGE = "gdm"
)
v <- max(z$pdata)
if (v > predmax ) predmax <- v
}
splineindex <- splineindex + numsplines
}
##
## plot the predictors with non-zero sum of coefficients
##
splineindex <- 1
for ( i in 1:preds )
{
## only if the sum of the coefficients associated with this predictor is > 0.....
numsplines <- model$splines[i]
if ( sum(model$coefficients[splineindex:(splineindex+numsplines-1)]) > 0 )
{
if (one_page_per_plot)
{
dev.new()
dev.next()
}
else
{
thisplot <- thisplot + 1
if (thisplot > (plot.layout[1] * plot.layout[2]))
{
dev.new()
dev.next()
thisplot <- 1
par(mfrow=plot.layout)
}
}
## get predictor plot Y-data
z <- .C( "GetPredictorPlotData",
pdata = as.double(preddata),
as.integer(PSAMPLE),
as.double(model$coefficients[splineindex:(splineindex+numsplines-1)]),
as.double(model$quantiles[splineindex:(splineindex+numsplines-1)]),
as.integer(numsplines),
PACKAGE = "gdm"
)
plot( seq(from=model$quantiles[[(i*3)-2]],to=model$quantiles[[(i*3)]], length=PSAMPLE),
z$pdata,
xlab=model$predictors[i],
ylab=paste("f(", model$predictors[i], ")", sep="" ),
ylim=c(0,predmax), type="l" )
}
splineindex <- splineindex + numsplines
}
}
"predict.gdm" <-
function (model, data)
{
options(warn.FPU = FALSE)
#
# Call the dll function
#
predicted <- rep(0,times=nrow(data))
z <- .C( "GDM_PredictFromTable",
as.matrix(data),
as.integer(model$geo),
as.integer(length(model$predictors)),
as.integer(nrow(data)),
as.double(model$quantiles),
as.integer(model$splines),
as.double(c(model$intercept,model$coefficients)),
preddata = as.double(predicted),
PACKAGE = "gdm"
)
return(z$preddata)
}
"gdm.transform" <-
function (model, data)
{
options(warn.FPU = FALSE)
#
# Call the dll function
#
transformed <- matrix(0,nrow(data),ncol(data))
z <- .C( "GDM_TransformFromTable",
as.integer(nrow(data)),
as.integer(ncol(data)),
as.integer(model$geo),
as.integer(length(model$predictors)),
as.integer(model$splines),
as.double(model$quantiles),
as.double(model$coefficients),
as.matrix(data),
trandata = as.double(transformed),
PACKAGE = "gdm"
)
##
## Convert transformed from a vector into a data frame before returning...
##
nRows <- nrow(data)
nCols <- ncol(data)
##
## z$trandata is the transformed data vector created in GDM_TransformFromTable
##
myVec <- z$trandata
pos <- 1
for (i in seq(from = 1, to = nCols, by = 1))
{
tmp <- myVec[seq(from=pos, to=pos+nRows-1)]
transformed[,i] <- tmp
pos <- pos + nRows
}
return(transformed)
}
"summary.gdm" <-
function (model)
{
print( "", quote=F )
print( "", quote=F )
print( "GDM Modelling Summary", quote=F );
print( paste( "Creation Date: ", model$creationdate ), quote=F );
print( "", quote=F )
## call <- match.call()
m <- match.call(expand.dots = F)
print( paste( "Name: ", m[[2]] ), quote=F )
print( "", quote=F )
print( paste( "Data: ", model$dataname ), quote=F )
print( "", quote=F )
print( paste( "Samples: ", model$sample ), quote=F )
print( "", quote=F )
print( paste( "Use Geographical Distance: ", model$geo ), quote=F )
print( "", quote=F )
print( paste( "NULL Deviance: ", model$nulldeviance ), quote=F )
print( paste( "GDM Deviance: ", model$gdmdeviance ), quote=F )
print( paste( "Deviance Explained: ", model$explained ), quote=F )
print( "", quote=F )
print( paste( "Intercept: ", model$intercept ), quote=F )
print( "", quote=F )
thiscoeff <- 1
thisquant <- 1
for ( i in 1:length(model$predictors) ) {
print( paste( "Predictor ",i,": ",model$predictors[[i]], sep="" ), quote=F )
print( paste( "Splines: ",model$splines[[i]], sep="" ), quote=F )
numsplines <- model$splines[[i]]
for ( j in 1:numsplines ) {
if ( j == 1 ) print( paste( "Min Quantile: ",model$quantiles[[thisquant]], sep="" ), quote=F )
else if ( j == numsplines ) print( paste( "Max Quantile: ",model$quantiles[[thisquant]], sep="" ), quote=F )
else print( paste( round(100/(numsplines-1),digits=2),"% Quantile: ",model$quantiles[[thisquant]], sep="" ), quote=F )
thisquant <- thisquant + 1
}
for ( j in 1:numsplines ) {
print( paste( "Coefficient[",j,"]: ",model$coefficients[[thiscoeff]], sep="" ), quote=F )
thiscoeff <- thiscoeff + 1
}
print( "", quote=F )
}
}
##"gdm.getwordsize" <-
##function()
##{
## p1 <- 0
## z <- .C( "GetWordSize", wsize = as.integer(p1), PACKAGE = "gdm" )
## return(z$wsize)
##}
gdm.formatsitepair <- function(bioData, bioFormat, dist="bray", abundance=F, siteColumn=NULL, XColumn, YColumn, sppColumn=NULL, abundColumn=NULL, sppFilter=0, predData, distPreds=NULL, weightType="def", custWeightVect=NULL, maxPairs=NULL){
##Takes species data from a variety of commonly used formats and transforms it into the
##required format for GDM
##Input Variables:
##bioData = the input species data for the gdm
##bioFormat = the format of which the species data has been given to the function
##dist = the dissimularity distance, defined by the vegdist fuction in the vegan package
##abundance = rather the species data represents abundance or presense-absense
##XColumn = the x coordinates of the sample site
##YColumn = the y coordinates of the sample site
##siteColumn = the name of the column in the species data which holds the site identification, needed for table type 1
##abundColumn = the name of the column which holds the species data, needed for table type 2
##sppColumn = species code column, like name, needed for table type 2
##sppFilter = number of spp as mimimum
##predData = environmental data to be tacked onto the species data
##distPreds = a list of prediction matrices
##weightType = how the weights of the site-pair table are calculated
##custWeightVect = custom weights vector
##Output Variables:
##outTable = the fully calculated site-pair table for GDM
###########################
#bioData <- sitePair15
#bioFormat <- 4
#dist <- "bray"
#abundance <- F
#siteColumn <- NULL
#XColumn <- "xCoord.S1"
#YColumn <- "yCorrd.S1"
#sppColumn <- NULL
#sppFilter <- 0
#abundColumn <- NULL
#predData <- envDat
#distPreds <- predMats
#weightType <- "def"
#custWeightVect <- NULL
#maxPairs <- 360
###########################
##required libraries
#require(raster)
#require(reshape2)
#require(vegan)
#print(class(predData))
##if rows of bioData and predData do not match, exit function
##if bioFormat is not a number, exit function
if(is.numeric(bioFormat)==FALSE){
stop("bioFormat variable not a number")
}
##if maxPairs is not a number, then exit function
if(is.numeric(maxPairs)==FALSE & is.null(maxPairs)==FALSE){
stop("maxPairs varialbe is not a number")
}
##makes sure that sppFilter is a number, if not exit function
if(is.numeric(sppFilter)==FALSE){
stop("sppFilter variable not a number")
}
if(weightType!="def" & weightType!="cust" & weightType!="rich"){
stop("weightType variable not acceptable")
}
if(bioFormat==3 & weightType=="rich"){
stop("cannot calculate richness without knowing species data")
}
##warns if distPreds are not matrices
for(mat in distPreds){
if(class(mat)!="matrix"){
warning("Not all of the given dissimularity matricies are not of class 'matrix'. Therefore, IF the data is incorperated without error, it maybe incorrect.")
}
}
##checks input data format
##species data as site-species matrix
if(bioFormat==1){
##processes site-species matrix
siteNum <- which(colnames(bioData)==siteColumn)
##point locations
x <- which(colnames(bioData)==XColumn)
##checks to see if coordinates are found with bioData, if not extracts them from envData
##coordinates need to be in bioData if envData is raster
if(is.na(x[1])==T){
x <- which(colnames(predData)==XColumn)
y <- which(colnames(predData)==YColumn)
locs <- predData[c(x,y)]
sppDat <- bioData[,-c(siteNum)]
}else{
y <- which(colnames(bioData)==YColumn)
locs <- bioData[c(x,y)]
sppDat <- bioData[,-c(siteNum, x, y)]
}
##filters out sites with low species count
##prep for filtering
headDat <- bioData[,c(siteNum)]
#sppDat[as.numeric(sppDat)>=1]=1
#sppDat[as.numeric(sppDat)==0]=0
sppDat[sppDat>=1]=1
sppDat[sppDat==0]=0
sppTotals <- cbind(as.data.frame(headDat), apply(sppDat, 1, function(m){sum(as.numeric(m))}))
##filters out data
filterBioDat <- subset(sppTotals, sppTotals[colnames(sppTotals)[2]] >= sppFilter)
spSiteCol <- filterBioDat["headDat"]
colnames(spSiteCol) <- siteColumn
##reassembles data after filtering
bioData <- merge(spSiteCol, bioData, by=siteColumn)
##checks unique sites against rasters
if(class(predData)=="RasterStack" | class(predData)=="RasterLayer" | class(predData)=="RasterBrick"){
#spSiteCol = bioData[siteColumn]
siteRaster <- predData[[1]]
##raster based site
#x = which(names(bioData)==XColumn)
#y = which(names(bioData)==YColumn)
#locs = bioData[c(x,y)]
cellID <- as.data.frame(cellFromXY(siteRaster, locs))
names(cellID)[names(cellID)=="cellFromXY(siteRaster, locs)"] <- "cellName"
names(spSiteCol)[names(spSiteCol)==siteColumn] <- "siteY"
##checks size, and if needed correcting
##if cellID less than siteColumn
if(length(unique(cellID$cellName))<length(unique(spSiteCol$siteY))){
warning("less unique cells identified than unique sites in the site column, using cell location as sites")
}else if(length(unique(cellID$cellName))>length(unique(spSiteCol$siteY))){
##if siteColumn less than cellID
warning("more unique cells identified than unique sites in site column, defaulting sites to cells")
}
##aggregates bioData data into classes by raster cells
newDataTable <- bioData[-c(siteNum, x, y)]
modDataTable <- cbind(cellID, newDataTable)
#uniqueCells = unique(cellID$cellName)
if(abundance==T){
##compress by cell
byCell <- aggregate(modDataTable, modDataTable[1], FUN=mean)
##removes unneeded columns
inDataTable <- byCell[-c(1, 2)]
}else{
##compress by cell
byCell <- aggregate(modDataTable, modDataTable[1], FUN=sum)
##transforms data to binary
byCell <- byCell[-c(1, 2)]
byCell[byCell>=1] <- 1
inDataTable <- byCell
}
}else{
predData <- merge(spSiteCol, predData, by=siteColumn)
predSite <- which(names(predData)==siteColumn)
predData <- predData[-predSite]
#x = which(names(bioData)==XColumn)
#y = which(names(bioData)==YColumn)
inDataTable <- bioData[-c(siteNum, x, y)]
}
##creates distance matrix
if(abundance==F){
distData <- vegdist(inDataTable, dist, binary=T)
}else{
distData <- vegdist(inDataTable, dist, binary=F)
}
########################################################################
##species data as x,y,species list
}else if(bioFormat==2){
##insert data if not available
if(is.null(abundColumn)){
warning("No abundance column specified, assuming all data is presence only")
bioData["reallysupercoolawesomedata"] <- 1
abundColumn <- "reallysupercoolawesomedata"
}
##point locations
x <- which(names(bioData)==XColumn)
y <- which(names(bioData)==YColumn)
locs <- bioData[c(x,y)]
##processes corrds, species list
##if environmental data is raster, checks size of data to makes sure there is not problems with end table binding
if(class(predData)=="RasterStack" | class(predData)=="RasterLayer" | class(predData)=="RasterBrick"){
##raster based site
##site raster
siteRaster <- predData[[1]]
cellID <- as.data.frame(cellFromXY(siteRaster, locs))
names(cellID)[names(cellID)=="cellFromXY(siteRaster, locs)"] <- "cellName"
##if siteColumn has been provided
if(!is.null(siteColumn)){
##species site
spSiteCol <- bioData[siteColumn]
names(spSiteCol)[names(spSiteCol)==siteColumn] <- "siteY"
##checks size, and if needed correcting
##if cellID less than siteColumn
if(length(unique(cellID$cellName))<length(unique(spSiteCol$siteY))){
warning("less unique cells identified than unique sites in the site column, using cell location as sites")
}else if(length(unique(cellID$cellName))>length(unique(spSiteCol$siteY))){
##if siteColumn less than cellID
warning("more unique cells identified than unique sites in site column, defaulting sites to cells")
}
##removes unneeded columns
siteNum <- which(names(bioData)==siteColumn)
newDataTable <- bioData[-c(siteNum, x, y)]
}else{
##removes unneeded columns
newDataTable <- bioData[-c(x, y)]
}
##casts data into site-species matrix
modDataTable <- cbind(cellID, newDataTable)
names(modDataTable)[names(modDataTable)==sppColumn] <- "spcodeUltimateCoolness"
castData <- dcast(modDataTable, cellName~spcodeUltimateCoolness, value.var=abundColumn)
##aggregate data by cellID, and filters data by low spp number
if(abundance==T){
##compress by cell
cellName <- which(names(castData)=="cellName")
byCell <- aggregate(castData, castData[cellName], FUN=mean)
byCell <- byCell[-2]
##filters species data
sppDat <- byCell[-c(cellName)]
headDat <- byCell[c(cellName)]
sppDat[sppDat>=1] <- 1
sppDat[is.na(sppDat)] <- 0
sppTotals <- cbind(headDat, rowSums(sppDat))
##filters out data
filterBioDat <- subset(sppTotals, sppTotals["rowSums(sppDat)"] >= sppFilter)
spSiteCol <- filterBioDat[cellName]
##reassembles data after filtering
cellXY <- xyFromCell(siteRaster, spSiteCol$cellName)
spSiteCol <- cbind(spSiteCol, cellXY)
bioData <- merge(spSiteCol, byCell, by=cellName)
##removes unneeded columns
inDataTable <- byCell[-c(1)]
}else{
##compress by cell
cellName <- which(names(castData)=="cellName")
byCell <- aggregate(castData, castData[cellName], FUN=sum)
##remove redunancy
byCell <- byCell[-2]
##filters species data
sppDat <- byCell[-c(cellName)]
headDat <- byCell[c(cellName)]
sppDat[sppDat>=1] <- 1
sppTotals <- cbind(headDat, rowSums(sppDat))
##filters out data
filterBioDat <- subset(sppTotals, sppTotals["rowSums(sppDat)"] >= sppFilter)
spSiteCol <- filterBioDat[cellName]
##reassembles data after filtering
cellXY <- xyFromCell(siteRaster, spSiteCol$cellName)
spSiteCol <- cbind(spSiteCol, cellXY)
bioData <- merge(spSiteCol, byCell, by=cellName)
##transforms data to binary
byCell <- byCell[-c(1)]
byCell[byCell>=1] <- 1
inDataTable <- byCell
}
}else{
##if environmental data is a table
##remove coords if needed
newDataTable <- bioData[-c(x, y)]
##renames required columns
names(newDataTable)[names(newDataTable)==siteColumn] <- "siteUltimateCoolness"
names(newDataTable)[names(newDataTable)==sppColumn] <- "spcodeUltimateCoolness"
castData <- dcast(newDataTable, siteUltimateCoolness~spcodeUltimateCoolness, value.var=abundColumn)
##filters species data
siteName <- which(names(castData)=="siteUltimateCoolness")
sppDat <- castData[-c(siteName)]
headDat <- castData[c(siteName)]
sppDat[sppDat>=1] <- 1
sppTotals <- cbind(headDat, rowSums(sppDat))
##filters out data
filterBioDat <- subset(sppTotals, sppTotals["rowSums(sppDat)"] >= sppFilter)
spSiteCol <- filterBioDat[siteName]
##reassembles data after filtering
bioSiteCol <- which(names(bioData)==siteColumn)
holdData <- bioData[c(bioSiteCol,x,y)]
holdData <- aggregate(holdData, holdData[1], FUN=mean)
holdData <- holdData[-1]
siteXY <- merge(spSiteCol, holdData, by.x="siteUltimateCoolness", by.y=siteColumn, all=F)
bioData <- merge(siteXY, castData, by="siteUltimateCoolness")
predData <- merge(spSiteCol, predData, by.x="siteUltimateCoolness", by.y=siteColumn)
predSite <- which(names(predData)=="siteUltimateCoolness")
predData <- predData[-predSite]
inDataTable <- bioData[-c(1,2,3)]
}
XColumn <- "x"
YColumn <- "y"
##creates distance matrix
inDataTable[is.na(inDataTable)] <- 0
inDataTable <- inDataTable[apply(inDataTable, 1, function(x){sum(x)>=sppFilter}),]
if(abundance==F){
distData <- vegdist(inDataTable, dist, binary=T)
}else{
distData <- vegdist(inDataTable, dist, binary=F)
}
########################################################################
##species data as site-site distance matrix
}else if(bioFormat==3){
##site-site distance already calculated
#castData = bioData
distData <- lower.tri(as.matrix(bioData), diag=FALSE)
distData <- as.vector(bioData[distData])
########################################################################
##site pair table, already preped
}else if(bioFormat==4){
##site-pair distance value
outTable <- bioData
########################################################################
}else{
##return error, bioFormat argument out of bounds
stop(paste("bioFormat varable of '", as.character(bioFormat), "' is not an accepted input value", sep=""))
}
########################################################################
##With the dissim distance calculated, creates and fills the table in gdm format
if(bioFormat!=4){
##creates base site-pair table
outTable <- as.data.frame(createsitepair(dist=distData, spdata=bioData, envInfo=predData, dXCol=XColumn, dYCol=YColumn, siteCol=siteColumn, weightsType=weightType, custWeights=custWeightVect))
}else{
outTable <- bioData
}
##first checks that the size of the dissimularity matrices, if any were provided
##then pastes any dissimularity matrices onto the created site-pair table
if(length(distPreds)>0){
baseMat <- distPreds[[1]]
##checks to size of each dissimularity matrix, to make sure they are all the same
lapply(distPreds, function(mat, mat1){if(length(mat1)!=length(mat)){stop("The dimensions of your predictions matrices are not the same size.")}}, mat1=baseMat)
##checks the size of the dissimularity matrices against the size of distData
baseMatDat <- lower.tri(as.matrix(baseMat),diag=FALSE)
baseMatDat <- as.vector(baseMat[baseMatDat])
if(nrow(outTable)!=length(baseMatDat)){
stop("The dimensions of your prediction matrices do not match the created distance data from the provided biological data")
}
#outTableHold = outTable
for(num in 1:length(distPreds)){
#num <- 2
##isolate matrix
matrixDat <- lower.tri(as.matrix(distPreds[[num]], diag=FALSE))
sweetFreakenPredMatrix <- as.vector(distPreds[[num]][matrixDat])
##add matrix to table
if(ncol(outTable)>6){
##break table up to insert matrix data columns
baseSitePair <- outTable[,1:6]
otherSitePair <- outTable[,7:ncol(outTable)]
otherNames <- colnames(otherSitePair)
s1SitePair <- as.data.frame(otherSitePair[,1:(ncol(otherSitePair)/2)])
colnames(s1SitePair) <- otherNames[1:(ncol(otherSitePair)/2)]
s2SitePair <- as.data.frame(otherSitePair[,(ncol(otherSitePair)/2+1):ncol(otherSitePair)])
colnames(s2SitePair) <- otherNames[(ncol(otherSitePair)/2+1):ncol(otherSitePair)]
##formats data from dissimularity matrices
s1Zeros <- as.data.frame(rep(0,length(sweetFreakenPredMatrix)))
colnames(s1Zeros) <- paste("s1.matrix_", num, sep="")
s2Mat <- as.data.frame(sweetFreakenPredMatrix)
colnames(s2Mat) <- paste("s2.matrix_", num, sep="")
##restructures the output table
outTable <- cbind(baseSitePair, s1SitePair, s1Zeros, s2SitePair, s2Mat)
}else{
##formats data from dissimularity matrices
s1Zeros <- as.data.frame(rep(0,length(sweetFreakenPredMatrix)))
colnames(s1Zeros) <- paste("s1.matrix_", num, sep="")
s2Mat <- as.data.frame(sweetFreakenPredMatrix)
colnames(s2Mat) <- paste("s2.matrix_", num, sep="")
##restructures the output table
outTable <- cbind(outTable, s1Zeros, s2Mat)
}
}
}
##if maxPairs has a numeric value, then randomly samples the site-pair table for that number of site-pairs
if(is.null(maxPairs)==FALSE){
randRows <- sample(1:nrow(outTable), maxPairs)
outTable <- outTable[c(randRows),]
}
##return output table
class(outTable) <- c("gdmData", "data.frame")
return(outTable)
}
##########################################################################
##########################################################################
createsitepair <- function(dist, spdata, envInfo, dXCol, dYCol, siteCol, weightsType, custWeights){
##Used in formatGDMData to transform data from a site-site distance matrix into
##a site pair format
##Input Variables:
##dist = the distance object representing the pair-wise dissimularity of the species data
##envInfo = environmental data
##spdata = needed as may have X,Y coords
##dXCol = x coordinate
##dYCol = y coordinate
##siteCol = site column, taken from either the species or environmental tables
##predMatrices = Prediction matrices to be added to the site-pair table
##weightsType = the method of determining the site-pair weights
##custWeights = custom wieghts, as a vector, if given
##Output Variables:
##gdmTableFill = the complete site-pair table
###########################
#dist = distData
#spdata = bioData
#envInfo = predData
#dXCol = XColumn
#dYCol = YColumn
#siteCol = siteColumn
#weightsType = weightType
#custWeights = custWeightVect
###########################
##required libraries
#require(raster)
##Create gdm ready table
weightsType <- as.character(weightsType)
distance <- as.vector(dist)
##calculates richness total, the sums of the two most populus sites
if(weightsType[1]=="rich"){
sppOnly <- spdata[-c(1,2,3)]
sppSums <- rowSums(sppOnly)
sppSiteSums <- cbind(spdata[1], sppSums)
orderedSums <- sppSiteSums[order(-sppSiteSums[,2]),]
richTotal <- orderedSums[1,2]+orderedSums[2,2]
}
##Builds index needed for output gdm table format
xCoord.S1 <- yCoord.S1 <- xCoord.S2 <- yCoord.S2 <- NULL
s1 <- s2 <- NULL
##get coordinates for each site
if(class(envInfo)=="RasterStack" | class(envInfo)=="RasterLayer" | class(envInfo)=="RasterBrick"){
#print("in raster option")
##collects xy data from the original species table
rlocs <- spdata[c(dXCol, dYCol)]
getCell <- as.data.frame(cellFromXY(envInfo[[1]], rlocs))
names(getCell)[names(getCell)=="cellFromXY(envInfo[[1]], rlocs)"] <- "site"
##get information from raster
rastXY <- xyFromCell(envInfo[[1]], unique(getCell$site))
exData <- as.data.frame(extract(envInfo, rastXY))
if(nlayers(envInfo)==1){
colnames(exData) <- names(envInfo)
}
creatRastDat <- cbind(rastXY, exData)
##Builds index needed for output gdm table format
count <- seq((nrow(as.matrix(creatRastDat))-1),1,-1)
s1 <- unlist(sapply(seq(length(count),1), function(y){c(s1, rep((max(count)-y)+1, times=y))}))
s2 <- unlist(sapply(seq(length(count),1), function(y){c(s2, (max(count)-y+2):(max(count)+1))}))
##create gdm table and weights
if(weightsType[1]=="def"){
weights <- rep(1, times=length(distance))
}else if(weightsType[1]=="cust"){
weights <- custWeights
}else{
weights <- (sppSiteSums[s1, "sppSums"] + sppSiteSums[s2, "sppSums"]) / richTotal
}
gdmTable <- cbind(distance, weights)
##from environmental table, copy coordinates
xCoord.S1 <- creatRastDat[s1, "x"]
xCoord.S2 <- creatRastDat[s2, "x"]
yCoord.S1 <- creatRastDat[s1, "y"]
yCoord.S2 = creatRastDat[s2, "y"]
if(length(xCoord.S1)!=nrow(gdmTable)){
stop("error with number of records of distances and records of coordinates")
}
##sets up output table
gdmForm <- cbind(gdmTable, xCoord.S1, yCoord.S1, xCoord.S2, yCoord.S2)
##fills output table
gdmTableFill <- as.data.frame(cbind(gdmForm, exData[s1,1:ncol(exData)], exData[s2,1:ncol(exData)]))
names.s1 <- paste("s1.",names(as.data.frame(exData)[1:ncol(exData)]), sep="")
names.s2 <- paste("s2.",names(as.data.frame(exData)[1:ncol(exData)]), sep="")
names(gdmTableFill) <- c(names(gdmTableFill)[1:6], names.s1, names.s2)
}else{
#print("in table option")
##Builds index needed for output gdm table format
count <- seq((nrow(as.matrix(envInfo))-1),1,-1)
s1 <- unlist(sapply(seq(length(count),1), function(y){c(s1, rep((max(count)-y)+1, times=y))}))
s2 <- unlist(sapply(seq(length(count),1), function(y){c(s2, (max(count)-y+2):(max(count)+1))}))
if(length(s1)!=length(distance)){
stop("error with number of records of distances and records of coordinates")
}
##create gdm table and weights
##Error caused here, not sure why yet, indexing?????
#if(weightsT[1]=="rich"){
# weights = (sppSiteSums[s1, "sppSums"] + sppSiteSums[s2, "sppSums"]) / richTotal
#}else if(weightsT[1]=="cust"){
# weights = custWeights
#}else{
# weights <- rep(1, times=length(distance))
#}
if(weightsType[1]=="def"){
weights <- rep(1, times=length(distance))
}else if(weightsType[1]=="cust"){
weights <- custWeights
}else{
weights <- (sppSiteSums[s1, "sppSums"] + sppSiteSums[s2, "sppSums"]) / richTotal
}
gdmTable <- cbind(distance, weights)
##from environmental table, copy coordinates
xCoord.S1 <- envInfo[s1, dXCol]
xCoord.S2 <- envInfo[s2, dXCol]
yCoord.S1 <- envInfo[s1, dYCol]
yCoord.S2 = envInfo[s2, dYCol]
##sets up output table
gdmForm <- cbind(gdmTable, xCoord.S1, yCoord.S1, xCoord.S2, yCoord.S2)
xhold <- which(names(envInfo)==dXCol)
yhold <- which(names(envInfo)==dYCol)
sitehold <- which(names(envInfo)==siteCol)
envInfo <- envInfo[-c(xhold, yhold, sitehold)]
##fills output table
if(ncol(envInfo)>0){
gdmTableFill <- cbind(gdmForm, envInfo[s1,1:ncol(envInfo)], envInfo[s2,1:ncol(envInfo)])
names.s1 <- paste("s1.",names(envInfo[1:ncol(envInfo)]), sep="")
names.s2 <- paste("s2.",names(envInfo[1:ncol(envInfo)]), sep="")
colnames(gdmTableFill) <- c(colnames(gdmTableFill)[1:6], names.s1, names.s2)
}else{
gdmTableFill <- gdmForm
}
}
##returns results
return(gdmTableFill)
}
##########################################################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.