app.genome <- function(bed, FUN, relatedness, windows, sliding, unit=c("bases", "indices"), map='B37', centromere=NULL, LD.thin=NULL, thread=22)
{
## Verif
if (!is.function(FUN) & !is.list(FUN)) stop('"FUN" argument must be a function of a list of functions')
if (is.list(FUN)) for (i in 1:length(FUN)) if (!is.function(FUN[[i]])) stop('"FUN" argument must be a function of a list of functions')
if (is.function(FUN)) { f <- as.character(substitute(FUN)); FUN <- list(FUN); names(FUN) <- ifelse(length(f)>1, 'function1', f); }
if (!is.function(relatedness)) stop('"relatedness" argument must be a function')
if (is.list(relatedness)) for (i in 1:length(relatedness)) if (!is.function(relatedness[[i]])) stop(relatedness)
if (is.function(relatedness)) { f <- as.character(substitute(relatedness)); relatedness <- list(relatedness); names(relatedness) <- ifelse(length(f)>1, 'relatedness1', f); }
# load of genetic map
if (map=='B36') {
data('genmapB36')
map <- genmapB36$genmap
if (is.null(centromere)) centromere <- genmapB36$centromere else {
if (!is.data.frame(centromere)) stop("'centromere' argument must be NULL or a data frame")
if (names(centromere)!=c("chr", "start", "end")) stop("'centromere' must contains 'chr', 'start', 'end' variables, one line by chromosom")
}
} else if (map=='B37') {
data('genmapB37')
map <- genmapB37$genmap
if (is.null(centromere)) centromere <- genmapB37$centromere else {
if (!is.data.frame(centromere)) stop("'centromere' argument must be NULL or a data frame")
if (names(centromere)!=c("chr", "start", "end")) stop("'centromere' must contains 'chr', 'start', 'end' variables, one line by chromosom")
}
} else if (is.data.frame(map)) {
if (names(map)!=c("chr", "base", "rate.cM.Mb", "cM")) stop("'map' must contains 'chr', 'base', 'rate.cM.Mb' and 'cM' variables")
if (is.null(centromere)) stop("If 'map' is a data frame, 'centromere' argument must be given")
if (!is.data.frame(centromere)) stop("'centromere' argument must be a data frame")
if (names(centromere)!=c("chr", "start", "end")) stop("'centromere' must contains 'chr', 'start', 'end' variables, one line by chromosom")
} else stop("'map' must be equal to 'B36' or 'B37' or a data frame")
if (is.character(bed))
{
snps <- read.table(paste(bed, '.bim', sep=''))
} else if (class(bed) == "bed.matrix")
{
snps <- bed@snps
} else {
stopCluster(cl)
stop('"bed" must be a string or a bed.matrix.')
}
temp <- order(snps$chr, snps$pos)
if (sum(temp!=1:nrow(snps))>0)
{
warning('SNPs are not ordered')
snps <- snps[temp,]
if (is.character(bed)) bed <- read.bed.matrix(bed)[,temp] else bed <- bed[,temp]
}
snps$index <- 1:nrow(snps)
result <- NULL
for (i in unique(snps$chr))
{
w <- snps[snps$chr==i,]
if (unit=='indices')
{
temp <- data.frame(chr=i, start=NA, end=NA, index.start= w$index[seq(1, floor((nrow(w)-1)/sliding)*sliding, by=sliding)], index.end=NA)
temp$index.end <- sapply(temp$index.start, function(x) min(x + windows, w$index[nrow(w)]) )
temp$start <- snps$pos[temp$index.start]
temp$end <- snps$pos[temp$index.end]
} else if (unit=='bases')
{
temp <- data.frame(chr=i, start=seq(0, as.integer(ceiling( max(w$pos)/sliding )*sliding), by=sliding), end=NA, index.start=NA, index.end=NA)
temp$end <- temp$start + windows
tt <- apply(cbind(temp$start,temp$end), 1, function(x) {
ww <- ifelse( w$pos>=x[1] & w$pos<x[2], w$pos, NA )
if (sum(!is.na(ww))>0) return( c(which.min(ww), which.max(ww)) )
else return(rep(NA,2)) } )
temp$index.start <- w$index[tt[1,]]
temp$index.end <- w$index[tt[2,]]
} else stop("'unit' must be equal to 'base' or 'indices'")
temp$recombi_snps <- apply(cbind(temp$start, temp$end), 1, function(x) mean( map$rate.cM.Mb[ map$chr==i & map$base>=x[1] & map$base<=x[2] ] ) )
temp$recombi_region <- apply(cbind(temp$start, temp$end), 1, function(x) {
w <- which(map$chr==i & map$base>=x[1] & map$base<=x[2]);
if (length(w)==0) return(NA)
else if (length(w)==1) return(map$rate.cM.Mb[w])
else return( diff(range( map$cM[ w ] ))/diff(range( map$base[ w ]/1e6 )) ) })
result <- rbind(result, temp)
}
rm(temp, w)
result$centro <- ( (result$start > centromere$start[result$chr] & result$start < centromere$end[result$chr]) |
(result$end > centromere$start[result$chr] & result$end < centromere$end[result$chr]) |
(result$end > centromere$end[result$chr] & result$start < centromere$start[result$chr]) )
#if(!file.exists(paste(bed, 'bed',sep='.')))
result$num <- NA
result$LD <- NA
result$LD_sd <- NA
result$maf <- NA
result$maf_sd <- NA
result$H <- NA
result$H_sd <- NA
result$Hz <- NA
result$Hz_sd <- NA
w <- names(relatedness)
ww <- names(FUN)
for (i in w) for (j in ww) result[paste(i, j, sep='_')] <- NA
if (thread>1)
{
if (class(bed) == "bed.matrix") {
time <- paste(c('temp_gaston.pop', strsplit(date(), ' ')[[1]][strsplit(date(), ' ')[[1]]!='']), collapse='_')
write.bed.matrix(bed, time, rds=NULL)
bed <- time
}
RNGkind("L'Ecuyer-CMRG")
cl <- makeForkCluster(nnodes=thread)
#cl <- makeCluster(thread)
#lib <- eval(.libPaths(), envir=.GlobalEnv)
#lib <- lib[length(lib)]
#clusterExport(cl, c("result", "relatedness", "FUN", "LD", "bed", "couple", "lib"), envir=environment())
#clusterCall(cl, function() .libPaths(lib))
#clusterCall(cl, function() library(gaston))
#clusterCall(cl, function() library(GPop))
result[,-(1:8)] <- matrix( parRapply(cl, cbind(result$chr, result$start, result$end, result$index.start, result$index.end), function(xx) {
if (is.na(xx[4]) | is.na(xx[5])) return(rep(NA, 9+length(relatedness)*length(FUN)))
if (is.character(bed)) x <- gaston.utils:::read.bed.matrix.part(bed, beg=xx[4], end=xx[5])
else if (class(bed) == "bed.matrix") x <- select.snps(bed, chr==xx[1] & pos>=xx[2] & pos<xx[3])
if(ncol(x)==0) return(rep(NA, 9+length(relatedness)*length(FUN)))
if (ncol(x)>0) {
standardize(x) <- "mu_sigma"
if (!is.null(LD.thin) & ncol(x)>1) x <- LD.thin(x, LD.thin, extract=TRUE)
r <- ncol(x)
#if (sum(map[[paste('chr', unique(x@snps$chr), sep='')]]$base %in% x@snps$pos)>0) t$recombi <- map[[paste('chr', unique(x@snps$chr), sep='')]]( map[[paste('chr', unique(x@snps$chr), sep='')]]$rate.cM.Mb[ map[[paste('chr', unique(x@snps$chr), sep='')]]$base %in% x@snps$pos ], na.rm=TRUE)
}
# LD
L <- LD(x, lim=c(1,ncol(x)))
L <- L[lower.tri(L, diag=FALSE)]
r <- c(r, mean(L) )
r <- c(r, sd(L) )
r <- c(r, mean(x@snps$maf) )
r <- c(r, sd(x@snps$maf) )
r <- c(r, mean(2*x@snps$maf*(1-x@snps$maf)) )
r <- c(r, sd(2*x@snps$maf*(1-x@snps$maf)) )
r <- c(r, mean(x@snps$hz) )
r <- c(r, sd(x@snps$hz) )
standardize(x) <- 'p'
for (i in 1:length(relatedness))
{
app <- relatedness[[i]](x)
for (j in 1:length(FUN)) {
t <- FUN[[j]](app)
if (length(t)>1) warning("FUN element(s) must be function with result value of length 1")
else r <- c(r, t)
}
}
return(r) }), ncol=9+length(relatedness)*length(FUN), byrow=TRUE)
stopCluster(cl)
file.remove(paste(time, 'bed', sep='.'))
file.remove(paste(time, 'bim', sep='.'))
file.remove(paste(time, 'fam', sep='.'))
} else {
for (k in 1:nrow(result))
{
if (is.character(bed)) x <- gaston.utils:::read.bed.matrix.part(bed, beg=result$index.start[k], end=result$index.end[k])
else if (class(bed) == "bed.matrix") x <- select.snps(bed, chr==result$chr[k] & pos>=result$start[k] & pos<result$end[k])
if(ncol(x)==0) next
if (ncol(x)>0) {
standardize(x) <- 'mu_sigma'
if (!is.null(LD.thin) & ncol(x)>1) x <- LD.thin(x, LD.thin, extract=TRUE)
result$num[k] <- ncol(x)
#if (sum(map[[paste('chr', unique(x@snps$chr), sep='')]]$base %in% x@snps$pos)>0) t$recombi <- map[[paste('chr', unique(x@snps$chr), sep='')]]( map[[paste('chr', unique(x@snps$chr), sep='')]]$rate.cM.Mb[ map[[paste('chr', unique(x@snps$chr), sep='')]]$base %in% x@snps$pos ], na.rm=TRUE)
}
# LD
L <- LD(x, lim=c(1,ncol(x)))
L <- L[lower.tri(L, diag=FALSE)]
result$LD[k] <- mean(L)
result$LD_sd[k] <- sd(L)
result$maf[k] <- mean(x@snps$maf)
result$maf_sd[k] <- sd(x@snps$maf)
result$H[k] <- mean(mean(2*x@snps$maf*(1-x@snps$maf)))
result$H_sd[k] <- sd(2*x@snps$maf*(1-x@snps$maf))
result$Hz[k] <- mean(x@snps$hz)
result$Hz_sd[k] <- sd(x@snps$hz)
standardize(x) <- 'p'
for (i in 1:length(relatedness))
{
app <- relatedness[[i]](x)
for (j in 1:length(FUN)) {
t <- FUN[[j]](app)
if (length(t)>1) warning("FUN element(s) must be function with result value of length 1")
else result[k, paste(w[i], ww[j], sep='_')] <- t
}
}
}
}
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.