R/PrepRISA.R
In prepRISA: prepRISA

.First.lib <- function(lib, pkg) {
  library.dynam("prepRISA", pkg, lib)
}
################################
# prepRISA - 9 March 2007
#
# Jean Thioulouse - Labo Biometrie et Biologie Evolutive,  UMR CNRS 5558
# Universite Lyon 1, Batiment Mendel,   43 Boulevard du 11 Novembre 1918
# 69622 Villeurbanne Cedex - France    http://pbil.univ-lyon1.fr/JTHome/
# Tel : (33) 4 72 43 27 56                      Fax : (33) 4 72 43 13 88
#  msn: thioulouse@hotmail.com  #  jabber: jt69 #  .mac: j.thioulouse #
#
################################
"prepRISA" <- function ()
{
#
# user dialogs
#
	cat("Data file (no def.) ? ")
	filename <- readLines(n=1)
	if (filename == "") stop("Filename needed")
	
	cat("Number of groups (def. = 1) ? ")
	ngr <- as.integer(readLines(n=1))
	if (is.na(ngr)) ngr <- 1
	
	cat("Sum (def. = 1) or Max (2) of peaks ? ")
	ioptsm <- as.integer(readLines(n=1))
	if (is.na(ioptsm)) ioptsm <- 1
	
	cat("Row percentage tranformation (def. = yes = 1) ? ")
	ioptpt <- as.integer(readLines(n=1))
	if (is.na(ioptpt)) ioptpt <- 1
	
	cat("Max. number of peaks (def. = All = 0) ? ")
	maxnp <- as.integer(readLines(n=1))
	if (is.na(maxnp)) maxnp <- 0
	
	cat("Start (def. = mini = 0) ? ")
	start <- as.integer(readLines(n=1))
	if (is.na(start)) start <- 0
#
# reading input file
#
#	tab <- read.table(filename)
	tab <- read.table(filename, colClasses=c("factor", "numeric", "numeric", "numeric"))
	names(tab) <- c("nech","np","pb","hp")
#
# computing min/max plus a few declarations
#
	if (start != 0) mini  <-  start else mini <- min(tab$pb)
	maxi <- max(tab$pb)	
	claw <- vector(mode="numeric",length=ngr)
	ncla <- vector(mode="numeric",length=ngr)
	maxl <- rep(maxi, ngr-1)
	lim <- vector(mode="numeric",length=ngr+1)
	nech <- rep(c(1:nlevels(tab$nech)),table(tab$nech)[unique(tab$nech)])
	nlig <- nrow(tab)
	pb <- tab$pb
	hp <- tab$hp

	cat("--- Min. length =", mini, "--- Max. length =", maxi,"---\n")
#
# follow up on user dialogs
#
	for (i in 1:ngr) {
		cat("Group number",i,":\n")
		cat("Class width  (def. = 2) ? ")
		claw[i] <- as.integer(readLines(n=1))
		if (is.na(claw[i])) claw[i] <- 2
	}

	if (ngr>1) {
		for (i in 1:(ngr-1)) {
			cat("Group number",i,":\n")
			cat("Upper limit  ? ")
			maxl[i] <- as.integer(readLines(n=1))
		}
	}

	for (i in 1:ngr) {
		cat("Group number",i,"class width:", claw[i],"\n")
	}

	cat("Class limits:\n")
	cat("Mini =",mini,"\n")
	if (ngr>1) {
		for (i in 1:(ngr-1)) {
			cat(maxl[i],"\n")
		}
	}
	cat("Maxi =",maxi,"\n")

#
# computing the table of class limits
#
	lim[1] <- mini
	if (ngr>1) lim[2:ngr] <- maxl
	lim[ngr+1] <- maxi
	nbech <- length(levels(tab$nech))
	cat("Number of samples =",nbech,"\n")
	npi <- by(tab$np, tab$nech, max)
	for (i in 1:nbech) {
		cat("Sample number :",i, "number of peaks =", npi[i],"\n")
	}

	tablim1 <- mini
	i <- 1
	while (i <= ngr) {
		while (tablim1 <= lim[i+1]) {
			ncla[i] <- ncla[i] + 1
			tablim1 <- tablim1 + claw[i]
		}
		i <- i + 1
	}

	for (i in 1:ngr)  {
		cat("Group number : ",i,"number of classes =",ncla[i],"\n")
	}

	nclatot=sum(ncla)
	cat("Total number of classes =", nclatot, "\n")

	maxnlim <- max(ncla)
	cat("Max number of classes =", maxnlim, "\n")
	
	tablim <- matrix(0, nrow=maxnlim+1, ncol=ngr)
	tablim[1,] <- lim[1:ngr]
	for (i in 1:ngr)  {
		for (j in 2:(ncla[i]+1))  {
			tablim[j, i] <- tablim[j-1, i] + claw[i]
		}
		tablim[ncla[i]+1, i] <- lim[i+1]
	}
#
# Call of the C function to compute the distribution of peaks
#
	classe <- function()
		.C("repClass",
			as.integer(ioptsm),
			as.integer(ngr),
			as.integer(nbech),
			as.integer(nlig),
			as.integer(nclatot),
			as.integer(maxnlim),
			as.integer(pb),
			as.integer(ncla),
			as.integer(t(tablim)),
			as.double(hp),
			as.integer(nech),
			tabh = double(nbech*nclatot),
			PACKAGE = "prepRISA")$tabh
#
# recovering C function output
#
	tabh <- matrix(classe(), nrow=nclatot, ncol=nbech)
	tabh <- t(tabh)
#
# truncate the number of peaks
#
	if ( (maxnp > 0) && ((maxnp < nclatot)) ) {
		for (j in 1:nbech)  {
			listpics <- sort(tabh[j,], decreasing=TRUE)
			tabh[j, tabh[j,] < listpics[maxnp]] <- 0
		}
	}
#
# setting dataframe row names
#
	row.names(tabh) <- unique(tab$nech)
	tabh <- as.data.frame(tabh)
	names(tabh) <- as.character(tablim[1:nclatot])
#
# remove columns of 0
#
	tabhsz <- tabh[,apply(tabh,2,sum)!=0]
#
# row percentages
#
	if (ioptpt == 1) {
		tabh <- tabh/apply(tabh,1,sum)
		tabhsz <- tabhsz/apply(tabhsz,1,sum)
	}
#
# build output file names
#
	extTot <- "_Tot"
	extSZ <- "_NoZ"
	if (ioptsm == 1) {
		extTot <- paste(extTot,"S",sep="")
		extSZ <- paste(extSZ,"S",sep="")
	} else {
		extTot <- paste(extTot,"M",sep="")
		extSZ <- paste(extSZ,"M",sep="")
	}
	if (ioptpt == 1) {
		extTot <- paste(extTot,"P",sep="")
		extSZ <- paste(extSZ,"P",sep="")
	}
#
# take care of filename extension(s)
#
	spl1 <- strsplit(filename, ".", fixed=TRUE)[[1]]
	if (length(spl1) > 1) {
		spl2 <- spl1[2:length(spl1)]
		TotOF <- paste(spl1[1],extTot,".",paste(spl2,collapse="."), sep="")
		NZOF <- paste(spl1[1],extSZ,".",paste(spl2,collapse="."),sep="")
	} else {
		TotOF <- paste(spl1[1],extTot,sep="")
		NZOF <- paste(spl1[1],extSZ,sep="")
	}
#
# write output files
#
	write.table(tabh, file=TotOF, quote=FALSE)
	cat("Full table :",TotOF,"Number of rows =", nrow(tabh), "Number of columns =",ncol(tabh),"\n")
	
	write.table(tabhsz, file=NZOF, quote=FALSE)
	cat("Table without columns of 0 :",NZOF,"Number of rows =", nrow(tabhsz), "Number of columns =",ncol(tabhsz),"\n")

	invisible(as.data.frame(tabhsz))
}