Nothing
#' Align-and-Count Method comparisons of RFLP data
#'
#' Fragment lengths or molecular weights from pairs of lanes are compared, and
#' a number of matching bands are calculated using the Align-and-Count Method.
#' Band intensities are not used in this method.This version of acm will perform
#' all distinct pairwise comparisons between lanes. The first lane will be compared
#' to all following lanes. The second lane will be compared to the third, fourth, etc..
#'
#' @param input The input file format for fragment and lane information are as the file
#' example.in shows. Each line (record) should define a band. The file must be
#' sorted so that the bands from a lane are together; the file must be sorted
#' by unique lane identifier (ULI). There must be three fields (separated by
#' white space -- that is, seperated by any combination of spaces and tabs).
#' The first field must be the ULI. The second field is the gel image
#' identifier (GII). This is only critical for the first fragment read in for
#' the lane; for subsequent fragments, the second field must simply be present.
#' ULI and GII names must not contain any spaces; I strongly suggest that these
#' fields contain only letters, numbers, and underscore characters. The third
#' field should contain the fragment length or molecular weight data for the
#' fragment. Lines that cannot be parsed into these three fields will be
#' recored in acm.log. Check acm.log after each run to see the number of lanes
#' read in and to check for lines from the input file that could not be parsed.
#' Lines must be no longer than 199 characters.
#' @param work_dir This is where the work is being done
#' @param dnum is the file number
#' @return The output from acm or acmone gives a comparison of two lanes on each line
#' of output in the form:
#' ULI ULI numbands numbands num_matches
#' The first field is identifier for the first lane in the comparison. The
#' third field is the number of bands read in for this first lane. The second
#' field is the identifier for the second lane in the comparison. The fourth
#' field is the number of band read in for this second lane. The fifth field is
#' the number of fragments found to match between the two lanes. The number of
#' matches found may be different depending on whether the lanes came from the
#' same gel image or from different gel images (see the file acm.par).
#' @references
#' Salamon et. al (1998) Accommodating Error Analysis in Comparison and Clustering of Molecular Fingerprints. Emerging Infectious Diseases Vol. 4, No. 2, April-June 1998
#' @references Abasci LLC. JAMES v1.0 User Documentation. 2002.
#' @author Andrea Benedetti \email{andrea.benedetti@@mcgill.ca}
#' @author Sahir Rai Bhatnagar
#' @author XiaoFei Zhao
#' @note Requires a parameter file generated by \code{\link{call_erra}}
#' @examples
#' \dontshow{
#' (WD <- getwd())
#' if (!is.null(WD)) setwd(WD)
#' data(replicates.in)
#' write.table(replicates.in, "replicates.in", quote=FALSE, row.names=FALSE)
#' data(experiments.in)
#' write.table(experiments.in, "experiments.in", quote=FALSE, row.names=FALSE)
#' call_erra("replicates.in", dnum=1, sd=1, delete=TRUE)
#' }
#' #perform the basic pairwise comparison via the align and count algorithm
#' res1<-call_acm("experiments.in")
#' @export
call_acm <- function(input, work_dir=".", dnum=1)
{
call_acm2 <- function()
{
if (!file.exists(work_dir)) {
if (!dir.create(work_dir)) {
print(paste("In directory ", getwd(), " ", sep = "'"))
print(paste(" - directory ", work_dir, " does not exist ", sep = "'"))
print(paste(" - and the program cannot create the directory ", work_dir, " ", sep = "'"))
stop()
}
}
acm.file.exists(input)
input = tools::file_path_as_absolute(input)
setwd(work_dir)
#exe.dir <- system.file(package="acm4r", "executables", "win32")
#file.copy(from = input, to = paste(exe.dir, "/acm.in", sep = ""), overwrite = TRUE)
#setwd(exe.dir)
#cmd = paste("./acm", input, "acm.out", sep = " ")
outputname = paste("acm.out", ".", dnum, sep = "")
outtable = acm(input, outputname)
#print (paste("executing command:", cmd, sep = " "))
#system(cmd, intern = TRUE, ignore.stdout = FALSE, ignore.stderr = TRUE)
file.show(outputname, title = "Result of all pairwise matches by acm (align and count with exact matching)")
#file.copy(from = "acm.out", to = paste("acm.out", dnum, sep = "."), overwrite = delete);
#print(paste("call_acm finished, data is stored at ", exe.dir, ".", sep = "'"))
print("call_acm finished.")
return(outtable)
}
old.dir <- getwd()
tryCatch((outtable = call_acm2()), finally = setwd(old.dir))
return(outtable);
}
# The code below is translated from C code for Align-and-Count-Method by XiaoFei Zhao
VERSION1 = "acm alpha version 0.6 written by Hugh Salamon"
VERSION2 = "Copyright 1996, 1997 The Regents of the University of California."
VERSION3 = "All Rights Reserved. acm.0.6.1 7-Apr-1998"
fprintf = function(filename = "", format = "", ...) {
write(sprintf(format, ...), file = filename, append = TRUE)
}
read_pars = function(logfile, fp)
# /* returns 1 if all parameters cannot be read and converted, 0 otherwise) */
{
params = list(s_increments="", s_slide="", s_deviation="", s_hmw="", s_hmw_rate="",
d_increments="", d_slide="", d_deviation="", d_hmw="", d_hmw_rate="", retval=0)
s_inc_tag = "SAME_INCREMENTS";
d_inc_tag = "DIFF_INCREMENTS";
s_sli_tag = "SAME_SLIDE";
d_sli_tag = "DIFF_SLIDE";
s_dev_tag = "SAME_DEVIATION";
d_dev_tag = "DIFF_DEVIATION";
s_hmw_tag = "SAME_HMW";
d_hmw_tag = "DIFF_HMW";
s_hmwr_tag = "SAME_HMW_RATE";
d_hmwr_tag = "DIFF_HMW_RATE";
filecontent = readLines(fp);
# print(filecontent);
for (i in 1:length(filecontent))
{
aline = unlist(strsplit(filecontent[i], "( )+"))
tag = aline[1]
options(warn = -1)
value = as.numeric(aline[2])
options(warn = 0)
if(length(aline) >= 2 && is.finite(value))
{
# print(paste("value ((", value, ")) is finite = "))
#write("S_INC_TAG found", file = stderr())
#print(value)
if(s_inc_tag == tag)
{
params$s_increments = floor(value);
}
else if(d_inc_tag == tag)
{
params$d_increments = floor(value);
}
else if(s_sli_tag == tag)
{
params$s_slide = value;
}
else if(d_sli_tag == tag)
{
params$d_slide = value;
}
else if(s_dev_tag == tag)
{
params$s_deviation = value;
}
else if(d_dev_tag == tag)
{
params$d_deviation = value;
}
else if(s_hmw_tag == tag)
{
params$s_hmw = value;
}
else if(d_hmw_tag == tag)
{
params$d_hmw = value;
}
else if(s_hmwr_tag == tag)
{
params$s_hmw_rate = value;
}
else if(d_hmwr_tag == tag)
{
params$d_hmw_rate = value;
}
}
}
if ("" == params$s_increments)
{
params$retval = 1;
fprintf(logfile, "SAME_INCREMENTS value not found in acm.par");
}
if ("" == params$d_increments)
{
params$retval = 1;
fprintf(logfile, "DIFF_INCREMENTS value not found in acm.par");
}
if ("" == params$s_slide)
{
params$retval = 1;
fprintf(logfile, "SAME_SLIDE value not found in acm.par");
}
if("" == params$d_slide)
{
params$retval = 1;
fprintf(logfile, "DIFF_SLIDE value not found in acm.par");
}
if("" == params$s_deviation)
{
params$retval = 1;
fprintf(logfile, "SAME_DEVIATION value not found in acm.par");
}
if("" == params$d_deviation)
{
params$retval = 1;
fprintf(logfile, "DIFF_DEVIATION value not found in acm.par");
}
if("" == params$s_hmw)
{
params$retval = 1;
fprintf(logfile, "SAME_HMW value not found in acm.par");
}
if("" == params$d_hmw)
{
params$retval = 1;
fprintf(logfile, "DIFF_HMW value not found in acm.par");
}
if("" == params$s_hmw_rate)
{
retval = 1;
fprintf(logfile, "SAME_HMW_RATE value not found in acm.par");
}
if("" == params$d_hmw_rate)
{
retval = 1;
fprintf(logfile, "DIFF_HMW_RATE value not found in acm.par");
}
return(params);
}
showpar_samediff = function(fp, deviation, slide, increments, hmw, hmw_rate) {
fprintf(fp, VERSION1);
fprintf(fp, VERSION2);
fprintf(fp, VERSION3);
# fprintf(fp, "LANES = %d", LANES);
# fprintf(fp, "MAXBANDS = %d", MAXBANDS);
# fprintf(fp, "IDENTIFIER_LENGTH = %d", IDENTIFIER_LENGTH);
# fprintf(fp, "deviation = %f", deviation);
# fprintf(fp, "slide = %f", slide);
# fprintf(fp, "increments = %d", increments);
# fprintf(fp, "hmw = %f", hmw);
# fprintf(fp, "hmw_rate = %f", hmw_rate);
return(0);
}
slidefun_samediff = function(array1, array2, bands1, bands2, increments, slide, deviation, hmw, hmw_rate)
{
# /*array1: first lane, array of band sizes -- e.g. in kilobases */
# /*array2: second lane, array of band sizes */
# double new1[MAXBANDS]; /* array to hold shifted values of first lane */
# double new2[MAXBANDS]; /* array to hold shifted values of second lane */
# double adjust1, adjust2;
# int j, k, maxscore, tmps;
UNINITIALIZED = 0
new1 = rep(0, bands1)
new2 = rep(0, bands2)
# /* should check that bands1 and bands2 are not > MAXBANDS */
maxscore = 0;
# /* Lanes -- arrays -- are shifted relative to each other by multiplying */
# /* bands in each lane by a single constant for that lane (e.g., 1.035). */
# /* The new, shifted, lanes are then compared by function score. This */
# /* shifting and scoring is performed increments number of times in the */
# /* following loop. The maximum score calculated is reported. */
j = -(increments - 1) / 2;
while (j <= (increments - 1) / 2)
{
adjust1 = 1 + as.numeric(j) * as.numeric(slide) / as.numeric(increments - 1);
adjust2 = 1 - as.numeric(j) * as.numeric(slide) / as.numeric(increments - 1);
for (k in 1:bands1)
{
new1[k] = array1[k] * adjust1;
}
for (k in 1:bands2)
{
new2[k] = array2[k] * adjust2;
}
tmps = score_samediff(new1,new2, bands1, bands2, deviation, hmw, hmw_rate);
if(tmps > maxscore)
{
maxscore = tmps;
}
j = j + 1;
}
return(maxscore);
}
score_samediff = function(bands_1, bands_2, numx, numy, deviation, hmw, hmw_rate) {
# /* The returned score is the number of bands determined to match between the */
# /* two arrays. Mutually closest bands whose distance apart is less than */
# /* deviation (or dev) multiplied by the mean size of the pair of mutually closest */
# /* bands contribute +1 to the score returned. */
UNINITIALIZED = 0;
closestx = rep(0, numx);
closesty = rep(0, numy);
matches = UNINITIALIZED;
#x = UNINITIALIZED;
#y = UNINITIALIZED;
best = UNINITIALIZED;
closest_mean = UNINITIALIZED;
dev = UNINITIALIZED;
test = UNINITIALIZED;
matches = 0;
for (x in 1:numx)
{
#best = abs(bands_1[x] - bands_2[1]);
#closestx[x] = 1;
#for(y in 2:numy)
#{
# test = abs(bands_1[x] - bands_2[y]);
# if (test < best)
# {
# best = test;
# closestx[x] = y;
# }
#}
closestx[x] = which.min(abs(bands_2 - bands_1[x]))
}
for (y in 1:numy)
{
#best = abs(bands_2[y] - bands_1[1]);
#closesty[y] = 1;
#for(x in 2:numx)
#{
# test = abs(bands_2[y] - bands_1[x]);
# if (test < best)
# {
# best = test;
# closesty[y] = x;
# }
#}
closesty[y] = which.min(abs(bands_1 - bands_2[y]))
}
# print(closestx); print(closesty);
for (x in 1:numx)
{
if (closesty[closestx[x]] == x)
{
closest_mean = (bands_1[x] + bands_2[closestx[x]]) / 2;
if (closest_mean > hmw)
{
dev = deviation + (closest_mean - hmw) * hmw_rate;
if (abs(bands_1[x] - bands_2[closestx[x]])/(0.5 * (bands_1[x] + bands_2[closestx[x]])) <= dev)
{
matches = matches + 1;
}
}
else
{
if (abs(bands_1[x] - bands_2[closestx[x]])/(0.5 * (bands_1[x] + bands_2[closestx[x]])) <= deviation)
{
matches = matches + 1;
}
}
}
}
return(matches);
}
acm <- function(infilename, outfilename)
{
stderr = stderr()
stdout = stdout()
stdin = stdin()
# /* parameters for matching functions: */
# /* s_ for same gel, d_ for different gel */
UNINITIALIZED = 0;
s_increments = UNINITIALIZED;
s_slide = UNINITIALIZED;
s_deviation = UNINITIALIZED;
s_hmw = UNINITIALIZED;
s_hmw_rate = UNINITIALIZED;
d_increments = UNINITIALIZED;
d_slide = UNINITIALIZED;
d_deviation = UNINITIALIZED;
d_hmw = UNINITIALIZED;
d_hmw_rate = UNINITIALIZED;
acmpar = "acm.par"; # /* parameter_file */
logfile = "acm.log"; # /* log file for errors */
write("", file = logfile)
infile = infilename; # /* fragment data file */
outfile = outfilename; # /* output file */
write("", file = outfile)
par_error = UNINITIALIZED;
array1 = UNINITIALIZED; #/* first lane for comparison, array of band sizes -- e.g. in kilobases */
array2 = UNINITIALIZED; #/* second lane for comparison, array of band sizes */
bands1 = UNINITIALIZED;
bands2 = UNINITIALIZED;
# i; j; k;
num = UNINITIALIZED;
comparisons = UNINITIALIZED;
total_comparisons = UNINITIALIZED;
progress = UNINITIALIZED;
storescore = UNINITIALIZED;
# indicator = UNINITIALIZED;
# scanreturn;
# newbandnum;
numbands = UNINITIALIZED;
lane = UNINITIALIZED;
gel = UNINITIALIZED;
bands = UNINITIALIZED;
laneident = UNINITIALIZED;
line = UNINITIALIZED;
#/* storage for reading in data */
# scan_uniqueid;
# scan_gelid;
# scan_length;
comparisons = 0;
# newbandnum = 0;
params = read_pars(logfile, acmpar);
par_error = params$retval;
if (par_error != 0) {
fprintf(stderr, "Error(s) reading acm.par, see acm.log.\n");
stop(1);
}
s_increments = params$s_increments;
s_slide = params$s_slide;
s_deviation = params$s_deviation;
s_hmw = params$s_hmw;
s_hmw_rate = params$s_hmw_rate;
d_increments = params$d_increments;
d_slide = params$d_slide;
d_deviation = params$d_deviation;
d_hmw = params$d_hmw;
d_hmw_rate = params$d_hmw_rate;
#print(params)
#print(paste("s_slide ============ ", s_slide))
if (infile == "-p") {
fprintf(stdout(), " same gel comparisons:");
showpar_samediff(stdout(), s_deviation, s_slide, s_increments, s_hmw, s_hmw_rate);
showpar_samediff(logfile, s_deviation, s_slide, s_increments, s_hmw, s_hmw_rate);
fprintf(stdout(), "\n different gel comparisons:");
showpar_samediff(stdout(), d_deviation, d_slide, d_increments, d_hmw, d_hmw_rate);
showpar_samediff(logfile, d_deviation, d_slide, d_increments, d_hmw, d_hmw_rate);
stop(1);
}
fprintf(logfile, "begin data read: any data which did not scan?");
# START reading data
intable = read.table(infile, skip = 1, sep = "", fill = TRUE, header = FALSE, col.names = 1:5)
# if (nrow(intable) < 3) {
# print(paste("Error, the file ", infile, " is not a good input file."))
# stop(-1)
# }
laneCount = 0
fragCount = 0
laneName = ""
laneNames = "";
gel = UNINITIALIZED;
for (i in 1:nrow(intable)) {
if (intable[i, 1] != "" & intable[i, 2] != "" & is.numeric(intable[i, 3])) {
fragCount = fragCount + 1
if (laneName != intable[i, 1]) {
laneCount = laneCount + 1
laneName = as.character(intable[i, 1])
gel[laneCount] = intable[i, 2]
laneNames[laneCount] = as.character(laneName);
}
} else {
# print(paste("lane ", i, " is incorrect!"))
}
}
laneVec = matrix(-1, nrow = laneCount, ncol = 50);
laneCount = 0;
bandCount = 0;
laneName = "";
fragCountAtLane = UNINITIALIZED;
for (i in 1:nrow(intable)) {
if (intable[i, 1] != "" & intable[i, 2] != "" & is.numeric(intable[i, 3])) {
if (laneName != intable[i, 1]) {
laneName = as.character(intable[i, 1]);
laneCount = laneCount + 1;
bandCount = 1;
laneVec[laneCount, 1] = as.numeric(intable[i,3]);
# laneNames[laneCount] = laneName;
} else {
bandCount = bandCount + 1;
fragCountAtLane[laneCount] = bandCount;
laneVec[laneCount, bandCount] = as.numeric(intable[i,3]);
}
} else {
fprintf(logfile, "%s", intable[i,]);
}
}
# END reading data
num = laneCount
fprintf(logfile, "end data read: %d lanes read", num);
fprintf(logfile, " acm starting...");
total_comparisons = num * (num - 1L) / 2L;
fprintf(stderr,VERSION1);
fprintf(stderr,VERSION2);
fprintf(stderr,VERSION3);
fprintf(stderr,"\n acm starting, please wait...");
matrixIndex = 1
outtable = matrix(data = NA, nrow = (num-1) * (num) / 2, ncol = 5)
for(i in 1:(num-1)) {
bands1 = fragCountAtLane[i];
#print(bands1)
for(k in 1:bands1) {
array1[k] = laneVec[i,k];
}
# print(paste("num = ", num))
for (j in (i + 1):num) {
comparisons = comparisons + 1;
if ((comparisons %% 100) == 0) {
progress = comparisons * 100 / total_comparisons;
fprintf(stderr," acm: %d of %d comparisons (approximately %5.1f percent complete)",
as.integer(comparisons), as.integer(total_comparisons), progress);
}
bands2 = fragCountAtLane[j];
for (k in 1:bands2) {
array2[k] = laneVec[j,k];
}
if(identical(gel[i], gel[j])) {
storescore = slidefun_samediff(array1,array2,bands1,bands2,s_increments,s_slide,s_deviation,s_hmw,s_hmw_rate);
} else {
storescore = slidefun_samediff(array1,array2,bands1,bands2,d_increments,d_slide,d_deviation,d_hmw,d_hmw_rate);
}
outtable[matrixIndex, 1] = as.character(laneNames[i])
outtable[matrixIndex, 2] = as.character(laneNames[j])
outtable[matrixIndex, 3] = as.integer(fragCountAtLane[i])
outtable[matrixIndex, 4] = as.integer(fragCountAtLane[j])
outtable[matrixIndex, 5] = as.numeric(storescore)
matrixIndex = matrixIndex + 1
}
}
fprintf(stderr, "\n acm: %d comparisons completed!", as.integer(total_comparisons));
fprintf(logfile, "\n%d comparisons\n acm completed!\n", as.integer(total_comparisons));
fprintf(logfile, "infile: \"%s\"", infile);
fprintf(logfile, "outfile: \"%s\"", outfile);
fprintf(logfile, "\n same gel comparisons:");
showpar_samediff(logfile, s_deviation, s_slide, s_increments, s_hmw, s_hmw_rate);
fprintf(logfile, "\n different gel comparisons:\n");
showpar_samediff(logfile, d_deviation, d_slide, d_increments, d_hmw, d_hmw_rate);
return(outtable)
#return(0);
}
#x = read_pars(stdout(), "acm.par")
#print(x)
#acm("replicates.in", "acm_test.out")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.