Nothing
#' Error Analysis for 1D Electrophoresis Gels
#'
#' This function analyzes fingerprint replicate data. Replicates are repeated measurements of the same sample in one or more gels, which allows the reproducibility of the measurement to be ascertained.
#'
#' @param rep.file is the full name and path of the replicate file
#' @param work_dir is where the datasets should be stored
#' @param dnum is the file number
#' @param sd is the standard deviation needed to calculate all things, Salamon et al. (1998) suggested a value of 4 for sd
#' @param delete logical value indicating if you want to delete any pre-existing acm.par files. Default is FALSE
#' @return the parameter filed called acm.par, needed by call_acm. This file contains information on about the measurement error present in the data.
#' @note \code{call_erra} doesn't work for unequal number of trials. This function needs to be run before call_acm.
#' @references
#' Salamon et al. (1998) Accommodating Error Analysis in Comparison and Clustering of Molecular Fingerprints. Emerging Infectious Diseases Vol. 4, No. 2, April-June 1998
#' @references Abasci LLC. JAMES v1.0 User Documentation. 2002.
#' @author Andrea Benedetti \email{andrea.benedetti@@mcgill.ca}
#' @author Sahir Rai Bhatnagar
#' @author XiaoFei Zhao
#' @examples
#' \dontshow{
#' (WD <- getwd())
#' if (!is.null(WD)) setwd(WD)
#' data(replicates.in)
#' write.table(replicates.in, "replicates.in", quote=FALSE, row.names=FALSE)
#' }
#' #generate the error parameter file needed by call_acm
#' call_erra("replicates.in", dnum=1, sd=1, delete=TRUE)
#' @export
call_erra <- function(rep.file, work_dir=".", dnum=1, sd=4, delete=FALSE)
{
# Only rep.file is the real input
# workdir: incorporated in rep.file
# dnum: always a constant
# sd: recommended to be 4 in the original article
# delete: always delete the old parameter file
call_erra2 <- function()
{
if (!file.exists(work_dir)) {
if (!dir.create(work_dir)) {
print(paste("In directory ", getwd(), " ", sep = "'"))
print(paste(" - directory ", work_dir, " does not exist ", sep = "'"))
print(paste(" - and the program cannot create the directory ", work_dir, " ", sep = "'"))
stop()
}
}
acm.file.exists(rep.file)
rep.file = tools::file_path_as_absolute(rep.file)
setwd(work_dir)
#exe.dir <- system.file(package="acm4r", "executables", "win32")
#file.copy(from = rep.file, to = paste(exe.dir, "/erra.in", sep = ""), overwrite = TRUE)
#setwd(exe.dir)
#acm.par is automatically generated
#cmd <- paste("./erra", "erra.in", "erra.out", sd, sep = " ")
#print (paste("executing command:", cmd, sep = " "))
outputfilename = paste("erra.out", ".", dnum, sep = "")
if (!delete)
{
acm.file.checkdelete(outputfilename)
}
erra(rep.file, outputfilename, sd)
#system(cmd, intern = TRUE, ignore.stdout = FALSE, ignore.stderr = TRUE)
file.copy(from = paste(outputfilename, ".acm", sep=""), to = "acm.par", overwrite = TRUE)
#file.show("acm.par", title = "parameter file generated by acm/erra")
#file.show("erra.out", title = "error analysis file generated by acm/erra")
#acm.file.copy(from = "acm.par", to = paste("acm.par", dnum, sep = "."), overwrite = delete)
#acm.file.copy(from = "erra.out", to = paste("erra.out", dnum, sep = "."), overwrite = delete)
#print(paste("call_erra finished, data is stored at ", exe.dir, ".", sep = "'"))
print("call_erra finished.")
}
old.dir <- getwd()
tryCatch(call_erra2(), finally = setwd(old.dir))
}
# The code below is translated from C code for Align-and-Count-Method by XiaoFei Zhao
fprintf = function(filename = "", format = "", ...) {
write(sprintf(format, ...), file = filename, append = TRUE)
}
VERSION1 <- "erra alpha version 0.2.1 written by Hugh Salamon\n"
VERSION2 <- "Copyright ?1998 The Regents of the University of California.\nAll Rights Reserved.\n"
VERSION3 <- "erra.0.2.1 15-Apr-1998: Error Analysis for 1D Electrophoresis Gels\n"
version = function(fp) {
fprintf(fp, VERSION1);
fprintf(fp, VERSION2);
fprintf(fp, VERSION3);
### R does not have the limitations that C has, so the next three lines are not needed.
# fprintf(fp, "LANES = %d\n", LANES);
# fprintf(fp, "MAXBANDS = %d\n", MAXBANDS);
# fprintf(fp, "IDENTIFIER_LENGTH = %d\n", IDENTIFIER_LENGTH);
return (0)
}
align = function (first, second, numbands) {
pro_err_array = 1:numbands
sum = 0
for (i in 1:numbands) {
if (second[i] <= 0 || first[i] <= 0) {
return (c(-1, pro_err_array))
} else {
sum = sum + log(first[i] / second[i])
}
}
s = exp(sum / numbands)
for (i in 1:numbands) {
new = s * second[i]
pro_err_array[i] = abs(new - first[i]) / ((new + first[i]) / 2)
}
c(s, pro_err_array)
}
erra = function(infilename, outfilename, sd) {
stdin <- stdin()
stdout <- stdout()
stderr <- stderr()
# table of 0.01 significance level cutoff for
# proportional error reduction (s_reduction or d_reduction)
# empirically defined from simulationse
table01 = c(
0
,0
,0.545853
,0.707217
,0.786821
,0.829467
,0.859871
,0.878989
,0.895801
,0.907533
,0.917482
,0.923378
,0.930336
,0.936066
,0.940792
,0.944424
,0.948582
,0.950775
,0.953878
,0.956662
,0.958364
)
# table of 0.05 significance level cutoff for
# proportional error reduction (s_reduction or d_reduction)
# empirically defined from simulations
table05 = c(
0
,0
,0.584046
,0.73282
,0.805951
,0.844601
,0.872386
,0.890113
,0.90525
,0.915821
,0.925033
,0.930465
,0.936637
,0.941961
,0.946195
,0.949619
,0.953305
,0.955293
,0.958113
,0.960667
,0.96228
)
UNINITIALIZED = 0
logfile = "erra.log"
# log file for errors # FILE*
rfile = "erra.rcm"
# recommendation file for MFA software # FILE*
infile = infilename
# fragment data file # FILE*
outfile = outfilename
# output file # FILE*
acmpar = paste(outfile, ".acm", sep="")
# acm parameter output file # FILE*
sitepar = paste(outfile, "_s.ite", sep="")
# interr parameter output file # FILE*
ditepar = paste(outfile, "_d.ite", sep="")
# interr parameter output file # FILE*
write("", file = logfile, append = FALSE)
write("", file = rfile, append = FALSE)
write("", file = outfile, append = FALSE)
write("", file = acmpar, append = FALSE)
write("", file = sitepar, append = FALSE)
write("", file = ditepar, append = FALSE)
acmpar_name = UNINITIALIZED
# char
itepar_s_name = UNINITIALIZED
# char
itepar_d_name = UNINITIALIZED
# char[256]
infile_name = UNINITIALIZED
# char[256]
i = UNINITIALIZED
# int
j = UNINITIALIZED
# int
k = UNINITIALIZED
# int
use_interr = UNINITIALIZED
# int
num = UNINITIALIZED
# int
testnum = UNINITIALIZED
# int
indicator = UNINITIALIZED
# int
scanreturn = UNINITIALIZED
# int
freturn = UNINITIALIZED
# int
newbandnum = 0
# int
numbands = UNINITIALIZED
# int[LANES]
lane = UNINITIALIZED
# char[LANES][IDENTIFIER_LENGTH]
gel = UNINITIALIZED
# char[LANES][IDENTIFIER_LENGTH]
bands = UNINITIALIZED
# double[LANES][MAXBANDS + 1]
laneident = UNINITIALIZED
# char[IDENTIFIER_LENGTH]
line = UNINITIALIZED
# char[200]
## storage for reading in data
scan_uniqueid = UNINITIALIZED
# char[IDENTIFIER_LENGTH]
scan_gelid = UNINITIALIZED
# char[IDENTIFIER_LENGTH]
scan_length = UNINITIALIZED
# double
## sd is the number of standard deviations used to control sensitivity
## in acm and interr parmeters output by this program
# sd = UNINITIALIZED
# double
s_count_sums = 0
# int
d_count_sums = 0
# int
array1 = UNINITIALIZED
# double[MAXBANDS]
array2 = UNINITIALIZED
# double[MAXBANDS]
pro_error_array = UNINITIALIZED
# double[MAXBANDS]
s_sums = UNINITIALIZED
# double[MAXBANDS]
d_sums = UNINITIALIZED
# double[MAXBANDS]
s_mean = UNINITIALIZED
# double[MAXBANDS]
d_mean = UNINITIALIZED
# double[MAXBANDS]
s_v_sum = UNINITIALIZED
# double[MAXBANDS]
d_v_sum = UNINITIALIZED
# double[MAXBANDS]
s_v = UNINITIALIZED
# double[MAXBANDS]
d_v = UNINITIALIZED
# double[MAXBANDS]
s_within_v = UNINITIALIZED
# double
d_within_v = UNINITIALIZED
# double
s_between_v = 0
# double
d_between_v = 0
# double
s_corrected_sums = UNINITIALIZED
# double
d_corrected_sums = UNINITIALIZED
# double
s_df_1 = UNINITIALIZED
# double
d_df_1 = UNINITIALIZED
# double
s_df_2 = UNINITIALIZED
# int
d_df_2 = UNINITIALIZED
# int
s_overall_mean = UNINITIALIZED
# double
d_overall_mean = UNINITIALIZED
# double
s_aligned_error_sum = 0
# double
d_aligned_error_sum = 0
# double
s_aligned_error_sum_sq = 0
# double
d_aligned_error_sum_sq = 0
# double
s_overall_aligned_v = 0
# double
d_overall_aligned_v = 0
# double
sa_count = 0
# int
da_count = 0
# int
mean_frag = UNINITIALIZED
# double[MAXBANDS]
s_F = UNINITIALIZED
# double
d_F = UNINITIALIZED
# double
s_ssd = UNINITIALIZED
# double
d_ssd = UNINITIALIZED
# double
prob = UNINITIALIZED
# double
dummy = UNINITIALIZED
# double
scale_factor = UNINITIALIZED
# double
s_scale_factor = 0
# double
d_scale_factor = 0
# double
s_scale_factor_sq = 0
# double
d_scale_factor_sq = 0
# double
s_scale_factor_v = 0
# double
d_scale_factor_v = 0
# double
s_aligned_error = UNINITIALIZED
# double[MAXBANDS]
d_aligned_error = UNINITIALIZED
# double{MAXBANDS]
s_overall_aligned = 0
# double
d_overall_aligned = 0
# double
s_reduction = UNINITIALIZED
# double
d_reduction = UNINITIALIZED
# double
s_red_flag = UNINITIALIZED
# int
d_red_flag = UNINITIALIZED
# int
entry = UNINITIALIZED
# int
s_repeat = 0
# int
d_repeat = 0
# int
s_flag = UNINITIALIZED
# int[LANES]
d_flag = UNINITIALIZED
# int[LANES]
p_flag = 0
# int
s_scale_count = 0
# int
d_scale_count = 0
# int
maxfrag = 0
# double
diff_slide = UNINITIALIZED
# double
diff_deviation = UNINITIALIZED
# double
diff_increments = UNINITIALIZED
# int
diff_hmw = UNINITIALIZED
# double
diff_hmw_rate = UNINITIALIZED
# double
same_slide = UNINITIALIZED
# double
same_deviation = UNINITIALIZED
# double
same_increments = UNINITIALIZED
# int
same_hmw = UNINITIALIZED
# double
same_hmw_rate = UNINITIALIZED
# double
version(logfile);
fprintf(logfile, "infile: \"%s\"\n", infile);
fprintf(logfile, "outfile: \"%s\"\n", outfile);
fprintf(logfile, "begin data read: any data which did not scan?\n");
########## START ERROR HANDLIGN ##########
########## END ERROR HANDLIGN ##########
repTable = read.table(infile, skip = 1, sep = "", fill = TRUE, header = FALSE, col.names = 1:5)
#if (nrow(repTable) < 3) {
# print(paste("Error, the file ", infile, " is not a good replicate file."))
# stop(-1)
#}
laneCount = 0
fragCount = 0
laneName = ""
testnum = -1;
numbands = 0;
for (i in 1:nrow(repTable)) {
if (repTable[i, 1] != "" & repTable[i, 2] != "" & is.numeric(repTable[i, 3])) {
fragCount = fragCount + 1
}
if (laneName != repTable[i, 1]) {
laneCount = laneCount + 1
laneName = repTable[i, 1]
if (laneCount == 2) {
testnum = numbands
}
if (laneCount > 1 & testnum != numbands) {
fprintf(logfile, "Error: all lanes must have the same number of bands!\n");
fprintf(logfile, "Lanes 1 has %d bands while lane %d has %d bands.\n", testnum, laneCount-1, numbands);
fprintf(stdout, "Error: all lanes must have the same number of bands!\n");
fprintf(stdout, "Lanes 1 has %d bands while lane %d has %d bands.\n", testnum, laneCount-1, numbands);
fprintf(logfile, "EXITING DUE TO ERROR\n");
fprintf(stderr, "EXITING DUE TO ERROR (see erra.log)\n");
stop(8);
}
numbands = 0
}
numbands = numbands + 1
}
if (laneCount < 2) {
fprintf(logfile, "There were fewer than two lanes found.\n");
fprintf(logfile, "EXITING DUE TO ERROR\n");
fprintf(stderr, "EXITING DUE TO ERROR (see erra.log)\n");
stop(7);
}
workTable = matrix(0, ncol=laneCount, nrow=testnum)
k = 1
for (j in 1:ncol(workTable)) {
gel[j] = as.character(repTable[k,2])
for (i in 1:nrow(workTable)) {
if (repTable[k,1] != "" & repTable[k,2] != "" & is.numeric(repTable[i, 3])) {
workTable[i,j] = repTable[k,3]
k = k + 1
} else {
fprintf(logfile, "%s", as.character(repTable[i,]))
}
}
}
fprintf(logfile, "end data read: %d lanes read\n", num);
fprintf(logfile, " erra error analysis starting...\n");
fprintf(stderr,VERSION1);
fprintf(stderr,VERSION2);
fprintf(stderr,VERSION3);
fprintf(stderr,"\n erra error analysis starting, please wait...");
fprintf(logfile," erra error analysis: calculating unaligned proportional errors ");
fprintf(stderr," erra error analysis: calculating unaligned proportional errors ");
# <aligned_error description="initialization of aligned_error">
for (i in 1:nrow(workTable)) {
s_aligned_error[i] = 0
d_aligned_error[i] = 0
}
# </aligned_error>
for (i in 1:ncol(workTable)) {
s_flag[i] = 0
d_flag[i] = 0
}
for (i in 1:nrow(workTable)) {
mean_frag[i] = mean(workTable[i,])
}
s_ssd = 0
d_ssd = 0
for (i in 1:nrow(workTable)) {
s_count_sums = 0
d_count_sums = 0
s_sums[i] = 0
d_sums[i] = 0
# calculate unaligned proportional error sums
for (j in 1:ncol(workTable)) {
for (k in (j + 1):ncol(workTable)) {
if (j + 1 > ncol(workTable)) {
next
}
if (identical(gel[j], gel[k])) {
# print(paste("Those gels are identical: ", gel[j], ", ", gel[k]))
if (i == 1) {
if (s_flag[j] == 1) {
s_repeat = s_repeat + 1
} else {
s_flag[j] = 1
}
if (identical(s_flag[k], 1)) {
s_repeat = s_repeat + 1
} else {
s_flag[k] = 1
}
}
s_sums[i] = s_sums[i] + abs(workTable[i,j] - workTable[i,k]) / ((workTable[i,j] + workTable[i,k]) / 2.0)
s_count_sums = s_count_sums + 1
} else {
if (i == 1) {
if (d_flag[j] == 1) {
d_repeat = d_repeat + 1
} else {
d_flag[j] = 1
}
if (d_flag[k] == 1) {
d_repeat = d_repeat + 1
} else {
d_flag[k] = 1
}
}
d_sums[i] = d_sums[i] + abs(workTable[i,j] - workTable[i,k]) / ((workTable[i,j] + workTable[i,k]) / 2.0)
d_count_sums = d_count_sums + 1
}
}
}
# calculate unaligned proportional error means
s_mean[i] = s_sums[i] / s_count_sums
d_mean[i] = d_sums[i] / d_count_sums
s_v_sum[i] = 0
d_v_sum[i] = 0
# calculate unaligned proportional error standard deviation
for (j in 1:ncol(workTable)) {
for (k in (j+1):ncol(workTable)) {
if (k > ncol(workTable)) {
next
}
if (identical(gel[j], gel[k])) {
dummy = (abs(workTable[i,j] - workTable[i,k]) / ((workTable[i,j] + workTable[i,k]) / 2)) - s_mean[i]
s_v_sum[i] = s_v_sum[i] + dummy * dummy
} else {
dummy = (abs(workTable[i,j] - workTable[i,k]) / ((workTable[i,j] + workTable[i,k]) / 2)) - d_mean[i]
d_v_sum[i] = d_v_sum[i] + dummy * dummy
}
}
}
maxfrag = max(workTable)
s_ssd = s_ssd + s_v_sum[i]
d_ssd = d_ssd + d_v_sum[i]
s_v[i] = s_v_sum[i] / (s_count_sums - 1)
d_v[i] = d_v_sum[i] / (d_count_sums - 1)
}
s_within_v = s_ssd / (nrow(workTable) * (s_count_sums - 1))
d_within_v = d_ssd / (nrow(workTable) * (d_count_sums - 1))
s_overall_mean = mean(s_mean)
d_overall_mean = mean(d_mean)
s_between_v = sum((s_mean - s_overall_mean)^2)
d_between_v = sum((d_mean - d_overall_mean)^2)
# s_between_v = s_between_v * (double)s_count_sums / ((double)testnum - 1)
# d_between_v = d_between_v * (double)d_count_sums / ((double)testnum - 1)
# subtract 0.5 degrees of freedom for every repeated use of lane in a
# comparison used to calculate between-group (band) variances
s_corrected_sums = s_count_sums - s_repeat / 2
d_corrected_sums = d_count_sums - d_repeat / 2
s_between_v = s_between_v * s_corrected_sums / (nrow(workTable) - 1)
d_between_v = d_between_v * d_corrected_sums / (nrow(workTable) - 1)
s_F = s_between_v / s_within_v
d_F = d_between_v / d_within_v
s_df_2 = nrow(workTable) - 1
d_df_2 = nrow(workTable) - 1
# /*s_df_1 = testnum * (s_count_sums - 1)
# d_df_1 = testnum * (d_count_sums - 1)*/
s_df_1 = nrow(workTable) * (s_corrected_sums - 1)
d_df_1 = nrow(workTable) * (s_corrected_sums - 1)
### <hypothesisTesting>
version(outfile);
fprintf(outfile, "\n band mean fragment size\n");
for(i in 1:nrow(workTable)) {
fprintf(outfile, " %2d %12.3f\n", i, mean_frag[i]);
}
fprintf(outfile, "\nUnaligned Proportional Error Analysis\n\n");
fprintf(outfile, " %d pairwise same gel lane comparisons\n", s_count_sums);
fprintf(outfile, " %d repeated lane uses in same gel comparisons\n", s_repeat);
fprintf(outfile, " %d pairwise different gel lane comparisons\n", d_count_sums);
fprintf(outfile, " %d repeated lane uses in different gel comparisons\n\n", d_repeat);
fprintf(outfile, "\n Proportional error:\n");
fprintf(outfile, " band same gel different gel\n");
for (i in 1:nrow(workTable)) {
fprintf(outfile, " sample means (variance) %2d %10.8f (%10.8f) %10.8f (%10.8f)\n",
i, s_mean[i], s_v[i], d_mean[i], d_v[i]);
}
fprintf(outfile, " overall means (variance) %10.8f (%10.8f) %10.8f (%10.8f)\n\n",
s_overall_mean, s_within_v, d_overall_mean, d_within_v);
if(s_count_sums > 1)
{
fprintf(outfile, " Test Hypothesis H_o: Error is proportional to fragment size.\n\n");
fprintf(outfile, " one-way ANOVA on proportional errors (with df corrections for non-orthogonal comparisons)\n");
fprintf(outfile, " same gel variances: between = %10.8f within = %10.8f\n", s_between_v, s_within_v);
fprintf(outfile, " same gel analysis of variance: F = %f, df1 = %f, df2 = %d\n", s_F, s_df_1, s_df_2);
prob = pbeta(s_df_2 / (s_df_2 + s_df_1 * s_F), 0.5 * s_df_2, 0.5 * s_df_1);
fprintf(outfile, " probability = %f\n", prob);
if (prob < 0.05) {
fprintf(outfile, " H_o rejected, error is not proportional to fragment size\n\n");
p_flag = 1;
} else {
fprintf(outfile, " H_o not rejected.\n\n");
}
}
if(d_count_sums > 1) {
fprintf(outfile, " Test Hypothesis H_o: Error is proportional to fragment size.\n\n");
fprintf(outfile, " one-way ANOVA on proportional errors (with df corrections for non-orthogonal comparisons)\n");
fprintf(outfile, " different gel variances: between = %10.8f within = %10.8f\n", d_between_v, d_within_v);
fprintf(outfile, " different gel analysis of variance: F = %f, df1 = %f, df2 = %d\n", d_F, d_df_1, d_df_2);
prob = pbeta(d_df_2 / (d_df_2 + d_df_1 * d_F), 0.5 * d_df_2, 0.5 * d_df_1);
fprintf(outfile, " probability = %f\n", prob);
if(prob < 0.05) {
fprintf(outfile, " H_o rejected, error is not proportional to fragment size\n\n");
p_flag = 1;
} else {
fprintf(outfile, " H_o not rejected.\n\n");
}
}
### </hypothesisTesting>
# append to logfile
write(" erra error analysis: calculating aligned proportional errors ", stderr)
for (j in 1:(ncol(workTable)-1)) {
for (k in (j+1):ncol(workTable)) {
for (i in 1:nrow(workTable)) {
array1[i] = workTable[i,j]
array2[i] = workTable[i,k]
}
freturn = align(array1, array2, nrow(workTable))
scale_factor = freturn[1]
pro_error_array = freturn[2:17]
if (scale_factor < 0) {
fprintf(logfile, "function align error: possibly because alignment can be performed")
fprintf(logfile, " only with non-zero fragment sizes")
fprintf(logfile, "EXITING DUE TO ERROR")
fprintf(stderr, "EXITING DUE TO ERROR (see erra.log)")
stop(7)
}
if (scale_factor < 1) {
scale_factor = 1 / scale_factor
}
if (identical(gel[j], gel[k])) {
s_scale_count = s_scale_count + 1
s_scale_factor = s_scale_factor + scale_factor
s_scale_factor_sq = s_scale_factor_sq + scale_factor * scale_factor
for (i in 1:nrow(workTable)) {
s_aligned_error[i] = s_aligned_error[i] + pro_error_array[i]
s_aligned_error_sum = s_aligned_error_sum + pro_error_array[i]
s_aligned_error_sum_sq = s_aligned_error_sum_sq + pro_error_array[i] * pro_error_array[i]
sa_count = sa_count + 1
}
} else {
d_scale_count = d_scale_count + 1
d_scale_factor = d_scale_factor + scale_factor
d_scale_factor_sq = d_scale_factor_sq + scale_factor * scale_factor
for (i in 1:nrow(workTable)) {
d_aligned_error[i] = d_aligned_error[i] + pro_error_array[i]
d_aligned_error_sum = d_aligned_error_sum + pro_error_array[i]
d_aligned_error_sum_sq = d_aligned_error_sum_sq + pro_error_array[i] * pro_error_array[i]
da_count = da_count + 1
}
}
}
}
# s_scale_factor has sum of values, s_scale_factor_sq has sum of squared values
# for s_scale_count values -- analagous situation for d_scale_factor, etc.
# /* calculate variance in scale_factors */
if (s_scale_count > 1) {
s_scale_factor_v = (s_scale_factor_sq - (s_scale_factor / s_scale_count) * s_scale_factor) /
(s_scale_count - 1)
}
if (d_scale_count > 1) {
d_scale_factor_v = (d_scale_factor_sq - (d_scale_factor / d_scale_count) * d_scale_factor) /
(d_scale_count - 1)
}
# /* calcuate mean scale factors */
s_scale_factor = s_scale_factor / s_scale_count
d_scale_factor = d_scale_factor / d_scale_count
# /* calculate the aligned error and variance in error for all band comparisons */
s_overall_aligned_v = (s_aligned_error_sum_sq - (s_aligned_error_sum / sa_count) * s_aligned_error_sum) /
(sa_count - 1)
d_overall_aligned_v = (d_aligned_error_sum_sq - (d_aligned_error_sum / da_count) * d_aligned_error_sum) /
(da_count - 1)
s_overall_aligned = s_aligned_error_sum / sa_count
d_overall_aligned = d_aligned_error_sum / da_count
fprintf(outfile, "Mean Same Gel Scaling Factor = %6.4f (%6.4f%%)\n", s_scale_factor, 100 * (s_scale_factor - 1.0));
fprintf(outfile, " variance = %10.8f\n\n", s_scale_factor_v);
fprintf(outfile, "Mean Different Gel Scaling Factor = %6.4f (%6.4f%%)\n", d_scale_factor, 100 * (d_scale_factor - 1.0));
fprintf(outfile, " variance = %10.8f\n\n", d_scale_factor_v);
fprintf(outfile, "Aligned Proportional Error Analysis\n\n");
fprintf(outfile, " band same gel different gel\n");
for (i in 1:nrow(workTable)) {
#fprintf(outfile, paste("s_aligned_scale_count: ", s_aligned_error[i]))
s_aligned_error[i] = s_aligned_error[i] / s_scale_count
d_aligned_error[i] = d_aligned_error[i] / d_scale_count
fprintf(outfile, " aligned sample means %2d %10.8f %10.8f\n", i, s_aligned_error[i], d_aligned_error[i]);
}
fprintf(outfile, " overall means %10.8f %10.8f\n", s_overall_aligned, d_overall_aligned);
s_reduction = s_overall_aligned / s_overall_mean
d_reduction = d_overall_aligned / d_overall_mean
if(ncol(workTable) > 20) {
entry = 20
} else {
entry = ncol(workTable)
}
fprintf(outfile, "\n same gel reduction in error due to alignment %5.1f%%\n", 100 * (1.0 - s_reduction));
if (s_reduction < table01[entry]) {
fprintf(outfile," which represents a significant reduction in proportional error (p <= 0.01)\n");
s_red_flag = 0
} else if (s_reduction < table05[entry]) {
fprintf(outfile," which represents a significant reduction in proportional error (p <= 0.05)\n");
s_red_flag = 1
} else {
fprintf(outfile," which is not a clearly significant reduction in proportional error\n");
s_red_flag = 2
if (ncol(workTable) < 9) {
fprintf(outfile, "\n The number of bands in the replicate strains is not large enough to reject that this\n");
fprintf(outfile, " reduction in proportional error would result even in the absence of scaling error.\n");
fprintf(outfile, " Increasing the number of fragments in replicate genotypes is recommended.\n\n");
}
}
fprintf(outfile, "\n different gel reduction in error due to alignment %5.1f%%\n", 100 * (1 - d_reduction));
if (d_reduction < table01[entry]) {
fprintf(outfile," which represents a significant reduction in proportional error (p <= 0.01)\n");
d_red_flag = 0;
} else if (d_reduction < table05[entry]) {
fprintf(outfile," which represents a significant reduction in proportional error (p <= 0.05)\n");
d_red_flag = 1;
} else {
fprintf(outfile, " which is not a clearly significant reduction in proportional error")
d_red_flag = 2
if (nrow(workTable) < 9) {
fprintf(outfile, "\n The number of bands in the replicate strains is not large enough to reject that this\n");
fprintf(outfile, " reduction in proportional error would result even in the absence of scaling error.\n");
fprintf(outfile, " Increasing the number of fragments in replicate genotypes is recommended.\n\n");
}
}
fprintf(outfile,"\n");
if(s_count_sums < 45 && d_count_sums < 45) {
fprintf(outfile, "The small number of comparisons may make the following recommendations unreliable.\n");
fprintf(outfile, "Increasing the number of replicate genotypes is recommended.\n\n");
}
if (p_flag == 1) {
# /* significant evidence that error is not proportional to fragment sizes was found */
fprintf(outfile, " ---------------------------------------------------------------------------\n");
fprintf(outfile, "| The Align-and-Count Method is not recommended for comparison of this data.|\n");
fprintf(outfile, "| Use Interpolated Error Method instead. |\n");
fprintf(outfile, " ---------------------------------------------------------------------------\n");
use_interr = 1;
} else if ((s_red_flag == 2 && d_red_flag == 2)) {
# /* no significant evidence for reduction of error with alignment was found */
if ((s_scale_factor - 1) / s_overall_mean < 0.5 && (d_scale_factor - 1) / d_overall_mean < 0.5) {
# /* and the scaling factors found were less than half the pre-alignment error levels */
fprintf(outfile, " ---------------------------------------------------------------------------------\n");
fprintf(outfile, "| The Align-and-Count Method is unlikely to be useful for comparison of this data.|\n");
fprintf(outfile, "| Use Interpolated Error Method instead. |\n");
fprintf(outfile, " ---------------------------------------------------------------------------------\n");
use_interr = 1;
} else {
# /* but at least one of the scaling factors was found to be at least half as large as the error levels */
fprintf(outfile, " ----------------------------------------------------------------------\n");
fprintf(outfile, "| The Align-and-Count Method may be useful for comparison of this data.|\n");
fprintf(outfile, " ----------------------------------------------------------------------\n");
use_interr = 1;
}
} else {
# /* proportional error and */
# /* significant reduction in error with alignment means the Align-and-Count Method should be useful */
fprintf(outfile, " -------------------------------------------------------------------------------\n");
fprintf(outfile, "| The Align-and-Count Method is likely to be useful for comparison of this data.|\n");
fprintf(outfile, " -------------------------------------------------------------------------------\n");
use_interr = 0;
}
fprintf(outfile,"\n");
fprintf(logfile, " erra error analysis: errors calculated, writing files\n\n");
fprintf(stderr, " erra error analysis: errors calculated, writing files\n\n");
if (use_interr == 1) {
fprintf(rfile, "Interpolated Error\ninterr\n");
} else {
fprintf(rfile, "ACM\nacm\n");
}
# /* calculate parameters for the acm parameter file */
diff_slide = d_scale_factor - 1.0 + sd * sqrt(d_scale_factor_v)
diff_deviation = d_overall_aligned + sd * sqrt(d_overall_aligned_v)
diff_increments = 2 * floor((d_scale_factor - 1) * 25 / sqrt(d_overall_aligned_v)) + 1
diff_hmw = maxfrag * 3
diff_hmw_rate = 0
same_slide = s_scale_factor - 1 + sd * sqrt(s_scale_factor_v)
same_deviation = s_overall_aligned + sd * sqrt(s_overall_aligned_v)
same_increments = 2 * floor((s_scale_factor - 1) * 25 / sqrt(s_overall_aligned_v)) + 1
same_hmw = maxfrag * 3
same_hmw_rate = 0
fprintf(acmpar, "This Align-and-Count Method (ACM) parameter file was automatically generated by\n");
fprintf(acmpar, VERSION3);
fprintf(acmpar, "The file %s was analyzed\n", infile_name);
fprintf(acmpar, "Comments can go anywhere as long as the parameter label and value are\n");
fprintf(acmpar, "the first two tokens, respectively, found in a line using sscanf in C.\n");
fprintf(acmpar, "The order of parameter declarations is not restricted. If an expected\n");
fprintf(acmpar, "parameter is not defined, acm will exit, and errors will be recorded\n");
fprintf(acmpar, "in acm.log.\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "DIFF_SLIDE %10.8f This is the maximum scaling factor\n", diff_slide);
fprintf(acmpar, "for relative scaling of lanes from different gels or images.\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "DIFF_DEVIATION %10.8f This is the cutoff proportional deviation\n", diff_deviation);
fprintf(acmpar, "factor for mutually closest bands to be considered a match.\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "DIFF_INCREMENTS %10d This is the number of increments used in\n", diff_increments);
fprintf(acmpar, "searching through the range given by DIFF_SLIDE. An odd number\n");
fprintf(acmpar, "is recommended.\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "DIFF_HMW %10.8f This is the value above which a larger\n", diff_hmw);
fprintf(acmpar, "deviation tolerance may be allowed. (Three times the largest fragment analyzed\n");
fprintf(acmpar, "is used in the automated generation of this file.)\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "DIFF_HMW_RATE %10.8f The per unit increase in DIFF_DEVIATION\n", diff_hmw_rate);
fprintf(acmpar, "applied for fragments larger than DIFF_HMW. (The default is zero for this automatically\n");
fprintf(acmpar, "generated file.)\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "SAME_SLIDE %10.8f This is the maximum scaling factor\n", same_slide);
fprintf(acmpar, "for relative scaling of lanes from samegels or images.\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "SAME_DEVIATION %10.8f This is the cutoff proportional deviation\n", same_deviation);
fprintf(acmpar, "factor for mutually closest bands to be considered a match.\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "SAME_INCREMENTS %10d This is the number of increments used in\n", same_increments);
fprintf(acmpar, "searching through the range given by SAME_SLIDE. An odd number\n");
fprintf(acmpar, "is recommended.\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "SAME_HMW %10.8f This is the value above which a larger\n", same_hmw);
fprintf(acmpar, "deviation tolerance may be allowed. (Three times the largest fragment analyzed\n");
fprintf(acmpar, "is used in the automated generation of this file.)\n");
fprintf(acmpar, "\n");
fprintf(acmpar, "SAME_HMW_RATE %10.8f The per unit increase in SAME_DEVIATION\n", same_hmw_rate);
fprintf(acmpar, "applied for fragments larger than SAME_HMW. (The default is zero for this automatically\n");
fprintf(acmpar, "generated file.)\n");
# /* write parameter files for interr (Interpolated Error Method) */
for (i in 1:nrow(workTable)) {
fprintf(sitepar,"%12.8f %10.8f\n",mean_frag[i], (s_mean[i] + sd * sqrt(s_v[i])));
fprintf(ditepar,"%12.8f %10.8f\n",mean_frag[i], (d_mean[i] + sd * sqrt(d_v[i])));
}
}
# erra("replicates.in", "test.out", sd = 1)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.