Spot_check_data<- function(Well_key, FC_data){
#Asks if user wants to remove samples with standard errors higher than a defined threshold
remove_samp<- as.logical(readline(prompt="Eliminate poor samples? (TRUE/FALSE): "))
if (remove_samp){
print("run adjust_well_key() to update well key for deleted samples")
cutoff<-as.numeric(readline(prompt="Insert cutoff threshold: "))
}
#Asks user if they want to remove natural log values outside a defined range
define_limits<- as.logical(readline(prompt="Remove extreneous natural log values? (TRUE/FALSE): "))
if (define_limits){
prompt <- "enter upper bound, followed by lower bound (+y,-y): "
y_limits<-as.integer(strsplit(readline(prompt), ",")[[1]])
upper_bound<- y_limits[1]
lower_bound<- y_limits[2]
}
#Katie added some code to remove mean and SD rows from flowjo data and to ensure that the well key matches the FC data
if ("Mean" %in% FC_data[,1]){
FC_data<-FC_data[-(which(FC_data[,1]=="Mean")), ]
}
if ("SD" %in% FC_data[,1]){
FC_data<-FC_data[-(which(FC_data[,1]=="SD")), ]
}
if (!identical(Well_key[,1], FC_data[,1])){
print("ERROR - first columns of well key and FC data must match")
}
FC_data$"Std.Error" <- NA #Creates a new column
c <- ncol(FC_data)
time_points<- (c(0:((ncol(FC_data)-4)/2)))*10 #finds the number of time points in dataset
#Goes through dataframe and calculates Std. Error for each sample
list_for_plots<- list()
for (i in 1:nrow(FC_data)) {
count<- FC_data[i, seq(2,ncol(FC_data)-1,2)] #-1 so we don't count Std. Error col
if (Well_key[i,4]==Well_key[i,2]){
#if data is gated on experimental strain
diff<- FC_data[i, seq(3,ncol(FC_data)-1,2)] #if data gated on experimental then diff will simply be the exp counts
refcount<- count-diff
} else { #assumes data gated on reference
refcount<- FC_data[i, seq(3,ncol(FC_data)-1,2)]
diff<- count-refcount
}
natlog<- log(diff/refcount)
#finds and removes infinate values (values where either reference or exp count are equal to 0)
natlog[which(is.infinite(as.matrix(natlog)))]<-NA
natlog<-as.numeric(natlog[1,])
if(define_limits){
natlog[natlog>upper_bound | natlog<lower_bound] <- NA; #natlog_for_plots ((not sure why this is here?))
}
#storing natlog values in a vector for later use in plotting
list_for_plots<- append(list_for_plots, natlog) # storing the natlog data for later
regress<- lm(na.exclude(natlog ~ time_points))
#slope<- as.numeric(na.exclude(coef(regress)[2]))
stderror<- as.numeric(coef(summary(regress))[2,2])
FC_data[i,c] <- stderror
}
# we don't appear to be doing anything with slope. why are we calculating?
## this section plots raw data and stores it to a pdf file in the directory the script is run in
min.y<- min(na.omit(as.numeric(list_for_plots)))
max.y<- max(na.omit(as.numeric(list_for_plots)))
if (abs(min.y)>abs(max.y)){
max.y<- abs(min.y)
} else {
min.y<- -(max.y)
}
samples<- seq(from=1, to=length(list_for_plots), by = length(time_points)) #a list to interate by
pdf("raw_data_plots.pdf", height = (nrow(FC_data)), width = 16)
par(mfrow=c(nrow(FC_data)/4, 4)) #control the margins of the plots
par(mar=c(4,4,4,4))
for (i in 1:length(samples)) {
start<- samples[i]
end<- samples[i]+(length(time_points)-1)
plot(time_points, list_for_plots[start:end], pch= 16, cex= 2, type = "o",
col= sample(1:600,1,replace = T), ylim = c(min.y, max.y), xlab=NA, ylab=NA)#generates plots
legend("topleft", legend=paste("std err =", signif(FC_data[i, ncol(FC_data)], 4)))
title(main = FC_data[i,1], xlab = "Generation", ylab = "ln(exp/ref)")
}
dev.off()
pdf("std_error_histogram.pdf")
hist(FC_data[,ncol(FC_data)], xlab="standard error", ylab="# samples", main="Histogram of standard errors", col="royalblue")
dev.off()
# this section asks a user for a standard error cuttoff value
# and removes samples that don't meet the cuttoff
good_samples.frame<- FC_data #copies FC dataframe to new frame
for (i in 1:ncol(good_samples.frame)){
good_samples.frame[,i]=NA
}
good_samples.frame<-na.omit(good_samples.frame)
if (remove_samp){
for (i in 1:nrow(FC_data)){
if(FC_data[i,c] < cutoff){
good_samples.frame <- rbind(good_samples.frame, FC_data[i, ])
}
}
FC_data<- good_samples.frame
}
if(define_limits){
print("Standard error estimates based on regressions with user specified outliers removed!")
}
return(FC_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.