Analyze_tails_tailseq4_new.R

#######################################################################################
###                                                                                 ###
###     Copyright (C) 2017  Pawel Krawczyk (p.krawczyk@ibb.waw.pl)                  ###
###                                                                                 ###
###     This program is free software: you can redistribute it and/or modify        ###
###     it under the terms of the GNU General Public License as published by        ###
###     the Free Software Foundation, either version 3 of the License, or           ###
###     (at your option) any later version.                                         ###
###                                                                                 ###
###     This program is distributed in the hope that it will be useful,             ###
###     but WITHOUT ANY WARRANTY; without even the implied warranty of              ###
###     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               ###
###     GNU General Public License for more details.                                ###
###                                                                                 ###
###     You should have received a copy of the GNU General Public License           ###
###     along with this program. If not, see <http://www.gnu.org/licenses/>.        ###
###                                                                                 ###
#######################################################################################



# load libraries
library(data.table)
library(ggplot2)
library(plyr)
library(dplyr)
library(ggseqlogo)
library(RColorBrewer)

#melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_4/SSDNA_3/sabre_2mism/processing_out_sabre/tailseq4_2.tsv"
#melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_4/SSDNA_2_no_spk_128/sabre_2_mism/processing_out_sabre/tailseq4_ovr.tsv"
melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_4/SSDNA_3/sabre_2mism/processing_out_sabre/tailseq4_5_all_with_rmask.tsv"
melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_4/SSDNA_2_no_spk_128/sabre_2_mism/processing_out_sabre/tailseq4_6.tsv"
melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_4/SSDNA_2_no_spk_128/sabre_2_mism/processing_out_sabre/test_new_script.tsv"
melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_4/SSDNA_2_no_spk_128/sabre_2_mism/processing_out_sabre/all_old_script.tsv"
melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_4/SSDNA_2_no_spk_128/sabre_2_mism/processing_out_sabre/all_new_script_urid.tsv"
melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_4/all/processing_out/all_flowcells_analysis_new_script_new_fixed_repliates_fixed_shortR5.tsv"
melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_5/SSDNA_3/sabre_2_mism/processing_out_sabre/initial_test_switched_reporters.tsv"
melt_data_localization <- "/home/smaegol/storage/analyses/tail_seq_5/ALL/processing_out/all_samples_2_3_4_5_anal.tsv"
# read tailing information to data.frame using data.table
tails_data_melt <- fread(melt_data_localization, sep = "\t", header = T, stringsAsFactors = T,
                         data.table = F, showProgress = TRUE)  # read data





#tails_data_melt[tails_data_melt$localization=='JADRO',]$localization<-"TOTAL"
tails_data_melt$tailed <- tails_data_melt$tail_length > 0  #mark tailed reads
tails_data_melt$mapped <- tails_data_melt$mapping_position != -1  #mark mapped reads
tails_data_mapped <- tails_data_melt[tails_data_melt$mapped != 0, ]  #discard unmapped reads

# fix of flase_no_tail - should be assigned no tail cause CTGAC was found in the
# clipped fragment - to be fixed in python script
#tails_data_mapped$tail_type <- as.character(tails_data_mapped$tail_type)
#tails_data_mapped[tails_data_mapped$tail_type == "false_no_tail_no_CTGAC", ]$tail_type <- "no_tail"

tails_data_mapped$ref_name_R5 <- as.character(tails_data_mapped$ref_name_R5)
tails_data_mapped$ref_name_R3 <- as.character(tails_data_mapped$ref_name_R3)
# tails_data_mapped_same_ref<-tails_data_mapped[tails_data_mapped$ref_name_R5==tails_data_mapped$ref_name_R3,]

# mark uridylated reads
tails_data_mapped$uridylated2 <- FALSE
tails_data_mapped[tails_data_mapped$Utail_length > 0, ]$uridylated2 = TRUE

#convert T to U in terminal nucleotides (for seqlogo)
tails_data_mapped$terminal_nucleotides<-as.character(tails_data_mapped$terminal_nucleotides)
tails_data_mapped$terminal_nucleotides<-gsub("T","U",tails_data_mapped$terminal_nucleotides)

summary(as.factor(tails_data_mapped$CTGAC_R5))

colnames(tails_data_mapped)
head(tails_data_mapped[,c(1,2,3,12,24,25,26,27,33,34,37,38)])

# in further analyses use only those read which got CTGAC delimiter identified in
# the clipped fragment
# for reporter analyses all mapped reads got CTGAC_R5 variable = 1 (because of short reads) so they will be included in the analysis
tails_data_mapped_true <- tails_data_mapped[tails_data_mapped$CTGAC_R5 > 0, ]
tails_data_mapped_true$ref_name = tails_data_mapped_true$ref_name_R5  #use ref_name_R5 as ref_name

head(tails_data_mapped_true[,c(1,2,3,12,24,25,26,27,33,34,37,38)])

head(tails_data_mapped[tails_data_mapped$tail_source=='tailseq_clip_clip_R3_shorter_than_tailseq',c(1,2,26,27,40)],30)

tails_data_mapped_true$tail_type = as.character(tails_data_mapped_true$tail_type)

head(tails_data_mapped_true[grep("A_mix",tails_data_mapped_true$tail_type_mixed),])

#treat all mixed (heterogenous) as true tails
#tails_data_mapped_true[grep("AU_mixed",tails_data_mapped_true$tail_type_mixed),]$tail_type = 'AU'
#tails_data_mapped_true[grep("^A_mixed",tails_data_mapped_true$tail_type_mixed),]$tail_type = 'A_only'
#tails_data_mapped_true[grep("^U_mixed",tails_data_mapped_true$tail_type_mixed),]$tail_type = 'U_only'
tails_data_mapped_true[tails_data_mapped_true$tail_type=='AU',]$uridylated2 = TRUE
tails_data_mapped_true[tails_data_mapped_true$tail_type=='U_only',]$uridylated2 = TRUE
#if clipping was shorter than tailseq - treat as other, as it is probably stretch of As in the reference seq
#tails_data_mapped_true[tails_data_mapped_true$tail_source=='tailseq_clip_clip_R3_shorter_than_tailseq',]$tail_type = 'no_tail'
#tails_data_mapped_true[tails_data_mapped_true$tail_source=='tailseq_clip_clip_R3_shorter_than_tailseq',]$uridylated = FALSE


# remove heterogenous tails from analysis
tails_data_mapped_true_no_hetero = tails_data_mapped_true[-grep("hetero", tails_data_mapped_true$tail_type),
                                                          ]

head(tails_data_mapped_true_no_hetero,20)
# remove other type tails (for which we can suspect they are not tails but rather origin from improper mapping/repeatmasker) from the analysis
tails_data_mapped_true_no_hetero_no_other = tails_data_mapped_true_no_hetero[-grep("other",
                                                                                   tails_data_mapped_true_no_hetero$tail_type), ]


# treat all AG,UG or UA tails as other_no_tail

tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$tail_type ==
                                            "AG", ]$tail_type <- "other_no_tail"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$tail_type ==
                                            "UG", ]$tail_type <- "other_no_tail"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$tail_type ==
                                            "UA", ]$tail_type <- "other_no_tail"
tails_data_mapped_true_no_hetero_no_other <- tails_data_mapped_true_no_hetero_no_other[-grep("other",
                                                                                             tails_data_mapped_true_no_hetero_no_other$tail_type), ] #remove all other from analysis


# create classes for A-tail lengths (0,1,2-5,6-10,11-20,21-30,30+)
tails_data_mapped_true_no_hetero_no_other$A_length = ""
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Atail_length ==
                                            0, ]$A_length = "0"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Atail_length ==
                                            1, ]$A_length = "1"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Atail_length %in%
                                            seq(2, 5, 1), ]$A_length = "2-5"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Atail_length %in%
                                            seq(6, 10, 1), ]$A_length = "6-10"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Atail_length %in%
                                            seq(11, 20, 1), ]$A_length = "11-20"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Atail_length %in%
                                            seq(21, 30, 1), ]$A_length = "21-30"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Atail_length >
                                            30, ]$A_length = "30+"
tails_data_mapped_true_no_hetero_no_other$A_length <- factor(tails_data_mapped_true_no_hetero_no_other$A_length,
                                                             levels = c("0", "1", "2-5", "6-10", "11-20", "21-30", "30+"))

# create classes for U-tail lengths (0,1,2,3-5,6-10,10+)
tails_data_mapped_true_no_hetero_no_other$U_length = ""
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Utail_length ==
                                            0, ]$U_length = "0"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Utail_length ==
                                            1, ]$U_length = "1"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Utail_length ==
                                            2, ]$U_length = "2"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Utail_length %in%
                                            seq(3, 5, 1), ]$U_length = "3-5"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Utail_length %in%
                                            seq(6, 10, 1), ]$U_length = "6-10"
tails_data_mapped_true_no_hetero_no_other[tails_data_mapped_true_no_hetero_no_other$Utail_length >
                                            10, ]$U_length = "10+"
tails_data_mapped_true_no_hetero_no_other$U_length <- factor(tails_data_mapped_true_no_hetero_no_other$U_length,
                                                             levels = c("10+", "6-10", "3-5", "2", "1", "0"))


# modify levels of tail_types to have U_only,A-only,AU or no_tail
tails_data_mapped_true_no_hetero_no_other_tails <- tails_data_mapped_true_no_hetero_no_other
tails_data_mapped_true_no_hetero_no_other_tails$tail_type <- as.character(tails_data_mapped_true_no_hetero_no_other_tails$tail_type)
tails_data_mapped_true_no_hetero_no_other_tails$tail_type <- factor(tails_data_mapped_true_no_hetero_no_other_tails$tail_type,
                                                                    levels = c("U_only", "AU", "no_tail", "A_only"))




# create dataframe with PA1 cell_line data
tails_data_mapped_true_no_hetero_no_other_PA1 <- tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$cell_line ==
                                                                                                   "PA1", ]
# create dataframe with PA1 cell_line data for knockdown conditions
tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD <- tails_data_mapped_true_no_hetero_no_other_PA1[tails_data_mapped_true_no_hetero_no_other_PA1$condition !=
                                                                                                          "NT", ]
# filter out tails longer than 64nt - due to a limit of spike-ins used
tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD <- tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD[tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD$tail_length <=
                                                                                                                   64, ]

# create dataframe with PA1 cell_line data for untreated
tails_data_mapped_true_no_hetero_no_other_PA1_tails_NT <- tails_data_mapped_true_no_hetero_no_other_PA1[tails_data_mapped_true_no_hetero_no_other_PA1$condition ==
                                                                                                          "NT" & tails_data_mapped_true_no_hetero_no_other_PA1$localization %in% c("CYTO","NUC") & tails_data_mapped_true_no_hetero_no_other_PA1$primer_name %in% c("L1NGS0","GAPDH"), ]
# filter out tails longer than 64nt - due to a limit of spike-ins used
tails_data_mapped_true_no_hetero_no_other_PA1_tails_NT <- tails_data_mapped_true_no_hetero_no_other_PA1_tails_NT[tails_data_mapped_true_no_hetero_no_other_PA1_tails_NT$tail_length <=
                                                                                                                   64, ]




tails_data_mapped_true_no_hetero_no_other_tails_NT1 <- tails_data_mapped_true_no_hetero_no_other_tails[(tails_data_mapped_true_no_hetero_no_other_tails$condition ==
                                                                                                          "NT" & tails_data_mapped_true_no_hetero_no_other_tails$localization %in% c("TOTAL","JADRO")),
                                                                                                       ]
# filter out tails longer than 64nt - due to a limit of spike-ins used
tails_data_mapped_true_no_hetero_no_other_tails_NT1 <- tails_data_mapped_true_no_hetero_no_other_tails_NT1[tails_data_mapped_true_no_hetero_no_other_tails_NT1$tail_length <=
                                                                                                             64, ]



tails_data_for_analysis_mysz <- tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$cell_line=="MYSZ",]


tails_data_for_analysis_H9 <- tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$cell_line=="H9",]

tails_data_for_analysis_LEAP147 <- tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$cell_line=="LEAP147",]

tails_data_for_analysis_LEAP317 <- tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$cell_line=="LEAP317",]


tails_data_for_analysis_reporter_overexp <- tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$transcript=="REPORTERL1_overexp",]
tails_data_for_analysis_reporter_overexp <- tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$tail_length <=
                                                                                       64, ]

tails_data_for_analysis_reporter_UTR_all_mapped <- tails_data_mapped_true[tails_data_mapped_true$mapping_position>8950 & tails_data_mapped_true$mapping_position<9010,]



tails_data_for_analysis_reporter_UTR <- tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$mapping_position>8950 & tails_data_for_analysis_reporter_overexp$mapping_position<9010,]
tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$uridylated==1,]$uridylated=TRUE

tails_data_for_analysis_reporter_SV40body <- tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$mapping_position<9140 & tails_data_for_analysis_reporter_overexp$mapping_position>=9010,]

tails_data_for_analysis_reporter_SV40end <- tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$mapping_position>9140,]


tails_data_for_analysis_reporter_KD <- tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$transcript=="REPORTERL1",]
tails_data_for_analysis_reporter_KD <- tails_data_for_analysis_reporter_KD[tails_data_for_analysis_reporter_KD$tail_length <=
                                                                             64, ]


### functions for summarizing data ###

summarize_tails_by_tail_type <- function(tail_data2) {
  # function summarizing tails by tail type
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(transcript, condition, cell_line,
                                             localization, primer_name, tail_type), summarise, N = length(primer_name))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, localization, primer_name), transform, freq = N/sum(N))
  return(tail_data_summarized)
}


summarize_tails_by_tail_type_replicates <- function(tail_data2) {
  #function summarizing tail length data for given dataset
  tail_data<-tail_data2
  #tail_data[tail_data$Utail_length>=10,]$Utail_length<-'10+'
  #tail_data$Utail_length<-as.factor(tail_data$Utail_length)
  #tail_data$Utail_length<-factor(tail_data$Utail_length,levels=c('0','1','2','3','4','5','6','7','8','9','10+'))
  tail_data_summarized<-ddply(tail_data,.(transcript,condition,cell_line,localization,primer_name,tail_type,replicate),summarise,N=length(primer_name))
  tail_data_summarized<-ddply(tail_data_summarized,.(transcript,condition,cell_line,localization,primer_name,replicate),transform, freq=N/sum(N))
  return(tail_data_summarized)
}


summarize_tails_by_tail_type_replicates <- function(tail_data2) {
  # function summarizing tails by tail type
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(transcript, condition, cell_line,
                                             localization, primer_name, tail_type), summarise, N = length(primer_name))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, localization, primer_name), transform, freq = N/sum(N))
  return(tail_data_summarized)
}



summarize_tails_by_tail_type_loc <- function(tail_data2) {
  # function summarizing tails by tail type
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(transcript, condition, cell_line,
                                             localization, primer_name, tail_type), summarise, N = length(primer_name))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, primer_name), transform, freq = N/sum(N))
  return(tail_data_summarized)
}

summarize_Utails_calculate_means <- function(tail_data2) {
  # function summarizing Utail length data for given dataset
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(transcript, condition, cell_line,
                                             primer_name), summarise, N = length(Utail_length), mean_Utail = mean(Utail_length,
                                                                                                                  na.rm = T), sd_utail = sd(Utail_length))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, primer_name), transform, se_utail = sd_utail/sqrt(N))
  return(tail_data_summarized)
}

summarize_Utails_calculate_means_repl <- function(tail_data2) {
  # function summarizing Utail length data for given dataset
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(transcript, condition, cell_line,replicate,
                                             primer_name), summarise, N = length(Utail_length), mean_Utail_rep = mean(Utail_length,
                                                                                                                      na.rm = T), sd_utail_rep = sd(Utail_length))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, primer_name,replicate), transform, se_utail_rep = sd_utail_rep/sqrt(N))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, primer_name), transform, mean_Utail = mean(mean_Utail_rep),sd_utail = sd(mean_Utail_rep))
  return(tail_data_summarized)
}

summarize_Atails_calculate_means <- function(tail_data2) {
  # function summarizing Atail length data for given dataset
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(transcript, condition, cell_line,
                                             primer_name), summarise, N = length(Atail_length), mean_Atail = mean(Atail_length,
                                                                                                                  na.rm = T), sd_Atail = sd(Atail_length))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, primer_name), transform, se_Atail = sd_Atail/sqrt(N))
  return(tail_data_summarized)
}

summarize_Atails_calculate_medians <- function(tail_data2) {
  # function summarizing Atail length data for given dataset
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(transcript, condition, cell_line,
                                             primer_name), summarise, N = length(Atail_length), mean_Atail = median(Atail_length,
                                                                                                                    na.rm = T), sd_Atail = sd(Atail_length))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, primer_name), transform, se_Atail = sd_Atail/sqrt(N))
  return(tail_data_summarized)
}


summarize_Utails_lengths <- function(tail_data2) {
  # function summarizing tail length data for given dataset
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(transcript, condition, cell_line,
                                             localization, primer_name, U_length), summarise, N = length(U_length))
  tail_data_summarized <- ddply(tail_data_summarized, .(transcript, condition,
                                                        cell_line, localization, primer_name), transform, freq = N/sum(N))
  return(tail_data_summarized)
}

summarize_tails_by_tail_type_collapse_short_relative <- function(tail_data2) {
  #function summarizing tails based on their length and type
  tail_data<-tail_data2
  tail_data$tail_length<-as.numeric(tail_data$tail_length)
  #calculate maximum possible tail length in given dataset
  max_tail_length<-summary(tail_data2$tail_length)[6]
  #bin tails based on their length
  if (max_tail_length>=60) {
    tail_data[tail_data$tail_length>=60,]$tail_length<-'60+'
  }
  if (max_tail_length>=50) {
    for (temp_len in seq(50,59,1)) {
      if(any(tail_data$tail_length==temp_len)) {
        tail_data[tail_data$tail_length==temp_len,]$tail_length<-'50-59'

      }
    }
  }
  if (max_tail_length>=40) {
    for (temp_len in seq(40,49,1)) {
      if(any(tail_data$tail_length==temp_len)) {
        tail_data[tail_data$tail_length==temp_len,]$tail_length<-'40-49'
      }
    }
  }
  if (max_tail_length>=30) {
    for (temp_len in seq(30,39,1)) {
      if(any(tail_data$tail_length==temp_len)) {
        tail_data[tail_data$tail_length==temp_len,]$tail_length<-'30-39'
      }
    }
  }
  if (max_tail_length>=20) {
    for (temp_len in seq(20,29,1)) {
      if(any(tail_data$tail_length==temp_len)) {
        tail_data[tail_data$tail_length==temp_len,]$tail_length<-'20-29'
      }
    }
  }
  for (temp_len in seq(10,19,1)) {
    if(any(tail_data$tail_length==temp_len)) {
      tail_data[tail_data$tail_length==temp_len,]$tail_length<-'10-19'
    }
  }

  for (temp_len in seq(1,9,1)) {
    if(any(tail_data$tail_length==temp_len)) {
      tail_data[tail_data$tail_length==temp_len,]$tail_length<-'1-9'
    }
  }
  #reorder tail length factor
  tail_data$tail_length<-as.factor(tail_data$tail_length)
  tail_data$tail_length<-factor(tail_data$tail_length,levels=c('0','1-9','10-19','20-29','30-39','40-49','50-59','60+'))
  #summarize:
  tail_data_summarized<-ddply(tail_data,.(transcript,condition,tail_length,cell_line,localization,primer_name,tail_type),summarise,N=length(tail_length))
  tail_data_summarized<-ddply(tail_data_summarized,.(transcript,condition,cell_line,localization,primer_name,tail_length),transform, freq=N/sum(N))
  return(tail_data_summarized)
}


summarize_uridylation_replicates2 <- function(tail_data2) {
  # function summarizing tails by tail type
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(condition, primer_name, uridylated,replicate), summarise, N = length(primer_name))
  tail_data_summarized <- ddply(tail_data_summarized, .(condition, primer_name,replicate), transform, freq = N/sum(N))
  tail_data_summarized <- ddply(tail_data_summarized, .(condition, primer_name, uridylated), transform, mean_freq = mean(freq), sd = sd(freq))
  return(tail_data_summarized)
}

summarize_uridylation_replicates3 <- function(tail_data2) {
  # function summarizing tails by tail type
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(condition,  uridylated,replicate), summarise, N = length(primer_name))
  tail_data_summarized <- ddply(tail_data_summarized, .(condition,replicate), transform, freq = N/sum(N))
  tail_data_summarized <- ddply(tail_data_summarized, .(condition,uridylated), transform, mean_freq = mean(freq), sd = sd(freq))
  return(tail_data_summarized)
}


summarize_uridylation_replicates4 <- function(tail_data2) {
  # function summarizing tails by tail type
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(condition,  uridylated2,replicate), summarise, N = length(primer_name))
  tail_data_summarized <- ddply(tail_data_summarized, .(condition,replicate), transform, freq = N/sum(N))
  tail_data_summarized <- ddply(tail_data_summarized, .(condition,uridylated2), transform, mean_freq = mean(freq), sd = sd(freq))
  return(tail_data_summarized)
}


summarize_uridylation_replicates5 <- function(tail_data2) {
  # function summarizing tails by tail type
  tail_data <- tail_data2
  tail_data_summarized <- ddply(tail_data, .(project_name,condition,  uridylated2,replicate), summarise, N = length(primer_name))
  tail_data_summarized <- ddply(tail_data_summarized, .(project_name,condition,replicate), transform, freq = N/sum(N))
  tail_data_summarized <- ddply(tail_data_summarized, .(condition,uridylated2), transform, mean_freq = mean(freq), sd = sd(freq))
  return(tail_data_summarized)
}


## FIGURES ##


tails_data_mapped_reporter_overexp_for_terminal <- tails_data_mapped[tails_data_mapped$transcript ==
                                                                       "REPORTERL1_overexp", ]
tails_data_mapped_reporter_overexp_for_terminal$condition <- as.character(tails_data_mapped_reporter_overexp_for_terminal$condition)
tails_data_mapped_reporter_overexp_for_terminal$condition <- as.factor(tails_data_mapped_reporter_overexp_for_terminal$condition)


tails_data_mapped_gapdh_overexp_for_terminal <- tails_data_mapped[tails_data_mapped$transcript ==
                                                                    "GAPDH" & tails_data_mapped$condition %in% c("CNTRL", "TUT7WT", "MOV10"), ]
tails_data_mapped_gapdh_overexp_for_terminal$condition <- as.character(tails_data_mapped_gapdh_overexp_for_terminal$condition)
tails_data_mapped_gapdh_overexp_for_terminal$condition <- as.factor(tails_data_mapped_gapdh_overexp_for_terminal$condition)


for (cond in levels(tails_data_mapped_reporter_overexp_for_terminal$condition)) {
  print(cond)
  terminal_nucleotides_cond_name = paste("terminal_nucleotides_reporter_overexp",
                                         cond, sep = "_")
  # temp2=eval(as.symbol(summary_tail_lengths_table_name))
  assign(terminal_nucleotides_cond_name, tails_data_mapped_reporter_overexp_for_terminal[tails_data_mapped_reporter_overexp_for_terminal$condition ==
                                                                                           cond, ]$terminal_nucleotides)
}

terminal_nucleotides_reporter_overexp = list(CTRL = terminal_nucleotides_reporter_overexp_MBP,
                                             TUT7WT = terminal_nucleotides_reporter_overexp_TUT7WT, MOV10 = terminal_nucleotides_reporter_overexp_MOV10,
                                             TUT4WT = terminal_nucleotides_reporter_overexp_TUT4WT, TUT7MUT = terminal_nucleotides_reporter_overexp_TUT7MT,
                                             TUT4MUT = terminal_nucleotides_reporter_overexp_TUT4MT, FAM46CMT = terminal_nucleotides_reporter_overexp_FAM46CMT, FAM46CWT = terminal_nucleotides_reporter_overexp_FAM46CWT,
                                             FTUT7WT = terminal_nucleotides_reporter_overexp_FTUT7WT,
                                             GLD2 = terminal_nucleotides_reporter_overexp_GLD2,
                                             PAPD5 = terminal_nucleotides_reporter_overexp_PAPD5,
                                             U6TUT = terminal_nucleotides_reporter_overexp_U6TUT)


terminal_nucleotides_reporter_overexp = list(CTRL = terminal_nucleotides_reporter_overexp_CNTRL,
                                             TUT7WT = terminal_nucleotides_reporter_overexp_TUT7WT, MOV10 = terminal_nucleotides_reporter_overexp_MOV10,
                                             TUT4WT = terminal_nucleotides_reporter_overexp_TUT4WT, TUT7MUT = terminal_nucleotides_reporter_overexp_TUT7MUT,
                                             TUT4MUT = terminal_nucleotides_reporter_overexp_TUT4MUT)

for (cond in levels(tails_data_mapped_gapdh_overexp_for_terminal$condition)) {
  print(cond)
  terminal_nucleotides_cond_name = paste("terminal_nucleotides_gapdh_overexp",
                                         cond, sep = "_")
  # temp2=eval(as.symbol(summary_tail_lengths_table_name))
  assign(terminal_nucleotides_cond_name, tails_data_mapped_gapdh_overexp_for_terminal[tails_data_mapped_gapdh_overexp_for_terminal$condition ==
                                                                                        cond, ]$terminal_nucleotides)
}

terminal_nucleotides_gapdh_overexp = list(CTRL = terminal_nucleotides_gapdh_overexp_CNTRL,
                                          TUT7WT = terminal_nucleotides_gapdh_overexp_TUT7WT, MOV10 = terminal_nucleotides_gapdh_overexp_MOV10)


pdf("fig_4a_reporter.pdf")
print(ggseqlogo(terminal_nucleotides_reporter_overexp, ncol = 2, method = "bits"))
dev.off()

pdf("fig_4a_gapdh.pdf")
print(ggseqlogo(terminal_nucleotides_gapdh_overexp, ncol = 2, method = "prob"))
dev.off()


pdf("fig_4B_old_style_all.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_tails_by_tail_type(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                              c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "FTUT7WT","TUT7WT", "TUT7MT", "TUT4WT", "TUT4MT", "MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))
plot_tail_lengths <- ggplot(summary_tail_types_table, aes(x = as.factor(condition),
                                                          fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                             stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION"))
print(plot_tail_lengths)
dev.off()

pdf("fig_4B_old_style_TUTMOV.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_tails_by_tail_type(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                              c("MBP", "TUT7WT", "TUT4WT", "MOV10","TUT7MT","TUT4MT","FTUT7WT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "FTUT7WT","TUT7WT", "TUT7MT", "TUT4WT", "TUT4MT", "MOV10"))
plot_tail_lengths <- ggplot(summary_tail_types_table, aes(x = as.factor(condition),
                                                          fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                             stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION"))
print(plot_tail_lengths)
dev.off()

pdf("fig_4B_reporter_all.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                   c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_4B_reporter_UTR_less_than_9010.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition %in%
                                                                                                               c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),y=mean_freq,
                                                                                                                                              fill = uridylated, colours = uridylated)) + geom_bar(position = position_stack(),
                                                                                                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()
ggsave(filename = "fig_4B_reporter_UTR_less_than_9010_new.pdf", plot = plot_tail_lengths)


pdf("fig_4B_reporter_UTR_less_than_9010_no_fam_mut_no_GLD.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition %in%
                                                                                                               c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CWT","FTUT7WT","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CWT","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()
ggsave(filename = "fig_4B_reporter_UTR_less_than_9010_no_fam_mut_no_GLD.pdf", plot = plot_tail_lengths)


#Create fig for UTR only LINE reads for all conditions except FTUT7, GLD2, FAM46MT

summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition %in%
                                                                                                               c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CWT","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CWT","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==1 & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()
ggsave(filename = "fig_4B_reporter_UTR_less_than_9010_no_fam_mut_no_GLD_no_FTUT7WT.pdf", plot = plot_tail_lengths)



pdf("fig_4B_old_style_reporter_UTR_less_than_9010_new.pdf",width=2048,height=2048)
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name,summarize_tails_by_tail_type(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition %in%
                                                                                                         c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))

plot_tail_lengths <- ggplot(summary_tail_types_table, aes(x = as.factor(condition),
                                                          fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                             stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION, UTR only LINE"))
print(plot_tail_lengths)
dev.off()
ggsave(filename = "fig_4B_old_style_reporter_UTR_less_than_9010_new.pdf", plot = plot_tail_lengths)

pdf("fig_4B_reporter_SV40bod_9010_9140.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_SV40body[tails_data_for_analysis_reporter_SV40body$condition %in%
                                                                                                                    c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_4B_reporter_SV40end_more_than9140.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_SV40end[tails_data_for_analysis_reporter_SV40end$condition %in%
                                                                                                                   c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()



pdf("fig_4B_reporter_all.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                   c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_4B_reporter_all_test.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_test2))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_4B_reporter_all_hetero.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_test_reporter_all_tails[tails_test_reporter_all_tails$condition %in%
                                                                                                        c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_4B_MYSZ.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_mysz))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
#summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
#summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
#                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(primer_name),
                                                                                                                                              fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_4B_H9.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_H9))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
#summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
#summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
#                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(primer_name),
                                                                                                                                              fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()



pdf("fig_4B_reporter_all_all_mapped.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true[tails_data_mapped$condition %in%
                                                                                                 c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true$transcript=="REPORTERL1_overexp", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$primer=='L1NGS1' & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                                                            fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                                                                 stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_4B_reporter_all_all_mapped_ACTB.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true_no_hetero_no_other_tails$transcript=="ACTB", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_4B_reporter_all_all_mapped_GAPDH.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true_no_hetero_no_other_tails$transcript=="GAPDH", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


#fig_4B_reporter_all_all_mapped_GAPDH.pdf - no GLD2 no FTUT7WT no FAM46CMT
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CWT","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true_no_hetero_no_other_tails$transcript=="GAPDH", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("GAPDH OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
ggsave(filename = "fig_4B_GAPDH_noGLD2_noFTUT7WT_noFAM46CMT.pdf", plot = plot_tail_lengths)


#fig_4B_reporter_all_all_mapped_ACTB.pdf - no GLD2 no FTUT7WT no FAM46CMT
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CWT","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true_no_hetero_no_other_tails$transcript=="ACTB", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("GAPDH OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
ggsave(filename = "fig_4B_ACTB_noGLD2_noFTUT7WT_noFAM46CMT.pdf", plot = plot_tail_lengths)



#fig_4B_reporter_all_all_mapped_PABPC4.pdf - no GLD2 no FTUT7WT no FAM46CMT
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CWT","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true_no_hetero_no_other_tails$transcript=="PABPC4", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("GAPDH OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
ggsave(filename = "fig_4B_PABPC4_noGLD2_noFTUT7WT_noFAM46CMT.pdf", plot = plot_tail_lengths)





pdf("fig_4B_reporter_all_all_mapped_ETV4.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true_no_hetero_no_other_tails$transcript=="ETV4", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_4B_reporter_all_all_mapped_SOGA2.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true_no_hetero_no_other_tails$transcript=="SOGA2", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_4B_reporter_all_all_mapped_PABPC4.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_mapped_true_no_hetero_no_other_tails$transcript=="PABPC4", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_4B_reporter_TUTMOV.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                   c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_4B_rebut2.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_mapped_true_no_hetero_no_other_tails[(tails_data_mapped_true_no_hetero_no_other_tails$transcript=='REPORTERL1_overexp') & tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                          c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT", "FTUT7WT", "TUT7MT","TUT4WT","TUT4MT", "MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate=='1',], aes(x = as.factor(condition),
                                                                                                                                                fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                     stat = "identity") + scale_y_continuous() + xlab("condition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_4C.pdf")
summary_tail_types_table_name <- paste("repo_KD_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_tails_by_tail_type(tails_data_for_analysis_reporter_KD[tails_data_for_analysis_reporter_KD$condition %in%
                                                                                                         c("CNTRLKD", "TUT7KD", "TUT4KD", "TUT4TUT7KD"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("CNTRLKD", "TUT7KD", "TUT4KD", "TUT4TUT7KD"))
plot_tail_lengths <- ggplot(summary_tail_types_table, aes(x = as.factor(condition),
                                                          fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                             stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("DEPLETION"))
print(plot_tail_lengths)
dev.off()


pdf("fig_4D.pdf")
summary_tail_types_table_name <- paste("genomic_L1_summarized_tails_types_by_cell_lines")
assign(summary_tail_types_table_name, summarize_tails_by_tail_type(tails_data_mapped_true_no_hetero_no_other_tails_NT1[tails_data_mapped_true_no_hetero_no_other_tails_NT1$transcript ==
                                                                                                                         "ENDOL1" & tails_data_mapped_true_no_hetero_no_other_tails_NT1$cell_line %in%
                                                                                                                         c("293T", "H9", "HELAHA", "NPC", "PA1", "MYSZ"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$cell_line <- as.character(summary_tail_types_table$cell_line)
summary_tail_types_table[summary_tail_types_table$cell_line == "MYSZ", ]$cell_line = "MOUSE_TESTIS"
summary_tail_types_table$cell_line <- factor(summary_tail_types_table$cell_line,
                                             levels = c("293T", "HELAHA", "NPC", "PA1", "H9", "MOUSE_TESTIS"))
plot_tail_lengths <- ggplot(summary_tail_types_table, aes(x = as.factor(cell_line),
                                                          fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                             stat = "identity") + scale_y_continuous() + xlab("cell_line") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("DEPLETION"))
print(plot_tail_lengths)
dev.off()

pdf("fig_4F.pdf")
summary_tail_types_table_name <- paste("genomic_L1_summarized_tails_types_by_condition_KD")
assign(summary_tail_types_table_name, summarize_tails_by_tail_type(tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD[tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD$condition %in%
                                                                                                                            c("CNTRLKD", "MOV10KD", "TUT4TUT7KD") & tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD$transcript ==
                                                                                                                            "ENDOL1", ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("CNTRLKD", "TUT4TUT7KD", "MOV10KD"))
plot_tail_lengths <- ggplot(summary_tail_types_table, aes(x = as.factor(condition),
                                                          fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                             stat = "identity") + scale_y_continuous() + xlab("condition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("DEPLETION (PA-1)"))
print(plot_tail_lengths)
dev.off()


pdf("fig_4G.pdf")
summary_tail_types_table_name <- paste("genomic_L1_and_GAPDH_summarized_tails_types_by_subcellular_localization")
assign(summary_tail_types_table_name, summarize_tails_by_tail_type_loc(tails_data_mapped_true_no_hetero_no_other_PA1_tails_NT))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
plot_tail_lengths <- ggplot(summary_tail_types_table, aes(x = as.factor(localization),
                                                          fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                             stat = "identity") + scale_y_continuous() + xlab("localization") + ylab("fraction of transcripts") +
  scale_fill_grey() + facet_grid(. ~ transcript)
print(plot_tail_lengths)
dev.off()


pdf("fig_S5A.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                  c("MBP", "FTUT7WT","TUT7WT", "TUT7MT", "MOV10","TUT4WT","TUT4MT") & tails_data_for_analysis_reporter_overexp$uridylated ==
                                                                                                                  TRUE, ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "FTUT7WT", "TUT7WT", "TUT7MT", "MOV10","TUT4WT","TUT4MT"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          se_utail, ymax = mean_Utail + se_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_S5A.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means_repl(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                       c("MBP", "FTUT7WT","TUT7WT", "TUT7MT", "MOV10") & tails_data_for_analysis_reporter_overexp$uridylated ==
                                                                                                                       TRUE, ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "FTUT7WT", "TUT7WT", "TUT7MT", "MOV10"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          se_utail, ymax = mean_Utail + se_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_S5A_2.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                  c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_for_analysis_reporter_overexp$uridylated ==
                                                                                                                  TRUE, ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT", "TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          se_utail, ymax = mean_Utail + se_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition %in%
                                                                                                              c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_for_analysis_reporter_UTR$uridylated ==
                                                                                                              TRUE, ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT", "TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          se_utail, ymax = mean_Utail + se_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)


pdf("fig_S5A_ACTB.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means_repl(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                              c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_mapped_true_no_hetero_no_other_tails$uridylated == TRUE & tails_data_mapped_true_no_hetero_no_other_tails$transcript=='ACTB',]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
#summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- as.factor(summary_mean_Utail_table$condition)
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          sd_utail, ymax = mean_Utail + sd_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_S5A_GAPDH.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means_repl(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                              c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_mapped_true_no_hetero_no_other_tails$uridylated == TRUE & tails_data_mapped_true_no_hetero_no_other_tails$transcript=='GAPDH',]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
#summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- as.factor(summary_mean_Utail_table$condition)
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          sd_utail, ymax = mean_Utail + sd_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_S5A_PABPC4.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means_repl(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                              c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_mapped_true_no_hetero_no_other_tails$uridylated == TRUE & tails_data_mapped_true_no_hetero_no_other_tails$transcript=='PABPC4',]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
#summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- as.factor(summary_mean_Utail_table$condition)
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          sd_utail, ymax = mean_Utail + sd_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_S5A_SOGA2.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means_repl(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                              c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_mapped_true_no_hetero_no_other_tails$uridylated == TRUE & tails_data_mapped_true_no_hetero_no_other_tails$transcript=='SOGA2',]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
#summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- as.factor(summary_mean_Utail_table$condition)
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          sd_utail, ymax = mean_Utail + sd_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_S5A_ETV4.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means_repl(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$condition %in%
                                                                                                                              c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_mapped_true_no_hetero_no_other_tails$uridylated == TRUE & tails_data_mapped_true_no_hetero_no_other_tails$transcript=='ETV4',]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
#summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- as.factor(summary_mean_Utail_table$condition)
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          sd_utail, ymax = mean_Utail + sd_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_S5A_repor_UTR.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Utails_calculate_means_repl(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition %in%
                                                                                                                   c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_for_analysis_reporter_UTR$uridylated == TRUE,]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
#summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- as.factor(summary_mean_Utail_table$condition)
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Utail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Utail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Utail -
                                                                                          sd_utail, ymax = mean_Utail + sd_utail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_SJ.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Atails_calculate_means(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                  c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") , ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT", "TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Atail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Atail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Atail -
                                                                                          se_Atail, ymax = mean_Atail + se_Atail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_SJ_Aonly.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Atails_calculate_means(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                  c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_for_analysis_reporter_overexp$Atail_length>0 , ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT", "TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Atail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Atail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Atail -
                                                                                          se_Atail, ymax = mean_Atail + se_Atail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_SJ_Aonly_median.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Atails_calculate_medians(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                    c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_for_analysis_reporter_overexp$Atail_length>0 , ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT", "TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Atail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Atail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Atail -
                                                                                          se_Atail, ymax = mean_Atail + se_Atail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()


pdf("fig_SJ_Aonly_median_UTR_only.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Atails_calculate_medians(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition %in%
                                                                                                                c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_for_analysis_reporter_UTR$Atail_length>0 , ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT", "TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Atail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Atail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Atail -
                                                                                          se_Atail, ymax = mean_Atail + se_Atail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()



pdf("fig_SJ_uridylated_only.pdf")
summary_mean_Utail_table_name <- paste("reporter_mean_Utails")
assign(summary_mean_Utail_table_name, summarize_Atails_calculate_means(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                  c("MBP", "TUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT") & tails_data_for_analysis_reporter_overexp$uridylated == TRUE , ]))
summary_mean_Utail_table <- eval(as.symbol(summary_mean_Utail_table_name))
summary_mean_Utail_table$condition <- as.character(summary_mean_Utail_table$condition)
summary_mean_Utail_table$condition <- factor(summary_mean_Utail_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT", "TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))
plot_tail_lengths <- ggplot(summary_mean_Utail_table, aes(x = as.factor(condition))) +
  geom_bar(aes(y = mean_Atail), position = position_dodge(), stat = "identity") +
  xlab("condition") + ylab("Atail mean length") + scale_fill_grey() + geom_errorbar(aes(ymin = mean_Atail -
                                                                                          se_Atail, ymax = mean_Atail + se_Atail), colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()

pdf("fig_S5B.pdf")
summary_Utail_lengths_table_name <- paste("reporter_overexp_summarized_Utails_lengths_by_condition")
assign(summary_Utail_lengths_table_name, summarize_Utails_lengths(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in% c("MBP", "MOV10", "TUT7WT", "TUT7MT", "TUT4WT", "TUT4MT","FTUT7WT"),]))
summary_Utail_lengths_table <- eval(as.symbol(summary_Utail_lengths_table_name))
summary_Utail_lengths_table$condition <- as.character(summary_Utail_lengths_table$condition)
summary_Utail_lengths_table$condition <- factor(summary_Utail_lengths_table$condition,
                                                levels = c("MBP", "FTUT7WT", "MOV10", "TUT7WT", "TUT7MT", "TUT4WT", "TUT4MT"))
plot_tail_lengths <- ggplot(summary_Utail_lengths_table, aes(x = as.factor(condition),
                                                             fill = U_length, colours = U_length)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                              stat = "identity") + xlab("cell line") + ylab("fraction of transcripts") + scale_fill_grey()
print(plot_tail_lengths)
dev.off()


pdf("fig_S5C-G.pdf")
for (transcript in c("ENDOL1")) {
  for (cell_line in c("H9", "HELAHA", "293T", "NPC", "PA1")) {
    summary_tail_lengths_table_name <- paste("_summarized_tails_lengths_by_type")
    assign(summary_tail_lengths_table_name, summarize_tails_by_tail_type_collapse_short_relative(tails_data_mapped_true_no_hetero_no_other_tails_NT1[tails_data_mapped_true_no_hetero_no_other_tails_NT1$transcript ==
                                                                                                                                                       transcript & tails_data_mapped_true_no_hetero_no_other_tails_NT1$cell_line ==
                                                                                                                                                       cell_line & tails_data_mapped_true_no_hetero_no_other_tails_NT1$primer_name ==
                                                                                                                                                       "L1NGS0" & tails_data_mapped_true_no_hetero_no_other_tails_NT1$tail_length >
                                                                                                                                                       0, ]))
    summary_tail_lengths_table <- eval(as.symbol(summary_tail_lengths_table_name))
    summary_tail_lengths_table$tail_type<-as.character(summary_tail_lengths_table$tail_type)
    summary_tail_lengths_table$tail_type<-factor(summary_tail_lengths_table$tail_type,levels=c("U_only","AU","A_only"))
    plot_tail_lengths <- ggplot(summary_tail_lengths_table, aes(x = as.factor(tail_length),
                                                                fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("Atail_length") + ylab("% of 3' ends") +
      facet_grid(. ~ cell_line) + scale_fill_manual(values = brewer.pal(11, "Spectral")[c(2,4,10)])
    print(plot_tail_lengths)
  }
}
dev.off()



pdf("fig_S5I.pdf")
for (condition in c("CNTRLKD", "TUT4TUT7KD", "MOV10KD")) {
  summary_tail_lengths_table_name <- paste("_summarized_tails_lengths_by_type")
  assign(summary_tail_lengths_table_name, summarize_tails_by_tail_type_collapse_short_relative(tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD[tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD$condition ==
                                                                                                                                                        condition & tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD$primer_name ==
                                                                                                                                                        "L1NGS0" & tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD$tail_length >
                                                                                                                                                        0, ]))
  summary_tail_lengths_table <- eval(as.symbol(summary_tail_lengths_table_name))
  summary_tail_lengths_table$tail_type <- as.character(summary_tail_lengths_table$tail_type)
  summary_tail_lengths_table$tail_type <- factor(summary_tail_lengths_table$tail_type,
                                                 levels = c("U_only", "AU", "A_only"))
  plot_tail_lengths <- ggplot(summary_tail_lengths_table, aes(x = as.factor(tail_length),
                                                              fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                                 stat = "identity") + scale_y_continuous() + xlab("Atail_length") + ylab("% of 3' ends") +
    facet_grid(. ~ condition) + scale_fill_manual(values = brewer.pal(11, "Spectral")[c(2,
                                                                                        4, 10)])
  print(plot_tail_lengths)
}

dev.off()



uridylated_TUT7WT<-summary_tail_types_table[summary_tail_types_table$condition=='TUT7WT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_CNTRL<-summary_tail_types_table[summary_tail_types_table$condition=='MBP' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_TUT4WT<-summary_tail_types_table[summary_tail_types_table$condition=='TUT4WT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_MOV10<-summary_tail_types_table[summary_tail_types_table$condition=='MOV10' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_TUT7MT<-summary_tail_types_table[summary_tail_types_table$condition=='TUT7MT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_TUT4MT<-summary_tail_types_table[summary_tail_types_table$condition=='TUT4MT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_FAM46CWT<-summary_tail_types_table[summary_tail_types_table$condition=='FAM46CWT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_FAM46CMT<-summary_tail_types_table[summary_tail_types_table$condition=='FAM46CMT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_PAPD5<-summary_tail_types_table[summary_tail_types_table$condition=='PAPD5' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_U6TUT<-summary_tail_types_table[summary_tail_types_table$condition=='U6TUT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_GLD2<-summary_tail_types_table[summary_tail_types_table$condition=='GLD2' & summary_tail_types_table$uridylated==TRUE,6]
#uridylated_TUT4TUT7KD<-summary_tail_types_table[summary_tail_types_table$condition=='TUT4TUT7KD' & summary_tail_types_table$uridylated==TRUE,5]
#uridylated_OE_df<-list(CTRL=uridylated_CNTRL,TUT4WT=uridylated_TUT4WT,TUT7WT=uridylated_TUT7WT,MOV10=uridylated_MOV10,TUT7MT=uridylated_TUT7MT,TUT4MT=uridylated_TUT4MT,GLD2=uridylated_GLD2,PAPD5=uridylated_PAPD5,FAM46CWT=uridylated_FAM46CWT,FAM46CMT=uridylated_FAM46CMT,U6TUT=uridylated_U6TUT,check.rows=F)
uridylated_OE_df<-list(CTRL=uridylated_CNTRL,TUT4WT=uridylated_TUT4WT,TUT7WT=uridylated_TUT7WT,MOV10=uridylated_MOV10,TUT7MT=uridylated_TUT7MT,TUT4MT=uridylated_TUT4MT,GLD2=uridylated_GLD2,PAPD5=uridylated_PAPD5,FAM46CWT=uridylated_FAM46CWT,U6TUT=uridylated_U6TUT)
attributes(uridylated_OE_df) = list(names = names(uridylated_OE_df),
                                    row.names=1:max(length(uridylated_TUT4WT), length(uridylated_CNTRL)), class='data.frame')
uridylated_OE_df_melt<-melt(uridylated_OE_df)
aov_test<-aov(value ~ variable,uridylated_OE_df_melt)
TukeyHSD(aov_test)
tukey_4C<-TukeyHSD(aov_test)


#reporter parts analysis
tails_data_for_analysis_reporter_UTR <- tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$mapping_position>8950 & tails_data_for_analysis_reporter_overexp$mapping_position<9010,]


reporter_start=8800
reporter_end=9200
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_overexp[tails_data_for_analysis_reporter_overexp$condition %in%
                                                                                                                   c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CMT","FAM46CWT","FTUT7WT","GLD2","PAPD5","U6TUT","TUT7MT","TUT4MT") & tails_data_for_analysis_reporter_overexp$mapping_position>reporter_start & tails_data_for_analysis_reporter_overexp$mapping_position<reporter_end & tails_data_for_analysis_reporter_overexp$PCR_duplicates<2, ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CMT","FAM46CWT","GLD2","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
uridylated_TUT7WT<-summary_tail_types_table[summary_tail_types_table$condition=='TUT7WT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_CNTRL<-summary_tail_types_table[summary_tail_types_table$condition=='MBP' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_TUT4WT<-summary_tail_types_table[summary_tail_types_table$condition=='TUT4WT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_MOV10<-summary_tail_types_table[summary_tail_types_table$condition=='MOV10' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_TUT7MT<-summary_tail_types_table[summary_tail_types_table$condition=='TUT7MT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_TUT4MT<-summary_tail_types_table[summary_tail_types_table$condition=='TUT4MT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_FAM46CWT<-summary_tail_types_table[summary_tail_types_table$condition=='FAM46CWT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_FAM46CMT<-summary_tail_types_table[summary_tail_types_table$condition=='FAM46CMT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_PAPD5<-summary_tail_types_table[summary_tail_types_table$condition=='PAPD5' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_U6TUT<-summary_tail_types_table[summary_tail_types_table$condition=='U6TUT' & summary_tail_types_table$uridylated==TRUE,6]
uridylated_GLD2<-summary_tail_types_table[summary_tail_types_table$condition=='GLD2' & summary_tail_types_table$uridylated==TRUE,6]
#uridylated_TUT4TUT7KD<-summary_tail_types_table[summary_tail_types_table$condition=='TUT4TUT7KD' & summary_tail_types_table$uridylated==TRUE,5]
#uridylated_OE_df<-list(CTRL=uridylated_CNTRL,TUT4WT=uridylated_TUT4WT,TUT7WT=uridylated_TUT7WT,MOV10=uridylated_MOV10,TUT7MT=uridylated_TUT7MT,TUT4MT=uridylated_TUT4MT,GLD2=uridylated_GLD2,PAPD5=uridylated_PAPD5,FAM46CWT=uridylated_FAM46CWT,FAM46CMT=uridylated_FAM46CMT,U6TUT=uridylated_U6TUT,check.rows=F)
uridylated_OE_df<-list(CTRL=uridylated_CNTRL,TUT4WT=uridylated_TUT4WT,TUT7WT=uridylated_TUT7WT,MOV10=uridylated_MOV10,TUT7MT=uridylated_TUT7MT,TUT4MT=uridylated_TUT4MT,GLD2=uridylated_GLD2,PAPD5=uridylated_PAPD5,FAM46CWT=uridylated_FAM46CWT,FAM46CMT=uridylated_FAM46CMT,U6TUT=uridylated_U6TUT)
attributes(uridylated_OE_df) = list(names = names(uridylated_OE_df),
                                    row.names=1:max(length(uridylated_TUT4WT), length(uridylated_CNTRL)), class='data.frame')
uridylated_OE_df_melt<-melt(uridylated_OE_df)
aov_test<-aov(value ~ variable,uridylated_OE_df_melt)
TukeyHSD(aov_test)
tukey_4C<-TukeyHSD(aov_test)




tails_dplyr_test <- tails_data_for_analysis_reporter_overexp %>% group_by(condition,tail_type)
tails_dplyr_aaa <- tails_dplyr_test %>% summarize(ile = n()) %>% mutate(freq = ile/sum(ile))
ggplot(tails_dplyr_aaa %>% filter(tail_type=='U_only'),aes(x=condition,y=freq,fill=tail_type)) + geom_bar(stat="identity")

tails_dplyr_test <- tails_data_for_analysis_reporter_overexp %>% group_by(condition,tail_sequence)
tails_dplyr_aaa <- tails_dplyr_test %>% summarize(ile = n()) %>% mutate(freq = ile/sum(ile))
ggplot(tails_dplyr_aaa %>% filter(tail_sequence=='AT'),aes(x=condition,y=freq)) + geom_bar(stat="identity")

tails_dplyr_test <- tails_data_for_analysis_reporter_UTR %>% group_by(condition,tail_type)
tails_dplyr_aaa <- tails_dplyr_test %>% summarize(ile = n()) %>% mutate(freq = ile/sum(ile))
ggplot(tails_dplyr_aaa %>% filter(tail_type=='no_tail'),aes(x=condition,y=freq,fill=tail_type)) + geom_bar(stat="identity")


for (cond in levels(tails_data_for_analysis_reporter_UTR$condition)) {
  print(cond)
  terminal_nucleotides_cond_name = paste("tails_data_for_analysis_reporter_UTR",
                                         cond, sep = "_")
  # temp2=eval(as.symbol(summary_tail_lengths_table_name))
  assign(terminal_nucleotides_cond_name, tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition ==
                                                                                           cond, ])
}



pdf("fig_4B_reporter_UTR_less_than_9010_no_fam_mut_no_GLD.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates2(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$condition %in%
                                                                                                               c("MBP", "TUT7WT", "TUT4WT", "MOV10","FAM46CWT","FTUT7WT","PAPD5","U6TUT","TUT7MT","TUT4MT"), ]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CWT","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = uridylated, colours = uridylated)) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()
ggsave(filename = "fig_4B_reporter_UTR_less_than_9010_no_fam_mut_no_GLD.pdf", plot = plot_tail_lengths)

pdf("fig_4B_reporter_UTR_less_than_9010_no_fam_mut_no_GLD.pdf")
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates3(tails_data_for_analysis_reporter_UTR))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
#summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
#summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
#                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CWT","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = as.factor(uridylated), colours = as.factor(uridylated))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                   stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)
dev.off()
ggsave(filename = "fig_4B_reporter_UTR_less_than_9010_no_fam_mut_no_GLD.pdf", plot = plot_tail_lengths)

#old style
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_tails_by_tail_type(tails_data_for_analysis_reporter_UTR))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
#summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
#summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
#                                             levels = c("MBP", "FTUT7WT","TUT7WT", "TUT7MT", "TUT4WT", "TUT4MT", "MOV10"))
plot_tail_lengths <- ggplot(summary_tail_types_table, aes(x = as.factor(condition),
                                                          fill = tail_type, colours = tail_type)) + geom_bar(aes(y = freq), position = position_stack(),
                                                                                                             stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION"))
print(plot_tail_lengths)


tails_data_for_analysis_reporter_UTR_all_mapped[tails_data_for_analysis_reporter_UTR_all_mapped$tail_type=='U_heterogenous',]$uridylated=TRUE
tails_data_for_analysis_reporter_UTR_all_mapped[tails_data_for_analysis_reporter_UTR_all_mapped$tail_type=='AU',]$uridylated=TRUE
tails_data_for_analysis_reporter_UTR_all_mapped[tails_data_for_analysis_reporter_UTR_all_mapped$tail_type=='U_only',]$uridylated=TRUE
tails_data_for_analysis_reporter_UTR_all_mapped[tails_data_for_analysis_reporter_UTR_all_mapped$tail_type=='UA',]$uridylated=TRUE
tails_data_for_analysis_reporter_UTR_all_mapped[tails_data_for_analysis_reporter_UTR_all_mapped$tail_type=='A_only',]$uridylated=FALSE
tails_data_for_analysis_reporter_UTR_all_mapped[tails_data_for_analysis_reporter_UTR_all_mapped$tail_type=='A_heterogenous',]$uridylated=FALSE
tails_data_for_analysis_reporter_UTR_all_mapped[tails_data_for_analysis_reporter_UTR_all_mapped$tail_type=='other',]$uridylated=FALSE
tails_data_for_analysis_reporter_UTR_all_mapped[tails_data_for_analysis_reporter_UTR_all_mapped$tail_type=='no_tail',]$uridylated=FALSE

#tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$tail_type=='U_heterogenous',]$uridylated=TRUE
tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$tail_type=='AU',]$uridylated=TRUE
tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$tail_type=='U_only',]$uridylated=TRUE
#tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$tail_type=='UA',]$uridylated=TRUE
tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$tail_type=='A_only',]$uridylated=FALSE
#tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$tail_type=='A_heterogenous',]$uridylated=FALSE
#tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$tail_type=='other',]$uridylated=FALSE
tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$tail_type=='no_tail',]$uridylated=FALSE



summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates3(tails_data_for_analysis_reporter_overexp))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
#summary_tail_types_table$condition <- as.character(summary_tail_types_table$condition)
#summary_tail_types_table$condition <- factor(summary_tail_types_table$condition,
#                                             levels = c("MBP", "TUT7WT","FTUT7WT","TUT7MT","TUT4WT", "TUT4MT","MOV10","FAM46CWT","PAPD5","U6TUT"))



plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = as.factor(uridylated), colours = as.factor(uridylated))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                                         stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("OVEREXPRESSION, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)


tails_data_for_analysis_reporter_KD2<-tails_data_for_analysis_reporter_KD
tails_data_for_analysis_reporter_KD2[tails_data_for_analysis_reporter_KD2$tail_length==1,]$tail_length<-0
tails_data_for_analysis_reporter_KD2[tails_data_for_analysis_reporter_KD2$Utail_length<2 & tails_data_for_analysis_reporter_KD2$tail_type=='U_only',]$uridylated<-FALSE
tails_data_for_analysis_reporter_KD2[tails_data_for_analysis_reporter_KD2$tail_length==0,]$uridylated<-FALSE


summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates3(tails_data_for_analysis_reporter_KD2))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))

plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = as.factor(uridylated), colours = as.factor(uridylated))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                                         stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN mod, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)

summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates4(tails_data_mapped_true[tails_data_mapped_true$transcript=='REPORTERL1KD',]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))

plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                                         stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN mod, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)

summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates4(tails_data_for_analysis_reporter_KD))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))

plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                                         stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN mod, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)


tails_data_for_analysis_reporter_KD3<-tails_data_for_analysis_reporter_KD
tails_data_for_analysis_reporter_KD3[tails_data_for_analysis_reporter_KD3$condition=='CNTRLKD_HGC',]$condition<-'CNTRLKD'


summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates4(tails_data_for_analysis_reporter_KD[tails_data_for_analysis_reporter_KD$project_name=='Tailsq_2',]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))

plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated==TRUE & summary_tail_types_table$replicate==1,], aes(x = as.factor(condition),
                                                                                                                                              fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                                         stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN mod, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
print(plot_tail_lengths)

test <- tails_data_for_analysis_reporter_KD  %>% select(V1,tail_sequence,tail_type,Atail,Atail_length,Utail_length,tail_length,tail_source,transcript,cell_line,localization,condition,replicate,primer_name,project_name,mapping_position,exp_type,CTGAC_R5,terminal_nucleotides,uridylated,uridylated2,tailed,mapped) %>% group_by(project_name,condition,replicate,transcript,primer_name)

test <- test %>% filter(mapping_position>8500 & mapping_position<8700)



test2 <- ggplot(test,aes(x=mapping_position,group=condition,colour=condition)) + stat_bin(binwidth=1,aes(x=mapping_position,y=..ncount..),geom="line")
test2



test_ovr <- tails_data_for_analysis_reporter_UTR  %>% select(V1,tail_sequence,tail_type,Atail,Atail_length,Utail_length,tail_length,tail_source,transcript,cell_line,localization,condition,replicate,primer_name,project_name,mapping_position,exp_type,CTGAC_R5,terminal_nucleotides,uridylated,uridylated2,tailed,mapped) %>% group_by(project_name,condition,replicate,transcript,primer_name)

test_ovr <- test_ovr %>% filter(mapping_position>8900 & mapping_position<9100)



test_ovr2 <- tails_data_for_analysis_reporter_overexp  %>% select(tail_type,Utail_length,tail_length,transcript,replicate,condition,replicate,primer_name,project_name,mapping_position,uridylated,uridylated2) %>% group_by(project_name,condition,transcript,primer_name,uridylated,replicate)
test_ovr2 <- test_ovr2 %>% filter(mapping_position>8900 & mapping_position<9200)
test_ovr2 <- test_ovr2 %>% filter(condition == 'CNTRL')
#%>% filter(project_name=='Tailseq_2')

#test_ovr2 <- test_ovr2 %>% summarize(uridyl = n()) %>% ungroup() %>% group_by(condition,replicate) %>% mutate(freq_repl = uridyl/sum(uridyl))
#test_ovr2 <- test_ovr2 %>%  group_by(condition,uridylated) %>% summarize(mean_freq=mean(freq_repl),sd=sd(freq_repl))
test_ovr2

test_ovr_plot <- ggplot(test_ovr2[test_ovr2$uridylated==1,],aes(x=condition,y=mean_freq)) + geom_bar(stat="identity") + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
test_ovr_plot



test2ovr <- ggplot(test_ovr2,aes(x=mapping_position,group=condition,colour=condition)) + stat_bin(binwidth=1,aes(x=mapping_position,y=..count..),geom="line")
test2ovr

test2ovr + facet_grid(project_name ~ .)


test <- tails_data_for_analysis_reporter_KD  %>% select(V1,tail_sequence,tail_type,Atail,Atail_length,Utail_length,tail_length,tail_source,transcript,cell_line,localization,condition,replicate,primer_name,project_name,mapping_position,exp_type,CTGAC_R5,terminal_nucleotides,uridylated,uridylated2,tailed,mapped) %>% group_by(project_name,condition,replicate,transcript,primer_name)

test <- tails_data_for_analysis_reporter_KD_filtr  %>% select(V1,tail_sequence,tail_type,Atail,Atail_length,Utail_length,tail_length,tail_source,transcript,cell_line,localization,condition,replicate,primer_name,project_name,R3_mapping_position,mapping_position,exp_type,CTGAC_R5,terminal_nucleotides,uridylated,uridylated2,tailed,mapped) %>% group_by(project_name,condition,replicate,transcript,primer_name)



test <- test %>% filter(R3_mapping_position>8500 & R3_mapping_position<8700)
test <- test %>% filter(condition=='TUT4KD')

test2 <- ggplot(test,aes(group=condition,colour=condition)) + stat_bin(binwidth=1,aes(x=R3_mapping_position,y=..ncount..),geom="line")
test2

test2 + facet_grid(project_name ~ .)



ovr_plot <- ggplot(test_ovr,aes(x=))

summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates4(tails_data_for_analysis_reporter_UTR[tails_data_for_analysis_reporter_UTR$project_name=='Tailseq_5',]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table <- summary_tail_types_table %>% group_by(condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                                                              fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                                                           stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN mod, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
#print(plot_tail_lengths)
plot_tail_lengths


summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates5(tails_data_for_analysis_reporter_UTR))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
#summary_tail_types_table <- summary_tail_types_table %>% group_by(project_name,condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                       fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                    stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN mod, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
#print(plot_tail_lengths)
plot_tail_lengths


tails_data_for_analysis_reporter_KD_filtr <- tails_data_for_analysis_reporter_KD[tails_data_for_analysis_reporter_KD$project_name=='Tailseq_5' & (tails_data_for_analysis_reporter_KD$R3_mapping_position>8660 | is.na(tails_data_for_analysis_reporter_KD$R3_mapping_position)),]

tails_data_for_analysis_reporter_KD_filtr[tails_data_for_analysis_reporter_KD_filtr$condition %in% c("CNTRLKD_MGC") & tails_data_for_analysis_reporter_KD_filtr$replicate==1,]$replicate<-'4'
tails_data_for_analysis_reporter_KD_filtr[tails_data_for_analysis_reporter_KD_filtr$condition %in% c("CNTRLKD_MGC") & tails_data_for_analysis_reporter_KD_filtr$replicate==2,]$replicate<-'5'
tails_data_for_analysis_reporter_KD_filtr[tails_data_for_analysis_reporter_KD_filtr$condition %in% c("CNTRLKD_MGC") & tails_data_for_analysis_reporter_KD_filtr$replicate==3,]$replicate<-'6'

tails_data_for_analysis_reporter_KD_filtr[tails_data_for_analysis_reporter_KD_filtr$condition %in% c("CNTRLKD_HGC") & tails_data_for_analysis_reporter_KD_filtr$replicate==1,]$replicate<-'7'
tails_data_for_analysis_reporter_KD_filtr[tails_data_for_analysis_reporter_KD_filtr$condition %in% c("CNTRLKD_HGC") & tails_data_for_analysis_reporter_KD_filtr$replicate==2,]$replicate<-'8'
tails_data_for_analysis_reporter_KD_filtr[tails_data_for_analysis_reporter_KD_filtr$condition %in% c("CNTRLKD_HGC") & tails_data_for_analysis_reporter_KD_filtr$replicate==3,]$replicate<-'9'
tails_data_for_analysis_reporter_KD_filtr[tails_data_for_analysis_reporter_KD_filtr$condition %in% c("CNTRLKD_HGC","CNTRLKD_MGC"),]$condition<-'CNTRLKD'


summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates5(tails_data_for_analysis_reporter_KD_filtr))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table <- summary_tail_types_table %>% group_by(condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                       fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                    stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN mod, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
#print(plot_tail_lengths)
plot_tail_lengths


tails_test <- tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD[tails_data_mapped_true_no_hetero_no_other_PA1_tails_KD$transcript=='ENDOL1',]
summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates5(tails_test))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
#summary_tail_types_table <- summary_tail_types_table %>% group_by(condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                       fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                    stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN PA1 ENDO")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
#print(plot_tail_lengths)
plot_tail_lengths


summary_tail_types_table


summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
assign(summary_tail_types_table_name, summarize_uridylation_replicates5(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$transcript=='ACTB' & tails_data_mapped_true_no_hetero_no_other_tails$exp_type=='OVR',]))
summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
summary_tail_types_table <- summary_tail_types_table %>% group_by(condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                       fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                    stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
  scale_fill_grey() + ggtitle(paste("KNOCKDOWN mod, LINE1 UTR only reporter")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
#print(plot_tail_lengths)
plot_tail_lengths


pdf("transcripts_overexp.pdf",width=12)
for (transcript in levels(as.factor(tails_data_mapped_true_no_hetero_no_other_tails$transcript))) {
  print(transcript)
  summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
  assign(summary_tail_types_table_name, summarize_uridylation_replicates5(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$transcript==transcript & tails_data_mapped_true_no_hetero_no_other_tails$exp_type=='OVR',]))
  summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
  if (nrow(summary_tail_types_table)>1) {
    print(summary_tail_types_table)
    summary_test <- summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,] %>% select(condition,freq,replicate,project_name) %>% reshape(timevar="condition",direction="wide",idvar=c("replicate","project_name"))
    write.table(summary_test,file=paste(transcript,"_ovr_uridylation_summarized.tsv",sep=""),row.names=F,sep="\t")
    summary_tail_types_table <- summary_tail_types_table %>% group_by(condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


    plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                           fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                        stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
      scale_fill_grey() + ggtitle(paste("OVR",transcript,sep=",")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
    print(plot_tail_lengths)
    #plot_tail_lengths
  }
  for (project in levels(as.factor(tails_data_mapped_true_no_hetero_no_other_tails$project_name))) {
    summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
    assign(summary_tail_types_table_name, summarize_uridylation_replicates5(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$transcript==transcript & tails_data_mapped_true_no_hetero_no_other_tails$exp_type=='OVR' & tails_data_mapped_true_no_hetero_no_other_tails$project_name==project,]))
    summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
    if (nrow(summary_tail_types_table)>1) {
      print(summary_tail_types_table)
      summary_tail_types_table <- summary_tail_types_table %>% group_by(condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


      plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                             fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                          stat = "identity") + scale_y_continuous() + xlab("ondition") + ylab("fraction of transcripts") +
        scale_fill_grey() + ggtitle(paste("OVR",transcript,project,sep=",")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
      print(plot_tail_lengths)
      #plot_tail_lengths
    }

  }

}

dev.off()

pdf("transcripts_kd.pdf",width=12)
for (transcript in levels(as.factor(tails_data_mapped_true_no_hetero_no_other_tails$transcript))) {
  print(transcript)
  summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
  assign(summary_tail_types_table_name, summarize_uridylation_replicates5(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$transcript==transcript & tails_data_mapped_true_no_hetero_no_other_tails$exp_type=='KD',]))
  summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
  if (nrow(summary_tail_types_table)>1) {
    print(summary_tail_types_table)
    summary_test <- summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,] %>% select(condition,freq,replicate,project_name) %>% reshape(timevar="condition",direction="wide",idvar=c("replicate","project_name"))
    write.table(summary_test,file=paste(transcript,"_kd_uridylation_summarized.tsv",sep=""),row.names=F,sep="\t")
    summary_tail_types_table <- summary_tail_types_table %>% group_by(condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


    plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                           fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                        stat = "identity") + scale_y_continuous() + xlab("condition") + ylab("fraction of transcripts") +
      scale_fill_grey() + ggtitle(paste("KD",transcript,"all seqs",sep=",")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
    print(plot_tail_lengths)
    #plot_tail_lengths
  }
  for (project in levels(as.factor(tails_data_mapped_true_no_hetero_no_other_tails$project_name))) {
    summary_tail_types_table_name <- paste("repo_over_summarized_tails_types_by_condition")
    assign(summary_tail_types_table_name, summarize_uridylation_replicates5(tails_data_mapped_true_no_hetero_no_other_tails[tails_data_mapped_true_no_hetero_no_other_tails$transcript==transcript & tails_data_mapped_true_no_hetero_no_other_tails$exp_type=='KD' & tails_data_mapped_true_no_hetero_no_other_tails$project_name==project,]))
    summary_tail_types_table <- eval(as.symbol(summary_tail_types_table_name))
    if (nrow(summary_tail_types_table)>1) {
      print(summary_tail_types_table)

      summary_tail_types_table <- summary_tail_types_table %>% group_by(condition,uridylated2) %>% summarise(mean_freq=mean(mean_freq),sd=mean(sd))


      plot_tail_lengths <- ggplot(summary_tail_types_table[summary_tail_types_table$uridylated2==TRUE,], aes(x = as.factor(condition),
                                                                                                             fill = as.factor(uridylated2), colours = as.factor(uridylated2))) + geom_bar(aes(y = mean_freq), position = position_stack(),
                                                                                                                                                                                          stat = "identity") + scale_y_continuous() + xlab("condition") + ylab("fraction of transcripts") +
        scale_fill_grey() + ggtitle(paste("KD",transcript,project,sep=",")) + geom_errorbar(aes(ymin = mean_freq -sd, ymax = mean_freq + sd),colour = "black", width = 0.1, position = position_dodge(0.9))
      print(plot_tail_lengths)
      #plot_tail_lengths
    }

  }

}

dev.off()

pdf("mapping_trans.pdf",width=12)
test_trans_select <- tails_data_mapped_true_no_hetero_no_other_tails  %>% select(tail_type,Utail_length,tail_length,transcript,replicate,condition,replicate,primer_name,project_name,mapping_position,R3_mapping_position,uridylated,uridylated2,exp_type) %>% group_by(project_name,condition,transcript,primer_name,uridylated,replicate)
for (trans in levels(as.factor(tails_data_mapped_true_no_hetero_no_other_tails$transcript))) {
  print(trans)
  test_trans2 <- test_trans_select %>% filter(transcript==trans)
  test_trans2 <- test_trans2 %>% filter(exp_type=="OVR")
  mapping_summary <- summary(test_trans2$R3_mapping_position)
  min_map = mapping_summary[2]
  max_map = mapping_summary[5]
  test_trans2 <- test_trans2  %>% filter(mapping_position>min_map & mapping_position<max_map)
  test_trans2$condition <- as.character(test_trans2$condition)
  test_trans2$condition <- as.factor(test_trans2$condition)
  for (cond in levels(as.factor(test_trans2$condition))) {
    print(cond)
    test_trans2_cond <- test_trans2 %>% filter(condition == cond)
    test_plot <- ggplot(test_trans2_cond,aes(x=mapping_position,group=condition,colour=condition)) + stat_bin(binwidth=1,aes(x=mapping_position,y=..count..),geom="line") +  ggtitle(paste("OVR",trans,cond,sep=","))
    test_plot <- test_plot + facet_grid(project_name ~ .)
    print(test_plot)
  }
}
dev.off()

pdf("mapping_trans_KD.pdf",width=12)
test_trans_select <- tails_data_mapped_true_no_hetero_no_other_tails  %>% select(tail_type,Utail_length,tail_length,transcript,replicate,condition,replicate,primer_name,project_name,mapping_position,R3_mapping_position,uridylated,uridylated2,exp_type) %>% group_by(project_name,condition,transcript,primer_name,uridylated,replicate)
for (trans in levels(as.factor(tails_data_mapped_true_no_hetero_no_other_tails$transcript))) {
  print(trans)
  test_trans2 <- test_trans_select %>% filter(transcript==trans)
  test_trans2 <- test_trans2 %>% filter(exp_type=="KD")
  mapping_summary <- summary(test_trans2$R3_mapping_position)
  min_map = mapping_summary[2]
  max_map = mapping_summary[5]
  test_trans2 <- test_trans2  %>% filter(mapping_position>min_map & mapping_position<max_map)
  test_trans2$condition <- as.character(test_trans2$condition) 
  test_trans2$condition <- as.factor(test_trans2$condition)
  for (cond in levels(as.factor(test_trans2$condition))) {
    print(cond)
    test_trans2_cond <- test_trans2 %>% filter(condition == cond)
    test_plot <- ggplot(test_trans2_cond,aes(x=mapping_position,group=condition,colour=condition)) + stat_bin(binwidth=1,aes(x=mapping_position,y=..count..),geom="line") +  ggtitle(paste("KD",trans,cond,sep=","))
    test_plot <- test_plot + facet_grid(project_name ~ .)
    print(test_plot)
  }
}
dev.off()


all_data <- tails_data_mapped_true_no_hetero_no_other_tails %>% select(V1,tail_sequence,tail_type,Atail,Atail_length,Utail_length,tail_length,tail_source,transcript,cell_line,localization,condition,replicate,primer_name,project_name,mapping_position,exp_type,CTGAC_R5,terminal_nucleotides,uridylated,uridylated2,tailed,mapped) %>% group_by(project_name,condition,replicate,transcript,primer_name,cell_line,uridylated2)

all_data_reporter_overexp <- all_data %>% filter(transcript=='REPORTERL1_overexp',exp_type=="OVR",mapping_position<9010,mapping_position>8950)

all_data_reporter_overexp2 <- all_data_reporter_overexp %>% group_by(condition,replicate,project_name,uridylated2)

all_data_reporter_overexp3 <- all_data_reporter_overexp2 %>% dplyr::summarize(n_urid=n()) %>% ungroup() %>% dplyr::group_by(condition,replicate,project_name) %>% dplyr::mutate(freq_urid = n_urid/sum(n_urid)) %>% dplyr::group_by(condition,uridylated2) %>% dplyr::mutate(mean_freq_urid = mean(freq_urid), sd_urid = sd(freq_urid))
all_data_reporter_overexp3 %>% filter(uridylated2==TRUE) %>% ggplot(aes(x=condition,y=mean_freq_urid)) + geom_bar(stat="identity",position="dodge") + geom_errorbar(aes(ymin =  mean_freq_urid - sd_urid, ymax = mean_freq_urid + sd_urid),colour = "black", width = 0.1, position = position_dodge(0.9)) + geom_jitter(aes(y=freq_urid))


all_data_reporter_overexp_ACTB <- all_data %>% filter(transcript=='ACTB',exp_type=="OVR")

all_data_reporter_overexp2_ACTB <- all_data_reporter_overexp_ACTB %>% group_by(condition,replicate,project_name,uridylated2)

all_data_reporter_overexp3_ACTB <- all_data_reporter_overexp2_ACTB %>% dplyr::summarize(n_urid=n()) %>% ungroup() %>% dplyr::group_by(condition,replicate,project_name) %>% dplyr::mutate(freq_urid = n_urid/sum(n_urid)) %>% dplyr::group_by(condition,uridylated2) %>% dplyr::mutate(mean_freq_urid = mean(freq_urid), sd_urid = sd(freq_urid))
all_data_reporter_overexp3_ACTB %>% filter(uridylated2==TRUE) %>% ggplot(aes(x=condition,y=mean_freq_urid)) + geom_bar(stat="identity",position="dodge") + geom_errorbar(aes(ymin =  mean_freq_urid - sd_urid, ymax = mean_freq_urid + sd_urid),colour = "black", width = 0.1, position = position_dodge(0.9)) + geom_jitter(aes(y=freq_urid))

pdf("OVR_transcripts.pdf")
for (trans in levels(as.factor(tails_data_mapped_true_no_hetero_no_other_tails$transcript))) {
print(trans)
  test_trans <- all_data %>% filter(transcript==trans,exp_type=="OVR")

  test_trans <- test_trans %>% group_by(condition,replicate,project_name,uridylated2)

  test_trans <- test_trans %>% dplyr::summarize(n_urid=n()) %>% ungroup() %>% dplyr::group_by(condition,replicate,project_name) %>% dplyr::mutate(freq_urid = n_urid/sum(n_urid)) %>% dplyr::group_by(condition,uridylated2) %>% dplyr::mutate(mean_freq_urid = mean(freq_urid), sd_urid = sd(freq_urid))
  plot <- test_trans %>% filter(uridylated2==TRUE) %>% ggplot(aes(x=condition,y=mean_freq_urid)) + geom_bar(stat="identity",position="dodge") + geom_errorbar(aes(ymin =  mean_freq_urid - sd_urid, ymax = mean_freq_urid + sd_urid),colour = "black", width = 0.1, position = position_dodge(0.9)) + geom_jitter(aes(y=freq_urid)) + ggtitle(paste("OVR",trans))
  print(plot)
}
dev.off()


pdf("KD_transcripts.pdf")
for (trans in levels(as.factor(tails_data_mapped_true_no_hetero_no_other_tails$transcript))) {
print(trans)
  test_trans <- all_data %>% filter(transcript==trans,exp_type=="KD")

  test_trans <- test_trans %>% group_by(condition,replicate,project_name,uridylated2)

  test_trans <- test_trans %>% dplyr::summarize(n_urid=n()) %>% ungroup() %>% dplyr::group_by(condition,replicate,project_name) %>% dplyr::mutate(freq_urid = n_urid/sum(n_urid)) %>% dplyr::group_by(condition,uridylated2) %>% dplyr::mutate(mean_freq_urid = mean(freq_urid), sd_urid = sd(freq_urid))
  plot <- test_trans %>% filter(uridylated2==TRUE) %>% ggplot(aes(x=condition,y=mean_freq_urid)) + geom_bar(stat="identity",position="dodge") + geom_errorbar(aes(ymin =  mean_freq_urid - sd_urid, ymax = mean_freq_urid + sd_urid),colour = "black", width = 0.1, position = position_dodge(0.9)) + geom_jitter(aes(y=freq_urid)) + ggtitle(paste("KD",trans))
  print(plot)
}
dev.off()
smaegol/raceseq_scripts documentation built on May 28, 2019, 5:40 p.m.