# Define the location for the project
PROJECT="./example_project"
# define the location of the raw HiC reads
HIC_READS="path/to/hic/reads"
# set the path to the assembled contigs (refrence genome)
REF="path/to/assembled/contigs"
REF_NAME=$(basename $REF)
# define output and log directories
OUT="${PROJECT}/001.fastqc"
LOG="${OUT}/logs"
# create directiores
mkdir -p $OUT
mkdir -p $LOG
# retrieve the path to the raw file names
FILES=`ls /workspace/hrpazs/petunia_genome/HiC/007.trim_fastp/*.fastq.gz`
# run FASTQC on the raw data
module load FastQC
module load MultiQC
command="fastqc $FILES -q -t 2 -o ${OUT} && multiqc $OUT -o $OUT"
# Submit the command to lsf for execution
bsub -o ${LOG}/FQC.out -e ${LOG}/FQC.err -n 4 -J FASTQC-fp $command
# define output and log directories
OUT="${PROJECT}/002.trim_fastp"
LOG="${OUT}/logs"
# create directiores
mkdir -p $OUT
mkdir -p $LOG
module load fastp/0.19.4
COMMAND="fastp \
--in1 $RAW/Hic_R1.fastq.gz \
--out1 $OUT/Hic_trimmed_R1.fastq.gz \
--in1 $RAW/Hic_R2.fastq.gz \
--out1 $OUT/Hic_trimmed_R2.fastq.gz \
--trim_tail1 25 \
--trim_tail2 25 \
--disable_trim_poly_g \
--disable_quality_filtering \
--disable_length_filtering \
--thread 16
"
# Submit the command to lsf for execution
bsub -o $LOG/fastp.out -e $LOG/fastp.err -n 16 -J fastp $COMMAND
For further information please visit Phase Genomics documentations on aligning Hi-C reads here.
# define output and log directories
OUT="$PROJECT/003.bwa/index"
LOG="$OUT/logs"
# create directiores
mkdir -p $OUT
mkdir -p $LOG
# load bwa
module load bwa/0.7.17
command="bwa index -p ${OUT}/$REF_NAME -a bwtsw $REF"
# Submit the command to LSF job scheduler for execution on the cluster:
bsub -o $LOG/index-bwa.out -e $LOG/index-bwa.err -n 20 -J bwa_index $COMMAND
# or if running localy on one machine just execute the command by running:
$command > $LOG/index_bwa.out 2> $LOG/index_bwa.err
# define input, output and log directories
IN="${PROJECT}/002.trim_fastp"
OUT="$PROJECT/003.bwa/mem"
LOG="$OUT/logs"
# set the indexed refrence directory
INDEX="$PROJECT/002.bwa/index"
# create directiores
mkdir -p $OUT
mkdir -p $LOG
# get basenames of Hi-C reads and remove trailing read number and file extension
NAME=`basename $(ls ${IN}/*.fastq.gz) | sort -u | uniq | sed 's/_R[1,2].fastq.gz//g'`
module load bwa/0.7.17
module load samtools/1.9
for name in $NAME
do
# define read 2 and read 2 of the Hi-C reads
READ1="${name}_R1.fastq.gz"
READ1="${name}_R2.fastq.gz"
# run bwa to map the reads then samtools to filter out unmapped reads, unmmaped mate, not primary alignment and
# supplementary alignment
command="bwa mem -5SP -t 50 $INDEX $IN/$read1 $IN/$read2 | \
samtools view -S -h -b -F 2316 > $OUT/$name.bam"
bsub -o $LOG/align_bwa.out -e $LOG/align_bwa.err -n 50 -J bwa_align $command
done
For further information please visit LACHESIS User's Manual here.
IN="$PROJECT/003.bwa/mem"
# get basename of bam file
BAM=$(basename $IN | sed 's/.bam//')
for bam in $BAM
do
command="./scripts/PreprocessSAMs.pl $IN/ $REF MBOI"
bsub -o $LOG/PreprocessSAMs.out -e $LOG/PreprocessSAMs.err -J filter $command
done
# generating contact file
command="samtools view $ASSEMBLY_RAGOO_MAP | \
./scripts/filter_reads.awk -v isize=0 q=0 | \
./scripts/bam_to_bins.awk -v w=100 > \
$WORKDIR/contact_map_100_qualFilt.txt"
bsub -J convert -o $WORKDIR/contactMap_100.out -e $WORKDIR/contactMap_100.err $command
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.