With the decreasing cost of third-generation sequencing (TGS), many researchers and enthusiasts are becoming interested in analyzing TGS data. This project aims to provide a comprehensive guide to TGS data analysis, allowing users to quickly get hands-on experience with the entire TGS workflow.
#!/bin/bash
mkdir nanoQC nanoQC -o nanoQC -f input_data_path.fastq.gz NanoStat --fastq input_data_path.sra.fastq.gz --outdir statreports
gunzip -c input_data_path.fastq.gz | NanoFilt -q 7 -l 1000 --headcrop 50 --tailcrop 50 | gzip > clean.NanoFilt.fastq.gz
minimap2 -d ref.mmi ref.fa minimap2 -a ref.mmi reads.fq > alignment.sam
samtools sort -@ 8 -o bam -o s0137.sorted.bam s1037.sam samtools index s0137.sorted.bam samtools faidx ref.fa
canu -p output_prefix -d output_dir genomeSize=5g maxThreads=96 -nanopore-raw input_data_path > canu.log
quast.py -r ref.fa canu.fa miniasm.fa wtdbg2.cns.fa smartdenovo.fa -o quast
medaka_consensus -i raw_reads.fastq.gz -d assembly.fasta -o medaka_result -m r941_min_high_g360 -v medaka.vcf -t 24 > medaka.log
bwa index pilon.fasta bwa mem pilon.fasta input_data_path.fastq.gz | samtools view -Sb - > input_data_path.bam samtools sort -o input_data_path.sorted.bam input_data_path.bam samtools index input_data_path.sorted.bam java -Xmx32G -jar pilon.jar --genome pilon.fasta --bam input_data_path.sorted.bam --output pilon_polished.fasta --threads 24 --fix all --verbose