From 125e23477d9e1adcc3e956ecb3ae45ff48dd7fd8 Mon Sep 17 00:00:00 2001 From: Dong Zhao Date: Sat, 30 Sep 2017 14:09:57 +0800 Subject: [PATCH] liu yongxin's share of 16s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 微信公众号 宏基因组分享 --- 16S-pcr-seq.sh | 233 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 16S-pcr-seq.sh diff --git a/16S-pcr-seq.sh b/16S-pcr-seq.sh new file mode 100644 index 0000000..df8b60b --- /dev/null +++ b/16S-pcr-seq.sh @@ -0,0 +1,233 @@ +# Ŀ¼,-pΪļдڲ +mkdir -p example_PE250 +cd example_PE250 +# ʱļͽĿ¼ +mkdir -p temp result + +# Ҫʮ +source activate qiime2-2017.7 + +# Ƿװɹɹ +qiime --help + +# رչʱرգȻܻ +source deactivate + +# ֤ʵǷд +validate_mapping_file.py -m mappingfile.txt +#˫ݺϲΪļ +join_paired_ends.py -f PE250_1.fq.gz -r PE250_2.fq.gz -m fastq-join -o temp/PE250_join +# ȡbarcode +extract_barcodes.py -f temp/PE250_join/fastqjoin.join.fastq -m mappingfile.txt -o temp/PE250_barcode -c barcode_paired_stitched --bc1_len 0 --bc2_len 6 -a --rev_comp_bc2 + +reads.fastq # ļbarcodeӦη +# ʿؼƷ +split_libraries_fastq.py -i temp/PE250_barcode/reads.fastq -b temp/PE250_barcode/barcodes.fastq -m mappingfile.txt -o temp/PE250_split/ -q 20 --max_bad_run_length 3 --min_per_read_length_fraction 0.75 --max_barcode_errors 0 --barcode_type 6 + +#cutadaptг˫Pȿ +cutadapt -g AACMGGATTAGATACCCKG -a GGAAGGTGGGGATGACGT -e 0.15 -m 300 --discard-untrimmed temp/PE250_split/seqs.fna -o temp/PE250_P5.fa + +# ʽת +sed 's/ .*/;/g;s/>.*/&&/g;s/;>/;barcodelabel=/g;s/_[0-9]*;$/;/g' temp/PE250_P5.fa > temp/seqs_usearch.fa + +# ȥ +./usearch10 -fastx_uniques temp/seqs_usearch.fa -fastaout temp/seqs_unique.fa -minuniquesize 2 -sizeout + +# OTU + ./usearch10.0.240_i86linux32 -cluster_otus temp/seqs_unique.fa -otu + s temp/otus.fa -uparseout temp/uparse.txt -relabel Otu + +# 鿴OTU +grep '>' -c temp/otus.fa +######04:26 85Mb 100.0% 5493 OTUs, 9086 chimeras + +# Usearch8ƼIJοݿRDP +wget http://drive5.com/uchime/rdp_gold.fa +# RDPݿȶȥ֪еǶ +./usearch10 -uchime2_ref temp/otus.fa -db rdp_gold.fa -chimeras temp/otus_chimeras.fa -notmatched temp/otus_rdp.fa -uchimeout temp/otus_rdp.uchime -strand plus -mode sensitive -threads 4 + +# Greengeneݿ⣬320MB +wget -c ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz +# ѹݰС3.4G +tar xvzf gg_13_8_otus.tar.gz +# OTU97%ƾĴжбȶԣԼ8min +time align_seqs.py -i temp/otus_non_chimera.fa -t gg_13_8_otus/rep_set_aligned/97_otus.fasta -o temp/aligned/ +# ޷ȶϸ +grep -c '>' temp/aligned/otus_non_chimera_failures.fasta # 1860 +# òϸOTU ID +grep '>' temp/aligned/otus_non_chimera_failures.fasta|cut -f 1 -d ' '|sed 's/>//g' > temp/aligned/otus_non_chimera_failures.id +# ˷ϸ +filter_fasta.py -f temp/otus_non_chimera.fa -o temp/otus_rdp_align.fa -s temp/aligned/otus_non_chimera_failures.id -n +# ڻжOTU:975 +grep '>' -c temp/otus_rdp_align.fa + +# ǶID +grep '>' temp/otus_chimeras.fa|sed 's/>//g' > temp/otus_chimeras.id +# ޳Ƕ +filter_fasta.py -f temp/otus.fa -o temp/otus_non_chimera.fa -s temp/otus_chimeras.id -n +# ǷΪԤڵ +grep '>' -c temp/otus_non_chimera.fa # 2835 + +# Greengeneݿ⣬320MB +wget -c ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz +# ѹݰС3.4G +tar xvzf gg_13_8_otus.tar.gz +# OTU97%ƾĴжбȶԣԼ8min +time align_seqs.py -i temp/otus_non_chimera.fa -t gg_13_8_otus/rep_set_aligned/97_otus.fasta -o temp/aligned/ +# ޷ȶϸ +grep -c '>' temp/aligned/otus_non_chimera_failures.fasta # 1860 +# òϸOTU ID +grep '>' temp/aligned/otus_non_chimera_failures.fasta|cut -f 1 -d ' '|sed 's/>//g' > temp/aligned/otus_non_chimera_failures.id +# ˷ϸ +filter_fasta.py -f temp/otus_non_chimera.fa -o temp/otus_rdp_align.fa -s temp/aligned/otus_non_chimera_failures.id -n +# ڻжOTU:975 +grep '>' -c temp/otus_rdp_align.fa + +# OTUհĴУReference(ѡϰ) +awk 'BEGIN {n=1}; />/ {print ">OTU_" n; n++} !/>/ {print}' temp/otus_rdp_align.fa > result/rep_seqs.fa +# OTU +./usearch10 -usearch_global temp/seqs_usearch.fa -db result/rep_seqs.fa -otutabout temp/otu_table.txt -biomout temp/otu_table.biom -strand plus -id 0.97 -threads 4 +# Ϣ 01:20 141Mb 100.0% Searching seqs_usearch.fa, 32.3% matched +# Ĭ10̣߳ʱ120룬32.3%ƥ䵽OTUϣ30̷߳ʱ304룬߳ԽԽ죬ַҲǺܷʱ + +#OTUless temp/otu_table.txt鿴һ +#13 # ע +assign_taxonomy.py -i result/rep_seqs.fa \ + -r gg_13_8_otus/rep_set/97_otus.fasta \ + -t gg_13_8_otus/taxonomy/97_otu_taxonomy.txt \ + -m rdp -o result +#14. OTUͳơʽתϢ + +# ıOTUתΪBIOM +biom convert -i temp/otu_table.txt \ + -o result/otu_table.biom \ + --table-type="OTU table" --to-json +# ϢOTUһУΪtaxonomy +biom add-metadata -i result/otu_table.biom \ + --observation-metadata-fp result/rep_seqs_tax_assignments.txt \ + -o result/otu_table_tax.biom \ + --sc-separated taxonomy --observation-header OTUID,taxonomy +# תbiomΪtxtʽעͣɶ +biom convert -i result/otu_table_tax.biom -o result/otu_table_tax.txt --to-tsv --header-key taxonomy + +# 鿴OTUĻϢƷOUTͳ +biom summarize-table -i result/otu_table_tax.biom -o result/otu_table_tax.sum +less result/otu_table_tax.sum#鿴һ +#15. OTUɸѡ + +# Ʒˣѡcounts>3000Ʒ +filter_samples_from_otu_table.py -i result/otu_table_tax.biom -o result/otu_table2.biom -n 3000 +# 鿴˺ֻ25Ʒ975OTU +biom summarize-table -i result/otu_table2.biom +# OTUȹˣѡԷȾֵ֮һOTU +filter_otus_from_otu_table.py --min_count_fraction 0.0001 -i result/otu_table2.biom -o result/otu_table3.biom +# 鿴˺ֻ25Ʒ346OTU +biom summarize-table -i result/otu_table3.biom +# תbiomʽOTUΪıOTU +biom convert -i result/otu_table3.biom -o result/otu_table4.txt --table-type="OTU table" --to-tsv +# OTUʽRȡ +sed -i '/# Const/d;s/#OTU //g;s/ID.//g' result/otu_table4.txt +# ɸѡOTUжӦOTU +filter_fasta.py -f result/rep_seqs.fa -b result/otu_table3.biom -o result/rep_seqs4.fa +#16. + +#ǻڶбȶԵĽչʾḻϢǽRͼϸ˴ֻǽAlpha, BetaԷļ + +# clustaloбȶԣû밲װClustal Omega +clustalo -i result/rep_seqs4.fa -o temp/rep_seqs_align.fa --seqtype=DNA --full --force --threads=4 +# ɸѡбкͱ +filter_alignment.py -i temp/rep_seqs_align.fa -o temp/rep_seqs_align_pfiltered#only very short conserved region saved +# fasttree +make_phylogeny.py -i temp/rep_seqs_align_pfiltered.fa/rep_seqs_align_pfiltered.fasta -o result/rep_seqs.tree +# generate tree by FastTree + + +#17. Alpha + +#AlphaǼƷɣͷάϢͿɼ1ͼAlphaԣϰҲҵĶ + +#AlphaԼǰҪOTUб׼Ϊͬȣ⵽᲻ͬǽOTUسͬԹƽȽϸƷ£ + +# 鿴ƷСֵ +biom summarize-table -i result/otu_table3.biom +# Сֵس׼ +single_rarefaction.py -i result/otu_table3.biom -o temp/otu_table_rare.biom -d 2797 +# 㳣õAlphaָ +alpha_diversity.py -i temp/otu_table_rare.biom -o result/alpha.txt -t result/rep_seqs.tree -m shannon,chao1,observed_otus,PD_whole_tree + + + +#18. Beta + +#BetaǼƷͬͬOTUҲҪ׼سʧϢ̫࣬ͳơ˲ѡCSS׼ + +# CSS׼OTU +normalize_table.py -i result/otu_table3.biom -o temp/otu_table_css.biom -a CSS +# ת׼OTUΪıںڻͼ +biom convert -i temp/otu_table_css.biom -o result/otu_table_css.txt --table-type="OTU table" --to-tsv +# ɾϢRȡ +sed -i '/# Const/d;s/#OTU //g;s/ID.//g' result/otu_table_css.txt +# Beta +beta_diversity.py -i temp/otu_table_css.biom -o result/beta/ -t result/rep_seqs.tree -m bray_curtis,weighted_unifrac,unweighted_unifrac +# BetaԾļRȡ +sed -i 's/^\t//g' result/beta/* + + +#19. ַ༶ + +#OTUҪעϢעϢͨעϢΪ7𣺽硢š١Ŀơ֡Сļ𣬺OTUƵвͬ +#dz˿ԱȽƷOTUˮƽ⣬оͬƼϵIJ죬ǷЩͬı仯ɡ + +#ע͵ļзܣExcelRǺ鷳Ĺ̡ʹQIIMEԴ Ľűsummarize_taxa.py + +# š١ĿơзܣӦL2-L6 +summarize_taxa.py -i result/otu_table3.biom -o result/sum_taxa + # summary each level percentage +# ޸һıͷʺRȡıʽ +sed -i '/# Const/d;s/#OTU ID.//g' result/sum_taxa/* +# format for R read +# Ϊ鿴 +less -S result/sum_taxa/otu_table3_L2.txt + + +#20. ɸѡչʾĽ + +#пƯĽOTUͨɰǧֱչʾǸҲǼġ +#̴һЩͨķɸѡݣóƯĽ + +# ѡOTUзȴ0.1%OTU +filter_otus_from_otu_table.py --min_count_fraction 0.001 -i result/otu_table3.biom -o temp/otu_table_k1.biom +# öӦfasta +filter_fasta.py -f result/rep_seqs.fa -o temp/tax_rep_seqs.fa -b temp/otu_table_k1.biom +# ͳ104һ100ҼдݵBܶ͸ɺϸ +grep -c '>' temp/tax_rep_seqs.fa # 104 +# бȶ +clustalo -i temp/tax_rep_seqs.fa -o temp/tax_rep_seqs_clus.fa --seqtype=DNA --full --force --threads=4 +# +make_phylogeny.py -i temp/tax_rep_seqs_clus.fa -o temp/tax_rep_seqs.tree +# ʽתΪR ggtreeõ +sed "s/'//g" temp/tax_rep_seqs.tree > result/tax_rep_seqs.tree # remove ' +# ID +grep '>' temp/tax_rep_seqs_clus.fa|sed 's/>//g' > temp/tax_rep_seqs_clus.id +# ЩеעͣɫʾͬϢ +awk 'BEGIN{OFS="\t";FS="\t"} NR==FNR {a[$1]=$0} NR>FNR {print a[$1]}' result/rep_seqs_tax_assignments.txt temp/tax_rep_seqs_clus.id|sed 's/; /\t/g'|cut -f 1-5 |sed 's/p__//g;s/c__//g;s/o__//g' > result/tax_rep_seqs.tax + + +#21. + +#һЩ򵥵ĸʽתΪͳƷ׼ļԺʲô⣬ﲹ䣬CSDNҿ޸ģѴҷʹ𰸷ⲿ֡ + +# mappingfileתΪRɶʵ +sed 's/#//' mappingfile.txt > result/design.txt +# תıotu_tableʽΪRɶ +sed '/# Const/d;s/#OTU //g;s/ID.//g' result/otu_table4.txt > result/otu_table.txt +# תעϢΪƱָRȡ +sed 's/;/\t/g;s/ //g' result/rep_seqs_tax_assignments.txt > result/rep_seqs_tax.txt + + + + + + + +