change default ouptut and tidy up repo

AgResearch · Apr 12, 2023 · 68fa6f5 · 68fa6f5
1 parent 245da03
commit 68fa6f5
Show file tree

Hide file tree

Showing 24 changed files with 43 additions and 1,097 deletions.
diff --git a/SelfRelDepth.r b/SelfRelDepth.r
@@ -9,7 +9,7 @@ get_command_args <- function() {
    args=(commandArgs(TRUE))
    if(length(args)!=1 ){
       #quit with error message if wrong number of args supplied
-      print('Usage example : Rscript --vanilla  SelfRelDepth.r kgd_dir=/dataset/hiseq/scratch/postprocessing/180419_D00390_0357_ACCHG7ANXX.gbs/SQ0673.processed_sample/uneak/KGD')
+      print('Usage example : Rscript --vanilla  SelfRelDepth.r kgd_dir=/dataset/2023_illumina_sequencing_a/scratch/postprocessing/180419_D00390_0357_ACCHG7ANXX.gbs/SQ0673.processed_sample/uneak/KGD')
       print('args received were : ')
       for (e in args) {
          print(e)
@@ -30,7 +30,7 @@ get_command_args <- function() {
 
 
 
-#KGDdir <- "/dataset/hiseq/scratch/postprocessing/180419_D00390_0357_ACCHG7ANXX.gbs/SQ0673.processed_sample/uneak/KGD"
+#KGDdir <- "/dataset/2023_illumina_sequencing_a/scratch/postprocessing/180419_D00390_0357_ACCHG7ANXX.gbs/SQ0673.processed_sample/uneak/KGD"
 #windows:
 #KGDdir <- paste0("//isamba",KGDdir)
 

diff --git a/SelfRelDepthtoHTML.py b/SelfRelDepthtoHTML.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 #
 # make an HTML page to view the output of SelfRelDepth.r together with the orginal plot
-# - the output should be sent to /bifo/scratch/hiseq/postprocessing/ so that the paths
+# - the output should be sent to /bifo/scratch/2023_illumina_sequencing_a/postprocessing/ so that the paths
 # work when opened 
 #
 import sys
@@ -106,18 +106,18 @@ def generate_run_plot():
 
     for record in record_array:
         # these are like
-        # /bifo/scratch/hiseq/postprocessing/180130_D00390_0343_BCBG7MANXX.gbs/SQ0618.processed_sample/uneak/PstI.PstI.cohort/KGD -0.06391309 2.17314e-15
+        # /bifo/scratch/2023_illumina_sequencing_a/postprocessing/180130_D00390_0343_BCBG7MANXX.gbs/SQ0618.processed_sample/uneak/PstI.PstI.cohort/KGD -0.06391309 2.17314e-15
         #/dataset/gseq_processing/scratch/gbs/140624_D00390_0044_BH9PEBADXX/SQ0001.all.PstI.PstI/KGD     -0.05257605     0.1317527       cattle  PstI    PstI    13
         #/dataset/gseq_processing/scratch/gbs/140624_D00390_0044_BH9PEBADXX/SQ0001.all.PstI.PstI/KGD.orig        -0.05257605     0.1317527       cattle  PstI    PstI    13
 
         #print record
-        #['/bifo/scratch/hiseq/postprocessing/180627_D00390_0375_BCCHBJANXX.gbs/SQ2741.processed_sample/uneak/all.ApeKI.ApeKI.cohort/KGD',
+        #['/bifo/scratch/2023_illumina_sequencing_a/postprocessing/180627_D00390_0375_BCCHBJANXX.gbs/SQ2741.processed_sample/uneak/all.ApeKI.ApeKI.cohort/KGD',
         # '-0.5114565', '1.276835e-12', 'white', 'clover', 'ApeKI', 'ApeKI', '96']
         #print "DEBUG processing %s"%record
 
         (path, slope,pval,species, gbs_cohort, enzyme, count) = record
         # need to fix up path - from this
-        # /bifo/scratch/hiseq/postprocessing/180130_D00390_0343_BCBG7MANXX.gbs/SQ0618.processed_sample/uneak/PstI.PstI.cohort/KGD
+        # /bifo/scratch/2023_illumina_sequencing_a/postprocessing/180130_D00390_0343_BCBG7MANXX.gbs/SQ0618.processed_sample/uneak/PstI.PstI.cohort/KGD
         # to this
         # 180816_D00390_0393_ACCRBRANXX.gbs/SQ0782.processed_sample/uneak/all.GOAT.PstI.cohort/KGD/GHWdgm.05diagdepth.png
         #
@@ -129,7 +129,7 @@ def generate_run_plot():
             relpath=os.path.relpath(path, "/dataset/gseq_processing/scratch/gbs")
             image_path=os.path.join(relpath, "GHWdgm.05diagdepth.png")
         else:
-            relpath=os.path.relpath(path, "/dataset/hiseq/scratch/postprocessing")
+            relpath=os.path.relpath(path, "/dataset/2023_illumina_sequencing_a/scratch/postprocessing")
             image_path=os.path.join("old_plots", relpath,"GHWdgm.05diagdepth.png")
 
 
@@ -138,16 +138,16 @@ def generate_run_plot():
 
         run_match=re.search("/dataset/gseq_processing/scratch/gbs/([^\/]+)/",path)
         if run_match is None:
-            run_match=re.search("/dataset/hiseq/scratch/postprocessing/([^\/]+)\.gbs/",path)
+            run_match=re.search("/dataset/2023_illumina_sequencing_a/scratch/postprocessing/([^\/]+)\.gbs/",path)
             if run_match is None:
                 print "Error could not parse run from %s"%path
                 continue
 
         run=run_match.groups()[0]
 
-        if os.path.exists(os.path.join("/dataset/hiseq/scratch/postprocessing/", "%s_plots.html"%run)):
-            plots_page="\\\\isamba\\dataset\\hiseq\\scratch\\postprocessing\\%s_plots.html"%run
-            path="<a href=\"%s\" target=plots_page>%s</a>"%(plots_page, os.path.join("/dataset/hiseq/scratch/postprocessing/", "%s_plots.html"%run))
+        if os.path.exists(os.path.join("/dataset/2023_illumina_sequencing_a/scratch/postprocessing/", "%s_plots.html"%run)):
+            plots_page="\\\\isamba\\dataset\\2023_illumina_sequencing_a\\scratch\\postprocessing\\%s_plots.html"%run
+            path="<a href=\"%s\" target=plots_page>%s</a>"%(plots_page, os.path.join("/dataset/2023_illumina_sequencing_a/scratch/postprocessing/", "%s_plots.html"%run))
         else:
             plots_page="file:///\\\\isamba\\" + path[1:].replace("/","\\")
             path="<a href=\"%s\" target=plots_page>%s</a>"%(plots_page, path)

diff --git a/_run_gbs_qc b/_run_gbs_qc
@@ -279,9 +279,9 @@ function get_run_opts() {
    OUT_ROOT=""
    SNP_ENGINE=tassel        # the only one supported at this point
 
-   NOVASEQ_ROOT=/dataset/hiseq/scratch
-   NOVASEQ_PROCESSING_ROOT=/dataset/hiseq/scratch/postprocessing/gbs
-   NOVASEQ_BCLCONVERT_ROOT=/dataset/hiseq/scratch/postprocessing/illumina/novaseq
+   NOVASEQ_ROOT=/dataset/2023_illumina_sequencing_a/scratch
+   NOVASEQ_PROCESSING_ROOT=/dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs
+   NOVASEQ_BCLCONVERT_ROOT=/dataset/2023_illumina_sequencing_a/scratch/postprocessing/illumina/novaseq
    mkdir -p $NOVASEQ_PROCESSING_ROOT
    mkdir -p $NOVASEQ_BCLCONVERT_ROOT
 

diff --git a/add_sample_sheet_header.py b/add_sample_sheet_header.py
@@ -78,7 +78,7 @@ def get_options():
 
 cat /dataset/hiseq/active/191021_D00390_0510_BCE3UBANXX/SampleSheet.csv | ./add_sample_sheet_header.py --sequencing_platform hiseq -H  /dataset/gseq_processing/active/bin/gbs_prism/etc/sample_sheet_header.csv
 
-cat /dataset/hiseq/scratch/220426_A01439_0069_BHNFW2DRXY/HNFW2DRXY.csv | ./add_sample_sheet_header.py
+cat /dataset/2023_illumina_sequencing_a/scratch/220426_A01439_0069_BHNFW2DRXY/HNFW2DRXY.csv | ./add_sample_sheet_header.py
 
 
 """

diff --git a/add_tags.py b/add_tags.py
diff --git a/ag_gbs_qc_prism.sh b/ag_gbs_qc_prism.sh
@@ -283,21 +283,6 @@ function get_targets() {
          exit 1
       fi
 
-      # check for missing fastq files and bail out if anything missing  - this shouldn't happen and there may have been an unsupported 
-      # keyfile import that will need to be manually patched (e.g. previously importing a future flowcell as well as current flowcell. The 
-      # former future is now current , but fastq link probably not updated )
-      missing_message=`$GBS_PRISM_BIN/list_keyfile.sh -s $libname -f $fcid -e $enzyme -g $gbs_cohort -q $qc_cohort -t missing_files | grep "fastq_link missing"`
-      if [ ! -z "$missing_message" ]; then
-         echo "*** !!!! ERROR !!!! there are missing fastq_links for lib: $libname fcid: $fcid enzyme: $enzyme cohort: $gbs_cohort qccohort: $qc_cohort ***"
-         echo "(was this flowcell previously imported in one or more keyfiles as a future flowcell  ?)"
-         echo "suggest try manual update of fastq location using : 
-
-"
-         for lane in `$GBS_PRISM_BIN/get_lane_from_database.sh $RUN $libname`; do
-            echo "$GBS_PRISM_BIN/updateFastqLocations.sh -s $libname -k $libname -r $RUN -f $fcid -l $lane "
-         done
-      fi
-
       #$GBS_PRISM_BIN/list_keyfile.sh -s $libname -f $fcid -e $enzyme -g $gbs_cohort -q $qc_cohort -t bwa_index_paths > $OUT_ROOT/${cohort_moniker}.bwa_references
       gquery -t gbs_keyfile -b library -p "flowcell=$fcid;enzyme=$enzyme;gbs_cohort=$gbs_cohort;columns=gbs_cohort,refgenome_bwa_indexes;noheading;distinct" $libname > $OUT_ROOT/${cohort_moniker}.bwa_references 
 

diff --git a/annotateSelfRelDepth.py b/annotateSelfRelDepth.py
@@ -20,14 +20,14 @@ def annotate():
     #/dataset/gseq_processing/scratch/gbs/181005_D00390_0407_BCCV91ANXX/SQ0807.all.DEER.PstI/KGD -0.02888578 0.3695895
     #/dataset/gseq_processing/scratch/gbs/181005_D00390_0407_BCCV91ANXX/SQ2766.all.ApeKI.ApeKI/KGD -0.1362936 0.2429048
 
-    #/bifo/scratch/hiseq/postprocessing/180810_D00390_0392_BCCR4LANXX.gbs/SQ0772.processed_sample/uneak/all.DEER.PstI.cohort/KGD -2.523152 5.655549e-05
-    #/bifo/scratch/hiseq/postprocessing/180810_D00390_0392_BCCR4LANXX.gbs/SQ0772.processed_sample/uneak/all.GOAT.PstI.cohort/KGD -0.03751884 0.002106262
-    #/bifo/scratch/hiseq/postprocessing/180810_D00390_0392_BCCR4LANXX.gbs/SQ0775.processed_sample/uneak/all.Cattle.PstI.cohort/KGD 0.007961025 0.7796325
+    #/bifo/scratch/2023_illumina_sequencing_a/postprocessing/180810_D00390_0392_BCCR4LANXX.gbs/SQ0772.processed_sample/uneak/all.DEER.PstI.cohort/KGD -2.523152 5.655549e-05
+    #/bifo/scratch/2023_illumina_sequencing_a/postprocessing/180810_D00390_0392_BCCR4LANXX.gbs/SQ0772.processed_sample/uneak/all.GOAT.PstI.cohort/KGD -0.03751884 0.002106262
+    #/bifo/scratch/2023_illumina_sequencing_a/postprocessing/180810_D00390_0392_BCCR4LANXX.gbs/SQ0775.processed_sample/uneak/all.Cattle.PstI.cohort/KGD 0.007961025 0.7796325
     #
     # and
-    # /bifo/scratch/hiseq/postprocessing/171218_D00390_0337_BCBG3AANXX.gbs/SQ0575.processed_sample/uneak/PstI.PstI.cohort
+    # /bifo/scratch/2023_illumina_sequencing_a/postprocessing/171218_D00390_0337_BCBG3AANXX.gbs/SQ0575.processed_sample/uneak/PstI.PstI.cohort
     # and
-    # /dataset/hiseq/scratch/postprocessing/150224_D00390_0217_AC4UAUACXX.gbs/SQ0056.processed_sample/uneak/all.PstI.PstI.cohort/KGD -0.2113675 2.645513e-25
+    # /dataset/2023_illumina_sequencing_a/scratch/postprocessing/150224_D00390_0217_AC4UAUACXX.gbs/SQ0056.processed_sample/uneak/all.PstI.PstI.cohort/KGD -0.2113675 2.645513e-25
     # from this :
     # flowcell = CCR4LANXX
     # libraryprepid = 772 etc

diff --git a/autostart_gbs_qc b/autostart_gbs_qc
@@ -80,14 +80,14 @@ function send_mail() {
 function get_landmark() {
    RUN=$1
    landmark=""
-   if [ -f /dataset/hiseq/scratch/$RUN/RTAComplete.txt ]; then
-      landmark=/dataset/hiseq/scratch/$RUN/RTAComplete.txt
+   if [ -f /dataset/2023_illumina_sequencing_a/scratch/$RUN/RTAComplete.txt ]; then
+      landmark=/dataset/2023_illumina_sequencing_a/scratch/$RUN/RTAComplete.txt
    fi
 }
 
 function get_digest() {
    RUN=$1
-   digest=`ls -lR /dataset/hiseq/scratch/$RUN/ | md5sum -b `
+   digest=`ls -lR /dataset/2023_illumina_sequencing_a/scratch/$RUN/ | md5sum -b `
    echo $digest
 }
 

diff --git a/collate_tags_reads.py b/collate_tags_reads.py
@@ -100,16 +100,16 @@ def get_options():
 
 examples :
 
-collate_tags_reads.py --run 211217_A01439_0043_BH2TTCDMXY --cohort SQ1744.all.PstI-MspI.PstI-MspI /dataset/hiseq/scratch/postprocessing/gbs/211217_A01439_0043_BH2TTCDMXY/SQ1744.all.PstI-MspI.PstI-MspI/TagCount.csv.blinded
-collate_tags_reads.py --report_name tags_reads_kgdstats  --kgd_stats_file /dataset/hiseq/scratch/postprocessing/gbs/211217_A01439_0043_BH2TTCDMXY/SQ1744.all.PstI-MspI.PstI-MspI/KGD/SampleStats.csv.blinded --run 211217_A01439_0043_BH2TTCDMXY --cohort SQ1744.all.PstI-MspI.PstI-MspI /dataset/hiseq/scratch/postprocessing/gbs/211217_A01439_0043_BH2TTCDMXY/SQ1744.all.PstI-MspI.PstI-MspI/TagCount.csv.blinded
+collate_tags_reads.py --run 211217_A01439_0043_BH2TTCDMXY --cohort SQ1744.all.PstI-MspI.PstI-MspI /dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs/211217_A01439_0043_BH2TTCDMXY/SQ1744.all.PstI-MspI.PstI-MspI/TagCount.csv.blinded
+collate_tags_reads.py --report_name tags_reads_kgdstats  --kgd_stats_file /dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs/211217_A01439_0043_BH2TTCDMXY/SQ1744.all.PstI-MspI.PstI-MspI/KGD/SampleStats.csv.blinded --run 211217_A01439_0043_BH2TTCDMXY --cohort SQ1744.all.PstI-MspI.PstI-MspI /dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs/211217_A01439_0043_BH2TTCDMXY/SQ1744.all.PstI-MspI.PstI-MspI/TagCount.csv.blinded
 
 # for testing 
-./collate_tags_reads.py --run 211020_A01439_0028_AHHYWFDRXY --cohort SQ1705.all.salmon.PstI-MspI --machine hiseq /dataset/hiseq/scratch/postprocessing/gbs/211020_A01439_0028_AHHYWFDRXY/SQ1705.all.salmon.PstI-MspI/TagCount.csv.blinded
-./collate_tags_reads.py --run 211020_A01439_0028_AHHYWFDRXY --cohort SQ1706.all.chinook_salmon.PstI-MspI --machine hiseq /dataset/hiseq/scratch/postprocessing/gbs/211020_A01439_0028_AHHYWFDRXY/SQ1706.all.chinook_salmon.PstI-MspI/TagCount.csv.blinded
-./collate_tags_reads.py --run 211020_A01439_0028_AHHYWFDRXY --cohort SQ1706.all.salmon.PstI-MspI --machine hiseq /dataset/hiseq/scratch/postprocessing/gbs/211020_A01439_0028_AHHYWFDRXY/SQ1706.all.salmon.PstI-MspI/TagCount.csv.blinded
+./collate_tags_reads.py --run 211020_A01439_0028_AHHYWFDRXY --cohort SQ1705.all.salmon.PstI-MspI --machine hiseq /dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs/211020_A01439_0028_AHHYWFDRXY/SQ1705.all.salmon.PstI-MspI/TagCount.csv.blinded
+./collate_tags_reads.py --run 211020_A01439_0028_AHHYWFDRXY --cohort SQ1706.all.chinook_salmon.PstI-MspI --machine hiseq /dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs/211020_A01439_0028_AHHYWFDRXY/SQ1706.all.chinook_salmon.PstI-MspI/TagCount.csv.blinded
+./collate_tags_reads.py --run 211020_A01439_0028_AHHYWFDRXY --cohort SQ1706.all.salmon.PstI-MspI --machine hiseq /dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs/211020_A01439_0028_AHHYWFDRXY/SQ1706.all.salmon.PstI-MspI/TagCount.csv.blinded
 
 
--rw-rw-r-- 1 mccullocha hiseq_users 32029 Oct 22 10:30 /dataset/hiseq/scratch/postprocessing/gbs/211020_A01439_0028_AHHYWFDRXY/SQ1706.all.salmon.PstI-MspI/TagCount.csv.blinded
+-rw-rw-r-- 1 mccullocha hiseq_users 32029 Oct 22 10:30 /dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs/211020_A01439_0028_AHHYWFDRXY/SQ1706.all.salmon.PstI-MspI/TagCount.csv.blinded
 
 
 
@@ -121,7 +121,7 @@ def get_options():
    #180914_D00390_0399_ACCVK0ANXX   SQ0788.all.DEER.PstI    good    CCVK0ANXX       1       SQ0788          268924508
 
 + awk -F, '{printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",run,cohort,$1,$2,$3,$4,$5,$6);}' run=211217_A01439_0043_BH2TTCDMXY cohort=SQ1744.all.PstI-MspI.PstI-MspI -
-+ cat /dataset/hiseq/scratch/postprocessing/gbs/211217_A01439_0043_BH2TTCDMXY/SQ1744.all.PstI-MspI.PstI-MspI/TagCount.csv.blinded
++ cat /dataset/2023_illumina_sequencing_a/scratch/postprocessing/gbs/211217_A01439_0043_BH2TTCDMXY/SQ1744.all.PstI-MspI.PstI-MspI/TagCount.csv.blinded
-Original file line number
+Diff line change
@@ Expand Up / @@ -78,7 +78,7 @@ def get_options(): @@
     cat /dataset/hiseq/active/191021_D00390_0510_BCE3UBANXX/SampleSheet.csv | ./add_sample_sheet_header.py --sequencing_platform hiseq -H  /dataset/gseq_processing/active/bin/gbs_prism/etc/sample_sheet_header.csv
-    cat /dataset/hiseq/scratch/220426_A01439_0069_BHNFW2DRXY/HNFW2DRXY.csv | ./add_sample_sheet_header.py
+    cat /dataset/2023_illumina_sequencing_a/scratch/220426_A01439_0069_BHNFW2DRXY/HNFW2DRXY.csv | ./add_sample_sheet_header.py
     """
@@ Expand Down @@