RabadanLab · jen-dfci · Jul 9, 2021
diff --git a/scripts/extract.py b/scripts/extract.py
@@ -52,13 +52,13 @@ def index_bam(bam):
         sys.exit('[extract] Error: unable to index bam file.')
 
 
-def extract_reads(bam, outdir, paired, unmapped, alts, temp, threads):
+def extract_reads(bam, outdir, paired, unmapped, alts, temp, threads, sample):
     '''Extracts reads from chromosome 6 and alts/decoys if applicable.'''
 
     log.info(f'[extract] Extracting reads from {bam}')
 
     file_list = []
-    sample = os.path.splitext(os.path.basename(bam))[0]
+    #sample = os.path.splitext(os.path.basename(bam))[0]
 
     # Index bam
     index_bam(bam)
@@ -199,7 +199,12 @@ def extract_reads(bam, outdir, paired, unmapped, alts, temp, threads):
                         action = 'count',
                         help='keep intermediate files\n\n',
                         default=False)
-
+
+    parser.add_argument('--sample',
+                        type = str,
+                        help = 'User defined sample id\n',
+                        default='')
+
     parser.add_argument('-t',
                         '--threads', 
                         type = str,
@@ -216,7 +221,9 @@ def extract_reads(bam, outdir, paired, unmapped, alts, temp, threads):
     temp = create_temp(args.temp)
 
     sample = os.path.basename(args.bam).split('.')[0]
-
+    if (args.sample != ""):
+      sample = args.sample
+
     datDir = os.path.dirname(os.path.realpath(__file__)) + '/../dat/'
 
     # Set up log file
@@ -260,7 +267,8 @@ def extract_reads(bam, outdir, paired, unmapped, alts, temp, threads):
                   args.unmapped,
                   alts,
                   temp,
-                  args.threads)
+                  args.threads,
+									sample)
 
     remove_files(temp, args.keep_files)
 

diff --git a/scripts/genotype.py b/scripts/genotype.py
@@ -663,7 +663,16 @@ def arg_check_threshold(parser, arg):
         sys.exit('[genotype] Error: FASTQ or alignment.p file required.')
 
     # Set up temporary and output folders, log file
-    sample = os.path.basename(args.file[0]).split('.')[0]
+    #sample = os.path.basename(args.file[0]).split('.')[0]
+    file_name_fields = os.path.basename(args.file[0]).split('.')
+    sample = ""
+    for i in range(len(file_name_fields) - 1, -1, -1):
+      if (file_name_fields[i] == 'extracted' or file_name_fields[i] == 'alignment'):
+        sample = '.'.join(file_name_fields[0:i])
+        break
+    if (sample == ""):
+      sample = os.path.basename(args.file[0]).split('.')[0]
+
     outdir = check_path(args.outdir)
     temp = create_temp(args.temp)
     if args.log:

diff --git a/scripts/merge.py b/scripts/merge.py
@@ -60,7 +60,16 @@ def process_genotype(json_files, indir, outdir, run, suffix):
 
     genotypes = dict()
     for file in json_files:
-        sample = file.split('.')[0]
+        #sample = file.split('.')[0]
+        file_name_fields = file.split('.')
+        sample = ""
+        for i in range(len(file_name_fields) - 1, -1, -1):
+          if (file_name_fields[i] == 'genotype'):
+            sample = '.'.join(file_name_fields[0:i])
+            break
+        if (sample == ""):
+          sample = file.split('.')[0]
+
         file_path = indir + file
 
         with open(file_path,'r') as file: