use logging 2

PengNi · Jun 14, 2023 · 209057d · 209057d
1 parent 6fc08d9
commit 209057d
Show file tree

Hide file tree

Showing 13 changed files with 76 additions and 90 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,4 @@
 include requirements.txt
 include README.rst
+include README.md
 include LICENSE
diff --git a/README.md b/README.md
@@ -336,8 +336,8 @@ usage: ccsmeth call_mods [-h] --input INPUT [--holes_batch HOLES_BATCH]
                          [--holeids_e HOLEIDS_E] [--holeids_ne HOLEIDS_NE]
                          [--motifs MOTIFS] [--mod_loc MOD_LOC]
                          [--methy_label {1,0}]
-                         [--norm {zscore,min-mean,min-max,mad}] [--no_decode]
-                         [--loginfo LOGINFO] [--ref REF] [--mapq MAPQ]
+                         [--norm {zscore,min-mean,min-max,mad,none}]
+                         [--no_decode] [--ref REF] [--mapq MAPQ]
                          [--identity IDENTITY] [--no_supplementary]
                          [--is_mapfea IS_MAPFEA]
                          [--skip_unmapped SKIP_UNMAPPED] [--threads THREADS]
@@ -415,12 +415,11 @@ EXTRACTION:
                         default 0
   --methy_label {1,0}   the label of the interested modified bases, this is
                         for training. 0 or 1, default 1
-  --norm {zscore,min-mean,min-max,mad}
+  --norm {zscore,min-mean,min-max,mad,none}
                         method for normalizing ipd/pw in subread level.
-                        zscore, min-mean, min-max or mad, default zscore
+                        zscore, min-mean, min-max, mad, or none. default
+                        zscore
   --no_decode           not use CodecV1 to decode ipd/pw
-  --loginfo LOGINFO     if printing more info of feature extraction on reads.
-                        yes or no, default no
 
 EXTRACTION ALIGN_MODE:
   --ref REF             path to genome reference to be aligned, in fasta/fa
@@ -635,19 +634,17 @@ usage: ccsmeth extract [-h] --input INPUT [--holeids_e HOLEIDS_E]
                        [--mode {denovo,align}] [--seq_len SEQ_LEN]
                        [--motifs MOTIFS] [--mod_loc MOD_LOC]
                        [--methy_label {1,0}]
-                       [--norm {zscore,min-mean,min-max,mad}] [--no_decode]
-                       [--holes_batch HOLES_BATCH] [--ref REF] [--mapq MAPQ]
-                       [--identity IDENTITY] [--no_supplementary]
-                       [--is_mapfea IS_MAPFEA] [--skip_unmapped SKIP_UNMAPPED]
-                       [--threads THREADS] [--loginfo LOGINFO]
+                       [--norm {zscore,min-mean,min-max,mad,none}]
+                       [--no_decode] [--holes_batch HOLES_BATCH] [--ref REF]
+                       [--mapq MAPQ] [--identity IDENTITY]
+                       [--no_supplementary] [--is_mapfea IS_MAPFEA]
+                       [--skip_unmapped SKIP_UNMAPPED] [--threads THREADS]
 
 extract features from hifi reads.
 
 optional arguments:
   -h, --help            show this help message and exit
   --threads THREADS     number of threads, default 5
-  --loginfo LOGINFO     if printing more info of feature extraction on reads.
-                        yes or no, default no
 
 INPUT:
   --input INPUT, -i INPUT
@@ -681,9 +678,10 @@ EXTRACTION:
                         default 0
   --methy_label {1,0}   the label of the interested modified bases, this is
                         for training. 0 or 1, default 1
-  --norm {zscore,min-mean,min-max,mad}
+  --norm {zscore,min-mean,min-max,mad,none}
                         method for normalizing ipd/pw in subread level.
-                        zscore, min-mean, min-max or mad, default zscore
+                        zscore, min-mean, min-max, mad, or none. default
+                        zscore
   --no_decode           not use CodecV1 to decode ipd/pw
   --holes_batch HOLES_BATCH
                         number of holes/hifi-reads in an batch to get/put in

diff --git a/ccsmeth/align_hifi_reads.py b/ccsmeth/align_hifi_reads.py
@@ -70,7 +70,7 @@ def generate_aligner_with_options(is_minimap2, path_to_minimap2, is_bwa, path_to
 
 
 def align_hifi_reads_to_genome(args):
-    LOGGER.info("[align_hifi_reads]start..")
+    LOGGER.info("[main]align_hifi_reads starts")
     start = time.time()
     inputpath = check_input_file(args.hifireads)
     outputpath = check_output_file(args.output, inputpath, args.minimap2, args.bwa)
@@ -144,11 +144,11 @@ def align_hifi_reads_to_genome(args):
         LOGGER.warning("failed..")
     else:
         LOGGER.info("succeeded..")
-    LOGGER.info("==stdout:\n{}".format(str(stdout, 'utf-8')))
-    LOGGER.info("==stderr:\n{}".format(str(stderr, 'utf-8')))
+    LOGGER.info("stdout:\n{}".format(str(stdout, 'utf-8')))
+    LOGGER.info("stderr:\n{}".format(str(stderr, 'utf-8')))
 
     endtime = time.time()
-    LOGGER.info("[align_hifi_reads]costs {:.1f} seconds".format(endtime - start))
+    LOGGER.info("[main]align_hifi_reads costs {:.1f} seconds".format(endtime - start))
 
 
 def main():

diff --git a/ccsmeth/call_hifi_reads.py b/ccsmeth/call_hifi_reads.py
@@ -49,7 +49,7 @@ def generate_ccscmd_with_options(args):
 
 
 def ccs_call_hifi_reads(args):
-    LOGGER.info("[call_hifi_reads]starts")
+    LOGGER.info("[main]call_hifi_reads starts")
     start = time.time()
     inputpath = check_input_file(args.subreads)
     if not os.path.exists(inputpath):
@@ -81,11 +81,11 @@ def ccs_call_hifi_reads(args):
         LOGGER.warning("failed")
     else:
         LOGGER.info("succeeded")
-    LOGGER.info("==stdout:\n{}".format(str(stdout, 'utf-8')))
-    LOGGER.info("==stderr:\n{}".format(str(stderr, 'utf-8')))
+    LOGGER.info("stdout:\n{}".format(str(stdout, 'utf-8')))
+    LOGGER.info("stderr:\n{}".format(str(stderr, 'utf-8')))
 
     endtime = time.time()
-    LOGGER.info("[call_hifi_reads]costs {:.1f} seconds".format(endtime - start))
+    LOGGER.info("[main]call_hifi_reads costs {:.1f} seconds".format(endtime - start))
 
 
 def main():

diff --git a/ccsmeth/call_modifications.py b/ccsmeth/call_modifications.py
@@ -450,7 +450,7 @@ def _get_gpus():
 
 
 def call_mods(args):
-    LOGGER.info("[main]call_mods starts..")
+    LOGGER.info("[main]call_mods starts")
     start = time.time()
     LOGGER.info("cuda availability: {}".format(use_cuda))
 
@@ -569,7 +569,7 @@ def call_mods(args):
 
         if not args.no_sort:
             post_time_start = time.time()
-            LOGGER.info("[post_process] bam_sort_index starts..")
+            LOGGER.info("[post_process] bam_sort_index starts")
             try:
                 LOGGER.info("sorting modbam file..")
                 modbam_sorted = os.path.splitext(out_modbam)[0] + ".sorted.bam"
@@ -582,13 +582,13 @@ def call_mods(args):
                 pysam.index("-@", str(args.threads), out_modbam)
             except Exception:
                 LOGGER.warning("failed indexing modbam file..")
-            LOGGER.info("[post_process] bam_sort_index costs %.2f seconds.." % (time.time() - post_time_start))
+            LOGGER.info("[post_process] bam_sort_index costs %.2f seconds" % (time.time() - post_time_start))
     else:
         from ._call_modifications_txt import call_mods_txt
         out_per_readsite = args.output + ".per_readsite.tsv"
         call_mods_txt(input_path, holeids_e, holeids_ne, out_per_readsite, model_path, args)
 
-    LOGGER.info("[main]call_mods costs %.2f seconds.." % (time.time() - start))
+    LOGGER.info("[main]call_mods costs %.2f seconds" % (time.time() - start))
 
 
 def main():

diff --git a/ccsmeth/call_mods_freq_bam.py b/ccsmeth/call_mods_freq_bam.py
@@ -678,7 +678,7 @@ def _worker_write_bed_result(output_prefix, bed_q, args):
 
 
 def call_mods_frequency_from_bamfile(args):
-    LOGGER.info("[main]call_freq_bam starts..")
+    LOGGER.info("[main]call_freq_bam starts")
     start = time.time()
 
     if args.call_mode == "aggregate" and not os.path.exists(args.aggre_model):
@@ -733,7 +733,7 @@ def call_mods_frequency_from_bamfile(args):
     bed_q.put("kill")
     p_w.join()
 
-    LOGGER.info("[main]call_freq_bam costs %.1f seconds.." % (time.time() - start))
+    LOGGER.info("[main]call_freq_bam costs %.1f seconds" % (time.time() - start))
 
 
 def main():

diff --git a/ccsmeth/call_mods_freq_txt.py b/ccsmeth/call_mods_freq_txt.py
@@ -285,7 +285,7 @@ def _concat_contig_results(contig_files, result_file, is_gzip=False):
 
 
 def call_mods_frequency_to_file(args):
-    LOGGER.info("[main]call_freq starts..")
+    LOGGER.info("[main]call_freq starts")
     start = time.time()
 
     input_paths = args.input_path
@@ -382,7 +382,7 @@ def call_mods_frequency_to_file(args):
             LOGGER.warning("!!!Please check the result files -- seems not all inputed contigs have result!!!")
         LOGGER.info("combine results of {} contigs..".format(len(resfiles_cs)))
         _concat_contig_results(resfiles_cs, result_file, is_gzip)
-    LOGGER.info("[main]call_freq costs %.1f seconds.." % (time.time() - start))
+    LOGGER.info("[main]call_freq costs %.1f seconds" % (time.time() - start))
 
 
 def main():

diff --git a/ccsmeth/extract_features.py b/ccsmeth/extract_features.py
@@ -521,7 +521,7 @@ def _write_featurestr_to_file(write_fp, featurestr_q, is_gzip):
 
 
 def extract_hifireads_features(args):
-    LOGGER.info("[extract_features_hifi]starts")
+    LOGGER.info("[main]extract_features_hifi starts")
     start = time.time()
 
     inputpath = check_input_file(args.input)
@@ -590,7 +590,7 @@ def extract_hifireads_features(args):
     p_w.join()
 
     endtime = time.time()
-    LOGGER.info("[extract_features_hifi]costs {:.1f} seconds".format(endtime - start))
+    LOGGER.info("[main]extract_features_hifi costs {:.1f} seconds".format(endtime - start))
 
 
 def main():

diff --git a/ccsmeth/train.py b/ccsmeth/train.py
@@ -33,7 +33,7 @@ def train(args):
     if use_cuda:
         torch.cuda.manual_seed(args.tseed)
 
-    LOGGER.info("[main]train starts..")
+    LOGGER.info("[main]train starts")
     if use_cuda:
         LOGGER.info("GPU is available!")
     else:
@@ -312,7 +312,7 @@ def train(args):
     if args.dl_offsets:
         train_dataset.close()
         valid_dataset.close()
-    LOGGER.info("[main]train costs {} seconds, "
+    LOGGER.info("[main]train costs {:.1f} seconds, "
                 "best accuracy: {} (epoch {})".format(endtime - total_start,
                                                       curr_best_accuracy,
                                                       curr_best_accuracy_loc))
@@ -396,16 +396,9 @@ def main():
 
     args = parser.parse_args()
 
-    LOGGER.info("[main] start..")
-    total_start = time.time()
-
     display_args(args)
-
     train(args)
 
-    endtime = time.time()
-    LOGGER.info("[main] costs {} seconds".format(endtime - total_start))
-
 
 if __name__ == '__main__':
     main()