Merge pull request #201 from andthum/feat/scripts/discretization/back-jump-probability

andthum · web-flow · commit 5cf8bbf88730 · 2023-11-19T18:38:28.000+01:00
`back_jump_prob*.py`: Return normalization factors
diff --git a/scripts/discretization/back_jump_prob.py b/scripts/discretization/back_jump_prob.py
@@ -242,11 +242,12 @@
     print("Last frame to read:     {:>8d}".format(END - 1))
     print("Read every n-th frame:  {:>8d}".format(EVERY))
     timer = datetime.now()
-    prob = mdt.dtrj.back_jump_prob(
+    prob, norm = mdt.dtrj.back_jump_prob(
         dtrj,
         continuous=args.CONTINUOUS,
         discard_neg=args.DISCARD_NEG,
         discard_neg_btw=args.DISCARD_NEG_BTW,
+        return_norm=True,
         verbose=True,
     )
     del dtrj
@@ -260,6 +261,10 @@
         "Back-jump probability: Probability to return to the initial state\n"
         + "after a state transition as function of the time that has passed\n"
         + "since the state transition averaged over all states.\n"
+        + "\n"
+        + "The total number of back jumps at a given lag time can be\n"
+        + "obtained by multiplying the back-jump probabilities with the\n"
+        + "normalization factors.\n"
         + "\n\n"
         + "intermittency:   {}\n".format(args.INTERMITTENCY)
         + "continuous:      {}\n".format(args.CONTINUOUS)
@@ -272,13 +277,14 @@
     header += (
         "The columns contain:\n"
         + "  1 Lag time (in trajectory steps)\n"
-        + "  2 back-jump probability\n"
+        + "  2 Back-jump probability\n"
+        + "  3 Normalization factors\n"
         + "\n"
         + "Column number:\n"
         + "{:>14d} {:>16d}".format(1, 2)
     )
     lag_times = np.arange(0, len(prob) * EVERY, EVERY, dtype=np.uint32)
-    data = np.column_stack([lag_times, prob])
+    data = np.column_stack([lag_times, prob, norm])
     mdt.fh.savetxt(args.OUTFILE, data, header=header)
     print("Created {}".format(args.OUTFILE))
     print("Elapsed time:         {}".format(datetime.now() - timer))
diff --git a/scripts/discretization/back_jump_prob_discrete.py b/scripts/discretization/back_jump_prob_discrete.py
@@ -41,7 +41,11 @@
     second discrete trajectory must have the same shape as the first
     discrete trajectory.
 -o
-    Output filename.
+    Output filename of the file containing the back-jump probabilities.
+--norm-out
+    Output filename of the file containing the normalization factors
+    (optional).  Multiplying the back-jump probabilities with the
+    normalization factors yields the total number of back jumps.
 -b
     First frame to read from the discrete trajectory.  Frame numbering
     starts at zero.  Default: ``0``.
@@ -148,7 +152,21 @@
         dest="OUTFILE",
         type=str,
         required=True,
-        help="Output filename.",
+        help=(
+            "Output filename of the file containing the back-jump"
+            " probabilities."
+        ),
+    )
+    parser.add_argument(
+        "--norm-out",
+        dest="OUTFILE_NORM",
+        type=str,
+        required=False,
+        default=None,
+        help=(
+            "Output filename of the file containing the normalization factors"
+            " (optional)."
+        ),
     )
     parser.add_argument(
         "-b",
@@ -300,31 +318,37 @@
         continuous=args.CONTINUOUS,
         discard_neg=args.DISCARD_NEG,
         discard_neg_btw=args.DISCARD_NEG_BTW,
+        return_norm=args.OUTFILE_NORM is not None,
         verbose=True,
     )
+    if args.OUTFILE_NORM is not None:
+        prob, norm = prob
     del dtrj1, dtrj2
     print("Elapsed time:         {}".format(datetime.now() - timer))
     print("Current memory usage: {:.2f} MiB".format(mdt.rti.mem_usage(proc)))
 
     print("\n")
     print("Creating output...")
     timer = datetime.now()
-    header = (
+    header_prob = (
         "Back-jump probability: Probability to return to the initial state\n"
         + "after a state transition as function of the time that has passed\n"
         + "since the state transition resolved with respect to the states in\n"
         + "a second discrete trajectory.\n"
-        + "\n\n"
+    )
+    header_base = (
+        "\n\n"
         + "intermittency_1: {}\n".format(args.INTERMITTENCY1)
         + "intermittency_2: {}\n".format(args.INTERMITTENCY2)
         + "continuous:      {}\n".format(args.CONTINUOUS)
         + "discard_neg:     {}\n".format(args.DISCARD_NEG)
         + "discard_neg_btw: {}\n".format(args.DISCARD_NEG_BTW)
         + "\n\n"
     )
-    header += dtrj1_trans_info_str
-    header += "\n\n"
-    header += (
+    header_base += dtrj1_trans_info_str
+    header_base += "\n\n"
+    header_prob = header_prob + header_base
+    header_prob += (
         "The first column contains the lag times (in trajectory steps).\n"
         "The first row contains the states of the second discrete trajectory\n"
         "that were used to discretize the back-jump probability.\n"
@@ -333,9 +357,36 @@
     )
     lag_times = np.arange(0, prob.shape[1] * EVERY, EVERY, dtype=np.uint32)
     mdt.fh.savetxt_matrix(
-        args.OUTFILE, prob.T, var1=lag_times, var2=dtrj2_states, header=header
+        args.OUTFILE,
+        prob.T,
+        var1=lag_times,
+        var2=dtrj2_states,
+        header=header_prob,
     )
     print("Created {}".format(args.OUTFILE))
+    if args.OUTFILE_NORM is not None:
+        header_norm = (
+            "Normalization factors used to normalize the back-jump\n"
+            "probabilities.\n"
+            "Multiplying the back-jump probabilities with the normalization\n"
+            "factors yields the total number of back jumps.\n"
+        )
+        header_norm = header_norm + header_base
+        header_norm += (
+            "The first column contains the lag times (in trajectory steps).\n"
+            "The first row contains the states of the second discrete"
+            " trajectory\n"
+            "that were used to discretize the back-jump probability.\n"
+            "The remaining matrix elements are the normalization factors.\n"
+        )
+        mdt.fh.savetxt_matrix(
+            args.OUTFILE_NORM,
+            norm.T,
+            var1=lag_times,
+            var2=dtrj2_states,
+            header=header_norm,
+        )
+        print("Created {}".format(args.OUTFILE_NORM))
     print("Elapsed time:         {}".format(datetime.now() - timer))
     print("Current memory usage: {:.2f} MiB".format(mdt.rti.mem_usage(proc)))