Use only one output aggregated column rather than two columns

Master-Codexxx-Pilot · Mar 11, 2023 · 11d15dc · 11d15dc
1 parent 504793c
commit 11d15dc
Show file tree

Hide file tree

Showing 6 changed files with 64 additions and 55 deletions.
diff --git a/common/signal_generation.py b/common/signal_generation.py
@@ -71,30 +71,24 @@ def combine_scores(df, model, buy_score_column, sell_score_column):
     positive values mean buy. The result is stored in the same input buy column
     while the sell column is redundant (and should be removed in future as unnecessary).
     """
+    trade_score_column = model.get("trade_score")
     if model.get("combine") == "relative":
-        combine_scores_relative(df, buy_score_column, sell_score_column, buy_score_column, sell_score_column)
+        combine_scores_relative(df, buy_score_column, sell_score_column, trade_score_column)
     elif model.get("combine") == "difference":
-        combine_scores_difference(df, buy_score_column, sell_score_column, buy_score_column, sell_score_column)
+        combine_scores_difference(df, buy_score_column, sell_score_column, trade_score_column)
     else:
         # If buy score is greater than sell score then positive buy, otherwise negative sell
-        temp_buy_score_column = df[[buy_score_column, sell_score_column]].apply(lambda x: x[0] if x[0] >= x[1] else -x[1], raw=True, axis=1)
-        temp_sell_score_column = df[[buy_score_column, sell_score_column]].apply(lambda x: -x[1] if x[0] >= x[1] else x[0], raw=True, axis=1)
-
-        df[sell_score_column] = temp_sell_score_column
-        df[buy_score_column] = temp_buy_score_column
+        df[trade_score_column] = df[[buy_score_column, sell_score_column]].apply(lambda x: x[0] if x[0] >= x[1] else -x[1], raw=True, axis=1)
 
     # Scale the score distribution to make it symmetric or normalize
     # Always apply the transformation to buy score. It might be in [0,1] or [-1,+1] depending on combine parameter
     if model.get("coefficient"):
-        df[buy_score_column] = df[buy_score_column] * model.get("coefficient")
+        df[trade_score_column] = df[trade_score_column] * model.get("coefficient")
     if model.get("constant"):
-        df[buy_score_column] = df[buy_score_column] + model.get("constant")
-
-    if model.get("combine") in ["relative", "difference"]:
-        df[sell_score_column] = -df[buy_score_column]  # We know that they are opposite for these types of combination
+        df[trade_score_column] = df[trade_score_column] + model.get("constant")
 
 
-def combine_scores_relative(df, buy_column, sell_column, buy_column_out, sell_column_out):
+def combine_scores_relative(df, buy_column, sell_column, trade_column_out):
     """
     Mutually adjust input buy and sell scores by producing two output scores.
     The idea is that if both scores (buy and sell) are equally high then in the output
@@ -106,16 +100,13 @@ def combine_scores_relative(df, buy_column, sell_column, buy_column_out, sell_co
     buy_plus_sell = df[buy_column] + df[sell_column]
     buy_sell_score = ((df[buy_column] / buy_plus_sell) * 2) - 1.0  # in [-1, +1]
 
-    df[buy_column_out] = buy_sell_score  # High values mean buy signal
+    df[trade_column_out] = buy_sell_score  # High values mean buy signal
     #df[buy_column_out] = df[df[buy_column_out] < 0] = 0  # Set negative values to 0
 
-    df[sell_column_out] = -buy_sell_score  # High values mean sell signal
-    #df[sell_column_out] = df[df[sell_column_out] < 0] = 0  # Set negative values to 0
-
     return buy_sell_score
 
 
-def combine_scores_difference(df, buy_column, sell_column, buy_column_out, sell_column_out):
+def combine_scores_difference(df, buy_column, sell_column, trade_column_out):
     """
     This transformation represents how much buy score higher than sell score.
     If they are equal then the output is 0. The output scores have opposite signs.
@@ -124,12 +115,9 @@ def combine_scores_difference(df, buy_column, sell_column, buy_column_out, sell_
     # difference
     buy_minus_sell = df[buy_column] - df[sell_column]
 
-    df[buy_column_out] = buy_minus_sell  # High values mean buy signal
+    df[trade_column_out] = buy_minus_sell  # High values mean buy signal
     #df[buy_column_out] = df[df[buy_column_out] < 0] = 0  # Set negative values to 0
 
-    df[sell_column_out] = -buy_minus_sell  # High values mean sell signal
-    #df[sell_column_out] = df[df[sell_column_out] < 0] = 0  # Set negative values to 0
-
     return buy_minus_sell
 
 
@@ -172,31 +160,37 @@ def linear_regr_fn(X):
 # Signal rules
 #
 
-def apply_rule_with_score_thresholds(df, model, buy_score_column, sell_score_column):
+def apply_rule_with_score_thresholds(df, model, trade_score_column):
     """
     Apply rules based on thresholds and generate trade signal buy, sell or do nothing.
 
     Returns signals in two pre-defined columns: 'buy_signal_column' and 'sell_signal_column'
     """
     df['buy_signal_column'] = \
-        (df[buy_score_column] >= model.get("buy_signal_threshold"))
+        (df[trade_score_column] >= model.get("buy_signal_threshold"))
     df['sell_signal_column'] = \
-        (df[buy_score_column] <= model.get("sell_signal_threshold"))
+        (df[trade_score_column] <= model.get("sell_signal_threshold"))
 
 
-def apply_rule_with_score_thresholds_2(df, model, buy_score_column, buy_score_column_2):
+def apply_rule_with_score_thresholds_2(df, model, trade_score_column, trade_score_column_2):
     """
     Assume using difference combination with negative sell scores
     """
+    #two_dim_distance_threshold = model.get("two_dim_distance_threshold")
+    #distance = ((df[buy_score_column]*df[buy_score_column]) + (df[buy_score_column_2]*df[buy_score_column_2]))**0.5
+    #distance_signal = (distance >= two_dim_distance_threshold)  # Far enough from the center
+
     # Both buy scores are greater than the corresponding thresholds
     df['buy_signal_column'] = \
-        (df[buy_score_column] >= model.get("buy_signal_threshold")) & \
-        (df[buy_score_column_2] >= model.get("buy_signal_threshold_2"))
+        (df[trade_score_column] >= model.get("buy_signal_threshold")) & \
+        (df[trade_score_column_2] >= model.get("buy_signal_threshold_2"))
+    #df['buy_signal_column'] = df['buy_signal_column'] & distance_signal
 
     # Both sell scores are smaller than the corresponding thresholds
     df['sell_signal_column'] = \
-        (df[buy_score_column] <= model.get("sell_signal_threshold")) & \
-        (df[buy_score_column_2] <= model.get("sell_signal_threshold_2"))
+        (df[trade_score_column] <= model.get("sell_signal_threshold")) & \
+        (df[trade_score_column_2] <= model.get("sell_signal_threshold_2"))
+    #df['sell_signal_column'] = df['sell_signal_column'] & distance_signal
 
 
 def apply_rule_with_score_thresholds_one_row(row, model, buy_score_column, sell_score_column):

diff --git a/config-sample-v0.5.0.json b/config-sample-v0.5.0.json
@@ -63,6 +63,8 @@
       "buy_labels": ["high_10_lc", "high_15_lc", "high_20_lc"],
       "sell_labels": ["low_10_lc", "low_15_lc", "low_20_lc"],
 
+      "trade_score": "trade_score",
+
       "point_threshold": null,
       "window": 3,
       "combine": "",

diff --git a/scripts/signals.py b/scripts/signals.py
@@ -75,7 +75,7 @@ def main(config_file):
     #
     # Aggregate and post-process
     #
-    score_column_names = []
+    trade_score_column_names = []
     sa_sets = ['score_aggregation', 'score_aggregation_2']
     for i, score_aggregation_set in enumerate(sa_sets):
         score_aggregation = App.config.get(score_aggregation_set)
@@ -92,34 +92,31 @@ def main(config_file):
         # Output (post-processed) columns for each aggregation set
         buy_column = 'buy_score_column'
         sell_column = 'sell_score_column'
-        if i > 0:
-            buy_column = 'buy_score_column' + '_' + str(i+1)
-            sell_column = 'sell_score_column' + '_' + str(i+1)
-
         # Aggregate scores between each other and in time
         aggregate_scores(df, score_aggregation, buy_column, buy_labels)
         aggregate_scores(df, score_aggregation, sell_column, sell_labels)
+
         # Mutually adjust two independent scores with opposite semantics
         combine_scores(df, score_aggregation, buy_column, sell_column)
 
-        score_column_names.append(buy_column)
-        score_column_names.append(sell_column)
+        trade_score_column = score_aggregation.get("trade_score")
+        trade_score_column_names.append(trade_score_column)
 
     #
     # Apply signal rule and generate binary buy_signal_column/sell_signal_column
     #
     signal_model = App.config['signal_model']
     if signal_model.get('rule_type') == 'two_dim_rule':
-        apply_rule_with_score_thresholds_2(df, signal_model, 'buy_score_column', 'buy_score_column_2')
+        apply_rule_with_score_thresholds_2(df, signal_model, trade_score_column_names[0], trade_score_column_names[1])
     else:  # Default one dim rule
-        apply_rule_with_score_thresholds(df, signal_model, 'buy_score_column', 'sell_score_column')
-
-    signal_column_names = ['buy_signal_column', 'sell_signal_column']
+        apply_rule_with_score_thresholds(df, signal_model, trade_score_column_names[0])
 
     #
-    # Simulate trade using close price and two boolean signals
+    # Simulate trade and compute performance using close price and two boolean signals
     # Add a pair of two dicts: performance dict and model parameters dict
     #
+    signal_column_names = ['buy_signal_column', 'sell_signal_column']
+
     performance, long_performance, short_performance = \
         simulated_trade_performance(df, 'sell_signal_column', 'buy_signal_column', 'close')
 
@@ -152,7 +149,7 @@ def main(config_file):
     lines = []
 
     # Score statistics
-    for score_col_name in score_column_names:
+    for score_col_name in trade_score_column_names:
         lines.append(f"'{score_col_name}':\n" + df[score_col_name].describe().to_string())
 
     # TODO: Profit
@@ -169,7 +166,7 @@ def main(config_file):
     #
     out_columns = ["timestamp", "open", "high", "low", "close"]  # Source data
     out_columns.extend(App.config.get('labels'))  # True labels
-    out_columns.extend(score_column_names)  # Aggregated post-processed scores
+    out_columns.extend(trade_score_column_names)  # Aggregated post-processed scores
     out_columns.extend(signal_column_names)  # Rule results
     out_columns.extend(["buy_transaction", "sell_transaction", "transaction_type", "profit_long_percent", "profit_short_percent", "profit_percent"])  # Simulation results
 

diff --git a/scripts/train_signals.py b/scripts/train_signals.py
@@ -147,16 +147,29 @@ def main(config_file):
 
         signal_model["rule_type"] = App.config["signal_model"]["rule_type"]
 
+        #
+        # Do not aggregate but assume that we have already the aggregation results in the data
+        #
+        trade_score_column_names = []
+        sa_sets = ['score_aggregation', 'score_aggregation_2']
+        for i, score_aggregation_set in enumerate(sa_sets):
+            score_aggregation = App.config.get(score_aggregation_set)
+            if not score_aggregation:
+                continue
+
+            trade_score_column = score_aggregation.get("trade_score")
+            trade_score_column_names.append(trade_score_column)
+
         #
         # Apply signal rule and generate binary buy_signal_column/sell_signal_column
         #
         if signal_model.get('rule_type') == 'two_dim_rule':
-            apply_rule_with_score_thresholds_2(df, signal_model, 'buy_score_column', 'buy_score_column_2')
+            apply_rule_with_score_thresholds_2(df, signal_model, trade_score_column_names[0], trade_score_column_names[1])
         else:  # Default one dim rule
-            apply_rule_with_score_thresholds(df, signal_model, 'buy_score_column', 'sell_score_column')
+            apply_rule_with_score_thresholds(df, signal_model, trade_score_column_names[0])
 
         #
-        # Simulate trade using close price and two boolean signals
+        # Simulate trade and compute performance using close price and two boolean signals
         # Add a pair of two dicts: performance dict and model parameters dict
         #
         performance, long_performance, short_performance = \

diff --git a/service/App.py b/service/App.py
@@ -175,6 +175,8 @@ class App:
             "buy_labels": ["high_10_lc", "high_15_lc", "high_20_lc"],
             "sell_labels": ["low_10_lc", "low_15_lc", "low_20_lc"],
 
+            "trade_score": "trade_score",  # Output column name: positive values - buy, negative values - sell
+
             "point_threshold": None,  # Produce boolean column (optional)
             "window": 3,  # Aggregate in time
             "combine": "",  # "no_combine" (or empty), "relative", "difference"
@@ -188,8 +190,8 @@ class App:
             "rule_type": "",  # empty, 'two_dim_rule'
 
             # Rule parameters to decide whether to buy/sell after all aggregations/combinations
-            "buy_signal_threshold": 0.65,
-            "sell_signal_threshold": 0.65,
+            "buy_signal_threshold": 0.1,
+            "sell_signal_threshold": -0.1,
 
             # To decide whether to notify (can be an option of individual users/consumers)
             "buy_notify_threshold": 0.05,

diff --git a/service/analyzer.py b/service/analyzer.py
@@ -398,6 +398,7 @@ def analyze(self):
         # 4.
         # Aggregate and post-process
         #
+        trade_score_column_names = []
         sa_sets = ['score_aggregation', 'score_aggregation_2']
         for i, score_aggregation_set in enumerate(sa_sets):
             score_aggregation = App.config.get(score_aggregation_set)
@@ -410,25 +411,25 @@ def analyze(self):
             # Output (post-processed) columns for each aggregation set
             buy_column = 'buy_score_column'
             sell_column = 'sell_score_column'
-            if i > 0:
-                buy_column = 'buy_score_column' + '_' + str(i + 1)
-                sell_column = 'sell_score_column' + '_' + str(i + 1)
-
             # Aggregate scores between each other and in time
             aggregate_scores(df, score_aggregation, buy_column, buy_labels)
             aggregate_scores(df, score_aggregation, sell_column, sell_labels)
+
             # Mutually adjust two independent scores with opposite semantics
             combine_scores(df, score_aggregation, buy_column, sell_column)
 
+            trade_score_column = score_aggregation.get("trade_score")
+            trade_score_column_names.append(trade_score_column)
+
         #
         # 5.
         # Apply rule to last row
         #
         signal_model = App.config['signal_model']
         if signal_model.get('rule_type') == 'two_dim_rule':
-            apply_rule_with_score_thresholds_2(df, signal_model, 'buy_score_column', 'buy_score_column_2')
+            apply_rule_with_score_thresholds_2(df, signal_model, trade_score_column_names[0], trade_score_column_names[1])
         else:  # Default one dim rule
-            apply_rule_with_score_thresholds(df, signal_model, 'buy_score_column', 'sell_score_column')
+            apply_rule_with_score_thresholds(df, signal_model, trade_score_column_names[0])
 
         #
         # 6.