Skip to content

Commit

Permalink
Use only one output aggregated column rather than two columns
Browse files Browse the repository at this point in the history
  • Loading branch information
asavinov committed Mar 11, 2023
1 parent 504793c commit 11d15dc
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 55 deletions.
54 changes: 24 additions & 30 deletions common/signal_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,30 +71,24 @@ def combine_scores(df, model, buy_score_column, sell_score_column):
positive values mean buy. The result is stored in the same input buy column
while the sell column is redundant (and should be removed in future as unnecessary).
"""
trade_score_column = model.get("trade_score")
if model.get("combine") == "relative":
combine_scores_relative(df, buy_score_column, sell_score_column, buy_score_column, sell_score_column)
combine_scores_relative(df, buy_score_column, sell_score_column, trade_score_column)
elif model.get("combine") == "difference":
combine_scores_difference(df, buy_score_column, sell_score_column, buy_score_column, sell_score_column)
combine_scores_difference(df, buy_score_column, sell_score_column, trade_score_column)
else:
# If buy score is greater than sell score then positive buy, otherwise negative sell
temp_buy_score_column = df[[buy_score_column, sell_score_column]].apply(lambda x: x[0] if x[0] >= x[1] else -x[1], raw=True, axis=1)
temp_sell_score_column = df[[buy_score_column, sell_score_column]].apply(lambda x: -x[1] if x[0] >= x[1] else x[0], raw=True, axis=1)

df[sell_score_column] = temp_sell_score_column
df[buy_score_column] = temp_buy_score_column
df[trade_score_column] = df[[buy_score_column, sell_score_column]].apply(lambda x: x[0] if x[0] >= x[1] else -x[1], raw=True, axis=1)

# Scale the score distribution to make it symmetric or normalize
# Always apply the transformation to buy score. It might be in [0,1] or [-1,+1] depending on combine parameter
if model.get("coefficient"):
df[buy_score_column] = df[buy_score_column] * model.get("coefficient")
df[trade_score_column] = df[trade_score_column] * model.get("coefficient")
if model.get("constant"):
df[buy_score_column] = df[buy_score_column] + model.get("constant")

if model.get("combine") in ["relative", "difference"]:
df[sell_score_column] = -df[buy_score_column] # We know that they are opposite for these types of combination
df[trade_score_column] = df[trade_score_column] + model.get("constant")


def combine_scores_relative(df, buy_column, sell_column, buy_column_out, sell_column_out):
def combine_scores_relative(df, buy_column, sell_column, trade_column_out):
"""
Mutually adjust input buy and sell scores by producing two output scores.
The idea is that if both scores (buy and sell) are equally high then in the output
Expand All @@ -106,16 +100,13 @@ def combine_scores_relative(df, buy_column, sell_column, buy_column_out, sell_co
buy_plus_sell = df[buy_column] + df[sell_column]
buy_sell_score = ((df[buy_column] / buy_plus_sell) * 2) - 1.0 # in [-1, +1]

df[buy_column_out] = buy_sell_score # High values mean buy signal
df[trade_column_out] = buy_sell_score # High values mean buy signal
#df[buy_column_out] = df[df[buy_column_out] < 0] = 0 # Set negative values to 0

df[sell_column_out] = -buy_sell_score # High values mean sell signal
#df[sell_column_out] = df[df[sell_column_out] < 0] = 0 # Set negative values to 0

return buy_sell_score


def combine_scores_difference(df, buy_column, sell_column, buy_column_out, sell_column_out):
def combine_scores_difference(df, buy_column, sell_column, trade_column_out):
"""
This transformation represents how much buy score higher than sell score.
If they are equal then the output is 0. The output scores have opposite signs.
Expand All @@ -124,12 +115,9 @@ def combine_scores_difference(df, buy_column, sell_column, buy_column_out, sell_
# difference
buy_minus_sell = df[buy_column] - df[sell_column]

df[buy_column_out] = buy_minus_sell # High values mean buy signal
df[trade_column_out] = buy_minus_sell # High values mean buy signal
#df[buy_column_out] = df[df[buy_column_out] < 0] = 0 # Set negative values to 0

df[sell_column_out] = -buy_minus_sell # High values mean sell signal
#df[sell_column_out] = df[df[sell_column_out] < 0] = 0 # Set negative values to 0

return buy_minus_sell


Expand Down Expand Up @@ -172,31 +160,37 @@ def linear_regr_fn(X):
# Signal rules
#

def apply_rule_with_score_thresholds(df, model, buy_score_column, sell_score_column):
def apply_rule_with_score_thresholds(df, model, trade_score_column):
"""
Apply rules based on thresholds and generate trade signal buy, sell or do nothing.
Returns signals in two pre-defined columns: 'buy_signal_column' and 'sell_signal_column'
"""
df['buy_signal_column'] = \
(df[buy_score_column] >= model.get("buy_signal_threshold"))
(df[trade_score_column] >= model.get("buy_signal_threshold"))
df['sell_signal_column'] = \
(df[buy_score_column] <= model.get("sell_signal_threshold"))
(df[trade_score_column] <= model.get("sell_signal_threshold"))


def apply_rule_with_score_thresholds_2(df, model, buy_score_column, buy_score_column_2):
def apply_rule_with_score_thresholds_2(df, model, trade_score_column, trade_score_column_2):
"""
Assume using difference combination with negative sell scores
"""
#two_dim_distance_threshold = model.get("two_dim_distance_threshold")
#distance = ((df[buy_score_column]*df[buy_score_column]) + (df[buy_score_column_2]*df[buy_score_column_2]))**0.5
#distance_signal = (distance >= two_dim_distance_threshold) # Far enough from the center

# Both buy scores are greater than the corresponding thresholds
df['buy_signal_column'] = \
(df[buy_score_column] >= model.get("buy_signal_threshold")) & \
(df[buy_score_column_2] >= model.get("buy_signal_threshold_2"))
(df[trade_score_column] >= model.get("buy_signal_threshold")) & \
(df[trade_score_column_2] >= model.get("buy_signal_threshold_2"))
#df['buy_signal_column'] = df['buy_signal_column'] & distance_signal

# Both sell scores are smaller than the corresponding thresholds
df['sell_signal_column'] = \
(df[buy_score_column] <= model.get("sell_signal_threshold")) & \
(df[buy_score_column_2] <= model.get("sell_signal_threshold_2"))
(df[trade_score_column] <= model.get("sell_signal_threshold")) & \
(df[trade_score_column_2] <= model.get("sell_signal_threshold_2"))
#df['sell_signal_column'] = df['sell_signal_column'] & distance_signal


def apply_rule_with_score_thresholds_one_row(row, model, buy_score_column, sell_score_column):
Expand Down
2 changes: 2 additions & 0 deletions config-sample-v0.5.0.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@
"buy_labels": ["high_10_lc", "high_15_lc", "high_20_lc"],
"sell_labels": ["low_10_lc", "low_15_lc", "low_20_lc"],

"trade_score": "trade_score",

"point_threshold": null,
"window": 3,
"combine": "",
Expand Down
25 changes: 11 additions & 14 deletions scripts/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def main(config_file):
#
# Aggregate and post-process
#
score_column_names = []
trade_score_column_names = []
sa_sets = ['score_aggregation', 'score_aggregation_2']
for i, score_aggregation_set in enumerate(sa_sets):
score_aggregation = App.config.get(score_aggregation_set)
Expand All @@ -92,34 +92,31 @@ def main(config_file):
# Output (post-processed) columns for each aggregation set
buy_column = 'buy_score_column'
sell_column = 'sell_score_column'
if i > 0:
buy_column = 'buy_score_column' + '_' + str(i+1)
sell_column = 'sell_score_column' + '_' + str(i+1)

# Aggregate scores between each other and in time
aggregate_scores(df, score_aggregation, buy_column, buy_labels)
aggregate_scores(df, score_aggregation, sell_column, sell_labels)

# Mutually adjust two independent scores with opposite semantics
combine_scores(df, score_aggregation, buy_column, sell_column)

score_column_names.append(buy_column)
score_column_names.append(sell_column)
trade_score_column = score_aggregation.get("trade_score")
trade_score_column_names.append(trade_score_column)

#
# Apply signal rule and generate binary buy_signal_column/sell_signal_column
#
signal_model = App.config['signal_model']
if signal_model.get('rule_type') == 'two_dim_rule':
apply_rule_with_score_thresholds_2(df, signal_model, 'buy_score_column', 'buy_score_column_2')
apply_rule_with_score_thresholds_2(df, signal_model, trade_score_column_names[0], trade_score_column_names[1])
else: # Default one dim rule
apply_rule_with_score_thresholds(df, signal_model, 'buy_score_column', 'sell_score_column')

signal_column_names = ['buy_signal_column', 'sell_signal_column']
apply_rule_with_score_thresholds(df, signal_model, trade_score_column_names[0])

#
# Simulate trade using close price and two boolean signals
# Simulate trade and compute performance using close price and two boolean signals
# Add a pair of two dicts: performance dict and model parameters dict
#
signal_column_names = ['buy_signal_column', 'sell_signal_column']

performance, long_performance, short_performance = \
simulated_trade_performance(df, 'sell_signal_column', 'buy_signal_column', 'close')

Expand Down Expand Up @@ -152,7 +149,7 @@ def main(config_file):
lines = []

# Score statistics
for score_col_name in score_column_names:
for score_col_name in trade_score_column_names:
lines.append(f"'{score_col_name}':\n" + df[score_col_name].describe().to_string())

# TODO: Profit
Expand All @@ -169,7 +166,7 @@ def main(config_file):
#
out_columns = ["timestamp", "open", "high", "low", "close"] # Source data
out_columns.extend(App.config.get('labels')) # True labels
out_columns.extend(score_column_names) # Aggregated post-processed scores
out_columns.extend(trade_score_column_names) # Aggregated post-processed scores
out_columns.extend(signal_column_names) # Rule results
out_columns.extend(["buy_transaction", "sell_transaction", "transaction_type", "profit_long_percent", "profit_short_percent", "profit_percent"]) # Simulation results

Expand Down
19 changes: 16 additions & 3 deletions scripts/train_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,29 @@ def main(config_file):

signal_model["rule_type"] = App.config["signal_model"]["rule_type"]

#
# Do not aggregate but assume that we have already the aggregation results in the data
#
trade_score_column_names = []
sa_sets = ['score_aggregation', 'score_aggregation_2']
for i, score_aggregation_set in enumerate(sa_sets):
score_aggregation = App.config.get(score_aggregation_set)
if not score_aggregation:
continue

trade_score_column = score_aggregation.get("trade_score")
trade_score_column_names.append(trade_score_column)

#
# Apply signal rule and generate binary buy_signal_column/sell_signal_column
#
if signal_model.get('rule_type') == 'two_dim_rule':
apply_rule_with_score_thresholds_2(df, signal_model, 'buy_score_column', 'buy_score_column_2')
apply_rule_with_score_thresholds_2(df, signal_model, trade_score_column_names[0], trade_score_column_names[1])
else: # Default one dim rule
apply_rule_with_score_thresholds(df, signal_model, 'buy_score_column', 'sell_score_column')
apply_rule_with_score_thresholds(df, signal_model, trade_score_column_names[0])

#
# Simulate trade using close price and two boolean signals
# Simulate trade and compute performance using close price and two boolean signals
# Add a pair of two dicts: performance dict and model parameters dict
#
performance, long_performance, short_performance = \
Expand Down
6 changes: 4 additions & 2 deletions service/App.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ class App:
"buy_labels": ["high_10_lc", "high_15_lc", "high_20_lc"],
"sell_labels": ["low_10_lc", "low_15_lc", "low_20_lc"],

"trade_score": "trade_score", # Output column name: positive values - buy, negative values - sell

"point_threshold": None, # Produce boolean column (optional)
"window": 3, # Aggregate in time
"combine": "", # "no_combine" (or empty), "relative", "difference"
Expand All @@ -188,8 +190,8 @@ class App:
"rule_type": "", # empty, 'two_dim_rule'

# Rule parameters to decide whether to buy/sell after all aggregations/combinations
"buy_signal_threshold": 0.65,
"sell_signal_threshold": 0.65,
"buy_signal_threshold": 0.1,
"sell_signal_threshold": -0.1,

# To decide whether to notify (can be an option of individual users/consumers)
"buy_notify_threshold": 0.05,
Expand Down
13 changes: 7 additions & 6 deletions service/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ def analyze(self):
# 4.
# Aggregate and post-process
#
trade_score_column_names = []
sa_sets = ['score_aggregation', 'score_aggregation_2']
for i, score_aggregation_set in enumerate(sa_sets):
score_aggregation = App.config.get(score_aggregation_set)
Expand All @@ -410,25 +411,25 @@ def analyze(self):
# Output (post-processed) columns for each aggregation set
buy_column = 'buy_score_column'
sell_column = 'sell_score_column'
if i > 0:
buy_column = 'buy_score_column' + '_' + str(i + 1)
sell_column = 'sell_score_column' + '_' + str(i + 1)

# Aggregate scores between each other and in time
aggregate_scores(df, score_aggregation, buy_column, buy_labels)
aggregate_scores(df, score_aggregation, sell_column, sell_labels)

# Mutually adjust two independent scores with opposite semantics
combine_scores(df, score_aggregation, buy_column, sell_column)

trade_score_column = score_aggregation.get("trade_score")
trade_score_column_names.append(trade_score_column)

#
# 5.
# Apply rule to last row
#
signal_model = App.config['signal_model']
if signal_model.get('rule_type') == 'two_dim_rule':
apply_rule_with_score_thresholds_2(df, signal_model, 'buy_score_column', 'buy_score_column_2')
apply_rule_with_score_thresholds_2(df, signal_model, trade_score_column_names[0], trade_score_column_names[1])
else: # Default one dim rule
apply_rule_with_score_thresholds(df, signal_model, 'buy_score_column', 'sell_score_column')
apply_rule_with_score_thresholds(df, signal_model, trade_score_column_names[0])

#
# 6.
Expand Down

0 comments on commit 11d15dc

Please sign in to comment.