Submission scores: Mean distance=510.29, Class Imbalance Ratio=0.43 l…

…inear model
Yachay-AI · dmitry-brazhenko · Nov 23, 2023 · Nov 23, 2023 · Nov 23, 2023 · Nov 23, 2023
commit 29d39e148c15284de66c1bbedd3df92424b0ec32
diff --git a/baseline.py b/baseline.py
@@ -2,7 +2,7 @@
 import numpy as np
 import sys
 from joblib import load
-
+import scipy.stats
 
 def softmax(x):
     # Compute the exponential values for each element in the input array
@@ -37,13 +37,30 @@ def transform_array(arr, length):
 # Add a new column 'confidence' to the DataFrame using the list of maximum confidence values.
 data_frame['confidence'] = max_confidences
 X = np.stack(data_frame['raw_prediction'])
-print(X.shape)
-data_frame['pred'] = model.predict(X)
+
+mean_confidence = np.mean(X, axis=1)
+std_confidence = np.std(X, axis=1)
+max_confidence = np.max(X, axis=1)
+min_confidence = np.min(X, axis=1)
+sum_confidence = np.sum(X, axis=1)
+median_confidence = np.median(X, axis=1)
+
+
+skew_confidence = np.apply_along_axis(lambda x: scipy.stats.skew(x), axis=1, arr=X)
+kurtosis_confidence = np.apply_along_axis(lambda x: scipy.stats.kurtosis(x), axis=1, arr=X)
+
+# Combine these new features into a single 2D array
+new_features = np.column_stack((mean_confidence, std_confidence, max_confidence, min_confidence, sum_confidence, median_confidence, skew_confidence, kurtosis_confidence))
+
+
+
+
+data_frame['pred'] = model.predict(new_features)
 
 #data_frame['pred'] = [x.argmax() for x in data_frame['raw_prediction']]
 
 # Sort the DataFrame by 'confidence' in descending order.
-sorted_data_frame = data_frame.sort_values(by='pred', ascending=False)
+sorted_data_frame = data_frame.sort_values(by='pred', ascending=True)
 
 # Determine the number of top records to consider for computing mean distance.
 top_records_count = int(0.1 * len(data_frame))