-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_model.py
More file actions
101 lines (84 loc) · 4.64 KB
/
Copy pathextract_model.py
File metadata and controls
101 lines (84 loc) · 4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import pickle
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib
def extract_model(model_path='scripts/final_model.pkl'):
"""Extract and analyze the model from the pickle file."""
try:
print(f"\nTrying to load model with joblib...")
try:
model = joblib.load(model_path)
print(f"\nSuccessfully loaded model with joblib!")
print(f"Model type: {type(model)}")
if isinstance(model, Pipeline):
print("\nPipeline details:")
print(f"Steps: {[step[0] for step in model.steps]}")
# Extract and save model coefficients/feature importances
for name, step in model.named_steps.items():
print(f"\nAnalyzing step: {name}")
print(f"Type: {type(step)}")
if isinstance(step, StandardScaler):
print("Found StandardScaler:")
print(f"Mean: {step.mean_}")
print(f"Scale: {step.scale_}")
np.save('model/scaler_mean.npy', step.mean_)
np.save('model/scaler_scale.npy', step.scale_)
print("\nSaved scaler parameters")
elif isinstance(step, RandomForestClassifier):
print("Found RandomForestClassifier:")
print(f"N estimators: {step.n_estimators}")
print(f"Feature importances: {step.feature_importances_}")
np.save('model/feature_importances.npy', step.feature_importances_)
print("\nSaved feature importances")
# Save the entire model step
with open('model/classifier.pkl', 'wb') as model_file:
pickle.dump(step, model_file)
print("\nSaved classifier model")
elif isinstance(model, RandomForestClassifier):
print("\nFound RandomForestClassifier:")
print(f"N estimators: {model.n_estimators}")
print(f"Feature importances: {model.feature_importances_}")
np.save('model/feature_importances.npy', model.feature_importances_)
print("\nSaved feature importances")
# Save the entire model
with open('model/classifier.pkl', 'wb') as model_file:
pickle.dump(model, model_file)
print("\nSaved classifier model")
except Exception as e:
print(f"\nError loading with joblib: {e}")
print("\nTrying standard pickle loading...")
with open(model_path, 'rb') as f:
raw_data = f.read()
print(f"\nRead {len(raw_data)} bytes from {model_path}")
try:
f.seek(0)
while True:
try:
obj = pickle.load(f)
print("\nFound pickled object:")
print(f"Type: {type(obj)}")
if isinstance(obj, np.ndarray):
print("Array details:")
print(f"Shape: {obj.shape}")
print(f"Dtype: {obj.dtype}")
print(f"Values: {obj}")
# Save feature names
if obj.dtype == np.dtype('O'): # Object dtype indicates strings
np.save('model/feature_names.npy', obj)
print("\nSaved feature names to model/feature_names.npy")
else:
print(f"Object contents: {obj}")
except EOFError:
break # Reached end of file
except Exception as e:
print(f"Error reading next object: {e}")
break
except Exception as e:
print(f"\nError during unpickling: {e}")
except Exception as e:
print(f"Error reading model file: {e}")
return None
if __name__ == "__main__":
extract_model()