-
Notifications
You must be signed in to change notification settings - Fork 0
/
classify_text.py
52 lines (42 loc) · 1.69 KB
/
classify_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import sys
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
def classify_text(text, threshold=0.45):
# Load the model from the best checkpoint
model_path = './best_model'
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
model = AutoModelForSequenceClassification.from_pretrained(
model_path,
num_labels=2,
problem_type="single_label_classification"
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()
inputs = tokenizer(
text,
truncation=True,
padding='max_length',
max_length=128,
return_tensors='pt'
).to(device)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
green_flag_prob = probabilities[0][0].item()
red_flag_prob = probabilities[0][1].item()
print(f"Green Flag probability: {green_flag_prob:.4f}")
print(f"Red Flag probability: {red_flag_prob:.4f}")
# Using threshold for classification
predicted_class = 1 if red_flag_prob > threshold else 0
confidence = max(red_flag_prob, green_flag_prob)
label_mapping = {0: 'Green Flag', 1: 'Red Flag'}
label = label_mapping[predicted_class]
print('harmful' if label == 'Red Flag' else 'non-harmful')
sys.stderr.write(f"Confidence: {confidence:.2%}")
if __name__ == "__main__":
if len(sys.argv) > 1:
text = sys.argv[1]
classify_text(text)
else:
print("Please provide text to classify")