Skip to content

Commit

Permalink
Merge pull request #1181 from linjieccc/fix_taskflow_senta
Browse files Browse the repository at this point in the history
Add confidence score for sentiment analysis tasks
  • Loading branch information
wawltor authored Oct 20, 2021
2 parents 00ef7de + c43a722 commit 47430dc
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 15 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ ddp("百度是一家高科技公司")
# 情感分析
senta = Taskflow("sentiment_analysis")
senta("怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片")
>>> [{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative'}]
>>> [{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative', 'score': 0.6691398620605469}]
```
更多使用方法请参考[Taskflow文档](./docs/model_zoo/taskflow.md)

Expand Down
2 changes: 1 addition & 1 deletion README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ ddp("百度是一家高科技公司")
# Sentiment Analysis
senta = Taskflow("sentiment_analysis")
senta("怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片")
>>> [{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative'}]
>>> [{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative', 'score': 0.6691398620605469}]
```

For more usage please refer to [Taskflow Docs](./docs/model_zoo/taskflow.md)
Expand Down
6 changes: 3 additions & 3 deletions docs/model_zoo/taskflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,16 @@ from paddlenlp import Taskflow

senta = Taskflow("sentiment_analysis")
senta("怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片")
>>> [{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative'}]
>>> [{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative', 'score': 0.6691398620605469}]

senta(["怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片",
"作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间"])
>>> [{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative'}, {'text': '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间', 'label': 'positive'}]
>>> [{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative', 'score': 0.6691398620605469}, {'text': '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间', 'label': 'positive', 'score': 0.9857505559921265}]

# 使用SKEP情感分析预训练模型进行预测
senta = Taskflow("sentiment_analysis", model="skep_ernie_1.0_large_ch")
senta("作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。")
>>> [{'text': '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。', 'label': 'positive'}]
>>> [{'text': '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。', 'label': 'positive', 'score': 0.984320878982544}]
```

### 知识挖掘
Expand Down
4 changes: 2 additions & 2 deletions paddlenlp/taskflow/models/sentiment_analysis_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def forward(self, text, seq_len):
logits = self.output_layer(fc_out)
probs = F.softmax(logits, axis=1)
idx = paddle.argmax(probs, axis=1).numpy()
return idx
return idx, probs


class SkepSequenceModel(SkepPretrainedModel):
Expand Down Expand Up @@ -158,4 +158,4 @@ def forward(self,
logits = self.classifier(pooled_output)
probs = F.softmax(logits, axis=1)
idx = paddle.argmax(probs, axis=1)
return idx
return idx, probs
28 changes: 20 additions & 8 deletions paddlenlp/taskflow/sentiment_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,21 @@
senta = Taskflow("sentiment_analysis")
senta("怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片")
'''
[{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative'}]
[{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative', 'score': 0.6691398620605469}]
'''
senta(["怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片",
"作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间"])
'''
[{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative'},
{'text': '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间', 'label': 'positive'}
[{'text': '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片', 'label': 'negative', 'score': 0.6691398620605469},
{'text': '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间', 'label': 'positive', 'score': 0.9857505559921265}
]
'''
senta = Taskflow("sentiment_analysis", model="skep_ernie_1.0_large_ch")
senta("作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。")
'''
[{'text': '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。', 'label': 'positive'}]
[{'text': '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。', 'label': 'positive', 'score': 0.984320878982544}]
'''
"""

Expand Down Expand Up @@ -115,7 +115,7 @@ def _construct_model(self, model):
padding_idx=pad_token_id,
pooling_type='max')
model_path = download_file(self._task_path, model + ".pdparams",
URLS[model][0], URLS[model][1], model)
URLS[model][0], URLS[model][1])

# Load the model parameter for the predict
state_dict = paddle.load(model_path)
Expand All @@ -126,7 +126,7 @@ def _construct_tokenizer(self, model):
"""
Construct the tokenizer for the predictor.
"""
full_name = download_file(self.model, "senta_word_dict.txt",
full_name = download_file(self._task_path, "senta_word_dict.txt",
URLS['bilstm_vocab'][0],
URLS['bilstm_vocab'][1])
vocab = Vocab.load_vocabulary(
Expand Down Expand Up @@ -181,28 +181,34 @@ def _run_model(self, inputs):
Run the task model from the outputs of the `_tokenize` function.
"""
results = []
scores = []
with static_mode_guard():
for batch in inputs['data_loader']:
ids, lens = self.batchify_fn(batch)
self.input_handles[0].copy_from_cpu(ids)
self.input_handles[1].copy_from_cpu(lens)
self.predictor.run()
idx = self.output_handle[0].copy_to_cpu().tolist()
probs = self.output_handle[1].copy_to_cpu().tolist()
labels = [self._label_map[i] for i in idx]
score = [max(prob) for prob in probs]
results.extend(labels)
scores.extend(score)

inputs['result'] = results
inputs['score'] = scores
return inputs

def _postprocess(self, inputs):
"""
This function will convert the model output to raw text.
"""
final_results = []
for text, label in zip(inputs['text'], inputs['result']):
for text, label, score in zip(inputs['text'], inputs['result'], inputs['score']):
result = {}
result['text'] = text
result['label'] = label
result['score'] = score
final_results.append(result)
return final_results

Expand Down Expand Up @@ -302,27 +308,33 @@ def _run_model(self, inputs):
Run the task model from the outputs of the `_tokenize` function.
"""
results = []
scores = []
with static_mode_guard():
for batch in inputs['data_loader']:
ids, segment_ids = self._batchify_fn(batch)
self.input_handles[0].copy_from_cpu(ids)
self.input_handles[1].copy_from_cpu(segment_ids)
self.predictor.run()
idx = self.output_handle[0].copy_to_cpu().tolist()
probs = self.output_handle[1].copy_to_cpu().tolist()
labels = [self._label_map[i] for i in idx]
score = [max(prob) for prob in probs]
results.extend(labels)
scores.extend(score)

inputs['result'] = results
inputs['score'] = scores
return inputs

def _postprocess(self, inputs):
"""
The model output is tag ids, this function will convert the model output to raw text.
"""
final_results = []
for text, label in zip(inputs['text'], inputs['result']):
for text, label, score in zip(inputs['text'], inputs['result'], inputs['score']):
result = {}
result['text'] = text
result['label'] = label
result['score'] = score
final_results.append(result)
return final_results

0 comments on commit 47430dc

Please sign in to comment.