REALSumm

txsun1997 · Oct 18, 2022 · 3732190 · 3732190
1 parent cff034f
commit 3732190
Show file tree

Hide file tree

Showing 12 changed files with 68 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -195,7 +195,7 @@ As you can see, the attached debiasing adapter successfully mitigates bias in th
 The following example shows how to evaluate the original metrics' perfomance on [WMT20](https://aclanthology.org/2020.wmt-1.77/):
 
 ```bash
-cd Metric-Fairness/mitigating_bias/performance_eval
+cd Metric-Fairness/mitigating_bias/performance_eval/WMT
 pip install -r requirements.txt
 python eval_bert_score.py --model_type bert-base-uncased 
 python eval_bert_score.py --model_type bert-large-uncased 
@@ -244,9 +244,25 @@ In like wise, each score of BERTScore (both BERT-base and BERT-large), BARTScore
 +-------+-------+-------+-------+-------+-------+-------+-------+-------+-------+---------+
 ```
 
+##### REALSumm
 
+For the sake of time, we provide the pkl file directly, run
 
-##### RealSumm
+```bash
+cd Metric-Fairness/mitigating_bias/performance_eval/REALSumm
+pip install -r requirements.txt
+python analyse_pkls.py
+```
+
+and you will get scores like
+
+```
++------------------------------+----------------------+------------------------------+----------------------+-------------------------------+-----------------------+--------------------------+------------------+
+| bart_score_bart_base_adapter | bart_score_bart_base | bert_score_bert_base_adapter | bert_score_bert_base | bert_score_bert_large_adapter | bert_score_bert_large | bleurt_bert_base_adapter | bleurt_bert_base |
++------------------------------+----------------------+------------------------------+----------------------+-------------------------------+-----------------------+--------------------------+------------------+
+|            0.307             |        0.325         |            0.473             |        0.465         |             0.468             |         0.464         |           0.4            |      0.299       |
++------------------------------+----------------------+------------------------------+----------------------+-------------------------------+-----------------------+--------------------------+------------------+
+```
 
 If you use our data or code, please cite:
 

diff --git a/mitigating_bias/performance_eval/REALSumm/analyse_pkls.py b/mitigating_bias/performance_eval/REALSumm/analyse_pkls.py
@@ -0,0 +1,35 @@
+import pandas as pd
+from prettytable import PrettyTable
+
+KEY_VALUE = {
+    'bart_score_bart_base_adapter': 'bart_score_avg_f',
+    'bart_score_bart_base': 'bart_score_avg_f',
+    'bert_score_bert_base_adapter': 'bert_score_f',
+    'bert_score_bert_base': 'bert_score_f',
+    'bert_score_bert_large_adapter': 'bert_score_f',
+    'bert_score_bert_large': 'bert_score_f',
+    'bleurt_bert_base_adapter': 'bleurt_score',
+    'bleurt_bert_base': 'bleurt_score'
+}
+
+
+
+def analyse_pkls(key,value):
+    data=pd.read_pickle('pkls/' + key +'.pkl' )
+    from scipy.stats import pearsonr, spearmanr, kendalltau
+
+    human = []
+    metric = []
+
+    for i in data.keys():
+        for j in data[i]['sys_summs'].keys():
+            human.append(data[i]['sys_summs'][j]['scores']['litepyramid_recall'])
+            metric.append(data[i]['sys_summs'][j]['scores'][value])
+    correlation, p_value = spearmanr(metric, human)
+    return correlation
+pt = PrettyTable()
+
+for key in KEY_VALUE.keys():
+    pt.add_column(key, [analyse_pkls(key,KEY_VALUE[key])])
+
+print(pt)
diff --git a/mitigating_bias/performance_eval/REALSumm/pkls/bart_score_bart_base.pkl b/mitigating_bias/performance_eval/REALSumm/pkls/bart_score_bart_base.pkl
diff --git a/mitigating_bias/performance_eval/REALSumm/pkls/bart_score_bart_base_adapter.pkl b/mitigating_bias/performance_eval/REALSumm/pkls/bart_score_bart_base_adapter.pkl
diff --git a/mitigating_bias/performance_eval/REALSumm/pkls/bert_score_bert_base.pkl b/mitigating_bias/performance_eval/REALSumm/pkls/bert_score_bert_base.pkl
diff --git a/mitigating_bias/performance_eval/REALSumm/pkls/bert_score_bert_base_adapter.pkl b/mitigating_bias/performance_eval/REALSumm/pkls/bert_score_bert_base_adapter.pkl
diff --git a/mitigating_bias/performance_eval/REALSumm/pkls/bert_score_bert_large.pkl b/mitigating_bias/performance_eval/REALSumm/pkls/bert_score_bert_large.pkl
diff --git a/mitigating_bias/performance_eval/REALSumm/pkls/bert_score_bert_large_adapter.pkl b/mitigating_bias/performance_eval/REALSumm/pkls/bert_score_bert_large_adapter.pkl
diff --git a/mitigating_bias/performance_eval/REALSumm/pkls/bleurt_bert_base.pkl b/mitigating_bias/performance_eval/REALSumm/pkls/bleurt_bert_base.pkl
diff --git a/mitigating_bias/performance_eval/REALSumm/pkls/bleurt_bert_base_adapter.pkl b/mitigating_bias/performance_eval/REALSumm/pkls/bleurt_bert_base_adapter.pkl
diff --git a/mitigating_bias/performance_eval/REALSumm/requirements.txt b/mitigating_bias/performance_eval/REALSumm/requirements.txt
@@ -0,0 +1,3 @@
+pandas==1.3.4
+prettytable==3.4.1
+scipy==1.7.1
diff --git a/mitigating_bias/performance_eval/WMT/requirements.txt b/mitigating_bias/performance_eval/WMT/requirements.txt
@@ -0,0 +1,12 @@
+absl==0.0
+adapter_transformers==3.1.0
+matplotlib==3.4.3
+numpy==1.20.3
+pandas==1.3.4
+prettytable==3.4.1
+pyemd==0.5.1
+scipy==1.7.1
+score==0.0.1a0
+setuptools==58.0.4
+tqdm==4.62.3
+transformers==4.23.1