1.16.24

shuyuan-lily · Oct 28, 2022 · 0d479b0 · 0d479b0
1 parent f812e09
commit 0d479b0
Show file tree

Hide file tree

Showing 23 changed files with 99 additions and 73 deletions.
diff --git a/README.MD b/README.MD
@@ -124,7 +124,7 @@ from pyabsa.functional import ATEPCConfigManager
 
 atepc_config = ATEPCConfigManager.get_atepc_config_english()
 
-atepc_config.pretrained_bert = 'https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1'
+atepc_config.pretrained_bert = 'yangheng/deberta-v3-base-absa-v1.1'
 atepc_config.model = ATEPCModelList.FAST_LCF_ATEPC
 dataset_path = ABSADatasetList.Restaurant14
 # or your local dataset: dataset_path = 'your local dataset path'

diff --git a/basic_test/run_apc_pretrain_test.py b/basic_test/run_apc_pretrain_test.py
@@ -43,7 +43,8 @@
 
 # # for dataset in ABSADatasetList():
 for dataset in ABSADatasetList()[:1]:
-    for model in APCModelList()[:1]:
+    # for model in APCModelList()[:1]:
+    for model in APCModelList():
         cuda.empty_cache()
         config = APCConfigManager.get_apc_config_english()
         config.lcf = 'cdm'
@@ -53,7 +54,7 @@
         config.max_seq_len = 10
         config.evaluate_begin = 0
         config.log_step = -1
-        config.cross_validate_fold = 3
+        config.cross_validate_fold = 5
         sent_classifier = Trainer(config=config,
                                   dataset=dataset,
                                   checkpoint_save_mode=1,

diff --git a/demos/aspect_polarity_classification/APC_USAGES.ipynb b/demos/aspect_polarity_classification/APC_USAGES.ipynb
@@ -4121,7 +4121,7 @@
         "apc_config_english.model = APCModelList.FAST_LCF_BERT\n",
         "apc_config_english.num_epoch = 1\n",
         "apc_config_english.evaluate_begin = 0\n",
-        "apc_config_english.pretrained_bert = 'https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1'\n",
+        "apc_config_english.pretrained_bert = 'yangheng/deberta-v3-base-absa-v1.1'\n",
         "apc_config_english.similarity_threshold = 1\n",
         "apc_config_english.max_seq_len = 80\n",
         "apc_config_english.dropout = 0.5\n",
@@ -4276,7 +4276,7 @@
           "output_type": "stream",
           "name": "stderr",
           "text": [
-            "Some weights of the model checkpoint at https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1 were not used when initializing DebertaV2Model: ['mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.dense.bias', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.dense.weight']\n",
+            "Some weights of the model checkpoint at yangheng/deberta-v3-base-absa-v1.1 were not used when initializing DebertaV2Model: ['mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.dense.bias', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.dense.weight']\n",
             "- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
             "- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
           ]
@@ -4918,14 +4918,14 @@
           "output_type": "stream",
           "name": "stdout",
           "text": [
-            "2022-09-11 13:27:35,034 INFO: pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n"
+            "2022-09-11 13:27:35,034 INFO: pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n"
           ]
         },
         {
           "output_type": "stream",
           "name": "stderr",
           "text": [
-            "INFO:fast_lcf_bert:pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n"
+            "INFO:fast_lcf_bert:pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n"
           ]
         },
         {
@@ -5260,7 +5260,7 @@
             "optimizer:adamw\t-->\tCalling Count:1\n",
             "patience:99999\t-->\tCalling Count:5\n",
             "polarities_dim:3\t-->\tCalling Count:5\n",
-            "pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n",
+            "pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n",
             "save_mode:0\t-->\tCalling Count:0\n",
             "seed:2672\t-->\tCalling Count:7\n",
             "sigma:0.3\t-->\tCalling Count:0\n",
@@ -5409,7 +5409,7 @@
             "/usr/local/lib/python3.7/dist-packages/transformers/convert_slow_tokenizer.py:435: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.\n",
             "  \"The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option\"\n",
             "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
-            "Some weights of the model checkpoint at https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1 were not used when initializing DebertaV2Model: ['mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.dense.bias', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.dense.weight']\n",
+            "Some weights of the model checkpoint at yangheng/deberta-v3-base-absa-v1.1 were not used when initializing DebertaV2Model: ['mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.dense.bias', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.dense.weight']\n",
             "- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
             "- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
           ]
@@ -6153,14 +6153,14 @@
           "output_type": "stream",
           "name": "stdout",
           "text": [
-            "2022-09-11 13:50:41,977 INFO: pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n"
+            "2022-09-11 13:50:41,977 INFO: pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n"
           ]
         },
         {
           "output_type": "stream",
           "name": "stderr",
           "text": [
-            "INFO:fast_lsa_t:pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n"
+            "INFO:fast_lsa_t:pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n"
           ]
         },
         {
@@ -6516,7 +6516,7 @@
             "optimizer:adamw\t-->\tCalling Count:1\n",
             "patience:99999\t-->\tCalling Count:6\n",
             "polarities_dim:3\t-->\tCalling Count:11\n",
-            "pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n",
+            "pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:4\n",
             "save_mode:1\t-->\tCalling Count:8\n",
             "seed:52\t-->\tCalling Count:7\n",
             "sigma:0.3\t-->\tCalling Count:0\n",
@@ -6558,7 +6558,7 @@
             "/usr/local/lib/python3.7/dist-packages/transformers/convert_slow_tokenizer.py:435: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.\n",
             "  \"The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option\"\n",
             "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
-            "Some weights of the model checkpoint at https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1 were not used when initializing DebertaV2Model: ['mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.dense.bias', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.dense.weight']\n",
+            "Some weights of the model checkpoint at yangheng/deberta-v3-base-absa-v1.1 were not used when initializing DebertaV2Model: ['mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.dense.bias', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.dense.weight']\n",
             "- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
             "- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
           ]
@@ -6628,7 +6628,7 @@
             "optimizer:adamw\t-->\tCalling Count:1\n",
             "patience:99999\t-->\tCalling Count:6\n",
             "polarities_dim:3\t-->\tCalling Count:11\n",
-            "pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:9\n",
+            "pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:9\n",
             "save_mode:1\t-->\tCalling Count:7\n",
             "seed:52\t-->\tCalling Count:7\n",
             "sigma:0.3\t-->\tCalling Count:0\n",
@@ -7465,14 +7465,14 @@
           "output_type": "stream",
           "name": "stdout",
           "text": [
-            "2022-09-11 14:21:23,234 INFO: pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:0\n"
+            "2022-09-11 14:21:23,234 INFO: pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:0\n"
           ]
         },
         {
           "output_type": "stream",
           "name": "stderr",
           "text": [
-            "INFO:tnet_lf:pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:0\n"
+            "INFO:tnet_lf:pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:0\n"
           ]
         },
         {
@@ -7818,7 +7818,7 @@
             "optimizer:adamw\t-->\tCalling Count:1\n",
             "patience:20\t-->\tCalling Count:9\n",
             "polarities_dim:3\t-->\tCalling Count:24\n",
-            "pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:0\n",
+            "pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:0\n",
             "save_mode:1\t-->\tCalling Count:10\n",
             "seed:52\t-->\tCalling Count:7\n",
             "sigma:0.3\t-->\tCalling Count:0\n",
@@ -7901,7 +7901,7 @@
             "optimizer:adamw\t-->\tCalling Count:1\n",
             "patience:20\t-->\tCalling Count:9\n",
             "polarities_dim:3\t-->\tCalling Count:15\n",
-            "pretrained_bert:https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:0\n",
+            "pretrained_bert:yangheng/deberta-v3-base-absa-v1.1\t-->\tCalling Count:0\n",
             "save_mode:1\t-->\tCalling Count:9\n",
             "seed:52\t-->\tCalling Count:7\n",
             "sigma:0.3\t-->\tCalling Count:0\n",

diff --git a/demos/aspect_polarity_classification/run_fast_lsa_deberta.py b/demos/aspect_polarity_classification/run_fast_lsa_deberta.py
@@ -36,7 +36,7 @@
 config1.cache_dataset = False
 config1.patience = 20
 config1.optimizer = 'adamw'
-config1.pretrained_bert = 'https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1'
+config1.pretrained_bert = 'yangheng/deberta-v3-base-absa-v1.1'
 config1.num_epoch = 50
 config1.log_step = 5
 config1.SRD = 3
@@ -98,7 +98,7 @@
 config2.cache_dataset = False
 config2.patience = 20
 config2.optimizer = 'adamw'
-config2.pretrained_bert = 'https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1'
+config2.pretrained_bert = 'yangheng/deberta-v3-base-absa-v1.1'
 config2.num_epoch = 50
 config2.log_step = 5
 config2.SRD = 3
@@ -159,7 +159,7 @@
 config3.cache_dataset = False
 config3.patience = 20
 config3.optimizer = 'adamw'
-config3.pretrained_bert = 'https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1'
+config3.pretrained_bert = 'yangheng/deberta-v3-base-absa-v1.1'
 config3.num_epoch = 50
 config3.log_step = 5
 config3.SRD = 3

diff --git a/demos/aspect_polarity_classification/train_apc_english.py b/demos/aspect_polarity_classification/train_apc_english.py
@@ -21,7 +21,7 @@
 apc_config_english.model = APCModelList.FAST_LSA_T_V2
 apc_config_english.num_epoch = 30
 apc_config_english.evaluate_begin = 2
-apc_config_english.pretrained_bert = 'https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1'
+apc_config_english.pretrained_bert = 'yangheng/deberta-v3-base-absa-v1.1'
 apc_config_english.similarity_threshold = 1
 apc_config_english.max_seq_len = 80
 apc_config_english.dropout = 0.5

diff --git a/demos/aspect_polarity_classification/train_apc_ensemble.py b/demos/aspect_polarity_classification/train_apc_ensemble.py
@@ -26,7 +26,7 @@
 
 apc_config_english.dropout = 0.5
 apc_config_english.log_step = 50
-apc_config_english.pretrained_bert = 'https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1'
+apc_config_english.pretrained_bert = 'yangheng/deberta-v3-base-absa-v1.1'
 apc_config_english.num_epoch = 15
 apc_config_english.batch_size = 16
 apc_config_english.evaluate_begin = 2