无法使用最新的版本 #351

ningmiaokai · 2023-09-22T12:30:28Z

Please provide the REQUIRED information. Otherwise, It is almost impossible to locate the problem. DO NOT CHANGE THE FORM.

PyABSA Version (Required)

Python Version:
3.8.10
PyABSA Version:
Requirement already satisfied: pyabsa in ./miniconda3/lib/python3.8/site-packages (2.3.3)
Torch Version:
Torch version: 2.0.0+cu118+cuda11.8
Transformers Version:
Transformers version: 4.29.0
Other:

See the console output for PyABSA, Torch, Transformers Version

### ABSADataset Version (Required if you use integrated datasets)

See the console output for ABSADataset Version

2023-09-22 18:02:43,259 INFO: Local dataset version: 2023.03.14
### Code To Reproduce (Required)

Paste buggy code here, **text-only, no screen shots here
def prepare_dependency_graph(dataset_list, graph_path, max_seq_len):
if 'train' in dataset_list[0].lower():
append_name = 'train_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
elif 'test' in dataset_list[0].lower():
append_name = 'test_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
elif 'val' in dataset_list[0].lower():
append_name = 'val_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
else:
append_name = 'unrecognized_set_{}x{}.graph'.format(max_seq_len, max_seq_len)

graph_path = os.path.join(graph_path, append_name)

if os.path.isfile(graph_path):
    return graph_path

idx2graph = {}
if os.path.isdir(graph_path):
    fout = open(os.path.join(graph_path, append_name), 'wb')
    graph_path = os.path.join(graph_path, append_name)
elif os.path.isfile(graph_path):
    return graph_path
else:
    fout = open(graph_path, 'wb')

for filename in dataset_list:
    try:
        print('parsing dependency matrix:', filename)
        fin = open(filename, 'r', encoding='utf-8', newline='\n', errors='ignore')
        lines = fin.readlines()
        fin.close()
        for i in tqdm.tqdm(range(0, len(lines), 3), postfix='Construct graph for {}'.format(filename)):
            text_left, _, text_right = [s.strip() for s in lines[i].partition("$T$")]
            aspect = lines[i + 1].strip()
            adj_matrix = dependency_adj_matrix(text_left + ' ' + aspect + ' ' + text_right)
            text = text_left + ' ' + aspect + ' ' + text_right
            idx2graph[text.lower()] = adj_matrix
    except Exception as e:
        print(e)
        print('unprocessed:', filename)
pickle.dump(idx2graph, fout)
fout.close()
return graph_path

### Full Console Output (Required)

KeyError Traceback (most recent call last)
Cell In[36], line 3
1 config.num_epoch = 1
2 config.model = APCModelList.DLCF_GCN
----> 3 trainer = APCTrainer(
4 config=config,
5 dataset=dataset,
6 from_checkpoint="english",
7 # if you want to resume training from our pretrained checkpoints, you can pass the checkpoint name here
8 auto_device=DeviceTypeOption.AUTO,
9 path_to_save=None, # set a path to save checkpoints, if it is None, save checkpoints at 'checkpoints' folder
10 checkpoint_save_mode=ModelSaveOption.SAVE_MODEL_STATE_DICT,
11 load_aug=False,
12 # there are some augmentation dataset for integrated datasets, you use them by setting load_aug=True to improve performance
13 )

Cell In[35], line 46, in APCTrainer.init(self, config, dataset, from_checkpoint, checkpoint_save_mode, auto_device, path_to_save, load_aug)
41 self.config.task_code = TaskCodeOption.Aspect_Polarity_Classification
42 self.config.task_name = TaskNameOption().get(
43 TaskCodeOption.Aspect_Polarity_Classification
44 )
---> 46 self._run()

Cell In[14], line 205, in Trainer._run(self)
203 self.config.seed = s
204 if self.config.checkpoint_save_mode:
--> 205 model_path.append(self.training_instructor(self.config).run())
206 else:
207 # always return the last trained model if you don't save trained model
208 model = self.inference_model_class(
209 checkpoint=self.training_instructor(self.config).run()
210 )

Cell In[34], line 15, in APCTrainingInstructor.init(self, config)
12 def init(self, config):
13 super().init(config)
---> 15 self._load_dataset_and_prepare_dataloader()
17 self._init_misc()

Cell In[34], line 3, in APCTrainingInstructor._load_dataset_and_prepare_dataloader(self)
2 def _load_dataset_and_prepare_dataloader(self):
----> 3 self.model = APCEnsembler(self.config)
4 self.tokenizer = self.model.tokenizer
6 self.train_set = self.model.train_set

Cell In[32], line 118, in APCEnsembler.init(self, config, load_dataset, **kwargs)
110 exit(-1)
112 if (
113 load_dataset
114 and not os.path.exists(cache_path)
115 or self.config.overwrite_cache
116 ):
117 self.train_set = (
--> 118 ABSADataset(self.config, self.tokenizer, dataset_type="train")
119 if not self.train_set
120 else self.train_set
121 )
122 self.test_set = (
123 ABSADataset(self.config, self.tokenizer, dataset_type="test")
124 if not self.test_set
125 else self.test_set
126 )
127 self.valid_set = (
128 ABSADataset(self.config, self.tokenizer, dataset_type="valid")
129 if not self.valid_set
130 else self.valid_set
131 )

Cell In[27], line 164, in ABSADataset.init(self, config, tokenizer, dataset_type)
163 def init(self, config, tokenizer, dataset_type="train"):
--> 164 super().init(config=config, tokenizer=tokenizer, dataset_type=dataset_type)

Cell In[24], line 53, in PyABSADataset.init(self, config, tokenizer, dataset_type, **kwargs)
46 self.data = self.covert_to_tensor(self.data)
48 elif (
49 self.config.get("dataset_file")
50 and dataset_type in self.config.dataset_file
51 and self.config.dataset_file[dataset_type]
52 ):
---> 53 self.load_data_from_file(
54 self.config.dataset_file, dataset_type=dataset_type, **kwargs
55 )
56 self.data = self.covert_to_tensor(self.data)
57 self.data = self.data[
58 : self.config.get("data_num", None)
59 if self.config.get("data_num", None)
60 else None
61 ]

Cell In[27], line 143, in ABSADataset.load_data_from_file(self, file_path, **kwargs)
140 check_and_fix_labels(label_set, "polarity", all_data, self.config)
141 self.config.output_dim = len(label_set)
--> 143 all_data = build_sentiment_window(
144 all_data,
145 self.tokenizer,
146 self.config.similarity_threshold,
147 input_demands=self.config.inputs_cols,
148 )
149 for data in all_data:
150 cluster_ids = []

Cell In[18], line 45, in build_sentiment_window(examples, tokenizer, similarity_threshold, input_demands)
42 def build_sentiment_window(
43 examples, tokenizer, similarity_threshold, input_demands=None
44 ):
---> 45 copy_side_aspect("left", examples[0], examples[0], examples, input_demands)
46 for idx in range(1, len(examples)):
47 if is_similar(
48 examples[idx - 1]["text_indices"],
49 examples[idx]["text_indices"],
50 tokenizer=tokenizer,
51 similarity_threshold=similarity_threshold,
52 ):

Cell In[18], line 93, in copy_side_aspect(direct, target, source, examples, input_demands)
91 elif data_item.startswith("right_") or data_item.startswith("left_"):
92 continue
---> 93 target[direct + "_" + data_item] = source[data_item]
94 target[direct + "_dist"] = int(
95 abs(
96 np.average(list(source["aspect_position"]))
97 - np.average(list(target["aspect_position"]))
98 )
99 )

KeyError: 'dependency_graph'

### Describe the bug

训练模型中运用到图卷积都无法生成图结构，无法往下训练。

### Expected behavior

我希望能够训练运用到图卷积的一些模型，但是DLCF_DCA_BERT模型和PyABSA都无法生成相关图结构，PyABSA-2应该是可以生成的，但是我没有成功。在运用新代码时运行from pyabsa import AspectPolarityClassification as APC就无法往下进行，所以选择按需导入代码块，但是一直未能完整的跑通一个模型(估计是无法导入PyABSAVersion才出的错，手动导入无效。由于无法使用from pyabsa import --version--方式导入，并赋值PyABSAVersion=‘--version--’，所以只能手动赋值了，但代码运行第三行没有按您的一样显示版本，最后到这一步也就停下来了）。如果您能帮我在前两种模型下能生成图结构，或者在最新模型快速导入失败时训练模型的方法，我将非常感激您！非常感谢您！