You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
See the console output for PyABSA, Torch, Transformers Version
### ABSADataset Version (Required if you use integrated datasets)
See the console output for ABSADataset Version
2023-09-22 18:02:43,259 INFO: Local dataset version: 2023.03.14
### Code To Reproduce (Required)
Paste buggy code here, **text-only, no screen shots here
def prepare_dependency_graph(dataset_list, graph_path, max_seq_len):
if 'train' in dataset_list[0].lower():
append_name = 'train_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
elif 'test' in dataset_list[0].lower():
append_name = 'test_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
elif 'val' in dataset_list[0].lower():
append_name = 'val_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
else:
append_name = 'unrecognized_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
graph_path = os.path.join(graph_path, append_name)
if os.path.isfile(graph_path):
return graph_path
idx2graph = {}
if os.path.isdir(graph_path):
fout = open(os.path.join(graph_path, append_name), 'wb')
graph_path = os.path.join(graph_path, append_name)
elif os.path.isfile(graph_path):
return graph_path
else:
fout = open(graph_path, 'wb')
for filename in dataset_list:
try:
print('parsing dependency matrix:', filename)
fin = open(filename, 'r', encoding='utf-8', newline='\n', errors='ignore')
lines = fin.readlines()
fin.close()
for i in tqdm.tqdm(range(0, len(lines), 3), postfix='Construct graph for {}'.format(filename)):
text_left, _, text_right = [s.strip() for s in lines[i].partition("$T$")]
aspect = lines[i + 1].strip()
adj_matrix = dependency_adj_matrix(text_left + ' ' + aspect + ' ' + text_right)
text = text_left + ' ' + aspect + ' ' + text_right
idx2graph[text.lower()] = adj_matrix
except Exception as e:
print(e)
print('unprocessed:', filename)
pickle.dump(idx2graph, fout)
fout.close()
return graph_path
### Full Console Output (Required)
KeyError Traceback (most recent call last)
Cell In[36], line 3
1 config.num_epoch = 1
2 config.model = APCModelList.DLCF_GCN
----> 3 trainer = APCTrainer(
4 config=config,
5 dataset=dataset,
6 from_checkpoint="english",
7 # if you want to resume training from our pretrained checkpoints, you can pass the checkpoint name here
8 auto_device=DeviceTypeOption.AUTO,
9 path_to_save=None, # set a path to save checkpoints, if it is None, save checkpoints at 'checkpoints' folder
10 checkpoint_save_mode=ModelSaveOption.SAVE_MODEL_STATE_DICT,
11 load_aug=False,
12 # there are some augmentation dataset for integrated datasets, you use them by setting load_aug=True to improve performance
13 )
Cell In[14], line 205, in Trainer._run(self)
203 self.config.seed = s
204 if self.config.checkpoint_save_mode:
--> 205 model_path.append(self.training_instructor(self.config).run())
206 else:
207 # always return the last trained model if you don't save trained model
208 model = self.inference_model_class(
209 checkpoint=self.training_instructor(self.config).run()
210 )
Cell In[34], line 15, in APCTrainingInstructor.init(self, config)
12 def init(self, config):
13 super().init(config)
---> 15 self._load_dataset_and_prepare_dataloader()
17 self._init_misc()
Please provide the REQUIRED information. Otherwise, It is almost impossible to locate the problem. DO NOT CHANGE THE FORM.
PyABSA Version (Required)
Python Version:
3.8.10
PyABSA Version:
Requirement already satisfied: pyabsa in ./miniconda3/lib/python3.8/site-packages (2.3.3)
Torch Version:
Torch version: 2.0.0+cu118+cuda11.8
Transformers Version:
Transformers version: 4.29.0
Other:
See the console output for PyABSA, Torch, Transformers Version
See the console output for ABSADataset Version
Paste buggy code here, **text-only, no screen shots here
def prepare_dependency_graph(dataset_list, graph_path, max_seq_len):
if 'train' in dataset_list[0].lower():
append_name = 'train_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
elif 'test' in dataset_list[0].lower():
append_name = 'test_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
elif 'val' in dataset_list[0].lower():
append_name = 'val_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
else:
append_name = 'unrecognized_set_{}x{}.graph'.format(max_seq_len, max_seq_len)
KeyError Traceback (most recent call last)
Cell In[36], line 3
1 config.num_epoch = 1
2 config.model = APCModelList.DLCF_GCN
----> 3 trainer = APCTrainer(
4 config=config,
5 dataset=dataset,
6 from_checkpoint="english",
7 # if you want to resume training from our pretrained checkpoints, you can pass the checkpoint name here
8 auto_device=DeviceTypeOption.AUTO,
9 path_to_save=None, # set a path to save checkpoints, if it is None, save checkpoints at 'checkpoints' folder
10 checkpoint_save_mode=ModelSaveOption.SAVE_MODEL_STATE_DICT,
11 load_aug=False,
12 # there are some augmentation dataset for integrated datasets, you use them by setting load_aug=True to improve performance
13 )
Cell In[35], line 46, in APCTrainer.init(self, config, dataset, from_checkpoint, checkpoint_save_mode, auto_device, path_to_save, load_aug)
41 self.config.task_code = TaskCodeOption.Aspect_Polarity_Classification
42 self.config.task_name = TaskNameOption().get(
43 TaskCodeOption.Aspect_Polarity_Classification
44 )
---> 46 self._run()
Cell In[14], line 205, in Trainer._run(self)
203 self.config.seed = s
204 if self.config.checkpoint_save_mode:
--> 205 model_path.append(self.training_instructor(self.config).run())
206 else:
207 # always return the last trained model if you don't save trained model
208 model = self.inference_model_class(
209 checkpoint=self.training_instructor(self.config).run()
210 )
Cell In[34], line 15, in APCTrainingInstructor.init(self, config)
12 def init(self, config):
13 super().init(config)
---> 15 self._load_dataset_and_prepare_dataloader()
17 self._init_misc()
Cell In[34], line 3, in APCTrainingInstructor._load_dataset_and_prepare_dataloader(self)
2 def _load_dataset_and_prepare_dataloader(self):
----> 3 self.model = APCEnsembler(self.config)
4 self.tokenizer = self.model.tokenizer
6 self.train_set = self.model.train_set
Cell In[32], line 118, in APCEnsembler.init(self, config, load_dataset, **kwargs)
110 exit(-1)
112 if (
113 load_dataset
114 and not os.path.exists(cache_path)
115 or self.config.overwrite_cache
116 ):
117 self.train_set = (
--> 118 ABSADataset(self.config, self.tokenizer, dataset_type="train")
119 if not self.train_set
120 else self.train_set
121 )
122 self.test_set = (
123 ABSADataset(self.config, self.tokenizer, dataset_type="test")
124 if not self.test_set
125 else self.test_set
126 )
127 self.valid_set = (
128 ABSADataset(self.config, self.tokenizer, dataset_type="valid")
129 if not self.valid_set
130 else self.valid_set
131 )
Cell In[27], line 164, in ABSADataset.init(self, config, tokenizer, dataset_type)
163 def init(self, config, tokenizer, dataset_type="train"):
--> 164 super().init(config=config, tokenizer=tokenizer, dataset_type=dataset_type)
Cell In[24], line 53, in PyABSADataset.init(self, config, tokenizer, dataset_type, **kwargs)
46 self.data = self.covert_to_tensor(self.data)
48 elif (
49 self.config.get("dataset_file")
50 and dataset_type in self.config.dataset_file
51 and self.config.dataset_file[dataset_type]
52 ):
---> 53 self.load_data_from_file(
54 self.config.dataset_file, dataset_type=dataset_type, **kwargs
55 )
56 self.data = self.covert_to_tensor(self.data)
57 self.data = self.data[
58 : self.config.get("data_num", None)
59 if self.config.get("data_num", None)
60 else None
61 ]
Cell In[27], line 143, in ABSADataset.load_data_from_file(self, file_path, **kwargs)
140 check_and_fix_labels(label_set, "polarity", all_data, self.config)
141 self.config.output_dim = len(label_set)
--> 143 all_data = build_sentiment_window(
144 all_data,
145 self.tokenizer,
146 self.config.similarity_threshold,
147 input_demands=self.config.inputs_cols,
148 )
149 for data in all_data:
150 cluster_ids = []
Cell In[18], line 45, in build_sentiment_window(examples, tokenizer, similarity_threshold, input_demands)
42 def build_sentiment_window(
43 examples, tokenizer, similarity_threshold, input_demands=None
44 ):
---> 45 copy_side_aspect("left", examples[0], examples[0], examples, input_demands)
46 for idx in range(1, len(examples)):
47 if is_similar(
48 examples[idx - 1]["text_indices"],
49 examples[idx]["text_indices"],
50 tokenizer=tokenizer,
51 similarity_threshold=similarity_threshold,
52 ):
Cell In[18], line 93, in copy_side_aspect(direct, target, source, examples, input_demands)
91 elif data_item.startswith("right_") or data_item.startswith("left_"):
92 continue
---> 93 target[direct + "_" + data_item] = source[data_item]
94 target[direct + "_dist"] = int(
95 abs(
96 np.average(list(source["aspect_position"]))
97 - np.average(list(target["aspect_position"]))
98 )
99 )
KeyError: 'dependency_graph'
训练模型中运用到图卷积都无法生成图结构,无法往下训练。
我希望能够训练运用到图卷积的一些模型,但是DLCF_DCA_BERT模型和PyABSA都无法生成相关图结构,PyABSA-2应该是可以生成的,但是我没有成功。在运用新代码时运行from pyabsa import AspectPolarityClassification as APC就无法往下进行,所以选择按需导入代码块,但是一直未能完整的跑通一个模型(估计是无法导入PyABSAVersion才出的错,手动导入无效。由于无法使用from pyabsa import --version--方式导入,并赋值PyABSAVersion=‘--version--’,所以只能手动赋值了,但代码运行第三行没有按您的一样显示版本,最后到这一步也就停下来了)。如果您能帮我在前两种模型下能生成图结构,或者在最新模型快速导入失败时训练模型的方法,我将非常感激您!非常感谢您!
The text was updated successfully, but these errors were encountered: