konstantinosKokos
diff --git a/‎scripts/train.py
+26-27 b/‎scripts/train.py
+26-27
diff --git a/‎src/Name/data/agda/syntax.py
+2-4 b/‎src/Name/data/agda/syntax.py
+2-4
diff --git a/‎src/Name/inference.py
+4-4 b/‎src/Name/inference.py
+4-4
diff --git a/‎src/Name/neural/train.py
-151 b/‎src/Name/neural/train.py
-151
diff --git a/‎src/Name/neural/batching.py ‎src/Name/nn/batching.py
+7-5 b/‎src/Name/neural/batching.py ‎src/Name/nn/batching.py
+7-5
diff --git a/‎src/Name/neural/embedding.py ‎src/Name/nn/embedding.py b/‎src/Name/neural/embedding.py ‎src/Name/nn/embedding.py
diff --git a/‎src/Name/neural/encoders.py ‎src/Name/nn/encoders.py b/‎src/Name/neural/encoders.py ‎src/Name/nn/encoders.py
diff --git a/‎src/Name/neural/model.py ‎src/Name/nn/model.py
+1-1 b/‎src/Name/neural/model.py ‎src/Name/nn/model.py
+1-1
@@ -1,15 +1,15 @@
 import os
 import pickle
 
-from src.Name.neural.train import TrainCfg, Trainer, acc, Logger, ModelCfg, macro_binary_stats
-from src.Name.neural.batching import filter_data, Sampler, Collator
-from src.Name.neural.utils.schedules import make_schedule
+from src.Name.nn.training import TrainCfg, Trainer, Logger, ModelCfg
+from src.Name.nn.batching import filter_data, Sampler, Collator
+from src.Name.nn.utils.schedules import make_schedule
 
 from torch import device
 from torch.optim import AdamW
 from torch.optim.lr_scheduler import LambdaLR
 
-from random import seed
+from random import seed, shuffle
 
 import sys
 
@@ -31,11 +31,11 @@ def train(config: TrainCfg, data_path: str, cast_to: str):
     train_files = [file for file in files if file.file.name in config['train_files']]
     dev_files = [file for file in files if file.file.name in config['dev_files']]
     print(f'Training on {len(train_files)} files with {sum(len(file.hole_asts) for file in train_files)} holes.')
-    print(f'Evaluating on {len(dev_files)} files with {sum(len(file.hole_asts) for file in train_files)} holes.')
+    print(f'Evaluating on {len(dev_files)} files with {sum(len(file.hole_asts) for file in dev_files)} holes.')
 
     train_sampler = Sampler(train_files)
     epoch_size = train_sampler.itersize(config['batch_size_s'] * config['backprop_every'], config['batch_size_h'])
-    collator = Collator(pad_value=-1, device=cast_to, allow_self_loops=False)
+    collator = Collator(pad_value=-1, device=cast_to, allow_self_loops=config['allow_self_loops'])
 
     model = Trainer(config['model_config']).to(device(cast_to))
     optimizer = AdamW(params=model.parameters(), lr=1, weight_decay=1e-02)
@@ -46,7 +46,7 @@ def train(config: TrainCfg, data_path: str, cast_to: str):
                              total_steps=config['num_epochs'] * epoch_size)
     scheduler = LambdaLR(optimizer=optimizer, lr_lambda=schedule, last_epoch=-1)
 
-    best_loss = 1e10
+    best_ap = -1e08
 
     for epoch in range(config['num_epochs']):
         print(f'Epoch {epoch}')
@@ -58,31 +58,29 @@ def train(config: TrainCfg, data_path: str, cast_to: str):
             optimizer=optimizer,
             scheduler=scheduler,
             backprop_every=config['backprop_every'])
-        print(f'Train loss: {sum(train_epoch["loss"])/len(train_epoch["predictions"])}')
-        print(f'Train stats: {macro_binary_stats(train_epoch["predictions"], train_epoch["truths"])}')
+        print(f'Train loss: {sum(train_epoch.loss)/len(train_epoch.loss)}')
+        print(f'Train mAP: {sum(train_epoch.ap)/len(train_epoch.ap)}')
+        print(f'Train R-Precision: {sum(train_epoch.rp) / len(train_epoch.rp)}')
         dev_epoch = model.eval_epoch(map(lambda x: collator([x]), dev_files))
-        print(f'Dev loss: {sum(dev_epoch["loss"])/len(dev_epoch["predictions"])}')
-        print(f'Dev stats: {macro_binary_stats(dev_epoch["predictions"], dev_epoch["truths"])}')
-        print()
-
-        # if sum(dev_epoch['loss']) < best_loss:
-        #     print('Saving...')
-        #     model.save(f'./model.pt')
-        #     best_loss = sum(dev_epoch['loss'])
-        # print('=' * 64 + '\n')
+        print(f'Dev loss: {sum(dev_epoch.loss)/len(dev_epoch.loss)}')
+        print(f'Dev mAP: {sum(dev_epoch.ap) / len(dev_epoch.ap)}')
+        print(f'Dev R-Precision: {sum(dev_epoch.rp) / len(dev_epoch.rp)}')
+        if sum(dev_epoch.ap) > best_ap:
+            print('Saving...')
+            model.save(f'./model.pt')
+            best_ap = sum(dev_epoch.ap)
+        print('=' * 64 + '\n')
 
 
 if __name__ == '__main__':
     seed(42)
-    # todo.
+
     files = [os.path.splitext(file)[0] for file in os.listdir('../data/stdlib/')]
-    # stdlib = [line for line in open('./data/stdlib.contents').read().split('\n')]
-    # unimath = [line for line in open('./data/um.contents').read().split('\n')]
-    # typetopo = [line for line in open('./data/tt.contents').read().split('\n')]
-    # shuffle(stdlib)
+    shuffle(files)
+    train_files, dev_files = files[:(int(0.75 * len(files)))], files[int(0.75 * len(files)):]
 
     model_config: ModelCfg = {
-        'depth': 8,
+        'depth': 6,
         'num_heads': 8,
         'dim': 128,
         'atn_dim': None,
@@ -94,16 +92,17 @@ def train(config: TrainCfg, data_path: str, cast_to: str):
         'num_epochs': 99,
         'warmup_epochs': 3,
         'warmdown_epochs': 90,
-        'batch_size_s': 1,
+        'batch_size_s': 2,
         'batch_size_h': 8,
         'max_lr': 5e-4,
         'min_lr': 1e-7,
         'backprop_every': 1,
-        'train_files': [f for f in files if f != 'Simple'],
-        'dev_files': [],
+        'train_files': train_files,
+        'dev_files': dev_files,
         'test_files': [],
         'max_scope_size': 300,
         'max_ast_len': 100,
+        'allow_self_loops': False
     }
 
     train(train_cfg, '../data/tokenized.p', 'cuda')
@@ -22,10 +22,8 @@ class File(_AgdaExpr[Name]):
     scope: list[ScopeEntry[Name]]
 
     def __post_init__(self):
-        if not self.valid_reference_structure():
-            raise AssertionError('Invalid reference structure')
-        if not self.unique_entry_names():
-            raise AssertionError('Duplicate entry names.')
+        assert self.valid_reference_structure(), 'Invalid reference structure.'
+        assert self.unique_entry_names(), 'Duplicate entry names.'
 
     def valid_reference_structure(self) -> bool:
         names = [entry.name for entry in self.scope]
 
@@ -4,9 +4,9 @@
 
 from .data.agda.reader import File
 from .data.tokenization import tokenize_file
-from .neural.model import Model, ModelCfg
-from .neural.batching import Collator
-from .neural.train import Logger
+from .nn.model import Model, ModelCfg
+from .nn.batching import Collator
+from .nn.train import Logger
 
 from torch_geometric.utils import to_dense_batch
 
@@ -25,7 +25,7 @@ def select_premises(self, file: File[str], threshold: float = 0.5) -> list[set[s
         with torch.no_grad():
             batch = self.collator([tokenized])
             scope_reprs, hole_reprs = self.encode(batch)
-            lemma_predictions = self.predict_lemmas(scope_reprs, hole_reprs, batch.edge_index)
+            lemma_predictions = self.match(scope_reprs, hole_reprs, batch.edge_index)
             sparse = to_dense_batch(lemma_predictions, batch.edge_index[1], fill_value=-1e8)
             # pdb.set_trace()
             # # todo
 
@@ -95,10 +95,10 @@ def __call__(self, files: list[TokenizedFile]) -> Batch:
         src_index, tgt_index, premise_selection = [], [], []
         for batch_id, file in enumerate(files):
             src_offset = sum(scope_lens[:batch_id])
-            hole_offset = sum(num_holes[:batch_id])
+            tgt_offset = sum(num_holes[:batch_id])
             for hole_idx, defined_at in enumerate(file.hole_to_scope):
                 src_index += list(range(src_offset, src_offset + defined_at))
-                tgt_index += [hole_offset + hole_idx] * defined_at
+                tgt_index += [tgt_offset + hole_idx] * defined_at
                 premise_selection += [entry in file.premises[hole_idx] for entry in range(defined_at)]
         edge_index = torch.stack((self.tensor(src_index), self.tensor(tgt_index)))
         premises = self.tensor(premise_selection)
@@ -144,10 +144,12 @@ def filter_data(files: list[TokenizedFile],
                 max_ast_len: int) -> Iterator[TokenizedFile]:
 
     for file in files:
-        if (len(file.hole_asts)
-                and len(file.scope_asts) <= max_scope_size
+        if (
+                len(file.hole_asts)
+                and 1 <= len(file.scope_asts) <= max_scope_size
                 and max(len(ast) for ast in file.hole_asts) <= max_ast_len
-                and max(len(ast) for ast in file.scope_asts) <= max_ast_len):
+                and max(len(ast) for ast in file.scope_asts) <= max_ast_len
+        ):
             yield file
 
 
 
@@ -35,7 +35,7 @@ def encode(self, batch: Batch) -> tuple[Tensor, Tensor]:
             scope_positions=batch.scope_positions,
             hole_positions=batch.hole_positions)
 
-    def predict_lemmas(self, scope_reprs: Tensor, hole_reprs: Tensor, edge_index: Tensor) -> Tensor:
+    def match(self, scope_reprs: Tensor, hole_reprs: Tensor, edge_index: Tensor) -> Tensor:
         source_index, target_index = edge_index
         sources = scope_reprs[source_index]
         targets = hole_reprs[target_index]