add instructions for MiniF2F and ProofNet

lean-dojo · Oct 8, 2023 · 86fa0ac · 86fa0ac
1 parent 10e245f
commit 86fa0ac
Show file tree

Hide file tree

Showing 6 changed files with 42 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -307,7 +307,7 @@ python generator/main.py fit --config generator/confs/cli_lean4_random.yaml --mo
 python generator/main.py fit --config generator/confs/cli_lean4_novel_premises.yaml --model.ret_ckpt_path PATH_TO_RETRIEVER_CHECKPOINT --data.preds_path PATH_TO_PREDICTIONS_PICKLE
 ```
 
-### Evaluation on Theorem Proving
+### Theorem Proving Evaluation on LeanDojo Benchmark (Lean 3 and Lean 4)
 
 After the tactic generator is trained, we combine it with best-first search to prove theorems by interacting with Lean.
 
@@ -331,6 +331,7 @@ python prover/evaluate.py --data-path data/leandojo_benchmark/random/  --ckpt_pa
 python prover/evaluate.py --data-path data/leandojo_benchmark/novel_premises/  --ckpt_path PATH_TO_REPROVER_CHECKPOINT --indexed-corpus-path PATH_TO_INDEXED_CORPUS --split test --num-cpus 8 --with-gpus
 ```
 
+See [here](docs/eval_MiniF2F_ProofNet.md) if you want to evaluate on other Lean repos such as [miniF2F](https://github.com/facebookresearch/miniF2F) and [ProofNet](https://github.com/zhangir-azerbayev/ProofNet).
 
 ## Questions and Bugs
 

diff --git a/common.py b/common.py
@@ -156,10 +156,12 @@ def from_data(cls, file_data: Dict[str, Any]) -> "File":
         path = file_data["path"]
         premises = []
         for p in file_data["premises"]:
-            if "user__.n" in p["full_name"] or p["code"] == "":
+            full_name = p["full_name"]
+            if full_name is None:
+                continue
+            if "user__.n" in full_name or p["code"] == "":
                 # Ignore ill-formed premises (often due to errors in ASTs).
                 continue
-            full_name = p["full_name"]
             if full_name.startswith("[") and full_name.endswith("]"):
                 # Ignore mutual definitions.
                 continue
@@ -346,6 +348,8 @@ def get_all_pos_premises(annot_tac, corpus: Corpus) -> List[Premise]:
         p = corpus.locate_premise(def_path, Pos(*prov["def_pos"]))
         if p is not None:
             all_pos_premises.add(p)
+        else:
+            logger.warning(f"Cannot locate premise: {prov}")
 
     return list(all_pos_premises)
 

diff --git a/docs/eval_MiniF2F_ProofNet.md b/docs/eval_MiniF2F_ProofNet.md
@@ -0,0 +1,22 @@
+Evaluation on MiniF2F and ProofNet
+----------------------------------
+
+Here we evaluate models trained on LeanDojo Benchmark on MiniF2F or ProofNet. We use MiniF2F as an example, but the same procedure applies to ProofNet.
+
+First, use LeanDojo to extract data from MiniF2F. See the end of [this Jupyter notebook](https://github.com/lean-dojo/LeanDojo/blob/main/scripts/generate-benchmark-lean3.ipynb). Save the dataset to `data/leandojo_minif2f`.
+
+
+For models without retrieval, run:
+```bash
+python prover/evaluate.py --data-path data/leandojo_minif2f/default/  --ckpt_path PATH_TO_MODEL_CHECKPOINT --split test --num-cpus 8 --with-gpus
+```
+
+For models with retrieval, first use the retriever to index the corpus (pre-computing the embeddings of all premises):
+```bash
+python retrieval/index.py --ckpt_path PATH_TO_RETRIEVER_CHECKPOINT --corpus-path data/leandojo_minif2f/corpus.jsonl --output-path PATH_TO_INDEXED_CORPUS
+```
+
+Then, run:
+```bash
+python prover/evaluate.py --data-path data/leandojo_minif2f/default/  --ckpt_path PATH_TO_REPROVER_CHECKPOINT --indexed-corpus-path PATH_TO_INDEXED_CORPUS --split test --num-cpus 8 --with-gpus
+```
diff --git a/retrieval/confs/cli_lean3_random.yaml b/retrieval/confs/cli_lean3_random.yaml
@@ -25,13 +25,13 @@ trainer:
     - class_path: pytorch_lightning.callbacks.EarlyStopping
       init_args:
         monitor: Recall@10_val
-        patience: 3
+        patience: 5
         mode: max
         verbose: true
 
 model:
   model_name: google/byt5-small
-  lr: 1e-4
+  lr: 2e-4
   warmup_steps: 2000
   num_retrieved: 100
 
@@ -40,7 +40,7 @@ data:
   corpus_path: data/leandojo_benchmark/corpus.jsonl
   num_negatives: 3
   num_in_file_negatives: 1
-  batch_size: 8
+  batch_size: 12
   eval_batch_size: 128
   max_seq_len: 1024
   num_workers: 4
diff --git a/retrieval/confs/cli_lean4_novel_premises.yaml b/retrieval/confs/cli_lean4_novel_premises.yaml
@@ -25,7 +25,7 @@ trainer:
     - class_path: pytorch_lightning.callbacks.EarlyStopping
       init_args:
         monitor: Recall@10_val
-        patience: 3
+        patience: 5
         mode: max
         verbose: true
 

diff --git a/retrieval/datamodule.py b/retrieval/datamodule.py
@@ -102,19 +102,14 @@ def __getitem__(self, idx: int) -> Example:
         premises_in_file = []
         premises_outside_file = []
 
-        try:
-            for p in self.corpus.get_premises(ex["context"].path):
-                if p == ex["pos_premise"]:
-                    continue
-                if p.end < ex["context"].theorem_pos:
-                    if ex["pos_premise"].path == ex["context"].path:
-                        premises_in_file.append(p)
-                    else:
-                        premises_outside_file.append(p)
-        except Exception:
-            import pdb
-
-            pdb.set_trace()
+        for p in self.corpus.get_premises(ex["context"].path):
+            if p == ex["pos_premise"]:
+                continue
+            if p.end < ex["context"].theorem_pos:
+                if ex["pos_premise"].path == ex["context"].path:
+                    premises_in_file.append(p)
+                else:
+                    premises_outside_file.append(p)
 
         for p in self.corpus.transitive_dep_graph.successors(ex["context"].path):
             if p == ex["pos_premise"].path: