From 86fa0acf57074c320b44ee5c6284f2050b0aaece Mon Sep 17 00:00:00 2001
From: Kaiyu Yang <kaiyuy@caltech.edu>
Date: Sat, 7 Oct 2023 18:42:53 -0700
Subject: [PATCH] add instructions for MiniF2F and ProofNet

---
 README.md                                     |  3 ++-
 common.py                                     |  8 +++++--
 docs/eval_MiniF2F_ProofNet.md                 | 22 +++++++++++++++++++
 retrieval/confs/cli_lean3_random.yaml         |  6 ++---
 retrieval/confs/cli_lean4_novel_premises.yaml |  2 +-
 retrieval/datamodule.py                       | 21 +++++++-----------
 6 files changed, 42 insertions(+), 20 deletions(-)
 create mode 100644 docs/eval_MiniF2F_ProofNet.md

diff --git a/README.md b/README.md
index 7702faf..2e2bcdf 100644
--- a/README.md
+++ b/README.md
@@ -307,7 +307,7 @@ python generator/main.py fit --config generator/confs/cli_lean4_random.yaml --mo
 python generator/main.py fit --config generator/confs/cli_lean4_novel_premises.yaml --model.ret_ckpt_path PATH_TO_RETRIEVER_CHECKPOINT --data.preds_path PATH_TO_PREDICTIONS_PICKLE
 ```
 
-### Evaluation on Theorem Proving
+### Theorem Proving Evaluation on LeanDojo Benchmark (Lean 3 and Lean 4)
 
 After the tactic generator is trained, we combine it with best-first search to prove theorems by interacting with Lean.
 
@@ -331,6 +331,7 @@ python prover/evaluate.py --data-path data/leandojo_benchmark/random/  --ckpt_pa
 python prover/evaluate.py --data-path data/leandojo_benchmark/novel_premises/  --ckpt_path PATH_TO_REPROVER_CHECKPOINT --indexed-corpus-path PATH_TO_INDEXED_CORPUS --split test --num-cpus 8 --with-gpus
 ```
 
+See [here](docs/eval_MiniF2F_ProofNet.md) if you want to evaluate on other Lean repos such as [miniF2F](https://github.com/facebookresearch/miniF2F) and [ProofNet](https://github.com/zhangir-azerbayev/ProofNet).
 
 ## Questions and Bugs
 
diff --git a/common.py b/common.py
index 45178d4..39f5369 100644
--- a/common.py
+++ b/common.py
@@ -156,10 +156,12 @@ def from_data(cls, file_data: Dict[str, Any]) -> "File":
         path = file_data["path"]
         premises = []
         for p in file_data["premises"]:
-            if "user__.n" in p["full_name"] or p["code"] == "":
+            full_name = p["full_name"]
+            if full_name is None:
+                continue
+            if "user__.n" in full_name or p["code"] == "":
                 # Ignore ill-formed premises (often due to errors in ASTs).
                 continue
-            full_name = p["full_name"]
             if full_name.startswith("[") and full_name.endswith("]"):
                 # Ignore mutual definitions.
                 continue
@@ -346,6 +348,8 @@ def get_all_pos_premises(annot_tac, corpus: Corpus) -> List[Premise]:
         p = corpus.locate_premise(def_path, Pos(*prov["def_pos"]))
         if p is not None:
             all_pos_premises.add(p)
+        else:
+            logger.warning(f"Cannot locate premise: {prov}")
 
     return list(all_pos_premises)
 
diff --git a/docs/eval_MiniF2F_ProofNet.md b/docs/eval_MiniF2F_ProofNet.md
new file mode 100644
index 0000000..4812cf3
--- /dev/null
+++ b/docs/eval_MiniF2F_ProofNet.md
@@ -0,0 +1,22 @@
+Evaluation on MiniF2F and ProofNet
+----------------------------------
+
+Here we evaluate models trained on LeanDojo Benchmark on MiniF2F or ProofNet. We use MiniF2F as an example, but the same procedure applies to ProofNet.
+
+First, use LeanDojo to extract data from MiniF2F. See the end of [this Jupyter notebook](https://github.com/lean-dojo/LeanDojo/blob/main/scripts/generate-benchmark-lean3.ipynb). Save the dataset to `data/leandojo_minif2f`.
+
+
+For models without retrieval, run:
+```bash
+python prover/evaluate.py --data-path data/leandojo_minif2f/default/  --ckpt_path PATH_TO_MODEL_CHECKPOINT --split test --num-cpus 8 --with-gpus
+```
+
+For models with retrieval, first use the retriever to index the corpus (pre-computing the embeddings of all premises):
+```bash
+python retrieval/index.py --ckpt_path PATH_TO_RETRIEVER_CHECKPOINT --corpus-path data/leandojo_minif2f/corpus.jsonl --output-path PATH_TO_INDEXED_CORPUS
+```
+
+Then, run:
+```bash
+python prover/evaluate.py --data-path data/leandojo_minif2f/default/  --ckpt_path PATH_TO_REPROVER_CHECKPOINT --indexed-corpus-path PATH_TO_INDEXED_CORPUS --split test --num-cpus 8 --with-gpus
+```
diff --git a/retrieval/confs/cli_lean3_random.yaml b/retrieval/confs/cli_lean3_random.yaml
index 09380c9..7038eb7 100644
--- a/retrieval/confs/cli_lean3_random.yaml
+++ b/retrieval/confs/cli_lean3_random.yaml
@@ -25,13 +25,13 @@ trainer:
     - class_path: pytorch_lightning.callbacks.EarlyStopping
       init_args:
         monitor: Recall@10_val
-        patience: 3
+        patience: 5
         mode: max
         verbose: true
 
 model:
   model_name: google/byt5-small
-  lr: 1e-4
+  lr: 2e-4
   warmup_steps: 2000
   num_retrieved: 100
 
@@ -40,7 +40,7 @@ data:
   corpus_path: data/leandojo_benchmark/corpus.jsonl
   num_negatives: 3
   num_in_file_negatives: 1
-  batch_size: 8
+  batch_size: 12
   eval_batch_size: 128
   max_seq_len: 1024
   num_workers: 4
diff --git a/retrieval/confs/cli_lean4_novel_premises.yaml b/retrieval/confs/cli_lean4_novel_premises.yaml
index 9f71732..6971efb 100644
--- a/retrieval/confs/cli_lean4_novel_premises.yaml
+++ b/retrieval/confs/cli_lean4_novel_premises.yaml
@@ -25,7 +25,7 @@ trainer:
     - class_path: pytorch_lightning.callbacks.EarlyStopping
       init_args:
         monitor: Recall@10_val
-        patience: 3
+        patience: 5
         mode: max
         verbose: true
 
diff --git a/retrieval/datamodule.py b/retrieval/datamodule.py
index 5ba0e56..5c916c1 100644
--- a/retrieval/datamodule.py
+++ b/retrieval/datamodule.py
@@ -102,19 +102,14 @@ def __getitem__(self, idx: int) -> Example:
         premises_in_file = []
         premises_outside_file = []
 
-        try:
-            for p in self.corpus.get_premises(ex["context"].path):
-                if p == ex["pos_premise"]:
-                    continue
-                if p.end < ex["context"].theorem_pos:
-                    if ex["pos_premise"].path == ex["context"].path:
-                        premises_in_file.append(p)
-                    else:
-                        premises_outside_file.append(p)
-        except Exception:
-            import pdb
-
-            pdb.set_trace()
+        for p in self.corpus.get_premises(ex["context"].path):
+            if p == ex["pos_premise"]:
+                continue
+            if p.end < ex["context"].theorem_pos:
+                if ex["pos_premise"].path == ex["context"].path:
+                    premises_in_file.append(p)
+                else:
+                    premises_outside_file.append(p)
 
         for p in self.corpus.transitive_dep_graph.successors(ex["context"].path):
             if p == ex["pos_premise"].path: