Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f7bdfa6
Engine: allow skipping trees without results
shilangyu Nov 9, 2025
11dfd25
Engine: extended semantics of the PikeTree
shilangyu Nov 13, 2025
2e90702
Engine: extended semantics of PikeVM
shilangyu Nov 14, 2025
0d89cb6
Engine: functional semantics of the extended PikeVM
shilangyu Nov 14, 2025
12f1b10
Engine: adjust PikeTree and PikeVM dependants
shilangyu Nov 14, 2025
f1ea675
Engine: state the final pike_vm_correct_lazyprefix theorem
shilangyu Nov 16, 2025
e246f9a
Engine: abstract lit when its irrelevant
shilangyu Nov 20, 2025
4d6c987
Engine: define initial lazyprefix state of PikeTree without compute_tr
shilangyu Nov 21, 2025
c62da7b
Engine: remove lit from places that don't need it
shilangyu Nov 21, 2025
69814b3
Engine: rename things
shilangyu Nov 21, 2025
77e6cfa
Engine: prove equivalence between generate/filter/nextchar steps
shilangyu Nov 22, 2025
065827a
Engine: add acc to PikeTree, prove nextt_nextprefix invariant initial…
shilangyu Nov 22, 2025
fbccc31
Engine: complete the lazy star pikevm proof!
shilangyu Nov 23, 2025
6a610ad
Remove trailing spaces
shilangyu Nov 24, 2025
2ef7a6c
Engine: add admitted complexity proof
shilangyu Nov 23, 2025
0fecc6b
Engine: lazy_tree notation for an unfolded prefix tree
Aurele-Barriere Dec 2, 2025
443a375
Engine: allow the PikeTree to erase nextt
Aurele-Barriere Dec 2, 2025
87e8389
Engine: change fuel formula for the PikeVM with acceleration. There i…
Aurele-Barriere Dec 3, 2025
576509b
Engine: prove complexity of the new PikeVM with acceleration
Aurele-Barriere Dec 4, 2025
1a42172
chore: fix rebase issues
shilangyu Dec 5, 2025
a093510
Engine: remove non-prefix acc functional
shilangyu Dec 5, 2025
3a57998
Engine: remove old fixme and unused import
shilangyu Dec 5, 2025
72ae4a7
Engine: remove input from may_erase
shilangyu Dec 6, 2025
3b3e79f
Engine: fix pts_nextchar_generate rule with regards to may_erase
shilangyu Dec 11, 2025
b0272ee
Engine: complete invariant initialization proof with may_erase
shilangyu Dec 12, 2025
e619f66
Engine: renames and TODOs
shilangyu Dec 17, 2025
4822eb3
Engine: refactor unanchored proofs
shilangyu Dec 17, 2025
fbb3a8f
Engine: add some missing documentation about acceleration
shilangyu Dec 17, 2025
760478e
Engine: move option_flat_map and improve docs
shilangyu Dec 17, 2025
1915c21
Engine: add comment about pvs_end
shilangyu Dec 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions Engine/BooleanSemantics.v
Original file line number Diff line number Diff line change
Expand Up @@ -379,27 +379,37 @@ Qed.
- inversion H0; subst; rewrite ANCHOR0 in ANCHOR; inversion ANCHOR. auto.
Qed.

(** * Unused direction *)
(* the other direction of implication is obtained using only determinism and productivity *)
(* but in our engine proofs we don't actually need this direction *)


Theorem bool_to_istree:
forall r inp t,
pike_regex r ->
bool_tree [Areg r] inp CanExit t ->
is_tree rer [Areg r] inp GroupMap.empty forward t.
forall acts b inp t,
bool_encoding b inp acts ->
pike_actions acts ->
bool_tree acts inp b t ->
is_tree rer acts inp GroupMap.empty forward t.
Proof.
intros r inp t H H0.
intros acts b inp t ENCODE H H0.
(* productivity *)
assert (exists t', is_tree rer [Areg r] inp GroupMap.empty forward t') as [t' ISTREE].
{ destruct (compute_tree rer [Areg r] inp GroupMap.empty forward (S (actions_fuel [Areg r] inp forward))) eqn:PROD.
assert (exists t', is_tree rer acts inp GroupMap.empty forward t') as [t' ISTREE].
{ destruct (compute_tree rer acts inp GroupMap.empty forward (S (actions_fuel acts inp forward))) eqn:PROD.
2: { generalize functional_terminates. intros H1. apply H1 in PROD; auto; lia. }
exists t0. eapply compute_is_tree; eauto. }
apply boolean_correct in ISTREE as BOOLTREE; auto.
eapply encode_equal in ISTREE as BOOLTREE; eauto.
(* determinism *)
assert (t = t') by (eapply bool_tree_determ; eauto). subst. auto.
Qed.
Qed.

Theorem bool_to_istree_regex:
forall r inp t,
pike_regex r ->
bool_tree [Areg r] inp CanExit t ->
is_tree rer [Areg r] inp GroupMap.empty forward t.
Proof.
intros r inp t H H0.
assert (bool_encoding CanExit inp [Areg r]) by (constructor; constructor).
eapply bool_to_istree; eauto; pike_subset.
Qed.


Theorem booltree_istree_equiv:
forall r inp t,
Expand All @@ -408,7 +418,7 @@ Qed.
is_tree rer [Areg r] inp GroupMap.empty forward t.
Proof.
intros r inp t SUBSET. split.
- apply bool_to_istree; auto.
- apply bool_to_istree_regex; auto.
- apply boolean_correct; auto.
Qed.

Expand Down
143 changes: 119 additions & 24 deletions Engine/Complexity.v
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ Import ListNotations.

From Linden Require Import Regex Chars Groups.
From Linden Require Import Tree Semantics BooleanSemantics.
From Linden Require Import NFA PikeTree PikeVM PikeSubset.
From Linden Require Import PikeTree PikeVM.
From Linden Require Import NFA PikeVM PikeSubset Prefix.
From Linden Require Import Correctness PikeEquiv SeenSets.
From Linden Require Import Parameters.
From Warblre Require Import Base RegExpRecord.
Expand Down Expand Up @@ -52,7 +51,7 @@ Lemma wf_in:
forall pc b,
inseenpc seen pc b = true <-> In (pc, b) dist.
Proof.
intros seen size dist H. induction H; intros.
intros seen size dist H. induction H; intros.
- rewrite initial_nothing_pc. split; intros; inversion H.
- rewrite inpc_add. split; intros; destruct H0; simpl; auto;
right; apply IHwf; auto.
Expand Down Expand Up @@ -105,7 +104,7 @@ Proof.
assert (pc = size \/ pc < size) by lia. destruct H0.
- subst. simpl. destruct b; auto.
- apply IHsize in H0. simpl. auto.
Qed.
Qed.

Theorem wf_size:
forall seen size dist,
Expand Down Expand Up @@ -160,33 +159,95 @@ Definition size (c:code) : nat := length c.
(* The number of free slots decreases at most steps *)
(* In some cases (a fork), a new thread is created but the number of free slots decreases: this is why free slots are multiplied by 2 *)
(* As we change characters, the seen set might get 2*codesize new free slots (multiplied by 2 for the measure) *)
(* But the input decreases, which makes the measure also decrease, because input size is multiplied by (1 + 4*codesize) *)
(* But the input decreases, which makes the measure also decrease, because input size is multiplied by (2 + 4*codesize) *)
(* It's (2 + 4*codesize) because we might generate a new thread at each input (for unanchored search) and because of the step it takes to advance *)
Definition measure (codesize:nat) (dist:list (nat*LoopBool)) (active blocked:list thread) (inp:input) :=
(2 * free codesize dist) + length active + length blocked + (inpsize inp * (1 + 4 * codesize)).
(2 * free codesize dist) + length active + length blocked + (inpsize inp * (2 + 4 * codesize)).

(* The invariant that is preserved through pikeVM execution, with a measure that strictly decreases *)
Inductive vm_inv (c:code): pike_vm_state -> nat -> Prop :=
| inv_final:
forall b, vm_inv c (PVS_final b) 0
| inv_pvs:
forall inp active best blocked seen dist
forall inp active best blocked nextprefix seen dist
(* the threads in active and blocked have their pc inside the code range *)
(ACTIVEWF: forall t, In t active -> fst (fst t) < size c)
(BLOCKEDWF: forall t, In t blocked -> fst (fst t) < size c)
(* the seen set is well-formed, and has `count` distinct elements *)
(SEENWF: wf seen (size c) dist),
vm_inv c (PVS inp active best blocked seen) (measure (size c) dist active blocked inp).
(SEENWF: wf seen (size c) dist)
(* The next place where the prefix can match is in range of the input *)
(RANGEPREF: forall n lit, nextprefix = Some (n, lit) -> inpsize inp > S n),
vm_inv c (PVS inp active best blocked nextprefix seen) (measure (size c) dist active blocked inp).

Lemma nonfinal_pos:
forall c inp active best blocked seen m,
vm_inv c (PVS inp active best blocked seen) m -> 0 < m.
forall c inp active best blocked nextprefix seen m,
vm_inv c (PVS inp active best blocked nextprefix seen) m -> 0 < m.
Proof.
intros c inp active best blocked seen m H. inversion H. subst. unfold measure.
intros c inp active best blocked nextprefix seen m H. inversion H. subst. unfold measure.
specialize (inpsize_strict inp) as SIZE. lia.
Qed.

(** * Next prefix search is in range *)

Theorem search_in_range:
forall inp lit n lit' (strs:StrSearch),
next_prefix_counter inp lit = Some (n, lit') ->
inpsize inp > S n.
Proof.
intros [next pref] lit n lit' strs H. unfold next_prefix_counter in H.
destruct next; simpl in H. inversion H.
destruct str_search eqn:SEARCH; inversion H; subst.
apply str_search_bound in SEARCH. simpl. lia.
Qed.

Lemma advance_inpsize:
forall inp1 inp2 n,
advance_input inp1 forward = Some inp2 ->
inpsize inp1 > S n ->
inpsize inp2 > n.
Proof.
intros [next1 pref1] inp2 n H H0. simpl in H.
destruct next1; inversion H.
simpl in H0. simpl. lia.
Qed.

Lemma advance_S_n:
forall n next pref c,
advance_input_n (Input (c::next) pref) (S n) forward =
advance_input_n (Input next (c::pref)) n forward.
Proof.
intros n next pref c. simpl. f_equal. rewrite <- app_assoc. auto.
Qed.

Lemma advance_n_inpsize:
forall inp n,
inpsize inp > S n ->
inpsize (advance_input_n inp (S n) forward) < inpsize inp.
Proof.
intros [next pref] n H.
generalize dependent next. generalize dependent pref.
induction n; intros.
- unfold advance_input_n. destruct next; simpl in H; simpl; lia.
- destruct next as [|c next]; simpl in H. lia.
assert (S (length next) > S n) by lia.
specialize (IHn (c::pref) next H0). rewrite advance_S_n.
simpl in IHn. simpl. lia.
Qed.


(** * Well-formedness of the code *)

(* first, we show the code is non empty, ensuring that threads generated by prefix acceleration are in range *)
Definition nonempty (c:code) : Prop := size c > 0.

(* Even if the regex is epsilon, adding the accept instruction makes the code non-empty *)
Lemma compilation_nonempty:
forall r, nonempty (compilation r).
Proof.
intros. unfold compilation. destruct compile. unfold nonempty, size.
rewrite app_length. simpl. lia.
Qed.

(* Some bytecode is well-formed if every target label belongs in some range *)
Definition code_wf (c:code) (size:nat) :=
forall pc i next,
Expand Down Expand Up @@ -394,25 +455,55 @@ Qed.

(* at each step, the measure strictly decreases *)
(* the well-formedness of the seen set is preserved *)
Theorem pikevm_decreases:
Theorem pikevm_decreases {strs:StrSearch}:
forall code pvs1 pvs2 m1,
code_wf code (size code) ->
nonempty code ->
pike_vm_step rer code pvs1 pvs2 ->
vm_inv code pvs1 m1 ->
exists m2, vm_inv code pvs2 m2 /\ m2 < m1.
Proof.
intros code pvs1 pvs2 m1 CODEWF STEP INV. inversion STEP; subst; simpl measure; inversion INV; subst;
intros code pvs1 pvs2 m1 CODEWF NONEMPTY STEP INV. inversion STEP; subst; simpl measure; inversion INV; subst;
try destruct t as [[pc gm] b].
(* when reaching a final state, we end up with a measure of 0, while the previous measure was strictly positive *)
- exists 0. split.
+ constructor.
+ apply nonfinal_pos in INV. auto.
(* acc: we might add (2*codesize) free slots, but we lose at least one input length *)
- specialize (RANGEPREF n lit eq_refl).
exists (measure (size code) [] [pike_vm_initial_thread] [] (advance_input_n inp (S n) forward)). split; [constructor|]; auto.
+ (* the new generated thread is in range because the code is nonempty *)
intros t H. inversion H as [IN1|IN2]; auto.
subst. unfold pike_vm_initial_thread. simpl. auto.
+ constructor.
+ intros n0 lit0 H. eapply search_in_range; eauto.
+ unfold measure. simpl. rewrite free_initial. specialize (advance_n_inpsize inp n RANGEPREF)as ADV.
apply increase_mult with (x:= 4 * size code) in ADV as NEXT. simpl in NEXT. lia.
(* end *)
- exists 0. split.
+ constructor.
+ apply nonfinal_pos in INV. auto.
(* nextchar: we might add (2*codesize) free slots, but we lose an input length *)
- exists (measure (size code) [] (thr::blocked) [] inp2). split; [constructor|]; auto.
+ constructor.
+ intros n lit H; inversion H.
+ unfold measure. simpl. rewrite free_initial. apply advance_input_decreases in ADVANCE.
apply increase_mult with (x:= 4 * size code) in ADVANCE as NEXT. simpl in NEXT. lia.
(* nextchar_generate: we might add (2*codesize) free slots, but we lose an input length *)
- exists (measure (size code) [] ((thr::blocked)++[pike_vm_initial_thread]) [] inp2). split; [constructor|]; auto.
+ (* the new generated thread is in range because the code is nonempty *)
intros t H. apply in_app_or in H as [IN1|IN2]; auto.
inversion IN2; inversion H. unfold pike_vm_initial_thread. simpl. auto.
+ constructor.
+ intros n lit0 H. eapply search_in_range; eauto.
+ unfold measure. simpl. rewrite free_initial. apply advance_input_decreases in ADVANCE.
apply increase_mult with (x:= 4 * size code) in ADVANCE as NEXT. simpl in NEXT.
rewrite app_length. simpl. lia.
(* nextchar_filter: we might add (2*codesize) free slots, but we lose an input length *)
- exists (measure (size code) [] (thr::blocked) [] inp2). split; [constructor|]; auto.
+ constructor.
+ intros n0 lit0 H. inversion H; subst. specialize (RANGEPREF (S n0) lit0 eq_refl).
eapply advance_inpsize; eauto.
+ unfold measure. simpl. rewrite free_initial. apply advance_input_decreases in ADVANCE.
apply increase_mult with (x:= 4 * size code) in ADVANCE as NEXT. simpl in NEXT. lia.
(* skip: we lose a thread *)
Expand All @@ -436,6 +527,7 @@ Proof.
exists (measure (size code) ((pc,b)::dist) [] blocked inp). split; [constructor|]; auto.
+ intros t0 H. inversion H.
+ unfold add_thread. apply wf_new; auto.
+ intros n lit H; inversion H.
+ specialize (free_add seen (size code) dist (pc,gm,b) SEENWF UNSEEN RANGE) as FREE.
apply wf_size in FREE. unfold measure, free. simpl. lia.
(* blocked: we switch an active thread to blocked, but lose a free slot *)
Expand Down Expand Up @@ -476,7 +568,7 @@ Definition codesize (r:regex) := S (compsize r).
Lemma compile_size:
forall r start endl code,
pike_regex r ->
compile r start = (code, endl) ->
compile r start = (code, endl) ->
length (code) = compsize r.
Proof.
intros r start endl code SUBSET COMP.
Expand Down Expand Up @@ -513,12 +605,12 @@ Proof.
unfold codesize, size, compilation. intros r H. destruct (compile r 0) eqn:COMP.
apply compile_size in COMP; auto. rewrite <- COMP. rewrite app_length. simpl. lia.
Qed.


(** * Initial PikeVM Measure *)

Definition complexity (r:regex) (inp:input) : nat :=
1 + (4 * codesize r) + (inpsize inp * (1 + 4 * codesize r)).
1 + (4 * codesize r) + (inpsize inp * (2 + 4 * codesize r)).

Theorem initial_measure:
forall inp r,
Expand All @@ -534,6 +626,7 @@ Proof.
simpl. lia.
+ intros t H. inversion H.
+ constructor.
+ intros n lit H; inversion H.
- unfold complexity, measure. rewrite <- compilation_size; auto. simpl.
rewrite free_initial. simpl. lia.
Qed.
Expand Down Expand Up @@ -585,30 +678,31 @@ Lemma strong_ind (P : nat -> Prop) :
forall n, P n.
Proof.
intros H n; enough (H0: forall p, p <= n -> P p).
- apply H0, le_n.
- apply H0, le_n.
- induction n; intros; apply H; intros; try lia.
apply IHn; lia.
Qed.

Lemma pike_vm_bound:
Lemma pike_vm_bound {strs:StrSearch}:
forall pvs code n,
code_wf code (size code) ->
nonempty code ->
vm_inv code pvs n ->
exists result, steps (pike_vm_step rer code) pvs n (PVS_final result).
Proof.
intros pvs code n WF INV. generalize dependent pvs. induction n using (strong_ind); intros.
intros pvs code n WF NONEMPTY INV. generalize dependent pvs. induction n using (strong_ind); intros.
destruct pvs.
2: { exists best. constructor. }
specialize (pikevm_progress rer code inp active best blocked seen) as [next STEP].
specialize (pikevm_decreases code (PVS inp active best blocked seen) next n WF STEP INV) as [newm [INV2 DECR]].
specialize (pikevm_progress rer code inp active best blocked nextprefix seen) as [next STEP].
specialize (pikevm_decreases code (PVS inp active best blocked nextprefix seen) next n WF NONEMPTY STEP INV) as [newm [INV2 DECR]].
specialize (H newm DECR next INV2) as [result STEPS].
exists result. apply more_steps with (n:=S newm); try lia.
econstructor; eauto.
Qed.

(** * Complexity Theorem *)

Theorem pikevm_complexity:
Theorem pikevm_complexity {strs:StrSearch}:
forall (r:regex) (inp:input),
(* for any supported regex r and input inp *)
pike_regex r ->
Expand All @@ -619,14 +713,15 @@ Proof.
intros r inp SUBSET.
apply pike_vm_bound.
- apply compiled_wf.
- apply compilation_nonempty.
- apply initial_measure. auto.
Qed.


(** * Termination of the PikeVM algorithm *)

(* As a corollary, we can deduce that the PikeVM always terminate *)
Theorem pike_vm_terminates:
Theorem pike_vm_terminates {strs:StrSearch}:
forall r inp,
pike_regex r ->
exists result, trc_pike_vm rer (compilation r) (pike_vm_initial_state inp) (PVS_final result).
Expand Down
Loading