Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 200 additions & 0 deletions Engine/Meta.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
(** * The Meta engine *)

(* Individual regex engines have their own unique strengths and weaknesses. *)
(* Some may support only a subset of regex features, some may be more *)
(* efficient depending on some characteristics of the regex or the input. *)
(* Sometimes we may even want to skip regex engines entirely and just do a *)
(* substring search. The Meta engine exploits these features by *)
(* encapsulating heuristics that look for matches using strategies it deems *)
(* to be the most efficient. *)

Require Import List.
Import ListNotations.

From Linden Require Import Regex Chars Semantics Tree.
From Linden Require Import FunctionalUtils FunctionalSemantics.
From Linden Require Import Parameters LWParameters.
From Linden Require Import StrictSuffix Prefix.
From Linden Require Import PikeSubset SeenSets.
From Linden Require Import Correctness FunctionalPikeVM.
From Warblre Require Import Base RegExpRecord.


Section Meta.
Context {params: LindenParameters}.
Context (rer: RegExpRecord).

(* We define what it means to be a regex engine and show that our engines follow these definitions. *)
Section Engines.
Context {VMS: VMSeen}.

(* interface of an anchored, executable engine *)
Class AnchoredEngine := {
exec: regex -> input -> option leaf;

(* asserts the supported subset of regexes *)
supported_regex: regex -> Prop;

(* the execution follows the backtracking tree semantics *)
exec_correct: forall r inp tree ol,
supported_regex r ->
is_tree rer [Areg r] inp Groups.GroupMap.empty forward tree ->
(first_leaf tree inp = ol <-> exec r inp = ol)
}.

(* we show that the PikeVM fits the scheme of an anchored engine *)
#[refine]
Instance PikeVMAnchoredEngine: AnchoredEngine := {
exec r inp := match pike_vm_match rer r inp with
| OutOfFuel => None
| Finished res => res
end;
supported_regex := pike_regex;
}.
(* exec_correct *)
intros r inp tree ol Hsubset Htree.
pose proof (pike_vm_match_terminates rer r inp Hsubset) as [res Hmatch].
rewrite Hmatch.
split.
- intros Hleaf.
subst. eauto using pike_vm_match_correct, pike_vm_correct.
- intros <-.
symmetry. eauto using pike_vm_match_correct, pike_vm_correct.
Qed.
End Engines.

Existing Instance literal_EqDec.
(* for each input position we run the anchored engine and return the earliest match *)
Fixpoint search_from {engine:AnchoredEngine} (r: regex) (next: string) (prev: string): option leaf :=
match (exec r (Input next prev)) with
| Some leaf => Some leaf
| None => match next with
| [] => None
| c::t => search_from r t (c::prev)
end
end.

Definition pref_str (i: input) : string :=
match i with
| Input _ pref => pref
end.

(* the string-quadratic algorithm described in RegExpBuiltinExec *)
Definition BuiltinExec {engine:AnchoredEngine} (r:regex) (inp:input) : option leaf :=
search_from r (next_str inp) (pref_str inp).

(* prefixed version *)
Definition BuiltinExecPrefixed {strs:StrSearch} {engine:AnchoredEngine} (r:regex) (inp:input) : option leaf :=
let lit := extract_literal rer r in
if lit == Impossible then None else
let p := prefix lit in
(* we skip the initial input that does not match the prefix *)
match (input_search p inp) with
| None => None (* if prefix is not present anywhere, then we cannot match *)
| Some i => search_from r (next_str i) (pref_str i)
end.

Lemma search_from_before_jump_eq {strs:StrSearch} {engine:AnchoredEngine}:
forall i r inp inp',
supported_regex r ->
input_search (prefix (extract_literal rer r)) inp = Some inp' ->
input_prefix i inp' forward ->
input_prefix inp i forward ->
search_from r (next_str i) (pref_str i) = search_from r (next_str inp') (pref_str inp').
Proof.
intros i r inp inp' Hsubset Hsearch Hprefix Hlow.
remember forward as dir.
induction Hprefix; subst.
- reflexivity.
- pose proof H as Hadvance.
specialize (IHHprefix Hsearch).
unfold advance_input in H. destruct inp1 as [next1 pref1] eqn:Hinp1, next1 eqn:Hnext1; [easy|].
inversion H. rewrite <-H1 in IHHprefix.
assert (Hnone: exec r (Input (t :: s) pref1) = None). {
assert (Hbetween: input_between inp inp3 inp1). {
rewrite input_prefix_strict_suffix in Hprefix, Hlow.
split; destruct Hprefix, Hlow; subst; eauto using ss_next', ss_advance.
}
subst.
pose proof (is_tree_productivity rer [Areg r] (Input (t :: s) pref1) Groups.GroupMap.empty forward) as [tree Htree].
rewrite <-exec_correct; eauto.
eauto using input_search_no_earlier, extract_literal_prefix_contra.
}
simpl.
rewrite Hnone.
eauto using ip_prev'.
Qed.

Lemma input_search_exec_none {strs:StrSearch} {engine:AnchoredEngine}:
forall i r inp,
supported_regex r ->
input_search (prefix (extract_literal rer r)) inp = None ->
input_prefix inp i forward ->
exec r i = None.
Proof.
intros i r inp Hsubset Hsearch Hlow.
rewrite input_prefix_strict_suffix in Hlow.
pose proof (is_tree_productivity rer [Areg r] i Groups.GroupMap.empty forward) as [tree Htree].
rewrite <-exec_correct; eauto.
eauto using input_search_not_found, extract_literal_prefix_contra.
Qed.

Lemma search_from_none_prefix {strs:StrSearch} {engine:AnchoredEngine}:
forall i r inp,
supported_regex r ->
input_search (prefix (extract_literal rer r)) inp = None ->
input_prefix inp i forward ->
search_from r (next_str i) (pref_str i) = None.
Proof.
intros [next pref] r inp Hsubset Hsearch Hprefix.
generalize dependent pref.
induction next; intros pref Hprefix.
- simpl; erewrite input_search_exec_none; eauto using ip_eq.
- simpl in *.
erewrite IHnext. erewrite input_search_exec_none.
all: eauto using ip_prev'.
Qed.

Lemma input_search_exec_impossible {strs:StrSearch} {engine:AnchoredEngine}:
forall inp r,
supported_regex r ->
extract_literal rer r = Impossible ->
exec r inp = None.
Proof.
intros inp r Hsubset Hextract.
pose proof (is_tree_productivity rer [Areg r] inp Groups.GroupMap.empty forward) as [tree Htree].
rewrite <-exec_correct; eauto.
eauto using extract_literal_impossible.
Qed.

Lemma search_from_impossible_prefix {strs:StrSearch} {engine:AnchoredEngine}:
forall inp r,
supported_regex r ->
extract_literal rer r = Impossible ->
search_from r (next_str inp) (pref_str inp) = None.
Proof.
intros [next pref] r Hsubset Hextract.
generalize dependent pref.
induction next; intros pref; simpl; erewrite input_search_exec_impossible; eauto.
Qed.

Theorem builtin_exec_equiv {strs:StrSearch} {engine:AnchoredEngine}:
forall r inp,
supported_regex r ->
BuiltinExec r inp = BuiltinExecPrefixed r inp.
Proof.
intros r inp Hsubset.
unfold BuiltinExec, BuiltinExecPrefixed.
wt_eq.
+ eapply search_from_impossible_prefix; eauto.
+ destruct input_search eqn:Hsearch.
- assert (input_prefix inp i forward). {
apply input_search_strict_suffix in Hsearch.
now rewrite <-input_prefix_strict_suffix in Hsearch.
}
eapply search_from_before_jump_eq; eauto using ip_eq.
- eapply search_from_none_prefix; eauto using ip_eq.
Qed.


End Meta.
8 changes: 4 additions & 4 deletions Engine/Prefix.v
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ From Linden Require Import Parameters LWParameters.
From Linden Require Import StrictSuffix.
From Warblre Require Import Base RegExpRecord.

Section Prefix.
Context {params: LindenParameters}.

(* Tactic for rewriting decidable equalities into propositional ones *)
Ltac wt_eq := repeat match goal with
| [ H: (?x ==? ?y)%wt = true |- _ ] => rewrite EqDec.inversion_true in H; subst
Expand All @@ -25,6 +22,9 @@ Ltac wt_eq := repeat match goal with
| [ H: context[(?x ==? ?y)%wt] |- _ ] => destruct (x ==? y)%wt eqn:?Heq
end.

Section Prefix.
Context {params: LindenParameters}.

Inductive starts_with: string -> string -> Prop :=
| sw_nil: forall s, starts_with [] s
| sw_cons: forall h t1 t2, starts_with t1 t2 -> starts_with (h :: t1) (h :: t2).
Expand Down Expand Up @@ -195,7 +195,7 @@ Proof.
Qed.

(* low inclusive, high exclusive *)
Notation input_between ilow ihigh i := ((i = ilow \/ strict_suffix i ilow forward) /\ strict_suffix ihigh i forward).
Definition input_between ilow ihigh i := ((i = ilow \/ strict_suffix i ilow forward) /\ strict_suffix ihigh i forward).

(* if strict_suffix i2 i1 forward, then next_str i2 is skipn k (next_str i1) for some k > 0 *)
Lemma strict_suffix_forward_skipn:
Expand Down
2 changes: 1 addition & 1 deletion Engine/SeenSets.v
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ From Warblre Require Import Base.

(** * Seen Sets of Trees *)
(* The PikeTree algorithm needs to maintain a set of seen trees *)
(* Our definitions are parameterized by an inmplementation of such sets. *)
(* Our definitions are parameterized by an implementation of such sets. *)

(* what we need from these sets is just these few definitions and properties *)
Class TSeen (params:LindenParameters) :=
Expand Down
2 changes: 1 addition & 1 deletion Engine/TreeRep.v
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Import ListNotations.

From Linden Require Import Regex Chars Groups.
From Linden Require Import Tree Semantics BooleanSemantics.
From Linden Require Import NFA PikeTree PikeVM.
From Linden Require Import NFA.
From Linden Require Import PikeSubset.
From Linden Require Import Parameters.
From Warblre Require Import Base RegExpRecord.
Expand Down
2 changes: 1 addition & 1 deletion Semantics/Examples.v
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Require Import List.
Import ListNotations.

From Linden Require Import Regex Chars Groups.
From Linden Require Import Tree Semantics PikeVM.
From Linden Require Import Tree Semantics.
From Warblre Require Import Base RegExpRecord.
From Linden Require Import FunctionalUtils FunctionalSemantics.
From Linden Require Import Inst.
Expand Down
4 changes: 4 additions & 0 deletions Semantics/FunctionalUtils.v
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ Section Utilities.
eauto using compute_tr_is_tree.
Qed.

Theorem is_tree_productivity: forall acts inp gm dir,
exists tree, is_tree rer acts inp gm dir tree.
Proof. eexists. apply compute_tr_is_tree. Qed.

Lemma is_tree_eq_compute_tr {actions i gm dir} :
forall {tr}, is_tree rer actions i gm dir tr -> tr = compute_tr actions i gm dir.
Proof.
Expand Down
72 changes: 43 additions & 29 deletions Semantics/StrictSuffix.v
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Section StrictSuffix.
advance_input inp2 dir = Some inp1 ->
strict_suffix inp2 inp3 dir ->
strict_suffix inp1 inp3 dir.

(** * Functional version of strict_suffix *)

(* Another, functional, version of strict suffix *)
Expand Down Expand Up @@ -100,7 +100,7 @@ Section StrictSuffix.
* simpl. f_equal. f_equal. rewrite Hpref12. simpl.
rewrite rev_app_distr, <- app_assoc, <- app_assoc, <- app_assoc. reflexivity.
* simpl in *. rewrite app_length in Hlendiff. simpl in *. apply IH with (diff := x :: diff'').
-- simpl. lia.
-- simpl. lia.
-- lia.
-- rewrite <- app_comm_cons. rewrite <- app_assoc in Hnext12. auto.
-- f_equal.
Expand Down Expand Up @@ -145,7 +145,7 @@ Section StrictSuffix.
* simpl. f_equal. f_equal. rewrite Hnext12. simpl.
rewrite <- app_assoc. reflexivity.
* simpl in *. rewrite app_length in Hlendiff. simpl in *. apply IH with (diff := diff'' ++ [a]).
-- rewrite app_length. simpl. lia.
-- rewrite app_length. simpl. lia.
-- lia.
-- rewrite <- app_assoc. auto.
-- rewrite <- app_assoc in Hpref12. auto.
Expand Down Expand Up @@ -256,28 +256,6 @@ Section StrictSuffix.
apply is_strict_suffix_correct in His_ss. contradiction.
Qed.

Theorem strict_suffix_current:
forall inp1 inp2 dir,
strict_suffix inp1 inp2 dir ->
length (current_str inp1 dir) < length (current_str inp2 dir).
Proof.
intros [next1 pref1] [next2 pref2] dir H.
rewrite <- is_strict_suffix_correct in H.
unfold is_strict_suffix in H. destruct dir.
- generalize dependent pref2. induction next2; intros.
{ inversion H. }
simpl. simpl in IHnext2. simpl in H.
destruct ((Input next2 (a :: pref2) ==? Input next1 pref1)%wt) eqn:INPEQ.
+ rewrite EqDec.inversion_true in INPEQ. inversion INPEQ. subst. lia.
+ apply IHnext2 in H. lia.
- generalize dependent next2. induction pref2; intros.
{ inversion H. }
simpl. simpl in IHpref2. simpl in H.
destruct ((Input (a::next2) pref2 ==? Input next1 pref1)%wt) eqn:INPEQ.
+ rewrite EqDec.inversion_true in INPEQ. inversion INPEQ. subst. lia.
+ apply IHpref2 in H. lia.
Qed.

Theorem read_suffix:
forall inp dir nextinp,
advance_input inp dir = Some nextinp ->
Expand All @@ -296,7 +274,7 @@ Section StrictSuffix.
- destruct pref2; inversion H. simpl. auto.
Qed.

Lemma ss_length_lt:
Lemma strict_suffix_current:
forall inp1 inp2 dir,
strict_suffix inp1 inp2 dir -> length (current_str inp1 dir) < length (current_str inp2 dir).
Proof.
Expand Down Expand Up @@ -402,13 +380,49 @@ Section StrictSuffix.
* rewrite <- app_assoc in Hnext_sufnext. auto.
* reflexivity.
Qed.

Lemma ss_neq:
forall inp1 inp2 dir,
strict_suffix inp1 inp2 dir -> inp1 <> inp2.
Proof.
intros inp1 inp2 dir Hss Habs. subst inp2.
pose proof ss_length_lt inp1 inp1 dir Hss. lia.
pose proof strict_suffix_current inp1 inp1 dir Hss. lia.
Qed.

(** * Prefixes *)

(* In general you should use strict_suffix in definitions. *)
(* The usage of input_prefix should be reserved for scenarios *)
(* where you need the induction to have reflexive base case, *)
(* and the inductive step to build from the biggest to smallest *)
(* prefixes. *)

(* relation that one input is a non-strict prefix of another *)
Inductive input_prefix : input -> input -> Direction -> Prop :=
| ip_eq : forall inp dir, input_prefix inp inp dir
| ip_prev : forall inp1 inp2 inp3 dir,
advance_input inp1 dir = Some inp2 ->
input_prefix inp2 inp3 dir ->
input_prefix inp1 inp3 dir.

Lemma ip_prev':
forall inp1 inp2 inp3 dir,
input_prefix inp1 inp2 dir ->
advance_input inp2 dir = Some inp3 ->
input_prefix inp1 inp3 dir.
Proof. induction 1; eauto using ip_prev, ip_eq. Qed.

(* equivalence between input_prefix and strict_suffix *)
Lemma input_prefix_strict_suffix:
forall i1 i2 dir,
input_prefix i1 i2 dir <->
i2 = i1 \/ strict_suffix i2 i1 dir.
Proof.
split; intros H.
- induction H; [auto|].
destruct IHinput_prefix; subst; eauto using ss_advance, ss_next'.
- destruct H; [subst; auto using ip_eq|].
induction H; subst; eauto using ip_eq, ip_prev, ip_prev'.
Qed.

End StrictSuffix.
End StrictSuffix.