Skip to content

Commit

Permalink
clean up all the coq code
Browse files Browse the repository at this point in the history
  • Loading branch information
lczielinski committed Dec 15, 2024
1 parent 05acb49 commit 349dbf2
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 119 deletions.
48 changes: 25 additions & 23 deletions coq/Antimirov.v
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ Require Export Lia.

(** If r is a regex and a is a char, then a partial derivative
is a regex r' such that if r' accepts word s, then r accepts a ⋅ s.
We can construct sets of partial derivatives (Antimirov derivatives) *)
The Antimirov derivative returns a set of all partial derivatives *)

(* Note that gsets are finite *)
(** Note that gsets are finite *)
Fixpoint a_der (r : re) (c : char) : gset re :=
match r with
| Void => ∅
Expand All @@ -32,7 +32,21 @@ Fixpoint a_der_str (r : re) (s : string) : gset re :=
set union instead *)
Definition a_der_set (rs : gset re) (c : char) : gset re :=
set_bind (fun r => a_der r c) rs.


(** True if there is a regex in the set which matches the empty string *)
Definition nullable (rs : gset re) : bool :=
let elem_of_bool (x : bool) (s : gset bool) := bool_decide (x ∈ s) in
elem_of_bool true (set_map (fun r => isEmpty r) rs).

(** True if r matches s, using a_der_set *)
Definition a_matches (r : re) (s : string) : bool :=
nullable (fold_left a_der_set s {[ r ]}).

(** Alternate definition using a_der_str *)
Definition a_matches' (r : re) (s : string) : bool :=
nullable (a_der_str r s).

(******************************************************************************)
(** Lemmas about a_der_set *)
Lemma a_der_set_singleton (r : re) (c : char) :
a_der_set {[ r ]} c = a_der r c.
Expand Down Expand Up @@ -69,20 +83,6 @@ Proof.
intros. contradiction. reflexivity.
Qed.

(** Matching principles for a_der *)
(** True if there is a regex in the set which matches the empty string *)
Definition nullable (rs : gset re) : bool :=
let elem_of_bool (x : bool) (s : gset bool) := bool_decide (x ∈ s) in
elem_of_bool true (set_map (fun r => isEmpty r) rs).

(** True if r matches s, using a_der_set *)
Definition a_matches (r : re) (s : string) : bool :=
nullable (fold_left a_der_set s {[ r ]}).

(** Alternate definition using a_der_str *)
Definition a_matches' (r : re) (s : string) : bool :=
nullable (a_der_str r s).

(** Lemmas about fold_left a_der_set *)
Lemma fold_left_empty (s : string) : fold_left a_der_set s ∅ = ∅.
Proof. induction s; autorewrite with core; eauto. Qed.
Expand Down Expand Up @@ -171,8 +171,7 @@ Proof. set_solver. Qed.

(** Lemmas about a_matches *)

(* If [s ~= Concat r1 r2], then [s] can be decomposed as [s1 ++ s2] such that
[s1 ~= r1] and [s2 ~= r2] *)
(** If s matches r1 ⋅ r2, then s = s1 ++ s2 where s1 matches r1 and s2 matches r2 *)
Lemma a_matches_Concat_destruct : forall (s : string) (r1 r2 : re),
a_matches (Concat r1 r2) s ->
exists s1 s2, (s = s1 ++ s2) /\ a_matches r1 s1 /\ a_matches r2 s2.
Expand Down Expand Up @@ -216,10 +215,9 @@ Proof.
apply H3. set_solver.
Qed.

(* If [s1 ~= r1] and [s2 ~= r2], then [s1 ++ s2 ~= Concat r1 r2] *)
(** If s1 matches r1 and s2 matches r2, then s1 ++ s2 matches r1 ⋅ r2 *)
Lemma a_matches_Concat : forall (s1 s2 : string) (r1 r2 : re),
a_matches r1 s1 ->
a_matches r2 s2 ->
a_matches r1 s1 -> a_matches r2 s2 ->
a_matches (Concat r1 r2) (s1 ++ s2).
Proof.
induction s1.
Expand Down Expand Up @@ -262,7 +260,7 @@ Proof.
rewrite fold_left_empty in H0. inversion H0.
Qed.

(* If [s ~= r^*], then ∃ n s.t. [s ~= r^n] *)
(* If s matches r^*, then ∃ n s.t. s matches r^n *)
Lemma a_matches_Star_destruct : forall (s : string) (r : re),
a_matches (Star r) s ->
exists n, a_matches (Concat_n n r) s.
Expand Down Expand Up @@ -293,6 +291,7 @@ Proof.
+ apply H7.
Qed.

(** If s1 matches r and s2 matches r*, then s1 ++ s2 matches r* *)
Lemma a_matches_Star : forall (s1 s2 : string) (r : re),
a_matches r s1 ->
a_matches (Star r) s2 ->
Expand All @@ -311,6 +310,7 @@ Proof.
+ apply H2.
Qed.

(******************************************************************************)
(** What it means for a string to match a set of regexes.
- [matches_set_here]: if [s] matches [r], then [s] matches any regex set
containing [r]
Expand Down Expand Up @@ -390,6 +390,7 @@ Proof. intros; eapply matches_set_here; eauto; set_solver. Qed.
Lemma matches_set_epsilon : matches_set [] {[Epsilon]}.
Proof. eapply matches_set_here. apply matches_epsilon. set_solver. Qed.

(** If s matches a partial derivative of r wrt a, then a :: s matches r *)
Lemma a_der_matches_1 a r s : matches_set' s (a_der r a) -> matches r (a :: s).
Proof.
revert s.
Expand All @@ -403,6 +404,7 @@ Proof.
- simpl. reflexivity.
Qed.

(** If a :: s matches r, then s matches a partial derivative of r wrt a *)
Lemma a_der_matches_2 a r s : matches r (a :: s) -> matches_set' s (a_der r a).
Proof.
revert s. induction r; X.
Expand Down
14 changes: 11 additions & 3 deletions coq/Brzozowski.v
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,21 @@ Lemma b_der_matches_2 (c : char) (r : re) (s : string) :
Proof. revert s. induction r; X. apply isEmpty_matches_2 in H2. X. Qed.
Hint Resolve b_der_matches_1 b_der_matches_2 : core.

(** Brzozowski derivative wrt a string *)
Definition b_der_str (r : re) (s : string) := fold_left b_der s r.

(** True if r matches s, using b_der *)
Definition b_matches (r : re) (s : string) : bool :=
isEmpty (fold_left b_der s r).

(* Brzozowski-based matcher agrees with the inductive proposition [matches] *)
(** Brzozowski-based matcher agrees with the inductive proposition [matches] *)
Lemma b_matches_matches (r : re) (s : string) :
b_matches r s = true <-> matches r s.
Proof. unfold b_matches. split; revert r; induction s; X. Qed.

(******************************************************************************)
(** Lemmas about applying the Brzozowski derivative to a string *)

Lemma b_der_Void : forall (s : string), fold_left b_der s Void = Void.
Proof. induction s. reflexivity. simpl. apply IHs. Qed.

Expand All @@ -46,6 +50,7 @@ Proof.
- simpl in *. intros. apply IHs.
Qed.

(** If s matches r1 ⋅ r2, then s = s1 ++ s2 where s1 matches r1 and s2 matches r2 *)
Lemma b_matches_Concat : forall (s : string) (r1 r2 : re),
b_matches (Concat r1 r2) s ->
exists s1 s2, (s = s1 ++ s2) /\ b_matches r1 s1 /\ b_matches r2 s2.
Expand All @@ -68,6 +73,7 @@ Proof.
exists (a :: s1). exists s2. repeat split; X.
Qed.

(** If s matches r* and s ≠ [], then s = s1 ++ s2 where s1 matches r and s2 matches r* *)
Lemma b_matches_Star : forall (s : string) (r : re),
s ≠ [] -> b_matches (Star r) s ->
exists s1 s2, (s = s1 ++ s2) /\ b_matches r s1 /\ b_matches (Star r) s2.
Expand All @@ -79,6 +85,7 @@ Proof.
exists s1. exists s2. X.
Qed.

(** If s1 matches r1 and s2 matches r2, then s1 ++ s2 matches r1 ⋅ r2 *)
Lemma isEmpty_Concat : forall (s1 s2 : string) (r1 r2 : re),
isEmpty (fold_left b_der s1 r1) ->
isEmpty (fold_left b_der s2 r2) ->
Expand All @@ -92,6 +99,7 @@ Proof.
apply orb_prop_intro. left. apply IHs1. apply H. apply H0.
Qed.

(** If s1 matches r and s2 matches r*, then s1 ++ s2 matches r* *)
Lemma isEmpty_Star : forall (s1 s2 : string) (r : re),
isEmpty (fold_left b_der s1 r) ->
isEmpty (fold_left b_der s2 (Star r)) ->
Expand All @@ -101,15 +109,15 @@ Proof.
X. apply isEmpty_Concat. apply H. apply H0.
Qed.

(* If [s ~= ε], then [s = ε] *)
(** If s matches ε, then s = ε *)
Lemma b_matches_Epsilon : forall (s : string),
b_matches Epsilon s -> s = [].
Proof.
destruct s. X. unfold b_matches. simpl. intros.
rewrite b_der_Void in H. contradiction H.
Qed.

(* If [s ~= r*], then [s ~= r^n] for some [n] *)
(** If s matches r*, then s matches r^n for some n *)
Lemma b_matches_Star_Concat : forall (r : re) (s : string),
b_matches (Star r) s ->
exists n, b_matches (Concat_n n r) s.
Expand Down
54 changes: 25 additions & 29 deletions coq/EdelmannGset.v
Original file line number Diff line number Diff line change
Expand Up @@ -4,55 +4,53 @@ Require Import Regex RegexOpt.
Import ListNotations.
From stdpp Require Import gmap sets fin_sets.

(* Variant of Edelmann.v, adapted to work with [gset]s instead of [ListSet] *)
(** Variant of Edelmann's code, adapted to work with gsets instead of ListSet *)

(***** CHARACTERS AND WORDS *****)

Global Declare Scope char_class_scope.
Open Scope char_class_scope.

(* Input characters. *)
(** Input characters *)
Definition char := ascii.
Definition char_dec := ascii_dec.

(* Input words. *)
(** Input words *)
Definition word := list char.


(***** CONTEXTS AND ZIPPERS *****)

(* Contexts are sequences of regular expressions. *)
(** Contexts are sequences of regular expressions *)
Definition context := list re.

(* Zippers are disjunctions of contexts. *)
(** Zippers are disjunctions of contexts *)
Definition zipper := gset context.

(* Union of two zippers. *)
(** Union of two zippers *)
Definition zipper_union (z1 : zipper) (z2 : zipper) : zipper := z1 ∪ z2.

(* Addition of a context in a zipper. *)
(** Addition of a context in a zipper *)
Definition zipper_add (ctx : context) (z : zipper) : zipper :=
z ∪ {[ ctx ]}.

(* Convert a regular expression into a zipper. *)
Definition focus (e: re) : zipper := singleton [e].
(** Convert a regular expression into a zipper *)
Definition focus (e : re) : zipper := singleton [e].

(* Typeclass instance needed to make the definition of [unfocus] below
(** Typeclass instance needed to make the definition of [unfocus] below
typecheck *)
Instance ContextElements : Elements re context := {
elements := fun ctx => ctx
}.

(* Conversion from zipper back to re.
* Unused, but provides some intuition on zippers.
*)
(** Conversion from zipper back to re
Unused, but provides some intuition on zippers *)
Definition unfocus (z : zipper) : re :=
let ds := set_map (fun ctx => set_fold RegexOpt.concat Epsilon ctx) z in
set_fold Regex.Union Void (ds : gset re).

(***** DERIVATION *****)

(* Downwards phase of Brzozowski's derivation on zippers. *)
(** Downwards phase of Brzozowski's derivation on zippers *)
Fixpoint derive_down (c : char) (e : re) (ctx : context) : zipper :=
match e with
| Atom cl => if Ascii.eqb cl c then {[ ctx ]} else
Expand All @@ -66,7 +64,7 @@ Fixpoint derive_down (c : char) (e : re) (ctx : context) : zipper :=
| _ => ∅
end.

(* Upwards phase of Brzozowski's derivation on zippers. *)
(** Upwards phase of Brzozowski's derivation on zippers *)
Fixpoint derive_up (c : char) (ctx : context) : zipper :=
match ctx with
| [] => ∅
Expand All @@ -77,9 +75,8 @@ Fixpoint derive_up (c : char) (ctx : context) : zipper :=
derive_down c e ctx'
end.


(* Some typeclass instances needed to make the definition of [derive]
below typecheck *)
(** Some typeclass instances needed to make the definition of [derive]
below typecheck *)
Instance ZipperElements : Elements zipper zipper := {
elements := fun z => [z]
}.
Expand All @@ -88,33 +85,32 @@ Instance ZipperSingleton : Singleton zipper zipper := {
singleton := fun z => z
}.

(* Brzozowski derivatives for zippers *)
(** Brzozowski derivatives for zippers *)
Definition derive (c : char) (z : zipper) : zipper :=
set_fold zipper_union ∅
(set_map (derive_up c) z : zipper).


(* Generalisation of derivatives to words. *)
(** Generalization of derivatives to words *)
Fixpoint derive_word (w : word) (z : zipper) : zipper :=
match w with
| [] => z
| c :: w' => derive_word w' (derive c z)
end.

(* Note: the following 3 functions return [Prop] instead of [bool],
as is the case for their implementations in [Edelmann.v].
This is because there does not exist a version of [existsb] for [gset]s
([existsb] comes from [ListSet] only). *)
(** Note: the following 3 functions return [Prop] instead of [bool],
as is the case for their implementations in [Edelmann.v].
This is because there does not exist a version of [existsb] for [gset]s
([existsb] comes from [ListSet] only) *)

(* Checks if the zipper z accept the empty word *)
(** Checks if the zipper z accept the empty word *)
Definition zipper_nullable (z : zipper) : Prop :=
set_Exists (fun ctx => forallb isEmpty ctx) z.

(* Checks if zipper [z] accepts the word [w] *)
(** Checks if zipper z accepts the word w *)
Definition zipper_accepts (z : zipper) (w : word) : Prop :=
zipper_nullable (derive_word w z).

(* Checks if the word [w] matches the regular expression [e] *)
(** Checks if the word w matches the regular expression e *)
Definition accepts (e : re) (w : list char) : Prop :=
zipper_accepts (focus e) w.

27 changes: 13 additions & 14 deletions coq/Equivalent.v
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ From stdpp Require Import gmap sets fin_sets.
Require Import Antimirov.
Require Import Brzozowski.

(** Proofs that Antimirov and Brzozowski derivatives are equivalent *)
(** Proof that Antimirov and Brzozowski derivatives are equivalent *)

(* if [isEmpty r = true], then the singleton set containing [r] is [nullable] *)
(* If isEmpty r, then the singleton set containing r is nullable *)
Lemma nullable_singleton : forall (r : re),
isEmpty r <-> nullable {[ r ]}.
Proof.
Expand All @@ -24,29 +24,28 @@ Proof.
- destruct H2. exists (Star r). split; eauto.
Qed.

(* If the Antimirov matcher says that [s ~= r^n] for some n,
then folding the Brzozowski derivative of [r^*] wrt each character in [s]
yields a regex that accepts the empty string *)
(** If the Antimirov matcher says that s matches r^n for some n,
then folding the Brzozowski derivative of r^* wrt each character in s
yields a regex that accepts the empty string *)
Lemma star_help : forall (n : nat) (r : re) (s : string),
(∀ x0 : string, a_matches r x0 ↔ b_matches r x0) ->
a_matches (Concat_n n r) s isEmpty (fold_left b_der s (Star r)).
(∀ x : string, a_matches r x <-> b_matches r x) ->
a_matches (Concat_n n r) s -> isEmpty (fold_left b_der s (Star r)).
Proof. induction n.
- X. apply nil_matches_Epsilon in H0. X.
- X. apply a_matches_Concat_destruct in H0.
destruct H0 as [s1 [s2 [H1 [H2 H3]]]].
apply IHn in H3.
rewrite H1.
apply IHn in H3. rewrite H1.
apply isEmpty_Star.
apply H in H2. unfold b_matches in H2.
apply H2. apply H3. apply H.
Qed.

(* If the Brzozowski matcher says that [s ~= r^n] for some n,
then folding the Brzozowski derivative of [r^*] wrt each character in [s]
yields a regex that accepts the empty string *)
(** If the Brzozowski matcher says that s matches r^n for some n,
then folding the Brzozowski derivative of r^* wrt each character in s
yields a regex that accepts the empty string *)
Lemma star_help' : forall (n : nat) (r : re) (s : string),
(∀ x0 : string, a_matches r x0 ↔ b_matches r x0) ->
b_matches (Concat_n n r) s a_matches (Star r) s.
(∀ x : string, a_matches r x <-> b_matches r x) ->
b_matches (Concat_n n r) s -> a_matches (Star r) s.
Proof. induction n.
- X. apply b_matches_Epsilon in H0. X.
unfold a_matches, nullable. X. destruct H0.
Expand Down
Loading

0 comments on commit 349dbf2

Please sign in to comment.