@@ -42,10 +42,39 @@ internal class ParserStructure<in Output>(
4242}
4343
4444/* *
45- * Concatenates a list of parser structures into a single structure, processing them in reverse order.
46- * Simplifies the result by merging number spans and handling unconditional modifications.
45+ * Concatenates a list of (potentially non-*valid*) parser structures into a single *valid* structure.
46+ *
47+ * A *valid* parser is one where:
48+ *
49+ * 1. Consecutive number parsers one any parsing path are represented as a single
50+ * [NumberSpanParserOperation].
51+ * 2. A span of [UnconditionalModification] can not precede a [NumberSpanParserOperation],
52+ * unless the span itself is preceded by a non-numeric non-zero-width parser.
53+ * 3. Every parser in every [ParserStructure.followedBy] either has non-empty [ParserStructure.operations]
54+ * or is exactly `ParserStructure(emptyList(), emptyList())`.
55+ *
56+ * Together, the first two rules ensure that whenever numeric values are parsed consecutively,
57+ * even with zero-width parser operations between them (at the moment, these are only
58+ * [UnconditionalModification]), they will be treated as a single number that's then
59+ * split into components.
60+ *
61+ * Rule 3 means there's no excessive structure to the parser and is also useful in the [concat] implementation.
4762 */
4863internal fun <T > List<ParserStructure<T>>.concat (): ParserStructure <T > {
64+ /* *
65+ * Returns a *valid* parser obtained by prepending [baseOperations] followed by [numberSpan]
66+ * to [simplifiedParserStructure],
67+ * while ensuring that [unconditionalModifications] are present in the result.
68+ *
69+ * Guarantees:
70+ * - If `simplifiedParserStructure.followedBy` is empty, the resulting `followedBy` will also be empty.
71+ * - If `simplifiedParserStructure.operations` is non-empty, the resulting `operations` will also be non-empty.
72+ *
73+ * Requirements:
74+ * - [simplifiedParserStructure] must either have non-empty [ParserStructure.operations] or be the empty parser.
75+ * - [simplifiedParserStructure] is a *valid* parser.
76+ * - [baseOperations] can not end with either an [UnconditionalModification] or a [NumberSpanParserOperation].
77+ */
4978 fun mergeOperations (
5079 baseOperations : List <ParserOperation <T >>,
5180 numberSpan : List <NumberConsumer <T >>? ,
@@ -56,6 +85,7 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
5685 val firstOperation = operationsToMerge.firstOrNull()
5786 val mergedOperations = buildList {
5887 addAll(baseOperations)
88+ // Currently, `this` is either empty or ends with a non-numeric non-zero-width parser.
5989 when {
6090 numberSpan == null -> {
6191 addAll(operationsToMerge)
@@ -71,18 +101,50 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
71101 addAll(operationsToMerge)
72102 }
73103 }
104+ // Currently, `this` ends with the operations from `operationsToMerge`.
105+ // If `operationsToMerge` was not empty, and its `lastOrNull()` is non-empty, then
106+ // - If it's a `NumberSpanParserOperation`,
107+ // this means its `followedBy` do not start with a `NumberSpanParserOperation`,
108+ // since `simplifiedParserStructure` is *valid*.
109+ // This means it's valid to append `unconditionalModifications`.
110+ // - If it's an `UnconditionalModification`,
111+ // this means either that its `followedBy` do not start with a `NumberSpanParserOperation`,
112+ // or that some non-zero-width non-numeric parsers precede it in `operationsToMerge`.
113+ // Adding new `unconditionalModifications` to the existing span does not break correctness.
114+ // - If it's some other parser,
115+ // then `unconditionalModifications` is preceded by a non-zero-width non-numeric parser,
116+ // which is valid.
117+ //
118+ // If `operationsToMerge` was empty, then `simplifiedParserStructure` is fully empty,
119+ // so `unconditionalModifications` precedes nothing at all.
74120 addAll(unconditionalModifications)
75121 }
122+ // The first two rules of validity hold by the considerations in the `mergedOperations` block.
123+ // The third rule holds because `simplifiedParserStructure.followedBy` must be valid.
76124 return ParserStructure (mergedOperations, simplifiedParserStructure.followedBy)
77125 }
78126
79- // Simplifies this parser and appends [other] to all execution paths.
80- // Merges number spans, collects unconditional modifications, and flattens alternatives.
127+ /* *
128+ * Returns a *valid* parser obtained by prepending *any* parser `this` to a *valid* parser [other].
129+ */
81130 fun ParserStructure<T>.simplifyAndAppend (other : ParserStructure <T >): ParserStructure <T > {
82131 val newOperations = mutableListOf<ParserOperation <T >>()
83132 var currentNumberSpan: MutableList <NumberConsumer <T >>? = null
84133 val unconditionalModifications = mutableListOf<UnconditionalModification <T >>()
85134
135+ // Loop invariant:
136+ //
137+ // |- zero-width parsers interspersing the number span
138+ // |
139+ // unconditionalModifications
140+ // \-------------------------/
141+ // operation, ..., operation, number, number, UnconditionalModification, number, operation, operation
142+ // \_______________________/ \______________ . . . . . . . . . . . . . ______/ \_______/
143+ // newOperations currentNumberSpan op
144+ // | | |- next operation
145+ // |- operations where spans of |- the continued span of
146+ // number parsers are merged into number parsers
147+ // `NumberSpanParserOperation`
86148 for (op in operations) {
87149 when (op) {
88150 is NumberSpanParserOperation -> {
@@ -105,6 +167,10 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
105167 }
106168 }
107169
170+ // *Valid* parsers resulting from appending [other] to every parser in `this.followedBy`.
171+ //
172+ // Every parser in this list is guaranteed to be a valid `followedBy` element, that is,
173+ // either have non-empty `ParserStructure.operations` or be exactly `ParserStructure(emptyList(), emptyList())`.
108174 val mergedTails = followedBy.flatMap {
109175 val simplified = it.simplifyAndAppend(other)
110176 // Parser `ParserStructure(emptyList(), p)` is equivalent to `p`,
@@ -116,7 +182,12 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
116182 else
117183 listOf (simplified)
118184 }.ifEmpty {
185+ // We only enter this branch if [followedBy] is empty.
186+ // In that case, [mergedTails] is exactly `listOf(other)`.
187+ // We optimize this common case here as a fast-path and to reduce indirection in the resulting parser.
119188 if (other.operations.isNotEmpty()) {
189+ // Directly append `other` to the simplified `this`.
190+ // The call is valid: `other.operations` is non-empty
120191 return mergeOperations(newOperations, currentNumberSpan, unconditionalModifications, other)
121192 }
122193 // [other] has no operations, just alternatives; use them as our tails
@@ -131,12 +202,37 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
131202 newOperations.add(NumberSpanParserOperation (currentNumberSpan))
132203 }
133204 newOperations.addAll(unconditionalModifications)
205+ // Either the merged tails do not start with a `NumberSpanParserOperation`,
206+ // or the last non-zero-width parser `newOperations` exists and is not a number parser.
207+ //
208+ // In the first case, the resulting parser is *valid*:
209+ // `unconditionalModifications` does not precede a number parser, and in `newOperations`,
210+ // consecutive number parsers are merged into one.
211+ //
212+ // In the second case, the resulting parser is also *valid*:
213+ // `unconditionalModifications` may precede a number parser, but it also has
214+ // a non-zero-width non-number parser before it.
134215 ParserStructure (newOperations, mergedTails)
135216 } else {
136- // Distribute number span across alternatives that start with number spans
217+ // Some `mergedTails` begin with a number parser, and also, either
218+ // the current number span isn't empty, or there are no non-zero-width non-number parsers preceding it.
137219 val newTails = mergedTails.map { structure ->
220+ // This is a valid `followedBy` element:
221+ // - If [structure] is the empty parser,
222+ // the resulting parser will have an empty `followedBy` list.
223+ // Such `followedBy` elements are always valid.
224+ // - If [structure] is a non-empty parser,
225+ // it must have a non-empty `followedBy` list
226+ // *and* non-empty `operations`.
227+ // The resulting parser will also have non-empty `operations`,
228+ // which makes it a valid `followedBy` element.
138229 mergeOperations(emptyList(), currentNumberSpan, unconditionalModifications, structure)
139230 }
231+ // [newTails] only contains *valid* parsers that are also valid `followedBy` elements.
232+ // They also start with the current number span.
233+ //
234+ // The resulting parser is *valid*, because furthermore, it is always valid for [currentNumberSpan],
235+ // with which every [newTails] starts, to follow [newOperations].
140236 ParserStructure (newOperations, newTails)
141237 }
142238 }
@@ -156,6 +252,15 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
156252 }
157253 }
158254
255+ // Loop invariant:
256+ //
257+ // this = Parser, ..., Parser, operations, operations, operations, Parser, Parser, ...
258+ // \____/ \________________________________/ \_________________/
259+ // parser accumulatedOperations.reversed() result
260+ // | | |- simplified parser
261+ // | |- span of parsers without branching
262+ // |
263+ // |- next parser to be processed
159264 for (parser in this .asReversed()) {
160265 if (parser.followedBy.isEmpty()) {
161266 accumulatedOperations.add(parser.operations)
0 commit comments