Skip to content

Commit 6495986

Browse files
authored
Document the parser concatenation algorithm (#590)
1 parent 0c14518 commit 6495986

File tree

1 file changed

+110
-5
lines changed
  • core/common/src/internal/format/parser

1 file changed

+110
-5
lines changed

core/common/src/internal/format/parser/Parser.kt

Lines changed: 110 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,39 @@ internal class ParserStructure<in Output>(
4242
}
4343

4444
/**
45-
* Concatenates a list of parser structures into a single structure, processing them in reverse order.
46-
* Simplifies the result by merging number spans and handling unconditional modifications.
45+
* Concatenates a list of (potentially non-*valid*) parser structures into a single *valid* structure.
46+
*
47+
* A *valid* parser is one where:
48+
*
49+
* 1. Consecutive number parsers one any parsing path are represented as a single
50+
* [NumberSpanParserOperation].
51+
* 2. A span of [UnconditionalModification] can not precede a [NumberSpanParserOperation],
52+
* unless the span itself is preceded by a non-numeric non-zero-width parser.
53+
* 3. Every parser in every [ParserStructure.followedBy] either has non-empty [ParserStructure.operations]
54+
* or is exactly `ParserStructure(emptyList(), emptyList())`.
55+
*
56+
* Together, the first two rules ensure that whenever numeric values are parsed consecutively,
57+
* even with zero-width parser operations between them (at the moment, these are only
58+
* [UnconditionalModification]), they will be treated as a single number that's then
59+
* split into components.
60+
*
61+
* Rule 3 means there's no excessive structure to the parser and is also useful in the [concat] implementation.
4762
*/
4863
internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
64+
/**
65+
* Returns a *valid* parser obtained by prepending [baseOperations] followed by [numberSpan]
66+
* to [simplifiedParserStructure],
67+
* while ensuring that [unconditionalModifications] are present in the result.
68+
*
69+
* Guarantees:
70+
* - If `simplifiedParserStructure.followedBy` is empty, the resulting `followedBy` will also be empty.
71+
* - If `simplifiedParserStructure.operations` is non-empty, the resulting `operations` will also be non-empty.
72+
*
73+
* Requirements:
74+
* - [simplifiedParserStructure] must either have non-empty [ParserStructure.operations] or be the empty parser.
75+
* - [simplifiedParserStructure] is a *valid* parser.
76+
* - [baseOperations] can not end with either an [UnconditionalModification] or a [NumberSpanParserOperation].
77+
*/
4978
fun mergeOperations(
5079
baseOperations: List<ParserOperation<T>>,
5180
numberSpan: List<NumberConsumer<T>>?,
@@ -56,6 +85,7 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
5685
val firstOperation = operationsToMerge.firstOrNull()
5786
val mergedOperations = buildList {
5887
addAll(baseOperations)
88+
// Currently, `this` is either empty or ends with a non-numeric non-zero-width parser.
5989
when {
6090
numberSpan == null -> {
6191
addAll(operationsToMerge)
@@ -71,18 +101,50 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
71101
addAll(operationsToMerge)
72102
}
73103
}
104+
// Currently, `this` ends with the operations from `operationsToMerge`.
105+
// If `operationsToMerge` was not empty, and its `lastOrNull()` is non-empty, then
106+
// - If it's a `NumberSpanParserOperation`,
107+
// this means its `followedBy` do not start with a `NumberSpanParserOperation`,
108+
// since `simplifiedParserStructure` is *valid*.
109+
// This means it's valid to append `unconditionalModifications`.
110+
// - If it's an `UnconditionalModification`,
111+
// this means either that its `followedBy` do not start with a `NumberSpanParserOperation`,
112+
// or that some non-zero-width non-numeric parsers precede it in `operationsToMerge`.
113+
// Adding new `unconditionalModifications` to the existing span does not break correctness.
114+
// - If it's some other parser,
115+
// then `unconditionalModifications` is preceded by a non-zero-width non-numeric parser,
116+
// which is valid.
117+
//
118+
// If `operationsToMerge` was empty, then `simplifiedParserStructure` is fully empty,
119+
// so `unconditionalModifications` precedes nothing at all.
74120
addAll(unconditionalModifications)
75121
}
122+
// The first two rules of validity hold by the considerations in the `mergedOperations` block.
123+
// The third rule holds because `simplifiedParserStructure.followedBy` must be valid.
76124
return ParserStructure(mergedOperations, simplifiedParserStructure.followedBy)
77125
}
78126

79-
// Simplifies this parser and appends [other] to all execution paths.
80-
// Merges number spans, collects unconditional modifications, and flattens alternatives.
127+
/**
128+
* Returns a *valid* parser obtained by prepending *any* parser `this` to a *valid* parser [other].
129+
*/
81130
fun ParserStructure<T>.simplifyAndAppend(other: ParserStructure<T>): ParserStructure<T> {
82131
val newOperations = mutableListOf<ParserOperation<T>>()
83132
var currentNumberSpan: MutableList<NumberConsumer<T>>? = null
84133
val unconditionalModifications = mutableListOf<UnconditionalModification<T>>()
85134

135+
// Loop invariant:
136+
//
137+
// |- zero-width parsers interspersing the number span
138+
// |
139+
// unconditionalModifications
140+
// \-------------------------/
141+
// operation, ..., operation, number, number, UnconditionalModification, number, operation, operation
142+
// \_______________________/ \______________ . . . . . . . . . . . . . ______/ \_______/
143+
// newOperations currentNumberSpan op
144+
// | | |- next operation
145+
// |- operations where spans of |- the continued span of
146+
// number parsers are merged into number parsers
147+
// `NumberSpanParserOperation`
86148
for (op in operations) {
87149
when (op) {
88150
is NumberSpanParserOperation -> {
@@ -105,6 +167,10 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
105167
}
106168
}
107169

170+
// *Valid* parsers resulting from appending [other] to every parser in `this.followedBy`.
171+
//
172+
// Every parser in this list is guaranteed to be a valid `followedBy` element, that is,
173+
// either have non-empty `ParserStructure.operations` or be exactly `ParserStructure(emptyList(), emptyList())`.
108174
val mergedTails = followedBy.flatMap {
109175
val simplified = it.simplifyAndAppend(other)
110176
// Parser `ParserStructure(emptyList(), p)` is equivalent to `p`,
@@ -116,7 +182,12 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
116182
else
117183
listOf(simplified)
118184
}.ifEmpty {
185+
// We only enter this branch if [followedBy] is empty.
186+
// In that case, [mergedTails] is exactly `listOf(other)`.
187+
// We optimize this common case here as a fast-path and to reduce indirection in the resulting parser.
119188
if (other.operations.isNotEmpty()) {
189+
// Directly append `other` to the simplified `this`.
190+
// The call is valid: `other.operations` is non-empty
120191
return mergeOperations(newOperations, currentNumberSpan, unconditionalModifications, other)
121192
}
122193
// [other] has no operations, just alternatives; use them as our tails
@@ -131,12 +202,37 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
131202
newOperations.add(NumberSpanParserOperation(currentNumberSpan))
132203
}
133204
newOperations.addAll(unconditionalModifications)
205+
// Either the merged tails do not start with a `NumberSpanParserOperation`,
206+
// or the last non-zero-width parser `newOperations` exists and is not a number parser.
207+
//
208+
// In the first case, the resulting parser is *valid*:
209+
// `unconditionalModifications` does not precede a number parser, and in `newOperations`,
210+
// consecutive number parsers are merged into one.
211+
//
212+
// In the second case, the resulting parser is also *valid*:
213+
// `unconditionalModifications` may precede a number parser, but it also has
214+
// a non-zero-width non-number parser before it.
134215
ParserStructure(newOperations, mergedTails)
135216
} else {
136-
// Distribute number span across alternatives that start with number spans
217+
// Some `mergedTails` begin with a number parser, and also, either
218+
// the current number span isn't empty, or there are no non-zero-width non-number parsers preceding it.
137219
val newTails = mergedTails.map { structure ->
220+
// This is a valid `followedBy` element:
221+
// - If [structure] is the empty parser,
222+
// the resulting parser will have an empty `followedBy` list.
223+
// Such `followedBy` elements are always valid.
224+
// - If [structure] is a non-empty parser,
225+
// it must have a non-empty `followedBy` list
226+
// *and* non-empty `operations`.
227+
// The resulting parser will also have non-empty `operations`,
228+
// which makes it a valid `followedBy` element.
138229
mergeOperations(emptyList(), currentNumberSpan, unconditionalModifications, structure)
139230
}
231+
// [newTails] only contains *valid* parsers that are also valid `followedBy` elements.
232+
// They also start with the current number span.
233+
//
234+
// The resulting parser is *valid*, because furthermore, it is always valid for [currentNumberSpan],
235+
// with which every [newTails] starts, to follow [newOperations].
140236
ParserStructure(newOperations, newTails)
141237
}
142238
}
@@ -156,6 +252,15 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
156252
}
157253
}
158254

255+
// Loop invariant:
256+
//
257+
// this = Parser, ..., Parser, operations, operations, operations, Parser, Parser, ...
258+
// \____/ \________________________________/ \_________________/
259+
// parser accumulatedOperations.reversed() result
260+
// | | |- simplified parser
261+
// | |- span of parsers without branching
262+
// |
263+
// |- next parser to be processed
159264
for (parser in this.asReversed()) {
160265
if (parser.followedBy.isEmpty()) {
161266
accumulatedOperations.add(parser.operations)

0 commit comments

Comments
 (0)