@@ -42,21 +42,30 @@ internal class ParserStructure<in Output>(
4242}
4343
4444/* *
45- * Concatenates a list of parser structures into a single *valid* structure.
45+ * Concatenates a list of (potentially non-*valid*) parser structures into a single *valid* structure.
4646 *
47- * A *valid* parser is one where, if numeric values are parsed consecutively without a separator
48- * (or with zero-width [UnconditionalModification] separators) between them,
49- * they are represented as a single [NumberSpanParserOperation].
47+ * A *valid* parser is one where:
48+ *
49+ * - Consecutive number parsers one any parsing path are represented as a single
50+ * [NumberSpanParserOperation].
51+ * - A span of [UnconditionalModification] can not precede a [NumberSpanParserOperation],
52+ * unless the span itself is preceded by a non-numeric non-zero-width parser.
53+ *
54+ * Together, these two rules ensure that whenever numeric values are parsed consecutively,
55+ * even with zero-width parser operations between them (at the moment, these are only
56+ * [UnconditionalModification]), they will be treated as a single number that's then
57+ * split into components.
5058 */
5159internal fun <T > List<ParserStructure<T>>.concat (): ParserStructure <T > {
5260 /* *
5361 * Returns a *valid* parser obtained by prepending [baseOperations] followed by [numberSpan]
5462 * to [simplifiedParserStructure],
55- * while ensuring that [unconditionalModifications] are preserved in the result.
63+ * while ensuring that [unconditionalModifications] are present in the result.
5664 *
5765 * Requirements:
5866 * - [simplifiedParserStructure] must have non-empty [ParserStructure.operations].
5967 * - [simplifiedParserStructure] is a *valid* parser.
68+ * - [baseOperations] can not end with either an [UnconditionalModification] or a [NumberSpanParserOperation].
6069 */
6170 fun mergeOperations (
6271 baseOperations : List <ParserOperation <T >>,
@@ -68,6 +77,7 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
6877 val firstOperation = operationsToMerge.firstOrNull()
6978 val mergedOperations = buildList {
7079 addAll(baseOperations)
80+ // Currently, `this` is either empty or ends with a non-numeric non-zero-width parser.
7181 when {
7282 numberSpan == null -> {
7383 addAll(operationsToMerge)
@@ -83,6 +93,19 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
8393 addAll(operationsToMerge)
8494 }
8595 }
96+ // Currently, `this` ends with the operations from `operationsToMerge`.
97+ // `operationsToMerge` was not empty, by the input requirements, so its `lastOrNull()` is non-empty.
98+ // - If it's a `NumberSpanParserOperation`,
99+ // this means its `followedBy` do not start with a `NumberSpanParserOperation`,
100+ // since `simplifiedParserStructure` is *valid*.
101+ // This means it's valid to append `unconditionalModifications`.
102+ // - If it's an `UnconditionalModification`,
103+ // this means either that its `followedBy` do not start with a `NumberSpanParserOperation`,
104+ // or that some non-zero-width non-numeric parsers precede it in `operationsToMerge`.
105+ // Adding new `unconditionalModifications` to the existing span does not break correctness.
106+ // - If it's some other parser,
107+ // then `unconditionalModifications` is preceded by a non-zero-width non-numeric parser,
108+ // which is valid.
86109 addAll(unconditionalModifications)
87110 }
88111 return ParserStructure (mergedOperations, simplifiedParserStructure.followedBy)
@@ -163,9 +186,20 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
163186 newOperations.add(NumberSpanParserOperation (currentNumberSpan))
164187 }
165188 newOperations.addAll(unconditionalModifications)
189+ // Either the merged tails do not start with a `NumberSpanParserOperation`,
190+ // or the last non-zero-width parser `newOperations` exists and is not a number parser.
191+ //
192+ // In the first case, the resulting parser is *valid*:
193+ // `unconditionalModifications` does not precede a number parser, and in `newOperations`,
194+ // consecutive number parsers are merged into one.
195+ //
196+ // In the second case, the resulting parser is also *valid*:
197+ // `unconditionalModifications` may precede a number parser, but it also has
198+ // a non-zero-width non-number parser before it.
166199 ParserStructure (newOperations, mergedTails)
167200 } else {
168- // Distribute number span across alternatives that start with number spans
201+ // Some `mergedTails` begin with a number parser, and also, either
202+ // the current number span isn't empty, or there are no non-zero-width non-number parsers preceding it.
169203 val newTails = mergedTails.map { structure ->
170204 mergeOperations(emptyList(), currentNumberSpan, unconditionalModifications, structure)
171205 }
0 commit comments