@@ -281,45 +281,120 @@ final _nonSpaceMatch = RegExp(r'[^ \t]');
281281/// Skip empty lines and returns the offset of the last possible line ending
282282/// only if the [offset] is a valid offset within the [yaml] string that
283283/// points to first line ending.
284- int indexOfLastLineEnding (String yaml, int offset) {
284+ ///
285+ /// The [blockIndent] is used to truncate any comments more indented than the
286+ /// parent collection that may affect other block entries within the collection
287+ /// that may have block scalars.
288+ int indexOfLastLineEnding (
289+ String yaml, {
290+ required int offset,
291+ required int blockIndent,
292+ }) {
285293 if (yaml.isEmpty || offset == - 1 ) return yaml.length;
286294
287295 final lastOffset = yaml.length - 1 ;
288296 var currentOffset = min (offset, lastOffset);
289297
290- if (yaml[currentOffset] case '\r ' || '\n ' ) {
291- var lineEndingIndex = currentOffset;
292- ++ currentOffset;
293-
294- scanner:
295- while (currentOffset <= lastOffset) {
296- switch (yaml[currentOffset]) {
297- case ' ' || '\t ' :
298- {
299- currentOffset = yaml.indexOf (_nonSpaceMatch, currentOffset);
300-
301- // We scanned till the end of the string and found nothing.
302- if (currentOffset == - 1 ) {
303- lineEndingIndex = lastOffset;
304- break scanner;
298+ // Unsafe. Cannot start our scanner state machine in an unguarded state.
299+ if (yaml[currentOffset] != '\r ' && yaml[currentOffset] != '\n ' ) {
300+ return currentOffset;
301+ }
302+
303+ var lineEndingIndex = currentOffset;
304+
305+ // Skip empty lines and any comments indented more than the block entry. Such
306+ // comments are hazardous to block scalars.
307+ scanner:
308+ while (currentOffset <= lastOffset) {
309+ switch (yaml[currentOffset]) {
310+ case '\r ' :
311+ {
312+ // Skip carriage return if possible. No use to us if we have a line
313+ // feed after.
314+ if (currentOffset < lastOffset && yaml[currentOffset + 1 ] == '\n ' ) {
315+ ++ currentOffset;
316+ }
317+
318+ continue indentChecker;
319+ }
320+
321+ indentChecker:
322+ case '\n ' :
323+ {
324+ lineEndingIndex = currentOffset;
325+ ++ currentOffset;
326+
327+ if (currentOffset >= lastOffset) {
328+ lineEndingIndex = lastOffset;
329+ break scanner;
330+ }
331+
332+ final offsetAfterIndent = yaml.indexOf (RegExp ('[^ ]' ), currentOffset);
333+
334+ // No more characters!
335+ if (offsetAfterIndent == - 1 ) {
336+ lineEndingIndex = lastOffset;
337+ break scanner;
338+ }
339+
340+ final indent = offsetAfterIndent - currentOffset;
341+ currentOffset = offsetAfterIndent;
342+ final charAfterIndent = yaml[currentOffset];
343+
344+ if (charAfterIndent case '\r ' || '\n ' ) {
345+ continue scanner;
346+ } else if (indent > blockIndent) {
347+ // If more indented than the entry, always attempt to truncate the
348+ // comment or skip it as an empty line.
349+ if (charAfterIndent == '\t ' ) {
350+ continue skipIfEmpty;
351+ } else if (charAfterIndent == '#' ) {
352+ continue truncateComment;
305353 }
306354 }
307355
308- case '\r ' || '\n ' :
309- {
310- lineEndingIndex = currentOffset;
311- ++ currentOffset;
356+ break scanner;
357+ }
358+
359+ // Guarded by indentChecker. Force tabs to be associated with empty lines
360+ // if seen past the indent.
361+ skipIfEmpty:
362+ case '\t ' :
363+ {
364+ final nonSpace = yaml.indexOf (_nonSpaceMatch, currentOffset);
365+
366+ if (nonSpace == - 1 ) {
367+ lineEndingIndex = lastOffset;
368+ } else if (yaml[nonSpace] case '\r ' || '\n ' ) {
369+ currentOffset = nonSpace;
370+ continue scanner;
312371 }
313372
314- default :
315373 break scanner;
316- }
317- }
374+ }
375+
376+ // Guarded by indentChecker. This ensures we only skip comments indented
377+ // more than the entry itself.
378+ truncateComment:
379+ case '#' :
380+ {
381+ final lineFeedOffset = yaml.indexOf ('\n ' , currentOffset);
382+
383+ if (lineFeedOffset == - 1 ) {
384+ lineEndingIndex = lastOffset;
385+ break scanner;
386+ }
318387
319- return lineEndingIndex;
388+ currentOffset = lineFeedOffset;
389+ continue indentChecker;
390+ }
391+
392+ default :
393+ break scanner;
394+ }
320395 }
321396
322- return currentOffset ;
397+ return lineEndingIndex ;
323398}
324399
325400/// Backtracks from the [start] offset and looks for the nearest character
0 commit comments