@@ -22,13 +22,15 @@ struct Cli {
2222 show_diff : bool ,
2323}
2424
25- static REGEX_IGNORE : LazyLock < Regex > =
26- LazyLock :: new ( || Regex :: new ( r"^\s*(\d\.|\-|\*)\s+" ) . unwrap ( ) ) ;
27- static REGEX_IGNORE_END : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"(\.|\?|;|!)$" ) . unwrap ( ) ) ;
25+ static REGEX_IGNORE_END : LazyLock < Regex > =
26+ LazyLock :: new ( || Regex :: new ( r"(\.|\?|;|!|,|\-)$" ) . unwrap ( ) ) ;
2827static REGEX_IGNORE_LINK_TARGETS : LazyLock < Regex > =
2928 LazyLock :: new ( || Regex :: new ( r"^\[.+\]: " ) . unwrap ( ) ) ;
3029static REGEX_SPLIT : LazyLock < Regex > =
31- LazyLock :: new ( || Regex :: new ( r"([^\.]\.|[^r]\?|;|!)\s+" ) . unwrap ( ) ) ;
30+ LazyLock :: new ( || Regex :: new ( r"([^\.\d\-\*]\.|[^r]\?|;|!)\s" ) . unwrap ( ) ) ;
31+ // list elements, numbered (1.) or not (- and *)
32+ static REGEX_LIST_ENTRY : LazyLock < Regex > =
33+ LazyLock :: new ( || Regex :: new ( r"^\s*(\d\.|\-|\*)\s+" ) . unwrap ( ) ) ;
3234
3335fn main ( ) -> Result < ( ) > {
3436 let cli = Cli :: parse ( ) ;
@@ -99,7 +101,6 @@ fn ignore(line: &str, in_code_block: bool) -> bool {
99101 || line. trim_start ( ) . starts_with ( '>' )
100102 || line. starts_with ( '#' )
101103 || line. trim ( ) . is_empty ( )
102- || REGEX_IGNORE . is_match ( line)
103104 || REGEX_IGNORE_LINK_TARGETS . is_match ( line)
104105}
105106
@@ -120,11 +121,19 @@ fn comply(content: &str) -> String {
120121 continue ;
121122 }
122123 if REGEX_SPLIT . is_match ( & line) {
123- let indent = line. find ( |ch : char | !ch. is_whitespace ( ) ) . unwrap ( ) ;
124- let new_lines: Vec < _ > = line
125- . split_inclusive ( & * REGEX_SPLIT )
126- . map ( |portion| format ! ( "{:indent$}{}" , "" , portion. trim( ) ) )
124+ let indent = if let Some ( regex_match) = REGEX_LIST_ENTRY . find ( & line) {
125+ regex_match. len ( )
126+ } else {
127+ line. find ( |ch : char | !ch. is_whitespace ( ) ) . unwrap ( )
128+ } ;
129+ let mut newly_split_lines = line. split_inclusive ( & * REGEX_SPLIT ) ;
130+ let first = newly_split_lines. next ( ) . unwrap ( ) . trim_end ( ) . to_owned ( ) ;
131+ let mut remaining: Vec < _ > = newly_split_lines
132+ . map ( |portion| format ! ( "{:indent$}{}" , "" , portion. trim_end( ) ) )
127133 . collect ( ) ;
134+ let mut new_lines = Vec :: new ( ) ;
135+ new_lines. push ( first) ;
136+ new_lines. append ( & mut remaining) ;
128137 new_content. splice ( new_n..=new_n, new_lines. clone ( ) ) ;
129138 new_n += new_lines. len ( ) - 1 ;
130139 }
@@ -168,7 +177,10 @@ fn lengthen_lines(content: &str, limit: usize) -> String {
168177 let Some ( next_line) = content. get ( n + 1 ) else {
169178 continue ;
170179 } ;
171- if ignore ( next_line, in_code_block) || REGEX_IGNORE_END . is_match ( line) {
180+ if ignore ( next_line, in_code_block)
181+ || REGEX_LIST_ENTRY . is_match ( next_line)
182+ || REGEX_IGNORE_END . is_match ( line)
183+ {
172184 continue ;
173185 }
174186 if line. len ( ) + next_line. len ( ) < limit {
@@ -182,42 +194,47 @@ fn lengthen_lines(content: &str, limit: usize) -> String {
182194
183195#[ test]
184196fn test_sembr ( ) {
185- let original = "\
197+ let original = "
186198# some. heading
187- must! be; split? and. normalizes space
188- 1. ignore numbered
199+ must! be; split?
200+ 1. ignore a dot after number. but no further
189201ignore | tables
190202ignore e.g. and
191203ignore i.e. and
192204ignore E.g. too
193- - ignore. list
194- * ignore. list
205+ - list. entry
206+ * list. entry
195207```
196208some code. block
197209```
198210sentence with *italics* should not be ignored. truly.
199211git log main.. compiler
212+ foo. bar. baz
200213" ;
201- let expected = "\
214+ let expected = "
202215# some. heading
203216must!
204217be;
205218split?
206- and.
207- normalizes space
208- 1. ignore numbered
219+ 1. ignore a dot after number.
220+ but no further
209221ignore | tables
210222ignore e.g. and
211223ignore i.e. and
212224ignore E.g. too
213- - ignore. list
214- * ignore. list
225+ - list.
226+ entry
227+ * list.
228+ entry
215229```
216230some code. block
217231```
218232sentence with *italics* should not be ignored.
219233truly.
220234git log main.. compiler
235+ foo.
236+ bar.
237+ baz
221238" ;
222239 assert_eq ! ( expected, comply( original) ) ;
223240}
@@ -230,12 +247,28 @@ short sentences
230247<div class='warning'>
231248a bit of text inside
232249</div>
250+ preserve next line
251+ 1. one
252+
253+ preserve next line
254+ - two
255+
256+ preserve next line
257+ * three
233258" ;
234259 let expected = "\
235260 do not split short sentences
236261<div class='warning'>
237262a bit of text inside
238263</div>
264+ preserve next line
265+ 1. one
266+
267+ preserve next line
268+ - two
269+
270+ preserve next line
271+ * three
239272" ;
240273 assert_eq ! ( expected, lengthen_lines( original, 50 ) ) ;
241274}
@@ -263,13 +296,13 @@ fn test_prettify_ignore_link_targets() {
263296
264297#[ test]
265298fn test_sembr_then_prettify ( ) {
266- let original = "\
299+ let original = "
267300hi there. do
268301not split
269302short sentences.
270303hi again.
271304" ;
272- let expected = "\
305+ let expected = "
273306hi there.
274307do
275308not split
@@ -278,15 +311,15 @@ hi again.
278311" ;
279312 let processed = comply ( original) ;
280313 assert_eq ! ( expected, processed) ;
281- let expected = "\
314+ let expected = "
282315hi there.
283316do not split
284317short sentences.
285318hi again.
286319" ;
287320 let processed = lengthen_lines ( & processed, 50 ) ;
288321 assert_eq ! ( expected, processed) ;
289- let expected = "\
322+ let expected = "
290323hi there.
291324do not split short sentences.
292325hi again.
@@ -297,12 +330,12 @@ hi again.
297330
298331#[ test]
299332fn test_sembr_question_mark ( ) {
300- let original = "\
333+ let original = "
301334o? whatever
302335r? @reviewer
303336 r? @reviewer
304337" ;
305- let expected = "\
338+ let expected = "
306339o?
307340whatever
308341r? @reviewer
0 commit comments