@@ -156,6 +156,7 @@ void parser_set_default_options(parser_t *self) {
156156 self -> thousands = '\0' ;
157157
158158 self -> skipset = NULL ;
159+ self -> skip_first_N_rows = -1 ;
159160 self -> skip_footer = 0 ;
160161}
161162
@@ -444,21 +445,17 @@ static int end_line(parser_t *self) {
444445 }
445446 }
446447
447- if (self -> skipset != NULL ) {
448- k = kh_get_int64 ((kh_int64_t * ) self -> skipset , self -> file_lines );
449-
450- if (k != ((kh_int64_t * )self -> skipset )-> n_buckets ) {
451- TRACE (("Skipping row %d\n" , self -> file_lines ));
452- // increment file line count
453- self -> file_lines ++ ;
454-
455- // skip the tokens from this bad line
456- self -> line_start [self -> lines ] += fields ;
448+ if (self -> state == SKIP_LINE ) {
449+ TRACE (("Skipping row %d\n" , self -> file_lines ));
450+ // increment file line count
451+ self -> file_lines ++ ;
452+
453+ // skip the tokens from this bad line
454+ self -> line_start [self -> lines ] += fields ;
457455
458- // reset field count
459- self -> line_fields [self -> lines ] = 0 ;
460- return 0 ;
461- }
456+ // reset field count
457+ self -> line_fields [self -> lines ] = 0 ;
458+ return 0 ;
462459 }
463460
464461 /* printf("Line: %d, Fields: %d, Ex-fields: %d\n", self->lines, fields, ex_fields); */
@@ -556,6 +553,15 @@ int parser_add_skiprow(parser_t *self, int64_t row) {
556553 return 0 ;
557554}
558555
556+ int parser_set_skipfirstnrows (parser_t * self , int64_t nrows ) {
557+ // self->file_lines is zero based so subtract 1 from nrows
558+ if (nrows > 0 ) {
559+ self -> skip_first_N_rows = nrows - 1 ;
560+ }
561+
562+ return 0 ;
563+ }
564+
559565static int parser_buffer_bytes (parser_t * self , size_t nbytes ) {
560566 int status ;
561567 size_t bytes_read ;
@@ -656,6 +662,15 @@ typedef int (*parser_op)(parser_t *self, size_t line_limit);
656662 TRACE(("datapos: %d, datalen: %d\n", self->datapos, self->datalen));
657663
658664
665+ int skip_this_line (parser_t * self , int64_t rownum ) {
666+ if (self -> skipset != NULL ) {
667+ return ( kh_get_int64 ((kh_int64_t * ) self -> skipset , self -> file_lines ) !=
668+ ((kh_int64_t * )self -> skipset )-> n_buckets );
669+ }
670+ else {
671+ return ( rownum <= self -> skip_first_N_rows );
672+ }
673+ }
659674
660675int tokenize_delimited (parser_t * self , size_t line_limit )
661676{
@@ -688,10 +703,25 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
688703
689704 switch (self -> state ) {
690705
706+ case SKIP_LINE :
707+ // TRACE(("tokenize_delimited SKIP_LINE %c, state %d\n", c, self->state));
708+ if (c == '\n' ) {
709+ END_LINE ();
710+ }
711+ break ;
712+
691713 case START_RECORD :
692714 // start of record
693-
694- if (c == '\n' ) {
715+ if (skip_this_line (self , self -> file_lines )) {
716+ if (c == '\n' ) {
717+ END_LINE ()
718+ }
719+ else {
720+ self -> state = SKIP_LINE ;
721+ }
722+ break ;
723+ }
724+ else if (c == '\n' ) {
695725 // \n\r possible?
696726 if (self -> skip_empty_lines )
697727 {
@@ -1006,9 +1036,26 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
10061036 self -> state ));
10071037
10081038 switch (self -> state ) {
1039+
1040+ case SKIP_LINE :
1041+ // TRACE(("tokenize_delim_customterm SKIP_LINE %c, state %d\n", c, self->state));
1042+ if (c == self -> lineterminator ) {
1043+ END_LINE ();
1044+ }
1045+ break ;
1046+
10091047 case START_RECORD :
10101048 // start of record
1011- if (c == self -> lineterminator ) {
1049+ if (skip_this_line (self , self -> file_lines )) {
1050+ if (c == self -> lineterminator ) {
1051+ END_LINE ()
1052+ }
1053+ else {
1054+ self -> state = SKIP_LINE ;
1055+ }
1056+ break ;
1057+ }
1058+ else if (c == self -> lineterminator ) {
10121059 // \n\r possible?
10131060 if (self -> skip_empty_lines )
10141061 {
@@ -1252,6 +1299,14 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
12521299 self -> state ));
12531300
12541301 switch (self -> state ) {
1302+
1303+ case SKIP_LINE :
1304+ // TRACE(("tokenize_whitespace SKIP_LINE %c, state %d\n", c, self->state));
1305+ if (c == '\n' ) {
1306+ END_LINE ();
1307+ }
1308+ break ;
1309+
12551310 case WHITESPACE_LINE :
12561311 if (c == '\n' ) {
12571312 self -> file_lines ++ ;
@@ -1283,9 +1338,17 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
12831338
12841339 case START_RECORD :
12851340 // start of record
1286- if (c == '\n' ) {
1287- // \n\r possible?
1341+ if (skip_this_line (self , self -> file_lines )) {
1342+ if (c == '\n' ) {
1343+ END_LINE ()
1344+ }
1345+ else {
1346+ self -> state = SKIP_LINE ;
1347+ }
1348+ break ;
1349+ } else if (c == '\n' ) {
12881350 if (self -> skip_empty_lines )
1351+ // \n\r possible?
12891352 {
12901353 self -> file_lines ++ ;
12911354 }
0 commit comments