8
8
#include "lexer.h"
9
9
#include "sblist.h"
10
10
#include "hsearch.h"
11
+ #include "tokens.h"
11
12
12
13
extern int yyerror (const char * );
13
14
@@ -29,102 +30,6 @@ static char* replace(const char*s, const char* needle, const char* repl) {
29
30
return repl_buf ;
30
31
}
31
32
32
- struct list_item {
33
- enum lex_context type ;
34
- size_t so , eo ;
35
- };
36
-
37
- static sblist * lex_to_list () {
38
- int c ;
39
- size_t pos ;
40
- struct list_item li ;
41
- sblist * ret = sblist_new (sizeof li , 32 );
42
- while ((c = yylex ()) != EOF ) {
43
- enum lex_context ctx = lex_getcontext ();
44
- pos = lex_getpos ()- 1 ;
45
-
46
- switch (ctx ) {
47
- case CTX_DUP :
48
- do { c = yylex (); } while (lex_getcontext () == CTX_DUP );
49
- assert (c == '}' );
50
- li .type = CTX_DUP ;
51
- li .so = pos ;
52
- li .eo = lex_getpos ();
53
- sblist_add (ret , & li );
54
- break ;
55
- case CTX_BRACKET :
56
- do { c = yylex (); } while (lex_getcontext () == CTX_BRACKET );
57
- assert (c == ']' );
58
- li .type = CTX_BRACKET ;
59
- li .so = pos ;
60
- li .eo = lex_getpos ();
61
- sblist_add (ret , & li );
62
- break ;
63
- default :
64
- li .type = ctx ;
65
- if (c == QUOTED_CHAR ) {
66
- li .so = pos - 1 ;
67
- li .eo = pos + 1 ;
68
- } else {
69
- li .so = pos ;
70
- li .eo = pos + 1 ;
71
- }
72
- sblist_add (ret , & li );
73
- break ;
74
- }
75
- }
76
- return ret ;
77
- }
78
-
79
- static void list_transform_dupchars (sblist * tokens , const char * org_regex ) {
80
- size_t i ;
81
- for (i = 0 ; i < sblist_getsize (tokens ); i ++ ) {
82
- struct list_item * li = sblist_get (tokens , i );
83
- if (li -> type == CTX_NONE ) switch (org_regex [li -> so ]) {
84
- case '?' : case '*' : case '+' :
85
- li -> type = CTX_DUP ;
86
- break ;
87
- }
88
- }
89
- }
90
-
91
- static sblist * list_join_literals (sblist * tokens , const char * org_regex ) {
92
- sblist * new = sblist_new (sizeof (struct list_item ), sblist_getsize (tokens ));
93
- size_t i ,j ;
94
- for (i = 0 ; i < sblist_getsize (tokens ); i ++ ) {
95
- size_t pcnt = 0 ;
96
- for (j = i ; j < sblist_getsize (tokens ); ++ j ) {
97
- struct list_item * li = sblist_get (tokens , j );
98
- if (li -> type != CTX_NONE ) break ;
99
- switch (org_regex [li -> so ]) {
100
- case '"' :
101
- case '^' :
102
- case '.' :
103
- case '[' :
104
- case '$' :
105
- case '(' :
106
- case ')' :
107
- case '|' :
108
- case '{' :
109
- goto break_loop ;
110
- default :
111
- pcnt += li -> eo - li -> so ;
112
- }
113
- continue ;
114
- break_loop :; break ;
115
- }
116
- struct list_item ins = * ((struct list_item * )sblist_get (tokens , i ));
117
- if (j > i ) {
118
- ins .type = 0xff ;
119
- ins .eo = ins .so + pcnt ;
120
- i = j - 1 ;
121
- }
122
- sblist_add (new , & ins );
123
- }
124
- sblist_free (tokens );
125
- return new ;
126
- }
127
-
128
33
static void print_token (struct list_item * li , const char * org_regex ) {
129
34
if (li -> type == 0xff ) {
130
35
fprintf (yyout , " \"%.*s\" " , (int ) (li -> eo - li -> so ), org_regex + li -> so );
@@ -174,13 +79,11 @@ static inline void* sblist_pop(sblist *l) {
174
79
return 0 ;
175
80
}
176
81
177
- static void dump_ragel_parser (const char * machinename , const char * org_regex , int * maxgroups ) {
82
+ static void dump_ragel_parser (const char * machinename , const char * org_regex , const char * org_regex_end , int * maxgroups ) {
178
83
FILE * f = fopen ("ragel.tmpl" , "r" );
179
84
char buf [4096 ];
180
85
int groups , cgroup = 0 ;
181
- sblist * tokens = lex_to_list ();
182
- list_transform_dupchars (tokens , org_regex );
183
- tokens = list_join_literals (tokens , org_regex );
86
+ sblist * tokens = lex_and_transform (org_regex , org_regex_end );
184
87
groups = count_groups (tokens , org_regex );
185
88
if (groups > * maxgroups ) * maxgroups = groups ;
186
89
sblist * group_order = sblist_new (sizeof (int ), groups ) ;
@@ -292,8 +195,7 @@ int main(int argc, char**argv) {
292
195
if (yyparse () == 0 ) {
293
196
htab_insert (remap , strdup (p ), HTV_P (strdup (buf )));
294
197
/* syntax check OK */
295
- lex_init (p , pe , LEXFLAG_SILENT );
296
- dump_ragel_parser (buf , p , & maxgroups );
198
+ dump_ragel_parser (buf , p , pe , & maxgroups );
297
199
} else {
298
200
++ err ;
299
201
size_t errpos = lex_errpos ();
0 commit comments