1
1
#include <stdio.h>
2
2
#include <string.h>
3
+ #include <assert.h>
3
4
4
5
#include "y.tab.h"
5
6
#include "yydefs.h"
6
7
#include "lexer.h"
8
+ #include "sblist.h"
7
9
8
10
extern int yyerror (const char * );
9
11
@@ -12,20 +14,232 @@ FILE *yyin;
12
14
extern int yyparse ();
13
15
extern int yydebug ;
14
16
17
+ static char * replace (const char * s , const char * needle , const char * repl ) {
18
+ char cp [4096 + 128 ];
19
+ strcpy (cp , s );
20
+ static char repl_buf [4096 + 128 ];
21
+ char * p ;
22
+ while ((p = strstr (cp , needle ))) {
23
+ snprintf (repl_buf , sizeof repl_buf , "%.*s%s%s" ,
24
+ (int )(p - cp ), cp , repl , p + strlen (needle ));
25
+ strcpy (cp , repl_buf );
26
+ }
27
+ return repl_buf ;
28
+ }
29
+
30
+ struct list_item {
31
+ enum lex_context type ;
32
+ size_t so , eo ;
33
+ };
34
+
35
+ static sblist * lex_to_list () {
36
+ int c ;
37
+ size_t pos ;
38
+ struct list_item li ;
39
+ sblist * ret = sblist_new (sizeof li , 32 );
40
+ while ((c = yylex ()) != EOF ) {
41
+ enum lex_context ctx = lex_getcontext ();
42
+ pos = lex_getpos ()- 1 ;
43
+
44
+ switch (ctx ) {
45
+ case CTX_DUP :
46
+ do { c = yylex (); } while (lex_getcontext () == CTX_DUP );
47
+ assert (c == '}' );
48
+ li .type = CTX_DUP ;
49
+ li .so = pos ;
50
+ li .eo = lex_getpos ();
51
+ sblist_add (ret , & li );
52
+ break ;
53
+ case CTX_BRACKET :
54
+ do { c = yylex (); } while (lex_getcontext () == CTX_BRACKET );
55
+ assert (c == ']' );
56
+ li .type = CTX_BRACKET ;
57
+ li .so = pos ;
58
+ li .eo = lex_getpos ();
59
+ sblist_add (ret , & li );
60
+ break ;
61
+ default :
62
+ li .type = ctx ;
63
+ if (c == QUOTED_CHAR ) {
64
+ li .so = pos - 1 ;
65
+ li .eo = pos + 1 ;
66
+ } else {
67
+ li .so = pos ;
68
+ li .eo = pos + 1 ;
69
+ }
70
+ sblist_add (ret , & li );
71
+ break ;
72
+ }
73
+ }
74
+ return ret ;
75
+ }
76
+
77
+ static void list_transform_dupchars (sblist * tokens , const char * org_regex ) {
78
+ size_t i ;
79
+ for (i = 0 ; i < sblist_getsize (tokens ); i ++ ) {
80
+ struct list_item * li = sblist_get (tokens , i );
81
+ if (li -> type == CTX_NONE ) switch (org_regex [li -> so ]) {
82
+ case '?' : case '*' : case '+' :
83
+ li -> type = CTX_DUP ;
84
+ break ;
85
+ }
86
+ }
87
+ }
88
+
89
+ static sblist * list_join_literals (sblist * tokens , const char * org_regex ) {
90
+ sblist * new = sblist_new (sizeof (struct list_item ), sblist_getsize (tokens ));
91
+ size_t i ,j ;
92
+ for (i = 0 ; i < sblist_getsize (tokens ); i ++ ) {
93
+ size_t pcnt = 0 ;
94
+ for (j = i ; j < sblist_getsize (tokens ); ++ j ) {
95
+ struct list_item * li = sblist_get (tokens , j );
96
+ if (li -> type != CTX_NONE ) break ;
97
+ switch (org_regex [li -> so ]) {
98
+ case '^' :
99
+ case '.' :
100
+ case '[' :
101
+ case '$' :
102
+ case '(' :
103
+ case ')' :
104
+ case '|' :
105
+ case '{' :
106
+ goto break_loop ;
107
+ default :
108
+ pcnt += li -> eo - li -> so ;
109
+ }
110
+ continue ;
111
+ break_loop :; break ;
112
+ }
113
+ struct list_item ins = * ((struct list_item * )sblist_get (tokens , i ));
114
+ if (j > i ) {
115
+ ins .type = 0xff ;
116
+ ins .eo = ins .so + pcnt ;
117
+ i = j - 1 ;
118
+ }
119
+ sblist_add (new , & ins );
120
+ }
121
+ sblist_free (tokens );
122
+ return new ;
123
+ }
124
+
125
+ static void print_token (struct list_item * li , const char * org_regex ) {
126
+ if (li -> type == 0xff ) {
127
+ printf (" \"%.*s\" " , (int ) (li -> eo - li -> so ), org_regex + li -> so );
128
+ return ;
129
+ } else if (li -> type == CTX_BRACKET ) {
130
+ /* ragel doesn't like leading/trailing dash in bracket expression */
131
+ if (org_regex [li -> so + 1 ] == '-' ) {
132
+ printf ("('-'|[%.*s)" , (int ) (li -> eo - li -> so - 2 ), org_regex + li -> so + 2 );
133
+ return ;
134
+ } else if (org_regex [li -> eo - 2 ] == '-' ) {
135
+ printf ("('-'|%.*s])" , (int ) (li -> eo - li -> so - 2 ), org_regex + li -> so );
136
+ return ;
137
+ }
138
+ }
139
+ printf ("%.*s" , (int ) (li -> eo - li -> so ), org_regex + li -> so );
140
+ }
141
+
142
+ static int count_groups (sblist * tokens , const char * org_regex ) {
143
+ size_t i ;
144
+ int count = 0 ;
145
+ for (i = 0 ; i < sblist_getsize (tokens ); ++ i ) {
146
+ struct list_item * li = sblist_get (tokens , i );
147
+ if (li -> type == CTX_NONE && org_regex [li -> so ] == '(' ) ++ count ;
148
+ }
149
+ return count ;
150
+ }
151
+
152
+ static void expand_groups (char * buf , int groups ) {
153
+ int i ;
154
+ char intbuf [16 ];
155
+ for (i = 0 ; i < groups ; ++ i ) {
156
+ snprintf (intbuf , sizeof intbuf , "%d" , i );
157
+ printf ("%s" , replace (buf , "%GROUPNR%" , intbuf ));
158
+ }
159
+ }
160
+
161
+ static inline void * sblist_pop (sblist * l ) {
162
+ size_t len = sblist_getsize (l );
163
+ if (len > 0 ) {
164
+ void * x = sblist_get (l , len - 1 );
165
+ sblist_delete (l , len - 1 );
166
+ return x ;
167
+ }
168
+ return 0 ;
169
+ }
170
+
171
+ static void dump_ragel_parser (const char * machinename , const char * org_regex , int * maxgroups ) {
172
+ FILE * f = fopen ("ragel.tmpl" , "r" );
173
+ char buf [4096 ];
174
+ int groups , cgroup = 0 ;
175
+ sblist * tokens = lex_to_list ();
176
+ list_transform_dupchars (tokens , org_regex );
177
+ tokens = list_join_literals (tokens , org_regex );
178
+ groups = count_groups (tokens , org_regex );
179
+ if (groups > * maxgroups ) * maxgroups = groups ;
180
+ sblist * group_order = sblist_new (sizeof (int ), groups ) ;
181
+
182
+ while (fgets (buf , sizeof buf , f )) {
183
+ char * p ;
184
+ if ((p = strstr (buf , "%MACHINENAME%" ))) {
185
+ printf ("%s" , replace (buf , "%MACHINENAME%" , machinename ));
186
+ } else if ((p = strstr (buf , "%GROUPNR%" ))) {
187
+ expand_groups (buf , groups );
188
+ } else if ((p = strstr (buf , "%MACHINEDEF%" ))) {
189
+ printf ("%.*s" , (int )(p - buf ), buf );
190
+ size_t i ;
191
+ /* insert group match actions */
192
+ for (i = 0 ; i < sblist_getsize (tokens ); i ++ ) {
193
+ struct list_item * li = sblist_get (tokens , i );
194
+ if (li -> type == CTX_NONE && org_regex [li -> so ] == '(' ) {
195
+ sblist_add (group_order , & cgroup );
196
+ ++ cgroup ;
197
+ print_token (li , org_regex );
198
+ } else if (li -> type == CTX_NONE && org_regex [li -> so ] == ')' ) {
199
+ struct list_item * next ;
200
+ int groupno = * ((int * )sblist_pop (group_order ));
201
+ if (i + 1 < sblist_getsize (tokens ) && (next = sblist_get (tokens , i + 1 )) && next -> type == CTX_DUP ) {
202
+ print_token (li , org_regex );
203
+ print_token (next , org_regex );
204
+ printf (" >A%d %%E%d " , groupno , groupno );
205
+ ++ i ;
206
+ } else {
207
+ print_token (li , org_regex );
208
+ printf (" >A%d %%E%d " , groupno , groupno );
209
+ }
210
+ } else {
211
+ print_token (li , org_regex );
212
+ }
213
+ }
214
+ printf ("%s" , p + sizeof ("%MACHINEDEF%" )- 1 );
215
+ } else {
216
+ printf ("%s" , buf );
217
+ }
218
+ }
219
+ fclose (f );
220
+ sblist_free (group_order );
221
+ sblist_free (tokens );
222
+ }
223
+
15
224
int main () {
16
225
#ifdef YYDEBUG
17
226
yydebug = 1 ;
18
227
#endif
19
228
char buf [4096 ];
20
229
size_t lineno = 0 ;
21
230
yyin = stdin ;
231
+ int maxgroups = 0 ;
22
232
while (fgets (buf , sizeof buf , yyin )) {
23
233
++ lineno ;
24
234
const char * p = buf , * pe = strrchr (buf , '\n' );
25
235
if (!pe ) pe = buf + strlen (p );
26
236
lex_init (p , pe , LEXFLAG_SILENT );
27
237
if (yyparse () == 0 ) {
28
238
/* syntax check OK */
239
+ char nbuf [128 ];
240
+ snprintf (nbuf , sizeof nbuf , "machine_%04zu" , lineno );
241
+ lex_init (p , pe , LEXFLAG_SILENT );
242
+ dump_ragel_parser (nbuf , p , & maxgroups );
29
243
} else {
30
244
size_t errpos = lex_errpos ();
31
245
fprintf (stderr , "parse error @%zu:%zu\n" , lineno , errpos );
0 commit comments