Skip to content

Commit 73d0b43

Browse files
committed
Cleanup location and span access in Lexer.
1 parent b02f71c commit 73d0b43

File tree

3 files changed

+89
-723
lines changed

3 files changed

+89
-723
lines changed

src/compiler/Input.ml

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,21 @@ module type S = sig
44
type loc
55
type span
66

7+
val span : loc -> loc -> span
8+
9+
module Span: sig
10+
type t
11+
12+
val start : t -> loc
13+
val ending : t -> loc
14+
val merge : t -> t -> t
15+
end with type t = span
16+
717
val length : t -> int
818
val is_empty : t -> bool
919
val loc : t -> loc
1020
val offset : t -> int
11-
val starts_with : t -> bool
21+
val starts_with : t -> substr:string -> bool
1222
val get : t -> index:int -> char
1323
val advance_by : t -> amount:int -> t
1424
val current_char : t -> char
@@ -40,7 +50,6 @@ end = struct
4050
type span = Text.SourceSpan.t
4151

4252
module Loc = Text.SourceLoc
43-
module Span = Text.SourceSpan
4453

4554
let from_string ~filename ~string = {
4655
source = filename;
@@ -50,6 +59,8 @@ end = struct
5059
column = 0
5160
}
5261

62+
let span l r = Text.SourceSpan.from l r
63+
5364
let length input =
5465
(String.length input.data) - input.pos
5566

@@ -82,7 +93,16 @@ end = struct
8293
let advance input substr =
8394
advance_by input (String.length substr)
8495

85-
let starts_with input substr =
96+
let rec starts_with_at string substr str_idx sub_idx =
97+
let sub_len = String.length substr in
98+
if sub_idx >= sub_len then
99+
true
100+
else if (String.get string str_idx) != (String.get substr sub_idx) then
101+
false
102+
else
103+
starts_with_at string substr (str_idx + 1) (sub_idx + 1)
104+
105+
let starts_with input ~substr =
86106
let slen = (String.length substr) in
87107
let ilen = length input in
88108
if ilen < slen then
@@ -95,10 +115,17 @@ end = struct
95115
let in_bounds input ~index = index >= 0 && index < (length input)
96116

97117
let substr input ~span =
98-
let start = Loc.offset (Span.start span) in
99-
let finish = Loc.offset (Span.finish span) in
118+
let start = Loc.offset (Text.SourceSpan.start span) in
119+
let finish = Loc.offset (Text.SourceSpan.finish span) in
100120
String.sub input.data start (finish - start)
101121

102122
let to_string input =
103123
String.sub input.data input.pos (length input)
124+
125+
module Span = struct
126+
type t = span
127+
let start = Text.SourceSpan.start
128+
let ending = Text.SourceSpan.finish
129+
let merge = Text.SourceSpan.merge
130+
end
104131
end

src/compiler/Lexer.ml

Lines changed: 55 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
module type S = sig
33
type input
44
type loc
5+
type span
56
type token = loc Token.t
67

78
val lex : input -> token Stream.t
@@ -10,6 +11,14 @@ end
1011
module Make(Input: Input.S) = struct
1112
open Token
1213

14+
module Span = Text.SourceSpan
15+
module Loc = Text.SourceLoc
16+
17+
type input = Input.t
18+
type loc = Input.loc
19+
type span = Input.span
20+
type token = span Token.t
21+
1322
let is_name_start chr =
1423
(chr >= 'a' && chr <= 'z')
1524
|| (chr >= 'A' && chr <= 'Z')
@@ -24,28 +33,27 @@ module Make(Input: Input.S) = struct
2433
let handle_simple input length token_type = begin
2534
let start = Input.loc !input in
2635
input := Input.advance_by !input length;
27-
Some (Span.from start (Input.loc !input), token_type)
36+
Some (Input.span start (Input.loc !input), token_type)
2837
end
2938

30-
let in_bounds text idx = idx < (String.length text)
39+
let in_bounds text idx = idx < (Input.length text)
3140

3241
let rec read_name text curr =
3342
if in_bounds text curr then
34-
match String.get text curr with
43+
match Input.get text curr with
3544
| c when is_name_body c -> read_name text (curr + 1)
3645
| _ -> curr
3746
else
3847
curr
3948

4049
let lex_name input =
41-
let text = (Input.full_text !input) in
42-
let curr = (Input.offset !input) in
4350
let start = Input.loc !input in
44-
let end_idx = read_name text curr in
51+
let start_idx = Input.offset !input in
52+
let end_idx = read_name !input 0 in
4553
let old_input = !input in
46-
let len = (end_idx - (Loc.offset start)) in
54+
let len = (end_idx - start_idx) in
4755
input := Input.advance_by !input len;
48-
let span = Span.from start (Input.loc !input) in
56+
let span = Input.span start (Input.loc !input) in
4957
let matches str = len = (String.length str) && Input.starts_with old_input str in
5058
let tok = match len with
5159
| _ when matches "type" -> Type
@@ -63,9 +71,9 @@ module Make(Input: Input.S) = struct
6371

6472
let rec read_multiline_comment text curr nesting =
6573
if in_bounds text curr then
66-
match String.get text curr with
74+
match Input.get text curr with
6775
| '*' -> if in_bounds text (curr + 1) then
68-
match String.get text (curr + 1) with
76+
match Input.get text (curr + 1) with
6977
| '/' -> if nesting = 1 then
7078
(curr + 2, true)
7179
else
@@ -74,7 +82,7 @@ module Make(Input: Input.S) = struct
7482
else
7583
(curr + 1, false)
7684
| '/' -> if in_bounds text (curr + 1) then
77-
match String.get text (curr + 1) with
85+
match Input.get text (curr + 1) with
7886
| '*' -> read_multiline_comment text (curr + 2) (nesting + 1)
7987
| _ -> read_multiline_comment text (curr + 1) nesting
8088
else
@@ -84,45 +92,43 @@ module Make(Input: Input.S) = struct
8492
(curr, false)
8593

8694
let lex_multiline_comment input =
87-
let text = Input.full_text !input in
8895
let start = Input.loc !input in
89-
let start_idx = Loc.offset start in
90-
let end_idx, successful = read_multiline_comment text start_idx 0 in
96+
let start_idx = Input.offset !input in
97+
let end_idx, successful = read_multiline_comment !input 0 0 in
9198
let len = end_idx - start_idx in
9299
input := Input.advance_by !input len;
93100
let finish = Input.loc !input in
94-
let span = Span.from start finish in
101+
let span = Input.span start finish in
95102
if successful then
96103
Some (span, Comment Multiline)
97104
else
98105
Some (span, Invalid "Invalid multiline comment")
99106

100107
let rec read_line_comment text curr =
101108
if in_bounds text curr then
102-
match String.get text curr with
109+
match Input.get text curr with
103110
| '\n' -> curr
104111
| _ -> read_line_comment text (curr + 1)
105112
else
106113
curr
107114

108115
let lex_line_comment input =
109-
let text = Input.full_text !input in
110116
let start = Input.loc !input in
111-
let start_idx = Loc.offset start in
112-
let end_idx = read_line_comment text (start_idx + 2) in
117+
let start_idx = Input.offset !input in
118+
let end_idx = read_line_comment !input (start_idx + 2) in
113119
let len = end_idx - start_idx in
114120
input := Input.advance_by !input len;
115121
let finish = Input.loc !input in
116-
let span = Span.from start finish in
122+
let span = Input.span start finish in
117123
Some (span, Comment Line)
118124

119125
let rec read_whitespace text curr =
120126
if in_bounds text curr then
121-
match String.get text curr with
127+
match Input.get text curr with
122128
| '\n' -> curr + 1
123129
| '\r' -> begin
124130
if in_bounds text (curr + 1) then
125-
match String.get text (curr + 1) with
131+
match Input.get text (curr + 1) with
126132
| '\n' -> curr + 2
127133
| _ -> read_whitespace text (curr + 1)
128134
else
@@ -134,19 +140,18 @@ module Make(Input: Input.S) = struct
134140
curr
135141

136142
let lex_whitespace input =
137-
let text = Input.full_text !input in
138143
let start = Input.loc !input in
139-
let start_idx = Loc.offset start in
140-
let end_idx = read_whitespace text (start_idx + 1) in
144+
let start_idx = Input.offset !input in
145+
let end_idx = read_whitespace !input (start_idx + 1) in
141146
let len = end_idx - start_idx in
142147
input := Input.advance_by !input len;
143148
let finish = Input.loc !input in
144-
let span = Span.from start finish in
149+
let span = Input.span start finish in
145150
Some (span, Whitespace)
146151

147152
let rec read_raw_prefix text curr =
148153
if in_bounds text curr then
149-
match String.get text curr with
154+
match Input.get text curr with
150155
| '#' -> read_raw_prefix text (curr + 1)
151156
| '"' -> (curr + 1, true)
152157
| _ -> (curr, false)
@@ -158,15 +163,15 @@ module Make(Input: Input.S) = struct
158163
true
159164
else
160165
if in_bounds text curr then
161-
match String.get text curr with
166+
match Input.get text curr with
162167
| '#' -> matches_suffix text (curr + 1) (len - 1)
163168
| _ -> false
164169
else
165170
false
166171

167172
let rec read_raw_body text curr suffix =
168173
if in_bounds text curr then
169-
match String.get text curr with
174+
match Input.get text curr with
170175
| '"' -> if matches_suffix text (curr + 1) suffix then
171176
(curr, true)
172177
else
@@ -176,39 +181,38 @@ module Make(Input: Input.S) = struct
176181
(curr, false)
177182

178183
let lex_raw_string input =
179-
let text = Input.full_text !input in
180184
let start_idx = Input.offset !input in
181-
let (pos, successful_prefix) = read_raw_prefix text (start_idx + 1) in
185+
let (pos, successful_prefix) = read_raw_prefix !input (start_idx + 1) in
182186
let start = Input.loc !input in
183187
if successful_prefix then
184188
begin
185189
let suffix = (pos - start_idx - 2) in
186-
let (end_pos, successful) = read_raw_body text pos suffix in
190+
let (end_pos, successful) = read_raw_body !input pos suffix in
187191
if successful then
188192
begin
189193
let len = (end_pos + suffix + 1) - start_idx in
190194
input := Input.advance_by !input len;
191-
let span = Span.from start (Input.loc !input) in
195+
let span = Input.span start (Input.loc !input) in
192196
Some (span, Raw_string suffix)
193197
end
194198
else
195199
begin
196200
let len = end_pos - start_idx in
197201
input := Input.advance_by !input len;
198-
let span = Span.from start (Input.loc !input) in
202+
let span = Input.span start (Input.loc !input) in
199203
Some (span, Invalid "Invalid raw string")
200204
end
201205
end
202206
else
203207
begin
204208
input := Input.advance_by !input (pos - start_idx);
205-
let span = Span.from start (Input.loc !input) in
209+
let span = Input.span start (Input.loc !input) in
206210
Some (span, Invalid "Expected \"")
207211
end
208212

209213
let rec read_string text curr =
210214
if in_bounds text curr then
211-
match String.get text curr with
215+
match Input.get text curr with
212216
| '\\' ->
213217
begin
214218
if in_bounds text curr then
@@ -222,22 +226,21 @@ module Make(Input: Input.S) = struct
222226
(curr, false)
223227

224228
let lex_string input =
225-
let text = Input.full_text !input in
226229
let start_idx = Input.offset !input in
227-
let finish, successful = read_string text (start_idx + 1) in
230+
let finish, successful = read_string !input (start_idx + 1) in
228231
let start = Input.loc !input in
229232
if successful then
230233
begin
231234
let len = (finish + 1) - start_idx in
232235
input := Input.advance_by !input len;
233-
let span = Span.from start (Input.loc !input) in
236+
let span = Input.span start (Input.loc !input) in
234237
Some (span, String)
235238
end
236239
else
237240
begin
238241
let len = finish - start_idx in
239242
input := Input.advance_by !input len;
240-
let span = Span.from start (Input.loc !input) in
243+
let span = Input.span start (Input.loc !input) in
241244
Some (span, Invalid "Expected \"")
242245
end
243246

@@ -250,7 +253,7 @@ module Make(Input: Input.S) = struct
250253

251254
let rec read_digits is_digit text curr =
252255
if in_bounds text curr then
253-
match String.get text curr with
256+
match Input.get text curr with
254257
| c when is_digit c -> read_digits is_digit text (curr + 1)
255258
| _ -> curr
256259
else
@@ -260,9 +263,9 @@ module Make(Input: Input.S) = struct
260263
if in_bounds text curr then
261264
let end_digits = read_digits is_decimal_digit text curr in
262265
if in_bounds text end_digits then
263-
match String.get text end_digits with
266+
match Input.get text end_digits with
264267
| '.' -> if in_bounds text (end_digits + 1) then
265-
match String.get text (end_digits + 1) with
268+
match Input.get text (end_digits + 1) with
266269
| c when is_decimal_digit c ->
267270
read_digits is_decimal_digit text (end_digits + 1)
268271
| _ -> end_digits
@@ -275,13 +278,13 @@ module Make(Input: Input.S) = struct
275278
curr
276279

277280
let lex_number input prefix_len read_digits =
278-
let text = Input.full_text !input in
279-
let start_idx = Input.offset !input in
280-
let start = Input.loc !input in
281+
let text = !input in
282+
let start_idx = Input.offset text in
283+
let start = Input.loc text in
281284
let end_idx = read_digits text (start_idx + prefix_len) in
282285
let len = end_idx - start_idx in
283-
input := Input.advance_by !input len;
284-
let span = Span.from start (Input.loc !input) in
286+
input := Input.advance_by text len;
287+
let span = Input.span start (Input.loc text) in
285288
if len = prefix_len then
286289
Some (span, Invalid "Invalid number")
287290
else
@@ -298,22 +301,22 @@ module Make(Input: Input.S) = struct
298301

299302
let rec read_invalid text curr =
300303
if in_bounds text curr then
301-
if is_valid_token_start (String.get text curr) then
304+
if is_valid_token_start (Input.get text curr) then
302305
curr
303306
else
304307
read_invalid text (curr + 1)
305308
else
306309
curr
307310

308311
let lex_invalid input =
309-
let text = Input.full_text !input in
312+
let text = !input in
310313
let start = Input.loc !input in
311-
let start_idx = Loc.offset start in
314+
let start_idx = Input.offset !input in
312315
let end_idx = read_invalid text (start_idx + 1) in
313316
let len = end_idx - start_idx in
314317
input := Input.advance_by !input len;
315318
let finish = Input.loc !input in
316-
let span = Span.from start finish in
319+
let span = Input.span start finish in
317320
Some (span, Invalid "Unknown token")
318321

319322
let lex_token input =

0 commit comments

Comments
 (0)