Skip to content

Commit 06d0b58

Browse files
committed
ondemand: able to iterate large json files
- fixed typo bug in ValueIterator.get_type() which was causing other mysterious problems - created recursive_iterate_json() in debug_main.zig (not in repo) which is getting through twitter.json and a large package-lock.json file
1 parent fa92c3b commit 06d0b58

File tree

3 files changed

+48
-21
lines changed

3 files changed

+48
-21
lines changed

src/Logger.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ fn printable_char(c: u8) u8 {
5151

5252
pub fn line_fmt(log: *Logger, iter: anytype, title_prefix: []const u8, title: []const u8, comptime detail_fmt: []const u8, detail_args: anytype) void {
5353
var buf: [0x100]u8 = undefined;
54-
log.line(iter, title_prefix, title, std.fmt.bufPrint(&buf, detail_fmt, detail_args) catch unreachable);
54+
log.line(iter, title_prefix, title, std.fmt.bufPrint(&buf, detail_fmt, detail_args) catch &buf);
5555
}
5656

5757
// TODO: remove catch unreachables

src/ondemand.zig

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ pub const Value = struct {
125125
else => @compileError("unsupported type: " ++ @typeName(T) ++ ". expecting pointer type."),
126126
}
127127
}
128+
129+
pub fn get_type(val: *Value) !ValueType {
130+
return val.iter.get_type();
131+
}
128132
};
129133

130134
pub const Field = struct {
@@ -166,6 +170,7 @@ pub const Object = struct {
166170
pub fn iterator(o: Object) ObjectIterator {
167171
return ObjectIterator.init(o.iter);
168172
}
173+
// TODO: move these to ValueIterator
169174
fn start_root(iter: *ValueIterator) !Object {
170175
_ = try iter.start_root_object();
171176
return Object{ .iter = iter.* };
@@ -177,6 +182,9 @@ pub const Object = struct {
177182
fn resume_(iter: *ValueIterator) Object {
178183
return Object{ .iter = iter.* };
179184
}
185+
pub fn resume_value(o: Object) Value {
186+
return Value{ .iter = o.iter };
187+
}
180188

181189
pub fn find_field(o: *Object, key: []const u8) !Value {
182190
return if (try o.iter.find_field_raw(key))
@@ -253,9 +261,6 @@ const ArrayIterator = struct {
253261
}
254262
return null;
255263
}
256-
pub fn get_int(ai: *ArrayIterator, comptime T: type) !T {
257-
return ai.iter.get_int(T);
258-
}
259264
};
260265
const Array = struct {
261266
iter: ValueIterator,
@@ -355,24 +360,25 @@ const TokenIterator = struct {
355360
}
356361
};
357362
pub const Iterator = struct {
363+
// TODO make this a pointer or make TokenIterator.read_buf a pointer
358364
token: TokenIterator,
359365
parser: *Parser,
360366
err: Error!void = {},
361367
depth: u32,
362368
log: Logger = .{ .depth = 0 },
363369

364370
pub fn init(parser: *Parser, src: std.io.StreamSource) Iterator {
365-
return .{
371+
return Iterator{
366372
.token = .{ .src = src, .index = parser.structural_indices().ptr, .buf_start_pos = 0 },
367373
.parser = parser,
368374
.depth = 1,
369375
};
370376
}
371377

372-
pub fn advance(iter: *Iterator, len: u16) ![*]const u8 {
378+
pub fn advance(iter: *Iterator, peek_len: u16) ![*]const u8 {
373379
defer iter.token.index += 1;
374380
// print("advance '{s}'\n", .{(try iter.token.peek(iter.token.index, len))[0..len]});
375-
return iter.token.peek(iter.token.index, len);
381+
return iter.token.peek(iter.token.index, peek_len);
376382
}
377383
pub fn peek(iter: *Iterator, index: [*]const u32, len: u16) ![*]const u8 {
378384
return iter.token.peek(index, len);
@@ -529,9 +535,13 @@ pub const Iterator = struct {
529535
iter.log.start(iter); // We start again
530536
iter.depth = 1;
531537
}
538+
539+
fn at_eof(iter: Iterator) bool {
540+
return iter.token.index == iter.last_document_position();
541+
}
532542
};
533543

534-
const ValueIterator = struct {
544+
pub const ValueIterator = struct {
535545
iter: Iterator, // this needs to be a value, not a pointer
536546
depth: u32,
537547
start_position: [*]const u32,
@@ -1124,14 +1134,13 @@ const ValueIterator = struct {
11241134

11251135
pub fn get_type(vi: *ValueIterator) !ValueType {
11261136
const start = try vi.peek_start(1);
1127-
// println("get_type() start '{c}'", .{start[0]});
11281137
return switch (start[0]) {
11291138
'{' => .object,
11301139
'[' => .array,
11311140
'"' => .string,
11321141
'n' => .nul,
11331142
't', 'f' => .bool,
1134-
'-', '0', '9' => .number,
1143+
'-', '0'...'9' => .number,
11351144
else => error.TAPE_ERROR,
11361145
};
11371146
}
@@ -1186,12 +1195,12 @@ pub const Document = struct {
11861195
return doc.resume_value_iterator().at_key(key);
11871196
}
11881197
pub fn get_object(doc: *Document) !Object {
1189-
var value = doc.get_root_value_iterator();
1190-
return try Object.start_root(&value);
1198+
var val = doc.get_root_value_iterator();
1199+
return try Object.start_root(&val);
11911200
}
11921201
pub fn get_array(doc: *Document) !Array {
1193-
var value = doc.get_root_value_iterator();
1194-
return Array.start_root(&value);
1202+
var val = doc.get_root_value_iterator();
1203+
return Array.start_root(&val);
11951204
}
11961205
fn resume_value(doc: *Document) Value {
11971206
return Value{ .iter = doc.resume_value_iterator() };
@@ -1237,7 +1246,10 @@ pub const Document = struct {
12371246
};
12381247
}
12391248
pub fn get(doc: *Document, out: anytype) !void {
1240-
return (Value{ .iter = doc.get_root_value_iterator() }).get(out);
1249+
return doc.value().get(out);
1250+
}
1251+
pub fn value(doc: *Document) Value {
1252+
return Value{ .iter = doc.get_root_value_iterator() };
12411253
}
12421254
};
12431255

@@ -1297,9 +1309,8 @@ pub const Parser = struct {
12971309

12981310
pub fn iterate(p: *Parser) !Document {
12991311
try p.stage1();
1300-
return Document{
1301-
.iter = Iterator.init(p, p.src.*),
1302-
};
1312+
p.iter = Iterator.init(p, p.src.*);
1313+
return Document{ .iter = p.iter };
13031314
}
13041315

13051316
inline fn structural_indices(parser: Parser) []u32 {

src/tests.zig

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -531,12 +531,28 @@ test "get_string_alloc" {
531531
}.func);
532532
const s = "asdf";
533533
const reps = mem.page_size / s.len + 100;
534-
const overlong_str = "\"" ++ s ** reps ++ "\"";
535-
try test_ondemand_doc(overlong_str
536-
, struct {
534+
const overlong_str = "\"" ++ s ** reps ++ "\"";
535+
try test_ondemand_doc(overlong_str, struct {
537536
fn func(doc: *ondemand.Document) E!void {
538537
const str = doc.get_string_alloc(allr);
539538
try testing.expectError(error.CAPACITY, str);
540539
}
541540
}.func);
542541
}
542+
543+
test "ondemand array iteration nested" {
544+
try test_ondemand_doc(
545+
\\{"a": [{}, {}] }
546+
, struct {
547+
fn func(doc: *ondemand.Document) E!void {
548+
var buf: [0x10]u8 = undefined;
549+
const obj1 = try doc.get_object();
550+
var field1 = (try obj1.iterator().next(&buf)) orelse return testing.expect(false);
551+
var arr1 = try field1.value.get_array();
552+
var it = arr1.iterator();
553+
var i: u8 = 0;
554+
while (try it.next()) |_| : (i += 1) {}
555+
try testing.expectEqual(@as(u8, 2), i);
556+
}
557+
}.func);
558+
}

0 commit comments

Comments
 (0)