Skip to content

Commit 171b338

Browse files
committed
Rewrote parsing conditions in terms of machine opcodes instead of predicates.
This makes it possible to implement conditional 'on' and 'off' block combinators, allowing for condition state to be mutated for a given sub-expression and automatically restoring the condition state if matching fails. This also improves parsing performance with conditionals.
1 parent 0cc93dc commit 171b338

File tree

3 files changed

+71
-100
lines changed

3 files changed

+71
-100
lines changed

lug/lug.hpp

Lines changed: 56 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ enum class opcode : unsigned char
4747
choice, commit, commit_back, commit_partial,
4848
jump, call, ret, fail,
4949
accept, accept_final, action, predicate,
50-
begin, end
50+
capture_start, capture_end, test_condition, push_condition,
51+
pop_condition
5152
};
5253

5354
enum class immediate : unsigned short {};
@@ -114,7 +115,7 @@ struct program
114115
case opcode::match_set: val = detail::push_back_unique(runesets, src.runesets[instr.pf.val]); break;
115116
case opcode::action: val = actions.size(); actions.push_back(src.actions[instr.pf.val]); break;
116117
case opcode::predicate: val = predicates.size(); predicates.push_back(src.predicates[instr.pf.val]); break;
117-
case opcode::end: val = captures.size(); captures.push_back(src.captures[instr.pf.val]); break;
118+
case opcode::capture_end: val = captures.size(); captures.push_back(src.captures[instr.pf.val]); break;
118119
default: val = (std::numeric_limits<std::size_t>::max)(); break;
119120
}
120121
if (val != (std::numeric_limits<std::size_t>::max)()) {
@@ -189,7 +190,7 @@ class environment
189190

190191
std::vector<lug::parser*> parser_stack_;
191192
std::vector<std::vector<std::any>> accept_stack_;
192-
std::unordered_set<std::string> conditions_;
193+
std::unordered_set<std::string_view> conditions_;
193194
unsigned int tab_width_ = 8;
194195
unsigned int tab_alignment_ = 8;
195196

@@ -241,11 +242,9 @@ class environment
241242
void tab_width(unsigned int w) { tab_width_ = w; }
242243
[[nodiscard]] unsigned int tab_alignment() const { return tab_alignment_; }
243244
void tab_alignment(unsigned int a) { tab_alignment_ = a; }
244-
[[nodiscard]] bool has_condition(std::string_view c) const noexcept { return (conditions_.count(std::string{c}) > 0); }
245-
void set_condition(std::string_view c) { conditions_.insert(std::string{c}); }
246-
void unset_condition(std::string_view c) { conditions_.erase(std::string{c}); }
245+
[[nodiscard]] bool has_condition(std::string_view name) const noexcept { return (conditions_.count(name) > 0); }
246+
bool set_condition(std::string_view name, bool value) { if (value) { return !conditions_.emplace(name).second; } else { return (conditions_.erase(name) > 0); } }
247247
void clear_conditions() { conditions_.clear(); }
248-
template <bool B> void modify_condition(std::string_view c) { if constexpr (B) set_condition(c); else unset_condition(c); }
249248
[[nodiscard]] std::string_view match() const;
250249
[[nodiscard]] syntax_position const& position_at(std::size_t index);
251250
[[nodiscard]] unsigned int variable_instance() const { return (static_cast<unsigned int>(parse_depth()) << 16) | call_depth(); }
@@ -610,37 +609,34 @@ inline constexpr directive_modifier<directives::postskip, directives::none, dire
610609
inline constexpr directive_modifier<directives::preskip, directives::postskip, directives::eps> skip_before{};
611610
template <unicode::ctype Property> struct ctype_combinator { void operator()(encoder& d) const { d.match_any(Property); } };
612611

613-
template <typename Op> struct test_condition_combinator
612+
template <bool Value>
613+
struct condition_test_combinator
614614
{
615-
template <class C>
616-
[[nodiscard]] constexpr auto operator()(C&& condition) const
615+
[[nodiscard]] constexpr auto operator()(std::string_view name) const noexcept
617616
{
618-
if constexpr (std::is_invocable_r_v<bool, C, environment&>)
619-
return [c = std::decay_t<C>{std::forward<C>(condition)}](environment& envr) -> bool { return Op{}(c(envr)); };
620-
else if constexpr (std::is_invocable_r_v<bool, C>)
621-
return [c = std::decay_t<C>{std::forward<C>(condition)}](environment&) -> bool { return Op{}(c()); };
622-
else if constexpr (std::is_same_v<std::string_view, std::decay_t<C>> || std::is_same_v<char const*, std::decay_t<C>>)
623-
return [c = std::string_view{std::forward<C>(condition)}](environment& envr) -> bool { return Op{}(envr.has_condition(c)); };
624-
else if constexpr (std::is_constructible_v<std::string, C&&>)
625-
return [c = std::string{std::forward<C>(condition)}](environment& envr) -> bool { return Op{}(envr.has_condition(c)); };
626-
else if constexpr (std::is_constructible_v<bool const&, C&&>)
627-
return [&condition](environment&) -> bool { return Op{}(condition); };
617+
return [name](encoder& d) {
618+
d.encode(opcode::test_condition, name, immediate{Value ? 1 : 0});
619+
};
628620
}
629621
};
630622

631623
template <bool Value>
632-
struct modify_condition_combinator
624+
struct condition_block_combinator
633625
{
634-
template <class C>
635-
[[nodiscard]] constexpr auto operator()(C&& condition) const
626+
struct condition_block_expression
636627
{
637-
if constexpr (std::is_same_v<std::string_view, std::decay_t<C>> || std::is_same_v<char const*, std::decay_t<C>>)
638-
return [c = std::string_view{std::forward<C>(condition)}](environment& envr) -> bool { envr.modify_condition<Value>(c); return true; };
639-
else if constexpr (std::is_constructible_v<std::string, C&&>)
640-
return [c = std::string{std::forward<C>(condition)}](environment& envr) -> bool { envr.modify_condition<Value>(c); return true; };
641-
else if constexpr (std::is_constructible_v<bool&, C&&>)
642-
return [&condition](environment&) -> bool { condition = Value; return true; };
643-
}
628+
std::string_view name;
629+
630+
template <class E, class = std::enable_if_t<is_expression_v<E>>>
631+
[[nodiscard]] constexpr auto operator[](E const& e) const
632+
{
633+
return [e = make_expression(e), n = name](encoder& d) {
634+
d.encode(opcode::push_condition, n, immediate{Value ? 1 : 0}).evaluate(e).encode(opcode::pop_condition);
635+
};
636+
}
637+
};
638+
639+
[[nodiscard]] constexpr auto operator()(std::string_view name) const noexcept { return condition_block_expression{name}; }
644640
};
645641

646642
namespace language {
@@ -665,8 +661,8 @@ inline constexpr ctype_combinator<ctype::alpha> alpha{}; inline constexpr ctype_
665661
inline constexpr ctype_combinator<ctype::upper> upper{}; inline constexpr ctype_combinator<ctype::digit> digit{}; inline constexpr ctype_combinator<ctype::xdigit> xdigit{};
666662
inline constexpr ctype_combinator<ctype::space> space{}; inline constexpr ctype_combinator<ctype::blank> blank{}; inline constexpr ctype_combinator<ctype::punct> punct{};
667663
inline constexpr ctype_combinator<ctype::graph> graph{}; inline constexpr ctype_combinator<ctype::print> print{};
668-
inline constexpr test_condition_combinator<detail::identity> when{}; inline constexpr test_condition_combinator<std::logical_not<>> unless{};
669-
inline constexpr modify_condition_combinator<true> set{}; inline constexpr modify_condition_combinator<false> unset{};
664+
inline constexpr condition_test_combinator<true> when{}; inline constexpr condition_test_combinator<false> unless{};
665+
inline constexpr condition_block_combinator<true> on{}; inline constexpr condition_block_combinator<false> off{};
670666

671667
inline constexpr struct
672668
{
@@ -715,8 +711,8 @@ chr{};
715711

716712
inline constexpr struct
717713
{
718-
[[nodiscard]] constexpr auto operator()(std::string_view s) const { return string_expression{s}; }
719-
[[nodiscard]] constexpr auto operator()(char const* s, std::size_t n) const { return string_expression{std::string_view{s, n}}; }
714+
[[nodiscard]] constexpr auto operator()(std::string_view s) const noexcept { return string_expression{s}; }
715+
[[nodiscard]] constexpr auto operator()(char const* s, std::size_t n) const noexcept { return string_expression{std::string_view{s, n}}; }
720716
}
721717
str{};
722718

@@ -783,7 +779,7 @@ template <class E, class A, class = std::enable_if_t<is_expression_v<E>>>
783779
{
784780
if constexpr (std::is_invocable_v<A, environment&, syntax>) {
785781
return [e = make_expression(e), a = std::move(a)](encoder& d) {
786-
d.skip().encode(opcode::begin).evaluate(e).encode(opcode::end, syntactic_capture{a});
782+
d.skip().encode(opcode::capture_start).evaluate(e).encode(opcode::capture_end, syntactic_capture{a});
787783
};
788784
} else if constexpr (std::is_invocable_v<A, detail::dynamic_cast_if_base_of<environment&>, syntax>) {
789785
return e < [a = std::move(a)](environment& envr, csyntax& x) {
@@ -923,7 +919,7 @@ struct parser_registers
923919

924920
class parser
925921
{
926-
enum class stack_frame_type : unsigned char { backtrack, call, capture, lrcall };
922+
enum class stack_frame_type : unsigned char { backtrack, call, capture, condition, lrcall };
927923
enum class subject_location : std::size_t {};
928924
struct lrmemo { std::size_t srr, sra, prec; std::ptrdiff_t pcr, pca; std::size_t rcr; std::vector<semantic_response> responses; };
929925
static inline constexpr std::size_t lrfailcode = (std::numeric_limits<std::size_t>::max)();
@@ -943,6 +939,7 @@ class parser
943939
std::vector<std::tuple<std::size_t, std::size_t, std::ptrdiff_t>> backtrack_stack_; // sr, rc, pc
944940
std::vector<std::ptrdiff_t> call_stack_; // pc
945941
std::vector<subject_location> capture_stack_; // sr
942+
std::vector<std::pair<std::string_view, bool>> condition_stack_; // name, value
946943
std::vector<lrmemo> lrmemo_stack_;
947944
std::vector<semantic_response> responses_;
948945
unsigned short prune_depth_{max_call_depth}, call_depth_{0};
@@ -1297,7 +1294,7 @@ class parser
12971294
fc = imm;
12981295
failure:
12991296
for (mr = (std::max)(mr, sr), ++fc; fc > 0; --fc) {
1300-
if (done = cut_frame_ >= stack_frames_.size(); done) {
1297+
if (done = (cut_frame_ >= stack_frames_.size()); done) {
13011298
registers_ = {sr, mr, rc, pc, 0};
13021299
break;
13031300
}
@@ -1312,6 +1309,11 @@ class parser
13121309
case stack_frame_type::capture: {
13131310
pop_stack_frame(capture_stack_, sr, mr, rc, pc), ++fc;
13141311
} break;
1312+
case stack_frame_type::condition: {
1313+
auto const& [cond_name, cond_value] = condition_stack_.back();
1314+
environment_.set_condition(cond_name, cond_value);
1315+
pop_stack_frame(condition_stack_), ++fc;
1316+
} break;
13151317
case stack_frame_type::lrcall: {
13161318
if (auto const& memo = lrmemo_stack_.back(); memo.sra != lrfailcode)
13171319
sr = memo.sra, pc = memo.pcr, rc = restore_responses_after(memo.rcr, memo.responses);
@@ -1345,11 +1347,11 @@ class parser
13451347
if (!accepted)
13461348
goto failure;
13471349
} break;
1348-
case opcode::begin: {
1350+
case opcode::capture_start: {
13491351
stack_frames_.push_back(stack_frame_type::capture);
13501352
capture_stack_.push_back(static_cast<subject_location>(sr));
13511353
} break;
1352-
case opcode::end: {
1354+
case opcode::capture_end: {
13531355
if (stack_frames_.empty() || stack_frames_.back() != stack_frame_type::capture)
13541356
goto failure;
13551357
auto const sr0 = static_cast<std::size_t>(capture_stack_.back()), sr1 = sr;
@@ -1358,6 +1360,21 @@ class parser
13581360
goto failure;
13591361
rc = push_response(call_stack_.size() + lrmemo_stack_.size(), imm, {sr0, sr1 - sr0});
13601362
} break;
1363+
case opcode::test_condition: {
1364+
if (environment_.has_condition(str) != (imm != 0))
1365+
goto failure;
1366+
} break;
1367+
case opcode::push_condition: {
1368+
stack_frames_.push_back(stack_frame_type::condition);
1369+
condition_stack_.emplace_back(str, environment_.set_condition(str, imm != 0));
1370+
} break;
1371+
case opcode::pop_condition: {
1372+
if (stack_frames_.empty() || stack_frames_.back() != stack_frame_type::condition)
1373+
goto failure;
1374+
auto const& [cond_name, cond_value] = condition_stack_.back();
1375+
environment_.set_condition(cond_name, cond_value);
1376+
pop_stack_frame(condition_stack_);
1377+
} break;
13611378
default: registers_ = {sr, (std::max)(mr, sr), rc, pc, 0}; throw bad_opcode{};
13621379
}
13631380
}

samples/basic.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,8 @@ class basic_interpreter
140140
| NL
141141
| (*(!NL > any) > NL) <[this]{ print_error("ILLEGAL FORMULA"); };
142142

143-
rule Init = when(fn_eval_) > FnEval
144-
| unless(fn_eval_) > Line;
143+
rule Init = when("fnev") > FnEval
144+
| unless("fnev") > Line;
145145

146146
grammar_ = start(Init);
147147
}
@@ -387,16 +387,15 @@ class basic_interpreter
387387
return 0.0;
388388
}
389389

390-
bool const saved_fn_eval = fn_eval_;
391-
fn_eval_ = true;
390+
bool const saved_fn_eval = environment_.set_condition("fnev", true);
392391

393392
double& param_var = vars_[param];
394393
double const saved_var = param_var;
395394
param_var = arg;
396395

397396
bool const success = lug::parse(body, grammar_, environment_);
398397

399-
fn_eval_ = saved_fn_eval;
398+
environment_.set_condition("fnev", saved_fn_eval);
400399
param_var = saved_var;
401400

402401
if (!success) {

tests/conditions.cpp

Lines changed: 11 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -7,86 +7,41 @@
77
#undef NDEBUG
88
#include <cassert>
99

10-
void test_condition_by_bool_reference()
10+
void test_condition()
1111
{
1212
using namespace lug::language;
13-
bool accept_ab = false;
14-
rule S = when(accept_ab) > "ab"_sx | unless(accept_ab) > "a"_sx;
15-
grammar G = start(S > eoi);
16-
assert(lug::parse("a", G));
17-
assert(!lug::parse("ab", G));
18-
accept_ab = true;
19-
assert(!lug::parse("a", G));
20-
assert(lug::parse("ab", G));
21-
accept_ab = false;
22-
assert(lug::parse("a", G));
23-
assert(!lug::parse("ab", G));
24-
}
25-
26-
void test_condition_by_bool_reference_dynamic()
27-
{
28-
using namespace lug::language;
29-
bool accept_ab = false;
30-
rule S = when(accept_ab) > "ab"_sx > unset(accept_ab)
31-
| unless(accept_ab) > "a"_sx > set(accept_ab);
32-
grammar G = start(+S > eoi);
33-
assert(lug::parse("a ab a ab", G));
34-
accept_ab = false;
35-
assert(!lug::parse("a a ab a ab", G));
36-
accept_ab = false;
37-
assert(!lug::parse("a ab ab a ab", G));
38-
accept_ab = false;
39-
assert(!lug::parse("ab a ab a", G));
40-
accept_ab = false;
41-
assert(!lug::parse("ab ab a ab a", G));
42-
}
43-
44-
void test_condition_by_name()
45-
{
46-
using namespace lug::language;
47-
rule S = when("accept_ab") > "ab"_sx | unless("accept_ab") > "a"_sx;
13+
rule S = when("accept_ab") > "ab"_sx
14+
| unless("accept_ab") > "a"_sx;
4815
environment E;
4916
grammar G = start(S > eoi);
5017
assert(lug::parse("a", G, E));
5118
assert(!lug::parse("ab", G, E));
52-
E.set_condition("accept_ab");
19+
E.set_condition("accept_ab", true);
5320
assert(!lug::parse("a", G, E));
5421
assert(lug::parse("ab", G, E));
55-
E.unset_condition("accept_ab");
22+
E.set_condition("accept_ab", false);
5623
assert(lug::parse("a", G, E));
5724
assert(!lug::parse("ab", G, E));
5825
}
5926

60-
void test_condition_by_name_dynamic()
27+
void test_condition_block()
6128
{
6229
using namespace lug::language;
63-
rule S = when("accept_ab") > "ab"_sx > unset("accept_ab")
64-
| unless("accept_ab") > "a"_sx > set("accept_ab");
65-
grammar G = start(+S > eoi);
30+
rule S = when("accept_ab") > "ab"_sx > ~off("accept_ab")[ S ]
31+
| unless("accept_ab") > "a"_sx > ~on("accept_ab")[ S ];
32+
grammar G = start(S > eoi);
6633
assert(lug::parse("a ab a ab", G));
6734
assert(!lug::parse("a a ab a ab", G));
6835
assert(!lug::parse("a ab ab a ab", G));
6936
assert(!lug::parse("ab a ab a", G));
7037
assert(!lug::parse("ab ab a ab a", G));
7138
}
7239

73-
void test_condition_by_predicate()
74-
{
75-
using namespace lug::language;
76-
rule S = "a"_sx > when([]{ return false; }) | unless([]{ return false; }) > "ab";
77-
grammar G = start(S > eoi);
78-
assert(lug::parse("ab", G));
79-
assert(!lug::parse("a", G));
80-
}
81-
8240
int main()
8341
{
8442
try {
85-
test_condition_by_bool_reference();
86-
test_condition_by_bool_reference_dynamic();
87-
test_condition_by_name();
88-
test_condition_by_name_dynamic();
89-
test_condition_by_predicate();
43+
test_condition();
44+
test_condition_block();
9045
} catch (std::exception& e) {
9146
std::cerr << "Error: " << e.what() << "\n";
9247
return -1;

0 commit comments

Comments
 (0)