Skip to content

Commit 681ba43

Browse files
committed
Assorted enhancements and fixes:
- Adjusted whitespace handling in repetitions to pull the initial skip_space out before the choice instruction, eliminating an extra skip_space per iteration. - Refactored repetition expression checks to ensure `NCount` and `NMin` are greater than 1 for better validation. - Simplified `dpsh` method in `lug.hpp` by removing unnecessary ancestor mode variable, enhancing readability. - Recursively swap the evaluation order of operator| and operator> for expressions to flatten the expression tree and optimize instruction encoding. - Added `is_template_instantiation_of` utility to check if a type is an instantiation of a specific template, improving type trait capabilities. - Updated JSON parser and matcher to use character literals instead of string literals for improved performance and clarity.
1 parent aa6ecc1 commit 681ba43

File tree

4 files changed

+53
-33
lines changed

4 files changed

+53
-33
lines changed

include/lug/detail.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ template <class R, class Fn, class... Args> struct is_invocable_r_exact_impl<std
120120
template <class R, class Fn, class... Args> struct is_invocable_r_exact : is_invocable_r_exact_impl<void, R, Fn, Args...> {};
121121
template <class R, class Fn, class... Args> inline constexpr bool is_invocable_r_exact_v = is_invocable_r_exact<R, Fn, Args...>::value;
122122

123+
template <class T, template <class...> class X> struct is_template_instantiation_of : std::false_type {};
124+
template <template <class...> class X, class... Args> struct is_template_instantiation_of<X<Args...>, X> : std::true_type {};
125+
template <class T, template <class...> class X> inline constexpr bool is_template_instantiation_of_v = is_template_instantiation_of<T, X>::value;
126+
123127
template <class T> struct remove_cvref_from_tuple;
124128
template <class T> struct remove_cvref_from_tuple<T const> : remove_cvref_from_tuple<T> {};
125129
template <class T> struct remove_cvref_from_tuple<T volatile> : remove_cvref_from_tuple<T> {};

include/lug/lug.hpp

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -785,8 +785,7 @@ class encoder
785785

786786
void dpsh(directives enable, directives disable)
787787
{
788-
directives const ancestor_mode = mode_.back();
789-
mode_.push_back((ancestor_mode & ~disable) | enable);
788+
mode_.push_back((mode_.back() & ~disable) | enable);
790789
}
791790

792791
void dpop(directives relay)
@@ -1409,17 +1408,16 @@ template <std::size_t NMin, std::size_t NMax, class E>
14091408
if (d.should_skip())
14101409
return false;
14111410
d.commit_eps();
1412-
if constexpr (std::is_same_v<std::decay_t<E>, match_any_expression>) {
1411+
if constexpr (std::is_same_v<std::decay_t<E>, match_any_expression>)
14131412
d.encode_min_max(opcode::repeat_any, NMin, NMax);
1414-
} else if constexpr (std::is_same_v<std::decay_t<E>, ctype_expression<unicode::ctype::blank>>) {
1413+
else if constexpr (std::is_same_v<std::decay_t<E>, ctype_expression<unicode::ctype::blank>>)
14151414
d.encode_min_max(opcode::repeat_blank, NMin, NMax);
1416-
} else if constexpr (std::is_same_v<std::decay_t<E>, ctype_expression<unicode::ctype::space>>) {
1415+
else if constexpr (std::is_same_v<std::decay_t<E>, ctype_expression<unicode::ctype::space>>)
14171416
d.encode_min_max(opcode::repeat_space, NMin, NMax);
1418-
} else if constexpr (std::is_same_v<std::decay_t<E>, char_expression>) {
1417+
else if constexpr (std::is_same_v<std::decay_t<E>, char_expression>)
14191418
d.encode_char_or_set(opcode::repeat_octet, opcode::repeat_set, e.c, NMin, NMax);
1420-
} else if constexpr (std::is_same_v<std::decay_t<E>, char32_range_expression>) {
1419+
else if constexpr (std::is_same_v<std::decay_t<E>, char32_range_expression>)
14211420
d.encode_min_max(opcode::repeat_set, NMin, NMax, e.make_rune_set(d.mode()));
1422-
}
14231421
return true;
14241422
} else if constexpr (std::is_same_v<std::decay_t<E>, string_expression>) {
14251423
if (d.should_skip() || (e.text.size() != 1))
@@ -1444,9 +1442,10 @@ struct repetition_expression : unary_encoder_expression_interface<repetition_exp
14441442
if constexpr (is_repetition_expression_optimizable_v<std::decay_t<E1>>)
14451443
if (repetition_encode_optimized<NMin, NMax>(this->e1, d))
14461444
return m;
1445+
d.skip(directives::none, directives::lexeme | directives::noskip);
14471446
auto const start = d.encode(opcode::jump);
14481447
auto const loop_body = d.here();
1449-
d.dpsh(directives::postskip, directives::none);
1448+
d.dpsh(directives::postskip, directives::preskip);
14501449
auto m2 = this->e1.evaluate(d, m);
14511450
d.dpop(NMin > 0 ? directives::eps : directives::none);
14521451
d.encode(opcode::ret);
@@ -1467,7 +1466,7 @@ struct repetition_expression : unary_encoder_expression_interface<repetition_exp
14671466
template <class E1, std::size_t NCount>
14681467
struct repetition_expression<E1, NCount, NCount> : unary_encoder_expression_interface<repetition_expression<E1, NCount, NCount>, E1>
14691468
{
1470-
static_assert((NCount > 0) && (NCount <= max_repetitions));
1469+
static_assert((NCount > 1) && (NCount <= max_repetitions));
14711470
using base_type = unary_encoder_expression_interface<repetition_expression<E1, NCount, NCount>, E1>;
14721471
constexpr explicit repetition_expression(E1 const& e) : base_type{e} {}
14731472

@@ -1477,9 +1476,10 @@ struct repetition_expression<E1, NCount, NCount> : unary_encoder_expression_inte
14771476
if constexpr (is_repetition_expression_optimizable_v<std::decay_t<E1>>)
14781477
if (repetition_encode_optimized<NCount, NCount>(this->e1, d))
14791478
return m;
1479+
d.skip(directives::none, directives::lexeme | directives::noskip);
14801480
auto const start = d.encode(opcode::jump);
14811481
auto const loop_body = d.here();
1482-
d.dpsh(directives::postskip, directives::none);
1482+
d.dpsh(directives::postskip, directives::preskip);
14831483
auto m2 = this->e1.evaluate(d, m);
14841484
d.dpop(directives::eps);
14851485
d.encode(opcode::ret);
@@ -1493,7 +1493,7 @@ struct repetition_expression<E1, NCount, NCount> : unary_encoder_expression_inte
14931493
template <class E1, std::size_t NMin>
14941494
struct repetition_expression<E1, NMin, forever> : unary_encoder_expression_interface<repetition_expression<E1, NMin, forever>, E1>
14951495
{
1496-
static_assert((NMin > 0) && (NMin <= max_repetitions));
1496+
static_assert((NMin > 1) && (NMin <= max_repetitions));
14971497
using base_type = unary_encoder_expression_interface<repetition_expression<E1, NMin, forever>, E1>;
14981498
constexpr explicit repetition_expression(E1 const& e) : base_type{e} {}
14991499

@@ -1503,9 +1503,10 @@ struct repetition_expression<E1, NMin, forever> : unary_encoder_expression_inter
15031503
if constexpr (is_repetition_expression_optimizable_v<std::decay_t<E1>>)
15041504
if (repetition_encode_optimized<NMin, forever>(this->e1, d))
15051505
return m;
1506+
d.skip(directives::none, directives::lexeme | directives::noskip);
15061507
auto const start = d.encode(opcode::jump);
15071508
auto const loop_body = d.here();
1508-
d.dpsh(directives::postskip, directives::none);
1509+
d.dpsh(directives::postskip, directives::preskip);
15091510
auto m2 = this->e1.evaluate(d, m);
15101511
d.dpop(NMin > 0 ? directives::eps : directives::none);
15111512
d.encode(opcode::ret);
@@ -1535,9 +1536,10 @@ struct repetition_expression<E1, 0, NMax> : unary_encoder_expression_interface<r
15351536
if constexpr (is_repetition_expression_optimizable_v<std::decay_t<E1>>)
15361537
if (repetition_encode_optimized<0, NMax>(this->e1, d))
15371538
return m;
1539+
d.skip(directives::none, directives::lexeme | directives::noskip);
15381540
auto const start = d.encode(opcode::jump);
15391541
auto const loop_body = d.here();
1540-
d.dpsh(directives::postskip, directives::none);
1542+
d.dpsh(directives::postskip, directives::preskip);
15411543
auto m2 = this->e1.evaluate(d, m);
15421544
d.dpop(directives::none);
15431545
d.encode(opcode::ret);
@@ -1596,9 +1598,10 @@ struct repetition_expression<E1, 0, forever> : unary_encoder_expression_interfac
15961598
if constexpr (is_repetition_expression_optimizable_v<std::decay_t<E1>>)
15971599
if (repetition_encode_optimized<0, forever>(this->e1, d))
15981600
return m;
1601+
d.skip(directives::none, directives::lexeme | directives::noskip);
15991602
auto const choice = d.encode(opcode::choice);
16001603
auto const expression = d.here();
1601-
d.dpsh(directives::postskip, directives::none);
1604+
d.dpsh(directives::postskip, directives::preskip);
16021605
auto m2 = this->e1.evaluate(d, m);
16031606
d.dpop(directives::none);
16041607
auto const commit = d.encode(opcode::commit_partial);
@@ -1629,15 +1632,13 @@ struct repetition_expression<E1, 1, 2> : unary_encoder_expression_interface<repe
16291632
if (repetition_encode_optimized<1, 2>(this->e1, d))
16301633
return m;
16311634
(void)this->e1.evaluate(d, m);
1632-
d.dpsh(directives::preskip, directives::postskip);
16331635
auto const choice = d.encode(opcode::choice);
1634-
d.dpsh(directives::none, directives::none);
1636+
d.dpsh(directives::preskip, directives::postskip);
16351637
auto m2 = this->e1.evaluate(d, m);
16361638
d.dpop(directives::eps);
16371639
auto const commit = d.encode(opcode::commit);
16381640
d.jump_to_here(choice);
16391641
d.jump_to_here(commit);
1640-
d.dpop(directives::eps);
16411642
return m2;
16421643
}
16431644
};
@@ -1655,16 +1656,15 @@ struct repetition_expression<E1, 1, forever> : unary_encoder_expression_interfac
16551656
if (repetition_encode_optimized<1, forever>(this->e1, d))
16561657
return m;
16571658
(void)this->e1.evaluate(d, m);
1658-
d.dpsh(directives::preskip, directives::postskip);
1659+
d.skip(directives::none, directives::lexeme | directives::noskip);
16591660
auto const choice = d.encode(opcode::choice);
16601661
auto const expression = d.here();
1661-
d.dpsh(directives::postskip, directives::none);
1662+
d.dpsh(directives::postskip, directives::preskip);
16621663
auto m2 = this->e1.evaluate(d, m);
16631664
d.dpop(directives::none);
16641665
auto const commit = d.encode(opcode::commit_partial);
16651666
d.jump_to_here(choice);
16661667
d.jump_to_target(commit, expression);
1667-
d.dpop(directives::eps);
16681668
return m2;
16691669
}
16701670
};
@@ -2076,11 +2076,27 @@ inline namespace operators {
20762076
[[nodiscard]] inline auto operator ""_srx(char const* s, std::size_t n) { return cased[basic_regular_expression{std::string_view{s, n}}]; }
20772077
[[nodiscard]] constexpr auto operator ""_fail(char const* s, std::size_t n) { return failure{std::string_view{s, n}}; }
20782078

2079+
template <class E1, class E2, class = std::enable_if_t<is_expression_v<E1> && is_expression_v<E2>>>
2080+
[[nodiscard]] constexpr auto operator|(E1 const& e1, E2 const& e2)
2081+
{
2082+
if constexpr (detail::is_template_instantiation_of_v<E1, choice_expression>)
2083+
return choice_expression{e1.e1, e1.e2 | e2};
2084+
else
2085+
return choice_expression{make_expression(e1), make_expression(e2)};
2086+
}
2087+
2088+
template <class E1, class E2, class = std::enable_if_t<is_expression_v<E1> && is_expression_v<E2>>>
2089+
[[nodiscard]] constexpr auto operator>(E1 const& e1, E2 const& e2)
2090+
{
2091+
if constexpr (detail::is_template_instantiation_of_v<E1, sequence_expression>)
2092+
return sequence_expression{e1.e1, e1.e2 > e2};
2093+
else
2094+
return sequence_expression{make_expression(e1), make_expression(e2)};
2095+
}
2096+
20792097
template <class E, class = std::enable_if_t<is_expression_v<E>>> [[nodiscard]] constexpr auto operator!(E const& e) { return negative_lookahead_expression{make_expression(e)}; }
20802098
template <class E, class = std::enable_if_t<is_expression_v<E>>> [[nodiscard]] constexpr auto operator&(E const& e) { return positive_lookahead_expression{make_expression(e)}; } // NOLINT(google-runtime-operator)
20812099
template <class E, class = std::enable_if_t<is_expression_v<E>>> [[nodiscard]] constexpr auto operator*(E const& e) { return repetition_expression<std::decay_t<decltype(make_expression(e))>, 0, forever>{make_expression(e)}; }
2082-
template <class E1, class E2, class = std::enable_if_t<is_expression_v<E1> && is_expression_v<E2>>> [[nodiscard]] constexpr auto operator|(E1 const& e1, E2 const& e2) { return choice_expression{make_expression(e1), make_expression(e2)}; }
2083-
template <class E1, class E2, class = std::enable_if_t<is_expression_v<E1> && is_expression_v<E2>>> [[nodiscard]] constexpr auto operator>(E1 const& e1, E2 const& e2) { return sequence_expression{make_expression(e1), make_expression(e2)}; }
20842100
template <class E1, class E2, class = std::enable_if_t<is_expression_v<E1> && is_expression_v<E2>>> [[nodiscard]] constexpr auto operator>>(E1 const& e1, E2 const& e2) { return e1 > *(e2 > e1); }
20852101
template <class T, class E, class = std::enable_if_t<is_expression_v<E>>> [[nodiscard]] constexpr auto operator%(T& target, E const& e) { return assign_to_expression{make_expression(e), std::addressof(target)}; }
20862102
template <class E, class = std::enable_if_t<is_expression_v<E>>> [[nodiscard]] constexpr auto operator^(E const& e, error_response r) { return e > recover_response_expression{r}; }
@@ -3384,8 +3400,8 @@ class basic_parser : public parser_base
33843400
if (!fail(fail_count))
33853401
return false;
33863402
accept_or_drain_if_deferred();
3387-
fail_count = 0;
33883403
}
3404+
fail_count = 0;
33893405
}
33903406
}
33913407
if (!success_)

samples/json/json_matcher.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ class json_matcher
1717
using namespace lug::language;
1818
rule JSON;
1919
auto ExponentPart = lexeme[ "[Ee]"_rx > ~"[+-]"_rx > +"[0-9]"_rx ];
20-
auto FractionalPart = lexeme[ "."_sx > +"[0-9]"_rx ];
21-
auto IntegralPart = lexeme[ "0"_sx | "[1-9]"_rx > *"[0-9]"_rx ];
22-
auto Number = lexeme[ ~"-"_sx > IntegralPart > ~FractionalPart > ~ExponentPart ];
20+
auto FractionalPart = lexeme[ '.'_cx > +"[0-9]"_rx ];
21+
auto IntegralPart = lexeme[ '0'_cx | "[1-9]"_rx > *"[0-9]"_rx ];
22+
auto Number = lexeme[ ~'-'_cx > IntegralPart > ~FractionalPart > ~ExponentPart ];
2323
auto Boolean = lexeme[ "true"_sx | "false" ];
2424
auto Null = lexeme[ "null" ];
2525
auto UnicodeEscape = lexeme[ 'u' > "[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]"_rx ];

samples/json/json_parser.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,18 @@ class json_parser
6767
// JSON grammar rules
6868
rule JSON;
6969
auto ExponentPart = lexeme[ "[Ee]"_rx > ~"[+-]"_rx > +"[0-9]"_rx ];
70-
auto FractionalPart = lexeme[ "."_sx > +"[0-9]"_rx ];
71-
auto IntegralPart = lexeme[ "0"_sx | "[1-9]"_rx > *"[0-9]"_rx ];
72-
auto Number = lexeme[ ~"-"_sx > IntegralPart > ~FractionalPart > ~ExponentPart ] < MakeNumber;
73-
auto True = lexeme[ "true"_sx ] < MakeTrue;
74-
auto False = lexeme[ "false"_sx ] < MakeFalse;
70+
auto FractionalPart = lexeme[ '.'_cx > +"[0-9]"_rx ];
71+
auto IntegralPart = lexeme[ '0'_cx | "[1-9]"_rx > *"[0-9]"_rx ];
72+
auto Number = lexeme[ ~'-'_cx > IntegralPart > ~FractionalPart > ~ExponentPart ] < MakeNumber;
73+
auto True = lexeme[ "true" ] < MakeTrue;
74+
auto False = lexeme[ "false" ] < MakeFalse;
7575
auto Null = lexeme[ "null" ] < MakeNull;
7676
auto UnicodeEscape = lexeme[ 'u' > "[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]"_rx ];
7777
auto Escape = lexeme[ '\\' > ("[/\"\\bfnrt]"_rx | UnicodeEscape) ];
7878
rule KeyOrString = lexeme[ '"' > *("[^\"\\\u0000-\u001F]"_rx | Escape) > '"' ] < MakeKeyOrString;
7979
auto String = synthesize<json_node, std::string>[ KeyOrString ];
8080
auto Array = '[' > synthesize_collect<json_node, json_array>[ JSON >> ',' ] > ']';
81-
auto Object = '{' > synthesize_collect<json_node, json_object, std::string, json_node>[ ( KeyOrString > ':' > JSON ) >> ',' ] > '}';
81+
auto Object = '{' > synthesize_collect<json_node, json_object, std::string, json_node>[ (KeyOrString > ':' > JSON) >> ',' ] > '}';
8282
JSON = Object | Array | String | Number | True | False | Null;
8383
grammar_ = start(JSON > eoi);
8484
}

0 commit comments

Comments
 (0)