Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make scanner accept based floating point literals #9106

Merged
merged 3 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 103 additions & 14 deletions lib/stdlib/src/erl_scan.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1824,7 +1824,7 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) ->
try list_to_integer(remove_digit_separators(Ncs, Us)) of
B when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 ->
Bcs = Ncs++[$#],
scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore);
scan_based_num(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore);
B when is_integer(B) ->
Len = length(Ncs),
scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0)
Expand Down Expand Up @@ -1861,29 +1861,35 @@ remove_digit_separators(Number, with_underscore) ->
orelse (C >= $A andalso B > 10 andalso C < $A + B - 10)
orelse (C >= $a andalso B > 10 andalso C < $a + B - 10)))).

scan_based_int(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us})
scan_based_num(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us})
when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 ->
scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us).
scan_based_num(Cs, St, Line, Col, Toks, B, NCs, BCs, Us).

scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when
scan_based_num([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when
?BASED_DIGIT(C, B) ->
scan_based_int(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us);
scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us)
scan_based_num(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us);
scan_based_num([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us)
when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) ->
scan_based_int(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs,
scan_based_num(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs,
with_underscore);
scan_based_int([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
scan_based_int([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
scan_based_num([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_num/6}};
scan_based_num([$.,C|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when ?BASED_DIGIT(C, B) ->
scan_based_fraction(Cs, St, Line, Col, Toks, B, [C,$.|Ncs], BCs, Us);
scan_based_num([$.,C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0);
scan_based_num([$.]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_num/6}};
scan_based_num([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
scan_error({illegal,integer}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0);
scan_based_int([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
scan_based_int(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) ->
scan_based_num([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_num/6}};
scan_based_num(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) ->
%% No actual digits following the base.
Len = length(Bcs),
Ncol = incr_column(Col, Len),
scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs);
scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
scan_based_num(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
Ncs = lists:reverse(Ncs0),
try list_to_integer(remove_digit_separators(Ncs, Us), B) of
N ->
Expand All @@ -1896,6 +1902,89 @@ scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs)
end.

scan_based_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,Us}) ->
scan_based_fraction(Cs, St, Line, Col, Toks, B, Ncs, BCs, Us).

scan_based_fraction([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when ?BASED_DIGIT(C, B) ->
scan_based_fraction(Cs, St, Line, Col, Toks, B, [C|Ncs], BCs, Us);
scan_based_fraction([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, BCs, _Us) when
?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) ->
scan_based_fraction(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], BCs, with_underscore);
scan_based_fraction([$_]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}};
scan_based_fraction([$#,E|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when E =:= $e; E =:= $E ->
scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, [E,$#], Us);
scan_based_fraction([$#]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}};
scan_based_fraction([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, BCs, _Us) when ?NAMECHAR(C) ->
scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs) + length(BCs)), Cs0);
scan_based_fraction([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}};
scan_based_fraction(Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
based_float_end(Cs, St, Line, Col, Toks, B, Ncs, BCs, [], Us).

scan_based_exponent_sign(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,ECs,Us}) ->
scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).

scan_based_exponent_sign([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) when
C =:= $+; C =:= $- ->
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [C|ECs], Us);
scan_based_exponent_sign([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs, ECs,Us},fun scan_based_exponent_sign/6}};
scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).

scan_based_exponent(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,ECs,Us}) ->
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).

scan_based_exponent([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) when ?DIGIT(C) ->
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [C|ECs], Us);
scan_based_exponent([$_,Next|Cs], St, Line, Col, Toks, B, Ncs, BCs, [Prev|_]=ECs, _) when
?DIGIT(Next) andalso ?DIGIT(Prev) ->
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [Next,$_|ECs], with_underscore);
scan_based_exponent([$_]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,ECs,Us},fun scan_based_exponent/6}};
scan_based_exponent([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,ECs, Us},fun scan_based_exponent/6}};
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
based_float_end(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).

% Note: the base and exponent parts are always in decimal
based_float_end(Cs, St, Line, Col, Toks, 10, Ncs0, BCs, ECs0, Us) ->
Ncs = lists:reverse(Ncs0),
ECs = lists:reverse(ECs0),
Tcs = BCs ++ Ncs ++ ECs,
Fcs = case ECs of
[] -> Ncs ++ ECs;
[$#|ECs1] -> Ncs ++ ECs1
end,
try list_to_float(remove_digit_separators(Fcs, Us)) of
F ->
tok3(Cs, St, Line, Col, Toks, float, Tcs, F)
catch
_:_ ->
Ncol = incr_column(Col, length(Ncs) + length(BCs)),
scan_error({illegal,float}, Line, Col, Line, Ncol, Cs)
end;
based_float_end(Cs, St, Line, Col, Toks, B, Ncs0, BCs, ECs0, Us) when B =/= 10 ->
%% there is no general list_to_float(String,Base) available yet
Ncs = lists:reverse(Ncs0),
ECs = lists:reverse(ECs0),
Tcs = BCs ++ Ncs ++ ECs,
Exp = case ECs of
[] -> 0;
[$#,_|ECs1] -> list_to_integer(remove_digit_separators(ECs1, Us))
end,
Ncs1 = trim_float_zeros(lists:reverse(trim_float_zeros(remove_digit_separators(Ncs0, Us)))),
N = list_to_integer(lists:delete($.,Ncs1), B),
D = length(Ncs1) - string:chr(Ncs1, $.),
F = N * math:pow(B, Exp-D),
tok3(Cs, St, Line, Col, Toks, float, Tcs, F).

trim_float_zeros([$0, $. | _]=Cs) -> Cs; % don't remove 0 next to `.`
trim_float_zeros([$0 | Cs]) -> trim_float_zeros(Cs);
trim_float_zeros(Cs) -> Cs.

scan_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs,Us}) ->
scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us).

Expand Down
96 changes: 90 additions & 6 deletions lib/stdlib/test/erl_scan_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ otp_7810(Config) when is_list(Config) ->
ok = integers(),
ok = base_integers(),
ok = floats(),
ok = base_floats(),
ok = dots(),
ok = chars(),
ok = variables(),
Expand Down Expand Up @@ -318,7 +319,7 @@ integers() ->
"__123"],
lists:foreach(
fun(S) ->
case erl_scan:string(S) of
case erl_scan_string(S) of
{ok, [{integer, _, _}|_], _} ->
error({unexpected_integer, S});
{ok, _, _} ->
Expand All @@ -335,7 +336,7 @@ integers() ->
"12@"],
lists:foreach(
fun(S) ->
case erl_scan:string(S) of
case erl_scan_string(S) of
{error,{1,erl_scan,{illegal,integer}},_} ->
ok;
{error,Err,_} ->
Expand Down Expand Up @@ -397,7 +398,7 @@ base_integers() ->
"10#12A4"],
lists:foreach(
fun(S) ->
case erl_scan:string(S) of
case erl_scan_string(S) of
{error,{1,erl_scan,{illegal,integer}},_} ->
ok;
{error,Err,_} ->
Expand Down Expand Up @@ -454,7 +455,89 @@ floats() ->
],
lists:foreach(
fun(S) ->
case erl_scan:string(S) of
case erl_scan_string(S) of
{error,{1,erl_scan,{illegal,float}},_} ->
ok;
{error,Err,_} ->
error({unexpected_error, S, Err});
Succ ->
error({unexpected_success, S, Succ})
end
end, FloatErrors),
ok.

base_floats() ->
[begin
Ts = [{float,{1,1},F}],
test_string(FS, Ts)
end || {FS, F} <- [{"10#1.0",1.0},
{"10#012345.625", 012345.625},
{"10#3.31200",3.31200},
{"10#1.0#e0",1.0e0},
{"10#1.0#E17",1.0E17},
{"10#34.21#E-18", 34.21E-18},
{"10#17.0#E+14", 17.0E+14},
{"10#12345.625#e3", 12345.625e3},
{"10#12345.625#E-3", 12345.625E-3},

{"2#1.0", 1.0},
{"2#101.0", 5.0},
{"2#101.1", 5.5},
{"2#101.101", 5.625},
{"2#101.1#e0", 5.5},
{"2#1.0#e+3", 8.0},
{"2#1.0#e-3", 0.125},
{"2#000100.001000", 4.125},
{"2#0.10000000000000000000000000000000000000000000000000001", 0.5000000000000001}, % 53 bits
{"2#0.100000000000000000000000000000000000000000000000000001", 0.5}, % not 54 bits
{"2#0.11001001000011111101101010100010001000010110100011000#e+2", math:pi()}, % pi to 53 bits

{"3#102.12", 3#10212/3#100},

{"16#100.0", 256.0},
{"16#ff.d", 16#ffd/16},
{"16#1.0", 1.0},
{"16#abc.def", 16#abcdef/16#1000},
{"16#00100.001000", 256.0 + 1/16#1000},
{"16#0.80000000000008", 0.5000000000000001}, % 53-bit fraction
{"16#0.80000000000004", 0.5}, % not 54 bits
{"16#fe.8#e0", 16#fe8/16},
{"16#f.e#e+3", float(16#fe*16#100)},
{"16#c.0#e-1", 16#c/16},
{"16#0.0e0", 16#e/16#100}, % e is a hex digit, not exponent
{"16#0.0E0", 16#e/16#100}, % same for E
{"16#0.3243f6a8885a30#e+1", math:pi()} % pi to 53 bits
]],

[begin
{error,{1,erl_scan,{illegal,float}},1} = erl_scan_string(S),
{error,{{1,1},erl_scan,{illegal,float}},{1,_}} =
erl_scan_string(S, {1,1}, [])
end || S <- ["1.14Ea"]],

UnderscoreSamples =
[{"1_6#000_100.0_0", 256.0},
{"16#0.3243_f6a8_885a_30#e+1", math:pi()},
{"16#3243_f6a8.885a_30#e-7", math:pi()},
{"16#3243_f6a8_885a.30#e-1_1", math:pi()},
{"2#1.010101010101010101010#e+2_1", 2796202.0}],
lists:foreach(
fun({S, I}) ->
test_string(S, [{float, {1, 1}, I}])
end, UnderscoreSamples),
FloatErrors =
[
"10#12345.a25",
"10#12345.6a5",
"16#a0.gf23",
"16#a0.2fg3",
"2#10.201",
"2#10.120",
"3#102.3"
],
lists:foreach(
fun(S) ->
case erl_scan_string(S) of
{error,{1,erl_scan,{illegal,float}},_} ->
ok;
{error,Err,_} ->
Expand Down Expand Up @@ -1592,8 +1675,9 @@ erl_scan_string(String, StartLocation, Options) ->
case erl_scan:string(String, StartLocation, Options) of
{ok, Tokens, EndLocation} ->
{ok, unopaque_tokens(Tokens), EndLocation};
Else ->
Else
{error,{_,Mod,Reason},_}=Error ->
Mod:format_error(Reason),
Error
end.

erl_scan_tokens(C, S, L) ->
Expand Down
16 changes: 14 additions & 2 deletions system/doc/reference_manual/data_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,25 @@ conventional notation, there are two Erlang-specific notations:

- `$`_`char`_
ASCII value or unicode code-point of the character _`char`_.
- _`base`_`#`_`value`_
- _`base`_`#`_`digits`_
Integer with the base _`base`_, which must be an integer in the range 2
through 36.
through 36. _`digits`_ are `0`-`9` plus letters `A`-`Z` (upper or lower case).
This notation can also be found in the Ada programming
language. Erlang does _not_ support prefixes such as `0x` for hexadecimal
or `077` for octal.
- _`base`_`#`_`digits`_`.`_`digits`_`#e`_`exponent`_
Based floating point number, for example `16#ff.fe#e+6`. Using a base
like 16 or 2 allows for an exact text representation of a floating
point number. Like the base, the exponent is always a decimal number.


Leading zeroes are ignored. Single underscore characters (`_`) can be
inserted between digits as a visual separator.

Also note that floating point numbers must start with a digit, and must
contain a `.`. In other words, literals such as `.01` and `1e6` are not
allowed, and must be written `0.01` and `1.0e6` respectively.

_Examples:_

```text
Expand Down
Loading