From e4c552504800529be0d15c271f7a5a2bfa2bb06a Mon Sep 17 00:00:00 2001 From: Richard Carlsson Date: Sat, 21 Dec 2024 13:35:02 +0100 Subject: [PATCH 1/3] Document details of existing integer and float notation --- system/doc/reference_manual/data_types.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/system/doc/reference_manual/data_types.md b/system/doc/reference_manual/data_types.md index a7168150a1b3..3731802838aa 100644 --- a/system/doc/reference_manual/data_types.md +++ b/system/doc/reference_manual/data_types.md @@ -42,11 +42,17 @@ conventional notation, there are two Erlang-specific notations: ASCII value or unicode code-point of the character _`char`_. - _`base`_`#`_`value`_ Integer with the base _`base`_, which must be an integer in the range 2 - through 36. + through 36. This notation can also be found in the Ada programming + language. Erlang does _not_ support prefixes such as `0x` for hexadecimal + or `077` for octal. Leading zeroes are ignored. Single underscore characters (`_`) can be inserted between digits as a visual separator. +Also note that floating point numbers must start with a digit, and must +contain a `.`. In other words, literals such as `.01` and `1e6` are not +allowed, and must be written `0.01` and `1.0e6` respectively. + _Examples:_ ```text From 0afd8d0f605bb69b7a4e243786ba94b435b0b0dd Mon Sep 17 00:00:00 2001 From: Richard Carlsson Date: Sun, 10 Nov 2024 16:14:47 +0100 Subject: [PATCH 2/3] Make scanner accept based floating point literals Follows Ada style, e.g. 16#ff.fe#e-2, 2#0.101#e5, etc. --- lib/stdlib/src/erl_scan.erl | 117 +++++++++++++++++++++++++---- lib/stdlib/test/erl_scan_SUITE.erl | 96 +++++++++++++++++++++-- 2 files changed, 193 insertions(+), 20 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index abb739614c79..e95d4cdb22de 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -1824,7 +1824,7 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) -> try list_to_integer(remove_digit_separators(Ncs, Us)) of B when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 -> Bcs = Ncs++[$#], - scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore); + scan_based_num(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore); B when is_integer(B) -> Len = length(Ncs), scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0) @@ -1861,29 +1861,35 @@ remove_digit_separators(Number, with_underscore) -> orelse (C >= $A andalso B > 10 andalso C < $A + B - 10) orelse (C >= $a andalso B > 10 andalso C < $a + B - 10)))). -scan_based_int(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us}) +scan_based_num(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us}) when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 -> - scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us). + scan_based_num(Cs, St, Line, Col, Toks, B, NCs, BCs, Us). -scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when +scan_based_num([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when ?BASED_DIGIT(C, B) -> - scan_based_int(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us); -scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us) + scan_based_num(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us); +scan_based_num([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us) when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) -> - scan_based_int(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs, + scan_based_num(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs, with_underscore); -scan_based_int([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) -> - {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; -scan_based_int([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) -> +scan_based_num([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_num/6}}; +scan_based_num([$.,C|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when ?BASED_DIGIT(C, B) -> + scan_based_fraction(Cs, St, Line, Col, Toks, B, [C,$.|Ncs], BCs, Us); +scan_based_num([$.,C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) -> + scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0); +scan_based_num([$.]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_num/6}}; +scan_based_num([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) -> scan_error({illegal,integer}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0); -scan_based_int([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) -> - {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}}; -scan_based_int(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) -> +scan_based_num([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_num/6}}; +scan_based_num(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) -> %% No actual digits following the base. Len = length(Bcs), Ncol = incr_column(Col, Len), scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs); -scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) -> +scan_based_num(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) -> Ncs = lists:reverse(Ncs0), try list_to_integer(remove_digit_separators(Ncs, Us), B) of N -> @@ -1896,6 +1902,89 @@ scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) -> scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs) end. +scan_based_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,Us}) -> + scan_based_fraction(Cs, St, Line, Col, Toks, B, Ncs, BCs, Us). + +scan_based_fraction([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when ?BASED_DIGIT(C, B) -> + scan_based_fraction(Cs, St, Line, Col, Toks, B, [C|Ncs], BCs, Us); +scan_based_fraction([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, BCs, _Us) when + ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) -> + scan_based_fraction(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], BCs, with_underscore); +scan_based_fraction([$_]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}}; +scan_based_fraction([$#,E|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when E =:= $e; E =:= $E -> + scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, [E,$#], Us); +scan_based_fraction([$#]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}}; +scan_based_fraction([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, BCs, _Us) when ?NAMECHAR(C) -> + scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs) + length(BCs)), Cs0); +scan_based_fraction([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}}; +scan_based_fraction(Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) -> + based_float_end(Cs, St, Line, Col, Toks, B, Ncs, BCs, [], Us). + +scan_based_exponent_sign(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,ECs,Us}) -> + scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us). + +scan_based_exponent_sign([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) when + C =:= $+; C =:= $- -> + scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [C|ECs], Us); +scan_based_exponent_sign([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs, ECs,Us},fun scan_based_exponent_sign/6}}; +scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) -> + scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us). + +scan_based_exponent(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,ECs,Us}) -> + scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us). + +scan_based_exponent([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) when ?DIGIT(C) -> + scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [C|ECs], Us); +scan_based_exponent([$_,Next|Cs], St, Line, Col, Toks, B, Ncs, BCs, [Prev|_]=ECs, _) when + ?DIGIT(Next) andalso ?DIGIT(Prev) -> + scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [Next,$_|ECs], with_underscore); +scan_based_exponent([$_]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,ECs,Us},fun scan_based_exponent/6}}; +scan_based_exponent([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) -> + {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,ECs, Us},fun scan_based_exponent/6}}; +scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) -> + based_float_end(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us). + +% Note: the base and exponent parts are always in decimal +based_float_end(Cs, St, Line, Col, Toks, 10, Ncs0, BCs, ECs0, Us) -> + Ncs = lists:reverse(Ncs0), + ECs = lists:reverse(ECs0), + Tcs = BCs ++ Ncs ++ ECs, + Fcs = case ECs of + [] -> Ncs ++ ECs; + [$#|ECs1] -> Ncs ++ ECs1 + end, + try list_to_float(remove_digit_separators(Fcs, Us)) of + F -> + tok3(Cs, St, Line, Col, Toks, float, Tcs, F) + catch + _:_ -> + Ncol = incr_column(Col, length(Ncs) + length(BCs)), + scan_error({illegal,float}, Line, Col, Line, Ncol, Cs) + end; +based_float_end(Cs, St, Line, Col, Toks, B, Ncs0, BCs, ECs0, Us) when B =/= 10 -> + %% there is no general list_to_float(String,Base) available yet + Ncs = lists:reverse(Ncs0), + ECs = lists:reverse(ECs0), + Tcs = BCs ++ Ncs ++ ECs, + Exp = case ECs of + [] -> 0; + [$#,_|ECs1] -> list_to_integer(remove_digit_separators(ECs1, Us)) + end, + Ncs1 = trim_float_zeros(lists:reverse(trim_float_zeros(remove_digit_separators(Ncs0, Us)))), + N = list_to_integer(lists:delete($.,Ncs1), B), + D = length(Ncs1) - string:chr(Ncs1, $.), + F = N * math:pow(B, Exp-D), + tok3(Cs, St, Line, Col, Toks, float, Tcs, F). + +trim_float_zeros([$0, $. | _]=Cs) -> Cs; % don't remove 0 next to `.` +trim_float_zeros([$0 | Cs]) -> trim_float_zeros(Cs); +trim_float_zeros(Cs) -> Cs. + scan_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs,Us}) -> scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us). diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index 64577554deaa..8fc51199bfcf 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -165,6 +165,7 @@ otp_7810(Config) when is_list(Config) -> ok = integers(), ok = base_integers(), ok = floats(), + ok = base_floats(), ok = dots(), ok = chars(), ok = variables(), @@ -318,7 +319,7 @@ integers() -> "__123"], lists:foreach( fun(S) -> - case erl_scan:string(S) of + case erl_scan_string(S) of {ok, [{integer, _, _}|_], _} -> error({unexpected_integer, S}); {ok, _, _} -> @@ -335,7 +336,7 @@ integers() -> "12@"], lists:foreach( fun(S) -> - case erl_scan:string(S) of + case erl_scan_string(S) of {error,{1,erl_scan,{illegal,integer}},_} -> ok; {error,Err,_} -> @@ -397,7 +398,7 @@ base_integers() -> "10#12A4"], lists:foreach( fun(S) -> - case erl_scan:string(S) of + case erl_scan_string(S) of {error,{1,erl_scan,{illegal,integer}},_} -> ok; {error,Err,_} -> @@ -454,7 +455,89 @@ floats() -> ], lists:foreach( fun(S) -> - case erl_scan:string(S) of + case erl_scan_string(S) of + {error,{1,erl_scan,{illegal,float}},_} -> + ok; + {error,Err,_} -> + error({unexpected_error, S, Err}); + Succ -> + error({unexpected_success, S, Succ}) + end + end, FloatErrors), + ok. + +base_floats() -> + [begin + Ts = [{float,{1,1},F}], + test_string(FS, Ts) + end || {FS, F} <- [{"10#1.0",1.0}, + {"10#012345.625", 012345.625}, + {"10#3.31200",3.31200}, + {"10#1.0#e0",1.0e0}, + {"10#1.0#E17",1.0E17}, + {"10#34.21#E-18", 34.21E-18}, + {"10#17.0#E+14", 17.0E+14}, + {"10#12345.625#e3", 12345.625e3}, + {"10#12345.625#E-3", 12345.625E-3}, + + {"2#1.0", 1.0}, + {"2#101.0", 5.0}, + {"2#101.1", 5.5}, + {"2#101.101", 5.625}, + {"2#101.1#e0", 5.5}, + {"2#1.0#e+3", 8.0}, + {"2#1.0#e-3", 0.125}, + {"2#000100.001000", 4.125}, + {"2#0.10000000000000000000000000000000000000000000000000001", 0.5000000000000001}, % 53 bits + {"2#0.100000000000000000000000000000000000000000000000000001", 0.5}, % not 54 bits + {"2#0.11001001000011111101101010100010001000010110100011000#e+2", math:pi()}, % pi to 53 bits + + {"3#102.12", 3#10212/3#100}, + + {"16#100.0", 256.0}, + {"16#ff.d", 16#ffd/16}, + {"16#1.0", 1.0}, + {"16#abc.def", 16#abcdef/16#1000}, + {"16#00100.001000", 256.0 + 1/16#1000}, + {"16#0.80000000000008", 0.5000000000000001}, % 53-bit fraction + {"16#0.80000000000004", 0.5}, % not 54 bits + {"16#fe.8#e0", 16#fe8/16}, + {"16#f.e#e+3", float(16#fe*16#100)}, + {"16#c.0#e-1", 16#c/16}, + {"16#0.0e0", 16#e/16#100}, % e is a hex digit, not exponent + {"16#0.0E0", 16#e/16#100}, % same for E + {"16#0.3243f6a8885a30#e+1", math:pi()} % pi to 53 bits + ]], + + [begin + {error,{1,erl_scan,{illegal,float}},1} = erl_scan_string(S), + {error,{{1,1},erl_scan,{illegal,float}},{1,_}} = + erl_scan_string(S, {1,1}, []) + end || S <- ["1.14Ea"]], + + UnderscoreSamples = + [{"1_6#000_100.0_0", 256.0}, + {"16#0.3243_f6a8_885a_30#e+1", math:pi()}, + {"16#3243_f6a8.885a_30#e-7", math:pi()}, + {"16#3243_f6a8_885a.30#e-1_1", math:pi()}, + {"2#1.010101010101010101010#e+2_1", 2796202.0}], + lists:foreach( + fun({S, I}) -> + test_string(S, [{float, {1, 1}, I}]) + end, UnderscoreSamples), + FloatErrors = + [ + "10#12345.a25", + "10#12345.6a5", + "16#a0.gf23", + "16#a0.2fg3", + "2#10.201", + "2#10.120", + "3#102.3" + ], + lists:foreach( + fun(S) -> + case erl_scan_string(S) of {error,{1,erl_scan,{illegal,float}},_} -> ok; {error,Err,_} -> @@ -1592,8 +1675,9 @@ erl_scan_string(String, StartLocation, Options) -> case erl_scan:string(String, StartLocation, Options) of {ok, Tokens, EndLocation} -> {ok, unopaque_tokens(Tokens), EndLocation}; - Else -> - Else + {error,{_,Mod,Reason},_}=Error -> + Mod:format_error(Reason), + Error end. erl_scan_tokens(C, S, L) -> From c95cfa1296e84ad1e683d99a03366301385640c5 Mon Sep 17 00:00:00 2001 From: Richard Carlsson Date: Fri, 24 Jan 2025 11:16:55 +0100 Subject: [PATCH 3/3] Document based floats --- system/doc/reference_manual/data_types.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/system/doc/reference_manual/data_types.md b/system/doc/reference_manual/data_types.md index 3731802838aa..8b2ee4f34889 100644 --- a/system/doc/reference_manual/data_types.md +++ b/system/doc/reference_manual/data_types.md @@ -40,11 +40,17 @@ conventional notation, there are two Erlang-specific notations: - `$`_`char`_ ASCII value or unicode code-point of the character _`char`_. -- _`base`_`#`_`value`_ +- _`base`_`#`_`digits`_ Integer with the base _`base`_, which must be an integer in the range 2 - through 36. This notation can also be found in the Ada programming + through 36. _`digits`_ are `0`-`9` plus letters `A`-`Z` (upper or lower case). + This notation can also be found in the Ada programming language. Erlang does _not_ support prefixes such as `0x` for hexadecimal or `077` for octal. +- _`base`_`#`_`digits`_`.`_`digits`_`#e`_`exponent`_ + Based floating point number, for example `16#ff.fe#e+6`. Using a base + like 16 or 2 allows for an exact text representation of a floating + point number. Like the base, the exponent is always a decimal number. + Leading zeroes are ignored. Single underscore characters (`_`) can be inserted between digits as a visual separator.