Make scanner accept based floating point literals

Follows Ada style, e.g. 16#ff.fe#e-2, 2#0.101#e5, etc.
erlang · Jan 8, 2025 · 0afd8d0 · 0afd8d0
1 parent e4c5525
commit 0afd8d0
Show file tree

Hide file tree

Showing 2 changed files with 193 additions and 20 deletions.
diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
@@ -1824,7 +1824,7 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) ->
     try list_to_integer(remove_digit_separators(Ncs, Us)) of
         B when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 ->
             Bcs = Ncs++[$#],
-            scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore);
+            scan_based_num(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore);
         B when is_integer(B) ->
             Len = length(Ncs),
             scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0)
@@ -1861,29 +1861,35 @@ remove_digit_separators(Number, with_underscore) ->
             orelse (C >= $A andalso B > 10 andalso C < $A + B - 10)
             orelse (C >= $a andalso B > 10 andalso C < $a + B - 10)))).
 
-scan_based_int(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us})
+scan_based_num(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us})
   when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 ->
-    scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us).
+    scan_based_num(Cs, St, Line, Col, Toks, B, NCs, BCs, Us).
 
-scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when
+scan_based_num([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when
       ?BASED_DIGIT(C, B) ->
-    scan_based_int(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us);
-scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us)
+    scan_based_num(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us);
+scan_based_num([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us)
       when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) ->
-    scan_based_int(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs,
+    scan_based_num(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs,
                    with_underscore);
-scan_based_int([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
-    {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
-scan_based_int([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
+scan_based_num([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_num/6}};
+scan_based_num([$.,C|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when ?BASED_DIGIT(C, B) ->
+    scan_based_fraction(Cs, St, Line, Col, Toks, B, [C,$.|Ncs], BCs, Us);
+scan_based_num([$.,C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
+    scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0);
+scan_based_num([$.]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_num/6}};
+scan_based_num([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
     scan_error({illegal,integer}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0);
-scan_based_int([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
-    {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
-scan_based_int(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) ->
+scan_based_num([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_num/6}};
+scan_based_num(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) ->
     %% No actual digits following the base.
     Len = length(Bcs),
     Ncol = incr_column(Col, Len),
     scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs);
-scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
+scan_based_num(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
     Ncs = lists:reverse(Ncs0),
     try list_to_integer(remove_digit_separators(Ncs, Us), B) of
         N ->
@@ -1896,6 +1902,89 @@ scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
             scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs)
     end.
 
+scan_based_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,Us}) ->
+    scan_based_fraction(Cs, St, Line, Col, Toks, B, Ncs, BCs, Us).
+
+scan_based_fraction([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when ?BASED_DIGIT(C, B) ->
+    scan_based_fraction(Cs, St, Line, Col, Toks, B, [C|Ncs], BCs, Us);
+scan_based_fraction([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, BCs, _Us) when
+      ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) ->
+    scan_based_fraction(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], BCs, with_underscore);
+scan_based_fraction([$_]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}};
+scan_based_fraction([$#,E|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when E =:= $e; E =:= $E ->
+    scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, [E,$#], Us);
+scan_based_fraction([$#]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}};
+scan_based_fraction([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, BCs, _Us) when ?NAMECHAR(C) ->
+    scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs) + length(BCs)), Cs0);
+scan_based_fraction([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}};
+scan_based_fraction(Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
+    based_float_end(Cs, St, Line, Col, Toks, B, Ncs, BCs, [], Us).
+
+scan_based_exponent_sign(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,ECs,Us}) ->
+    scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).
+
+scan_based_exponent_sign([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) when
+      C =:= $+; C =:= $- ->
+    scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [C|ECs], Us);
+scan_based_exponent_sign([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs, ECs,Us},fun scan_based_exponent_sign/6}};
+scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
+    scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).
+
+scan_based_exponent(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,ECs,Us}) ->
+    scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).
+
+scan_based_exponent([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) when ?DIGIT(C) ->
+    scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [C|ECs], Us);
+scan_based_exponent([$_,Next|Cs], St, Line, Col, Toks, B, Ncs, BCs, [Prev|_]=ECs, _) when
+      ?DIGIT(Next) andalso ?DIGIT(Prev) ->
+    scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [Next,$_|ECs], with_underscore);
+scan_based_exponent([$_]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,ECs,Us},fun scan_based_exponent/6}};
+scan_based_exponent([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
+    {more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,ECs, Us},fun scan_based_exponent/6}};
+scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
+    based_float_end(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).
+
+% Note: the base and exponent parts are always in decimal
+based_float_end(Cs, St, Line, Col, Toks, 10, Ncs0, BCs, ECs0, Us) ->
+    Ncs = lists:reverse(Ncs0),
+    ECs = lists:reverse(ECs0),
+    Tcs = BCs ++ Ncs ++ ECs,
+    Fcs = case ECs of
+              [] -> Ncs ++ ECs;
+              [$#|ECs1] -> Ncs ++ ECs1
+          end,
+    try list_to_float(remove_digit_separators(Fcs, Us)) of
+        F ->
+            tok3(Cs, St, Line, Col, Toks, float, Tcs, F)
+    catch
+        _:_ ->
+            Ncol = incr_column(Col, length(Ncs) + length(BCs)),
+            scan_error({illegal,float}, Line, Col, Line, Ncol, Cs)
+    end;
+based_float_end(Cs, St, Line, Col, Toks, B, Ncs0, BCs, ECs0, Us) when B =/= 10 ->
+    %% there is no general list_to_float(String,Base) available yet
+    Ncs = lists:reverse(Ncs0),
+    ECs = lists:reverse(ECs0),
+    Tcs = BCs ++ Ncs ++ ECs,
+    Exp = case ECs of
+              [] -> 0;
+              [$#,_|ECs1] -> list_to_integer(remove_digit_separators(ECs1, Us))
+          end,
+    Ncs1 = trim_float_zeros(lists:reverse(trim_float_zeros(remove_digit_separators(Ncs0, Us)))),
+    N = list_to_integer(lists:delete($.,Ncs1), B),
+    D = length(Ncs1) - string:chr(Ncs1, $.),
+    F = N * math:pow(B, Exp-D),
+    tok3(Cs, St, Line, Col, Toks, float, Tcs, F).
+
+trim_float_zeros([$0, $. | _]=Cs) -> Cs;  % don't remove 0 next to `.`
+trim_float_zeros([$0 | Cs]) -> trim_float_zeros(Cs);
+trim_float_zeros(Cs) -> Cs.
+
 scan_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs,Us}) ->
     scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us).
 

diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl
@@ -165,6 +165,7 @@ otp_7810(Config) when is_list(Config) ->
     ok = integers(),
     ok = base_integers(),
     ok = floats(),
+    ok = base_floats(),
     ok = dots(),
     ok = chars(),
     ok = variables(),
@@ -318,7 +319,7 @@ integers() ->
          "__123"],
     lists:foreach(
       fun(S) ->
-              case erl_scan:string(S) of
+              case erl_scan_string(S) of
                   {ok, [{integer, _, _}|_], _} ->
                       error({unexpected_integer, S});
                   {ok, _, _} ->
@@ -335,7 +336,7 @@ integers() ->
          "12@"],
     lists:foreach(
       fun(S) ->
-              case erl_scan:string(S) of
+              case erl_scan_string(S) of
                   {error,{1,erl_scan,{illegal,integer}},_} ->
                       ok;
                   {error,Err,_} ->
@@ -397,7 +398,7 @@ base_integers() ->
          "10#12A4"],
     lists:foreach(
       fun(S) ->
-              case erl_scan:string(S) of
+              case erl_scan_string(S) of
                   {error,{1,erl_scan,{illegal,integer}},_} ->
                       ok;
                   {error,Err,_} ->
@@ -454,7 +455,89 @@ floats() ->
         ],
     lists:foreach(
       fun(S) ->
-              case erl_scan:string(S) of
+              case erl_scan_string(S) of
+                  {error,{1,erl_scan,{illegal,float}},_} ->
+                      ok;
+                  {error,Err,_} ->
+                      error({unexpected_error, S, Err});
+                  Succ ->
+                      error({unexpected_success, S, Succ})
+              end
+      end, FloatErrors),
+    ok.
+
+base_floats() ->
+    [begin
+         Ts = [{float,{1,1},F}],
+         test_string(FS, Ts)
+     end || {FS, F} <- [{"10#1.0",1.0},
+                        {"10#012345.625", 012345.625},
+                        {"10#3.31200",3.31200},
+                        {"10#1.0#e0",1.0e0},
+                        {"10#1.0#E17",1.0E17},
+                        {"10#34.21#E-18", 34.21E-18},
+                        {"10#17.0#E+14", 17.0E+14},
+                        {"10#12345.625#e3", 12345.625e3},
+                        {"10#12345.625#E-3", 12345.625E-3},
+
+                        {"2#1.0", 1.0},
+                        {"2#101.0", 5.0},
+                        {"2#101.1", 5.5},
+                        {"2#101.101", 5.625},
+                        {"2#101.1#e0", 5.5},
+                        {"2#1.0#e+3", 8.0},
+                        {"2#1.0#e-3", 0.125},
+                        {"2#000100.001000", 4.125},
+                        {"2#0.10000000000000000000000000000000000000000000000000001", 0.5000000000000001}, % 53 bits
+                        {"2#0.100000000000000000000000000000000000000000000000000001", 0.5}, % not 54 bits
+                        {"2#0.11001001000011111101101010100010001000010110100011000#e+2", math:pi()}, % pi to 53 bits
+
+                        {"3#102.12", 3#10212/3#100},
+
+                        {"16#100.0", 256.0},
+                        {"16#ff.d", 16#ffd/16},
+                        {"16#1.0", 1.0},
+                        {"16#abc.def", 16#abcdef/16#1000},
+                        {"16#00100.001000", 256.0 + 1/16#1000},
+                        {"16#0.80000000000008", 0.5000000000000001}, % 53-bit fraction
+                        {"16#0.80000000000004", 0.5}, % not 54 bits
+                        {"16#fe.8#e0", 16#fe8/16},
+                        {"16#f.e#e+3", float(16#fe*16#100)},
+                        {"16#c.0#e-1", 16#c/16},
+                        {"16#0.0e0", 16#e/16#100}, % e is a hex digit, not exponent
+                        {"16#0.0E0", 16#e/16#100}, % same for E
+                        {"16#0.3243f6a8885a30#e+1", math:pi()} % pi to 53 bits
+                       ]],
+
+    [begin
+         {error,{1,erl_scan,{illegal,float}},1} = erl_scan_string(S),
+         {error,{{1,1},erl_scan,{illegal,float}},{1,_}} =
+             erl_scan_string(S, {1,1}, [])
+     end || S <- ["1.14Ea"]],
+
+    UnderscoreSamples =
+        [{"1_6#000_100.0_0", 256.0},
+         {"16#0.3243_f6a8_885a_30#e+1", math:pi()},
+         {"16#3243_f6a8.885a_30#e-7", math:pi()},
+         {"16#3243_f6a8_885a.30#e-1_1", math:pi()},
+         {"2#1.010101010101010101010#e+2_1", 2796202.0}],
+    lists:foreach(
+         fun({S, I}) ->
+                 test_string(S, [{float, {1, 1}, I}])
+         end, UnderscoreSamples),
+    FloatErrors =
+        [
+         "10#12345.a25",
+         "10#12345.6a5",
+         "16#a0.gf23",
+         "16#a0.2fg3",
+         "2#10.201",
+         "2#10.120",
+         "3#102.3"
+        ],
+    lists:foreach(
+      fun(S) ->
+              case erl_scan_string(S) of
                   {error,{1,erl_scan,{illegal,float}},_} ->
                       ok;
                   {error,Err,_} ->
@@ -1592,8 +1675,9 @@ erl_scan_string(String, StartLocation, Options) ->
     case erl_scan:string(String, StartLocation, Options) of
         {ok, Tokens, EndLocation} ->
             {ok, unopaque_tokens(Tokens), EndLocation};
-        Else ->
-            Else
+        {error,{_,Mod,Reason},_}=Error ->
+            Mod:format_error(Reason),
+            Error
     end.
 
 erl_scan_tokens(C, S, L) ->