Skip to content

Commit 5760563

Browse files
committed
Make scanner accept floating point literals in hex and binary
1 parent d9a0d06 commit 5760563

File tree

2 files changed

+208
-14
lines changed

2 files changed

+208
-14
lines changed

lib/stdlib/src/erl_scan.erl

Lines changed: 117 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,11 @@ format_error({illegal,Type}) ->
165165
lists:flatten(io_lib:fwrite("illegal ~w", [Type]));
166166
format_error({base,Base}) ->
167167
lists:flatten(io_lib:fwrite("illegal base '~w'", [Base]));
168+
format_error({float_base,Base}) ->
169+
lists:flatten(io_lib:fwrite("illegal base '~w' - only 10, 16 or 2 are allowed for floating point literals", [Base]));
170+
format_error({exponent,Base}) ->
171+
C = if Base =:= 10 -> $e; true -> $p end,
172+
lists:flatten(io_lib:fwrite("only '~c' allowed as exponent character in base ~w", [C, Base]));
168173
format_error(indentation) ->
169174
"bad indentation in triple-quoted string";
170175
format_error(white_space) ->
@@ -1820,7 +1825,7 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) ->
18201825
try list_to_integer(remove_digit_separators(Ncs, Us)) of
18211826
B when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 ->
18221827
Bcs = Ncs++[$#],
1823-
scan_based_int(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore);
1828+
scan_based_num(Cs, St, Line, Col, Toks, B, [], Bcs, no_underscore);
18241829
B when is_integer(B) ->
18251830
Len = length(Ncs),
18261831
scan_error({base,B}, Line, Col, Line, incr_column(Col, Len), Cs0)
@@ -1857,29 +1862,40 @@ remove_digit_separators(Number, with_underscore) ->
18571862
orelse (C >= $A andalso B > 10 andalso C < $A + B - 10)
18581863
orelse (C >= $a andalso B > 10 andalso C < $a + B - 10)))).
18591864

1860-
scan_based_int(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us})
1865+
scan_based_num(Cs, #erl_scan{}=St, Line, Col, Toks, {B,NCs,BCs,Us})
18611866
when is_integer(B), 2 =< B, B =< 1+$Z-$A+10 ->
1862-
scan_based_int(Cs, St, Line, Col, Toks, B, NCs, BCs, Us).
1867+
scan_based_num(Cs, St, Line, Col, Toks, B, NCs, BCs, Us).
18631868

1864-
scan_based_int([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when
1869+
scan_based_num([C|Cs], St, Line, Col, Toks, B, Ncs, Bcs, Us) when
18651870
?BASED_DIGIT(C, B) ->
1866-
scan_based_int(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us);
1867-
scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us)
1871+
scan_based_num(Cs, St, Line, Col, Toks, B, [C|Ncs], Bcs, Us);
1872+
scan_based_num([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us)
18681873
when ?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) ->
1869-
scan_based_int(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs,
1874+
scan_based_num(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], Bcs,
18701875
with_underscore);
1871-
scan_based_int([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
1872-
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
1873-
scan_based_int([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
1876+
scan_based_num([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
1877+
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_num/6}};
1878+
scan_based_num([$.,C|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when ?BASED_DIGIT(C, B) ->
1879+
if B =:= 10; B =:= 2; B =:= 16 ->
1880+
scan_based_fraction(Cs, St, Line, Col, Toks, B, [C,$.|Ncs], BCs, Us);
1881+
true ->
1882+
Ncol = incr_column(Col, length(Ncs) + length(BCs)),
1883+
scan_error({float_base, B}, Line, Col, Line, Ncol, Cs)
1884+
end;
1885+
scan_based_num([$.,C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
1886+
scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0);
1887+
scan_based_num([$.]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
1888+
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_num/6}};
1889+
scan_based_num([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
18741890
scan_error({illegal,integer}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0);
1875-
scan_based_int([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
1876-
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
1877-
scan_based_int(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) ->
1891+
scan_based_num([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
1892+
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_num/6}};
1893+
scan_based_num(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) ->
18781894
%% No actual digits following the base.
18791895
Len = length(Bcs),
18801896
Ncol = incr_column(Col, Len),
18811897
scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs);
1882-
scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
1898+
scan_based_num(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
18831899
Ncs = lists:reverse(Ncs0),
18841900
try list_to_integer(remove_digit_separators(Ncs, Us), B) of
18851901
N ->
@@ -1892,6 +1908,93 @@ scan_based_int(Cs, St, Line, Col, Toks, B, Ncs0, [_|_]=Bcs, Us) ->
18921908
scan_error({illegal,integer}, Line, Col, Line, Ncol, Cs)
18931909
end.
18941910

1911+
scan_based_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,Us}) ->
1912+
scan_based_fraction(Cs, St, Line, Col, Toks, B, Ncs, BCs, Us).
1913+
1914+
scan_based_fraction([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when ?BASED_DIGIT(C, B) ->
1915+
scan_based_fraction(Cs, St, Line, Col, Toks, B, [C|Ncs], BCs, Us);
1916+
scan_based_fraction([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, BCs, _Us) when
1917+
?BASED_DIGIT(Next, B) andalso ?BASED_DIGIT(Prev, B) ->
1918+
scan_based_fraction(Cs, St, Line, Col, Toks, B, [Next,$_|Ncs], BCs, with_underscore);
1919+
scan_based_fraction([$_]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
1920+
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}};
1921+
scan_based_fraction([E|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when E =:= $e; E =:= $E ->
1922+
if B =:= 10 ->
1923+
scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, [E], Us);
1924+
true ->
1925+
Ncol = incr_column(Col, length(Ncs)+length(BCs)),
1926+
scan_error({exponent,B}, Line, Col, Line, Ncol, Cs)
1927+
end;
1928+
scan_based_fraction([E|Cs], St, Line, Col, Toks, B, Ncs, BCs, Us) when E =:= $p; E =:= $P ->
1929+
if B =/= 10 ->
1930+
scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, [E], Us);
1931+
true ->
1932+
Ncol = incr_column(Col, length(Ncs)+length(BCs)),
1933+
scan_error({exponent,B}, Line, Col, Line, Ncol, Cs)
1934+
end;
1935+
scan_based_fraction([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, BCs, _Us) when ?NAMECHAR(C) ->
1936+
scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs) + length(BCs)), Cs0);
1937+
scan_based_fraction([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
1938+
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,Us},fun scan_based_fraction/6}};
1939+
scan_based_fraction(Cs, St, Line, Col, Toks, B, Ncs, BCs, Us) ->
1940+
based_float_end(Cs, St, Line, Col, Toks, B, Ncs, BCs, [], Us).
1941+
1942+
scan_based_exponent_sign(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,ECs,Us}) ->
1943+
scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).
1944+
1945+
scan_based_exponent_sign([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) when
1946+
C =:= $+; C =:= $- ->
1947+
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [C|ECs], Us);
1948+
scan_based_exponent_sign([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
1949+
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs, ECs,Us},fun scan_based_exponent_sign/6}};
1950+
scan_based_exponent_sign(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
1951+
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).
1952+
1953+
scan_based_exponent(Cs, #erl_scan{}=St, Line, Col, Toks, {B,Ncs,BCs,ECs,Us}) ->
1954+
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).
1955+
1956+
scan_based_exponent([C|Cs], St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) when ?DIGIT(C) ->
1957+
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [C|ECs], Us);
1958+
scan_based_exponent([$_,Next|Cs], St, Line, Col, Toks, B, Ncs, BCs, [Prev|_]=ECs, _) when
1959+
?DIGIT(Next) andalso ?DIGIT(Prev) ->
1960+
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, [Next,$_|ECs], with_underscore);
1961+
scan_based_exponent([$_]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
1962+
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,ECs,Us},fun scan_based_exponent/6}};
1963+
scan_based_exponent([]=Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
1964+
{more,{Cs,St,Col,Toks,Line,{B,Ncs,BCs,ECs, Us},fun scan_based_exponent/6}};
1965+
scan_based_exponent(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us) ->
1966+
based_float_end(Cs, St, Line, Col, Toks, B, Ncs, BCs, ECs, Us).
1967+
1968+
% Note: the base and exponent parts are always in decimal
1969+
based_float_end(Cs, St, Line, Col, Toks, 10, Ncs0, BCs, ECs, Us) ->
1970+
Ncs = lists:reverse(Ncs0),
1971+
Fcs = Ncs ++ lists:reverse(ECs),
1972+
try list_to_float(remove_digit_separators(Fcs, Us)) of
1973+
F ->
1974+
Tcs = BCs ++ Fcs,
1975+
tok3(Cs, St, Line, Col, Toks, float, Tcs, F)
1976+
catch
1977+
_:_ ->
1978+
Ncol = incr_column(Col, length(Ncs) + length(BCs)),
1979+
scan_error({illegal,float}, Line, Col, Line, Ncol, Cs)
1980+
end;
1981+
based_float_end(Cs, St, Line, Col, Toks, B, Ncs0, BCs, ECs0, Us) when B =/= 10 ->
1982+
ECs = lists:reverse(ECs0),
1983+
Exp = case ECs of
1984+
[] -> 0;
1985+
_ -> list_to_integer(remove_digit_separators(tl(ECs), Us))
1986+
end,
1987+
Tcs = BCs ++ lists:reverse(Ncs0) ++ ECs,
1988+
Ncs = trim_float_zeros(lists:reverse(trim_float_zeros(remove_digit_separators(Ncs0, Us)))),
1989+
FBits = (length(Ncs) - string:chr(Ncs, $.)) * case B of 2 -> 1; 16 -> 4 end,
1990+
%% note that there will always be at least one digit in the fraction, even if 0
1991+
F = list_to_integer(lists:delete($.,Ncs), B) * math:pow(2, Exp-FBits),
1992+
tok3(Cs, St, Line, Col, Toks, float, Tcs, F).
1993+
1994+
trim_float_zeros([$0, $. | _]=Cs) -> Cs;
1995+
trim_float_zeros([$0 | Cs]) -> trim_float_zeros(Cs);
1996+
trim_float_zeros(Cs) -> Cs.
1997+
18951998
scan_fraction(Cs, #erl_scan{}=St, Line, Col, Toks, {Ncs,Us}) ->
18961999
scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us).
18972000

lib/stdlib/test/erl_scan_SUITE.erl

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ otp_7810(Config) when is_list(Config) ->
165165
ok = integers(),
166166
ok = base_integers(),
167167
ok = floats(),
168+
ok = base_floats(),
168169
ok = dots(),
169170
ok = chars(),
170171
ok = variables(),
@@ -465,6 +466,96 @@ floats() ->
465466
end, FloatErrors),
466467
ok.
467468

469+
base_floats() ->
470+
[begin
471+
Ts = [{float,{1,1},F}],
472+
test_string(FS, Ts)
473+
end || {FS, F} <- [{"10#1.0",1.0},
474+
{"10#012345.625", 012345.625},
475+
{"10#3.31200",3.31200},
476+
{"10#1.0e0",1.0e0},
477+
{"10#1.0E17",1.0E17},
478+
{"10#34.21E-18", 34.21E-18},
479+
{"10#17.0E+14", 17.0E+14},
480+
{"10#12345.625e3", 12345.625e3},
481+
{"10#12345.625E-3", 12345.625E-3},
482+
483+
{"2#1.0", 1.0},
484+
{"2#101.0", 5.0},
485+
{"2#101.1", 5.5},
486+
{"2#101.101", 5.625},
487+
{"2#101.1p0", 5.5},
488+
{"2#1.0p+3", 8.0},
489+
{"2#1.0p-3", 0.125},
490+
{"2#000100.001000", 4.125},
491+
{"2#0.10000000000000000000000000000000000000000000000000001", 0.5000000000000001}, % 53 bits
492+
{"2#0.100000000000000000000000000000000000000000000000000001", 0.5}, % not 54 bits
493+
{"2#0.11001001000011111101101010100010001000010110100011000p+2", math:pi()}, % pi to 53 bits
494+
495+
{"16#100.0", 256.0},
496+
{"16#ff.d", 16#ffd/16},
497+
{"16#1.0", 1.0},
498+
{"16#abc.def", 16#abcdef/16#1000},
499+
{"16#00100.001000", 256.0 + 1/16#1000},
500+
{"16#0.80000000000008", 0.5000000000000001}, % 53-bit fraction
501+
{"16#0.80000000000004", 0.5}, % not 54 bits
502+
{"16#fe.8p0", 254.5},
503+
{"16#f.0p+3", 120.0},
504+
{"16#c.0p-3", 1.5},
505+
{"16#0.0e0", 16#e/16#100}, % e is a hex digit, not exponent
506+
{"16#0.0E0", 16#e/16#100}, % same for E
507+
{"16#0.c90fdaa22168c0p+2", math:pi()} % pi to 53 bits
508+
]],
509+
510+
[begin
511+
{error,{1,erl_scan,{illegal,float}},1} = erl_scan:string(S),
512+
{error,{{1,1},erl_scan,{illegal,float}},{1,_}} =
513+
erl_scan:string(S, {1,1}, [])
514+
end || S <- ["1.14Ea"]],
515+
516+
UnderscoreSamples =
517+
[{"1_6#000_100.0_0", 256.0},
518+
{"16#0.c90f_daa2_2168_c0p+2", math:pi()},
519+
{"16#c90f_daa2.2168_c0p-30", math:pi()},
520+
{"16#c90f_daa2_2168.c0p-4_6", math:pi()},
521+
{"2#1.010101010101010101010p+2_1", 2796202.0}],
522+
lists:foreach(
523+
fun({S, I}) ->
524+
test_string(S, [{float, {1, 1}, I}])
525+
end, UnderscoreSamples),
526+
FloatErrors =
527+
[
528+
"10#12345.a25",
529+
"10#12345.6a5",
530+
"16#a0.gf23",
531+
"16#a0.2fg3",
532+
"2#10.201",
533+
"2#10.120",
534+
"3#102.3"
535+
],
536+
lists:foreach(
537+
fun(S) ->
538+
case erl_scan:string(S) of
539+
{error,{1,erl_scan,{illegal,float}},_} ->
540+
ok;
541+
{error,Err,_} ->
542+
error({unexpected_error, S, Err});
543+
Succ ->
544+
error({unexpected_success, S, Succ})
545+
end
546+
end, FloatErrors),
547+
{error,{{1,1},erl_scan,{float_base,3}},{1,6}} =
548+
erl_scan:string("3#102.12", {1,1}, []),
549+
{error,{{1,1},erl_scan,{exponent,10}},{1,13}} =
550+
erl_scan:string("10#12345.625p3", {1,1}, []),
551+
{error,{{1,1},erl_scan,{exponent,10}},{1,13}} =
552+
erl_scan:string("10#12345.625P3", {1,1}, []),
553+
{error,{{1,1},erl_scan,{exponent,2}},{1,8}} =
554+
erl_scan:string("2#10.01e3", {1,1}, []),
555+
{error,{{1,1},erl_scan,{exponent,2}},{1,8}} =
556+
erl_scan:string("2#10.01E3", {1,1}, []),
557+
ok.
558+
468559
dots() ->
469560
Dot = [{".", {ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,2}}},
470561
{". ", {ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,3}}},

0 commit comments

Comments
 (0)