From 27e7d05f4f35aeb0fc96b15e57dd1e956090fe63 Mon Sep 17 00:00:00 2001
From: frazze-jobb <frazze@erlang.org>
Date: Wed, 16 Oct 2024 17:46:55 +0200
Subject: [PATCH] kernel: bypass unicode translation in group for latin1
 putc_requests

latin1 requests are sent to user_drv, if its in latin1 mode, and raw
output mode, then just output the latin1 binary. Otherwise user_drv
will convert the output to unicode.
---
 lib/kernel/src/group.erl                      | 32 +++-------------
 lib/kernel/src/prim_tty.erl                   |  7 +++-
 lib/kernel/src/user_drv.erl                   | 37 ++++++++++++++++---
 lib/stdlib/test/escript_SUITE.erl             | 31 ++++++++++++++--
 .../bypass_unicode_conversion                 |  7 ++++
 5 files changed, 78 insertions(+), 36 deletions(-)
 create mode 100644 lib/stdlib/test/escript_SUITE_data/bypass_unicode_conversion

diff --git a/lib/kernel/src/group.erl b/lib/kernel/src/group.erl
index 17b4562ff6bd..0312974a868c 100644
--- a/lib/kernel/src/group.erl
+++ b/lib/kernel/src/group.erl
@@ -608,36 +608,16 @@ putc_request({put_chars,unicode,M,F,As}, Drv, From) ->
                     {reply,{error,F}}
             end
     end;
-putc_request({put_chars,latin1,Binary}, Drv, From) when is_binary(Binary) ->
-    send_drv(Drv, {put_chars_sync, unicode,
-                   unicode:characters_to_binary(Binary,latin1),
-                   From}),
+putc_request({put_chars,latin1,Output}, Drv, From) ->
+    send_drv(Drv, {put_chars_sync, latin1, Output, From}),
     noreply;
-putc_request({put_chars,latin1,Chars}, Drv, From) ->
-    case catch unicode:characters_to_binary(Chars,latin1) of
-        Binary when is_binary(Binary) ->
-            send_drv(Drv, {put_chars_sync, unicode, Binary, From}),
-            noreply;
-        _ ->
-            {reply,{error,{put_chars,latin1,Chars}}}
-    end;
 putc_request({put_chars,latin1,M,F,As}, Drv, From) ->
     case catch apply(M, F, As) of
-        Binary when is_binary(Binary) ->
-            send_drv(Drv, {put_chars_sync, unicode,
-                           unicode:characters_to_binary(Binary,latin1),
-                           From}),
-            noreply;
-        Chars ->
-            case catch unicode:characters_to_binary(Chars,latin1) of
-                B when is_binary(B) ->
-                    send_drv(Drv, {put_chars_sync, unicode, B, From}),
-                    noreply;
-                _ ->
-                    {reply,{error,F}}
-            end
+        Ret when is_list(Ret) =:= false, is_binary(Ret) =:= false ->
+            {reply, {error, F}};
+        Chars -> send_drv(Drv, {put_chars_sync, latin1, Chars, From}),
+            noreply
     end;
-
 putc_request({requests,Reqs}, Drv, From) ->
     putc_requests(Reqs, {reply, ok}, Drv, From);
 
diff --git a/lib/kernel/src/prim_tty.erl b/lib/kernel/src/prim_tty.erl
index 45600663f7ef..9b2f00b15577 100644
--- a/lib/kernel/src/prim_tty.erl
+++ b/lib/kernel/src/prim_tty.erl
@@ -111,7 +111,7 @@
          npwcwidth/1, npwcwidth/2,
          ansi_regexp/0, ansi_color/2]).
 -export([reader_stop/1, disable_reader/1, enable_reader/1, read/1, read/2,
-         is_reader/2, is_writer/2]).
+         is_reader/2, is_writer/2, output_mode/1]).
 
 -nifs([isatty/1, tty_create/0, tty_init/2, setlocale/1,
        tty_select/2, tty_window_size/1,
@@ -476,6 +476,11 @@ is_writer(#state{ writer = {WriterPid, _} }, WriterPid) ->
 is_writer(_, _) ->
     false.
 
+-spec output_mode(state()) -> cooked | raw.
+output_mode(State) ->
+    #{output := Output} = State#state.options,
+    Output.
+
 -spec unicode(state()) -> boolean().
 unicode(State) ->
     State#state.unicode.
diff --git a/lib/kernel/src/user_drv.erl b/lib/kernel/src/user_drv.erl
index dabfc91ab78a..e5c443511c91 100644
--- a/lib/kernel/src/user_drv.erl
+++ b/lib/kernel/src/user_drv.erl
@@ -52,6 +52,9 @@
         %% Same as put_chars/3, but sends Reply to From when the characters are
         %% guaranteed to have been written to the terminal
         {put_chars_sync, unicode, binary(), {From :: pid(), Reply :: term()}} |
+        %% Output raw binary, should only be called if output mode is set to raw
+        %% and encoding set to latin1.
+        {put_chars_sync, latin1, binary(), {From :: pid(), Reply :: term()}} |
         %% Put text in expansion area
         {put_expand, unicode, binary(), integer()} |
         {move_expand, -32768..32767} |
@@ -852,7 +855,7 @@ group_opts() ->
     [{expand_below, application:get_env(stdlib, shell_expand_location, below) =:= below}].
 
 -spec io_request(request(), prim_tty:state()) -> {noreply, prim_tty:state()} |
-          {term(), reference(), prim_tty:state()}.
+          {term(), reference(), prim_tty:state()} | {term(), {error, term()}}.
 io_request({requests,Rs}, TTY) ->
     {noreply, io_requests(Rs, TTY)};
 io_request(redraw_prompt, TTY) ->
@@ -867,6 +870,22 @@ io_request(delete_line, TTY) ->
     write(prim_tty:handle_request(TTY, delete_line));
 io_request({put_chars, unicode, Chars}, TTY) ->
     write(prim_tty:handle_request(TTY, {putc, unicode:characters_to_binary(Chars)}));
+io_request({put_chars_sync, latin1, Chars, Reply}, TTY) ->
+    try
+        case {prim_tty:unicode(TTY), prim_tty:output_mode(TTY)} of
+            {false, raw} ->
+                Bin = if is_binary(Chars) -> Chars;
+                    true -> list_to_binary(Chars)
+                end,
+                {Output, NewTTY} = prim_tty:handle_request(TTY, {putc_raw, Bin}),
+                {ok, MonitorRef} = prim_tty:write(NewTTY, Output, self()),
+                {Reply, MonitorRef, NewTTY};
+            _ ->
+                io_request({put_chars_sync, unicode, unicode:characters_to_binary(Chars,latin1), Reply}, TTY)
+        end
+    catch
+        _:_ -> {Reply, {error, {put_chars, latin1, Chars}}}
+    end;
 io_request({put_chars_sync, unicode, Chars, Reply}, TTY) ->
     {Output, NewTTY} = prim_tty:handle_request(TTY, {putc, unicode:characters_to_binary(Chars)}),
     {ok, MonitorRef} = prim_tty:write(NewTTY, Output, self()),
@@ -958,13 +977,16 @@ mktemp() ->
 handle_req(next, TTYState, {false, IOQ} = IOQueue) ->
     case queue:out(IOQ) of
         {empty, _} ->
-	    {TTYState, IOQueue};
+            {TTYState, IOQueue};
         {{value, {Origin, Req}}, ExecQ} ->
             case io_request(Req, TTYState) of
                 {noreply, NewTTYState} ->
-		    handle_req(next, NewTTYState, {false, ExecQ});
+                    handle_req(next, NewTTYState, {false, ExecQ});
                 {Reply, MonitorRef, NewTTYState} ->
-		    {NewTTYState, {{Origin, MonitorRef, Reply}, ExecQ}}
+                    {NewTTYState, {{Origin, MonitorRef, Reply}, ExecQ}};
+                {Reply, {error, Reason}} ->
+                    Origin ! {reply, Reply, {error, Reason}},
+                    handle_req(next, TTYState, {false, ExecQ})
             end
     end;
 handle_req(Msg, TTYState, {false, IOQ} = IOQueue) ->
@@ -972,9 +994,12 @@ handle_req(Msg, TTYState, {false, IOQ} = IOQueue) ->
     {Origin, Req} = Msg,
     case io_request(Req, TTYState) of
         {noreply, NewTTYState} ->
-	    {NewTTYState, IOQueue};
+            {NewTTYState, IOQueue};
         {Reply, MonitorRef, NewTTYState} ->
-	    {NewTTYState, {{Origin, MonitorRef, Reply}, IOQ}}
+            {NewTTYState, {{Origin, MonitorRef, Reply}, IOQ}};
+        {Reply, {error, Reason}} ->
+            Origin ! {reply, Reply, {error, Reason}},
+            {TTYState, IOQueue}
     end;
 handle_req(Msg,TTYState,{Resp, IOQ}) ->
     %% All requests are queued when we have outstanding sync put_chars
diff --git a/lib/stdlib/test/escript_SUITE.erl b/lib/stdlib/test/escript_SUITE.erl
index 8899ab2deeef..4d7e9a28cf7d 100644
--- a/lib/stdlib/test/escript_SUITE.erl
+++ b/lib/stdlib/test/escript_SUITE.erl
@@ -39,7 +39,8 @@
 	 overflow/1,
 	 verify_sections/4,
          unicode/1,
-         bad_io_server/1
+         bad_io_server/1,
+         bypass_unicode_conversion/1
 	]).
 
 -include_lib("common_test/include/ct.hrl").
@@ -54,9 +55,10 @@ all() ->
      emulator_flags_no_shebang, two_lines,
      module_script, beam_script, archive_script, epp,
      create_and_extract, foldl, overflow,
-     archive_script_file_access, unicode, bad_io_server].
+     archive_script_file_access, unicode, bad_io_server,
+     bypass_unicode_conversion].
 
-groups() -> 
+groups() ->
     [].
 
 init_per_suite(Config) ->
@@ -978,6 +980,29 @@ bad_io_server(Config) when is_list(Config) ->
            "called as '\\x{400}' / 0\nExitCode:127">>]),
     ok.
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+bypass_unicode_conversion(Config) when is_list(Config) ->
+    Data = proplists:get_value(data_dir, Config),
+    Dir = filename:absname(Data),		%Get rid of trailing slash.
+    ToNull = case os:type() of
+        {win32,_} -> " 1> nul ";
+        _ -> " 1> /dev/null "
+    end,
+    Cmd = fun(Enc) -> "bypass_unicode_conversion "++atom_to_list(Enc)++ToNull end,
+    {TimeLatin1, _} = timer:tc(
+        fun() -> run(Config, Dir, Cmd(latin1), [<<"ExitCode:0">>]) end),
+    {TimeUnicode, _} = timer:tc(
+        fun() -> run(Config, Dir, Cmd(unicode), [<<"ExitCode:0">>]) end),
+    %% Check that Time(latin1) is about the same as Time(unicode)
+    %% Without the bypass, the time difference would be about 5x.
+    %% Turns out that the timing might be a bit unstable, so we allow a 2x difference.
+    io:format("Time(latin1) = ~p ~~= Time(unicode) = ~p~n", [TimeLatin1, TimeUnicode]),
+    true = TimeLatin1 =< TimeUnicode * 2,
+    ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 run(Config, Dir, Cmd, Expected) ->
     run_with_opts(Config, Dir, "", Cmd, Expected).
 
diff --git a/lib/stdlib/test/escript_SUITE_data/bypass_unicode_conversion b/lib/stdlib/test/escript_SUITE_data/bypass_unicode_conversion
new file mode 100644
index 000000000000..4b08d103c58d
--- /dev/null
+++ b/lib/stdlib/test/escript_SUITE_data/bypass_unicode_conversion
@@ -0,0 +1,7 @@
+#!/usr/bin/env escript
+
+main([Enc]) ->
+  Data = {tuple, {list, lists:seq(1,1000000)}},
+  io:setopts(group_leader(), [{encoding, list_to_atom(Enc)}]),
+  file:write(group_leader(), term_to_binary(Data)),
+  ok.