add xmerl_xml_indent-module

The indent module was added to provide out of the box indented output instead of the standard xmerl_xml-module. The code is based on the Elixir xmerl_xml_indent-package. That package can be found on hex.pm. The documentation was updated to mention the module and its use using the motorcycle-example. Test-cases have been added to the xmerl-SUITE as requested by OTP-maintenance team.
erlang · Dec 12, 2023 · 68c80e1 · 68c80e1
1 parent 4836605
commit 68c80e1
Show file tree

Hide file tree

Showing 4 changed files with 218 additions and 19 deletions.
diff --git a/lib/xmerl/doc/src/xmerl_ug.xmlsrc b/lib/xmerl/doc/src/xmerl_ug.xmlsrc
@@ -359,6 +359,21 @@ Data =
     ...    </pre>
     <p>The result will be: </p>
     <codeinclude file="new_motorcycles2.txt" tag="" type="none"></codeinclude>
+    <p>
+
+    The generated XML above was formatted for readability.
+
+    Another exporter which indents the code with 2 spaces can also be used.
+
+    This exporter should make the xml a bit more human-readable without the need
+    to use external tools such as xmllint.
+
+    In order to use it one only needs to change the export-module:</p>
+    <pre>
+      ...
+        Export=xmerl:export_simple([NewRootEl],xmerl_xml_indent,[{prolog,Prolog}]),
+      ...
+    </pre>
   </section>
 
   <section>

diff --git a/lib/xmerl/src/Makefile b/lib/xmerl/src/Makefile
@@ -77,6 +77,7 @@ MODULES = $(EDOC_MODULES) \
 	xmerl_validate \
 	xmerl_xlate \
 	xmerl_xml \
+	xmerl_xml_indent \
 	xmerl_xpath_lib \
 	xmerl_xpath_parse \
 	xmerl_xpath_pred \

diff --git a/lib/xmerl/src/xmerl_xml_indent.erl b/lib/xmerl/src/xmerl_xml_indent.erl
@@ -0,0 +1,84 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2016. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description  : Callback module for exporting complete or simple forms to indented XML.
+%%
+%% This module indents the xml with 2 spaces and a newline \n.
+%% Currently the implementation does not allow it to be configured.
+%% The implementation is based on the same Elixir implementation.
+%% https://hexdocs.pm/xmerl_xml_indent/readme.html
+
+-module(xmerl_xml_indent).
+
+-export(['#xml-inheritance#'/0]).
+
+-export(['#root#'/4,
+	 '#element#'/5,
+	 '#text#'/1]).
+
+-import(xmerl_lib, [markup/3, empty_tag/2, export_text/1]).
+
+-include("xmerl.hrl").
+-include("xmerl_internal.hrl").
+
+
+'#xml-inheritance#'() -> [].
+
+
+%% The '#text#' function is called for every text segment.
+
+'#text#'(Text) ->
+    export_text(Text).
+
+
+%% The '#root#' tag is called when the entire structure has been
+%% exported. It does not appear in the structure itself.
+
+'#root#'(Data, [#xmlAttribute{name=prolog,value=V}], [], _E) ->
+    [V,Data];
+'#root#'(Data, _Attrs, [], _E) ->
+    ["<?xml version=\"1.0\"?>\n", Data].
+
+
+%% The '#element#' function is the default handler for XML elements.
+
+'#element#'(Tag, [], Attrs, _Parents, _E) ->
+    empty_tag(Tag, Attrs);
+'#element#'(Tag, Data, Attrs, Parents, _E) ->
+    IsCharData = is_char(Data),
+    NewData =
+        case IsCharData of
+            true ->
+                LengthParents = length(Parents),
+                %% Push all the data over Lvl spaces.
+                [
+                    indent(LengthParents + 1) ++ DataEntry
+                 || DataEntry <- Data
+                ] ++ indent(LengthParents);
+            false ->
+                Data
+        end,
+    markup(Tag, Attrs, NewData).
+
+is_char(Data) when is_list(Data) ->
+    is_list(hd(Data)) and not is_integer(lists:nth(1, hd(Data))).
+
+indent(Level) ->
+    lists:flatten(["\n"] ++ lists:duplicate(Level, "  ")).
diff --git a/lib/xmerl/test/xmerl_SUITE.erl b/lib/xmerl/test/xmerl_SUITE.erl
@@ -40,14 +40,14 @@
 %%----------------------------------------------------------------------
 %% Test groups
 %%----------------------------------------------------------------------
-all() -> 
+all() ->
     [{group, cpd_tests}, xpath_text1, xpath_main,
      xpath_abbreviated_syntax, xpath_functions, xpath_namespaces,
      {group, misc}, {group, eventp_tests},
      {group, ticket_tests}, {group, app_test},
-     {group, appup_test}].
+     {group, appup_test}, {group, format_test}].
 
-groups() -> 
+groups() ->
     [{cpd_tests, [],
       [cpd_invalid1, cpd_invalid1_index, cpd_invalid2_index,
        cpd_invalid_index3, cpd_invalid_is_layer,
@@ -63,7 +63,8 @@ groups() ->
        ticket_6873, ticket_7496, ticket_8156, ticket_8697,
        ticket_9411, ticket_9457, ticket_9664_schema, ticket_9664_dtd]},
      {app_test, [], [{xmerl_app_test, all}]},
-     {appup_test, [], [{xmerl_appup_test, all}]}].
+     {appup_test, [], [{xmerl_appup_test, all}]},
+     {format_test, [], [formatter_pass,formatter_fail]}].
 
 suite() ->
     [{timetrap,{minutes,10}}].
@@ -257,12 +258,12 @@ xml_ns(_Config) ->
                  attributes = [#xmlAttribute{name = 'xmlns:xml',
                                              expanded_name = {"xmlns","xml"},
                                              nsinfo = {"xmlns","xml"},
-                                             namespace = #xmlNamespace{default = [], 
+                                             namespace = #xmlNamespace{default = [],
                                                                        nodes = [{"xml",'http://www.w3.org/XML/1998/namespace'}]}},
                                #xmlAttribute{name = 'xml:attr1',
                                              expanded_name = {'http://www.w3.org/XML/1998/namespace',attr1},
                                              nsinfo = {"xml","attr1"},
-                                             namespace = #xmlNamespace{default = [], 
+                                             namespace = #xmlNamespace{default = [],
                                                                        nodes = [{"xml",'http://www.w3.org/XML/1998/namespace'}]}}]},
      []
     } = xmerl_scan:string(Doc2, [{namespace_conformant, true}]),
@@ -349,15 +350,15 @@ sax_parse_export_xml_small(Config) ->
     ok.
 
 simple() ->
-    [{document, 
+    [{document,
       [{title, "Doc Title"}, {author, "Ulf Wiger"}],
-      [{section, 
+      [{section,
 	[{heading, "heading1"}],
 	[{'P', ["This is a paragraph of text."]},
-	 {section, 
+	 {section,
 	  [{heading, "heading2"}],
 	  [{'P', ["This is another paragraph."]},
-	   {table, 
+	   {table,
 	    [{border, 1}],
 	    [{heading,
 	      [{col, ["head1"]},
@@ -393,7 +394,7 @@ generate_section_attribute(0) ->
 generate_section_attribute(N) ->
     {{heading, "heading1"},N-1}.
 
-    
+
 generate_subsection_content(0) ->
     done;
 generate_subsection_content(1) ->
@@ -450,7 +451,7 @@ generate_heading_col(N) ->
 ticket_5998(Config) ->
     DataDir = datadir(Config),
     %% First fix is tested by case syntax_bug2.
-    
+
     ok =
         case catch xmerl_scan:file(filename:join([DataDir,misc,"ticket_5998_2.xml"])) of
             {'EXIT',{fatal,Reason1}} ->
@@ -484,18 +485,18 @@ ticket_7211(Config) ->
     {E,[]} = xmerl_scan:file(filename:join([DataDir,misc,"notes2.xml"]),
                              [{fetch_path,[filename:join([DataDir,misc,erlang_docs_dtd])]},
                               {validation,dtd}]),
-    
+
     ok = case E of
              Rec when is_record(Rec,xmlElement) ->
                  ok;
              _ ->
                  E
          end,
-		       
+
     {E2,[]} = xmerl_scan:file(filename:join([DataDir,misc,"XS.xml"]),
                               [{fetch_path,[filename:join([DataDir,misc,erlang_docs_dtd])]},
                                {validation,dtd}]),
-    
+
     ok = case E2 of
              Rec2 when is_record(Rec2,xmlElement) ->
                  ok;
@@ -517,7 +518,7 @@ ticket_7214(Config) ->
     {E,[]} = xmerl_scan:file(filename:join([DataDir,misc,'block_tags.html']),
                              [{validation,dtd},
                               {fetch_path,[filename:join([DataDir,misc,erlang_docs_dtd])]}]),
-    
+
     ok = case E of
              Rec when is_record(Rec,xmlElement) ->
                  ok;
@@ -528,7 +529,7 @@ ticket_7214(Config) ->
 %%
 %% ticket_7430
 %%
-%% Problem with contents of numeric character references followed by 
+%% Problem with contents of numeric character references followed by
 %% UTF-8 characters..
 %%
 ticket_7430(_Config) ->
@@ -631,7 +632,7 @@ allow_entities_test(Config) ->
     DataDir = proplists:get_value(data_dir, Config),
     File = filename:join(DataDir, "lol_1_test.xml"), %% Depth 9
     %% Disallow entities
-    {'EXIT',{fatal, {{error,entities_not_allowed}, _, _, _}}} = 
+    {'EXIT',{fatal, {{error,entities_not_allowed}, _, _, _}}} =
         (catch xmerl_scan:file(File, [{allow_entities, false}])),
     ok.
 
@@ -679,7 +680,7 @@ change_mode3([F|Fs]) ->
             chmod(F)
     end,
     change_mode3(Fs).
-    
+
 chmod(F) ->
     case file:read_file_info(F) of
 	{ok,FileInfo} ->
@@ -696,3 +697,101 @@ datadir(Config) ->
 
 datadir_join(Config,Files) ->
     filename:join([datadir(Config)|Files]).
+
+%%======================================================================
+%% New formatter tests input/output
+%%======================================================================
+
+html() ->
+    "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"DTD/xhtml1-strict.dtd\"><html>"
+    "<head><title>Doc Title</title><author>Ulf Wiger</author></head>"
+    "<h1>heading1</h1>"
+    "<p>This is a paragraph of text.</p>"
+    "<h2>heading2</h2>"
+    "<p>This is another paragraph.</p>"
+    "<table>"
+    "<thead><tr><td>head1</td><td>head2</td></tr></thead>"
+    "<tr><td>col11</td><td>col122</td></tr>"
+    "<tr><td>col21</td><td>col122</td></tr>"
+    "</table>"
+    "</html>".
+
+html_indented() ->
+    "<?xml version=\"1.0\"?>"
+    "\n<html>"
+    "\n  <head>"
+    "\n    <title>Doc Title</title>"
+    "\n    <author>Ulf Wiger</author>"
+    "\n  </head>"
+    "\n  <h1>heading1</h1>"
+    "\n  <p>This is a paragraph of text.</p>"
+    "\n  <h2>heading2</h2>"
+    "\n  <p>This is another paragraph.</p>"
+    "\n  <table>"
+    "\n    <thead>"
+    "\n      <tr>"
+    "\n        <td>head1</td>"
+    "\n        <td>head2</td>"
+    "\n      </tr>"
+    "\n    </thead>"
+    "\n    <tr>"
+    "\n      <td>col11</td>"
+    "\n      <td>col122</td>"
+    "\n    </tr>"
+    "\n    <tr>"
+    "\n      <td>col21</td>"
+    "\n      <td>col122</td>"
+    "\n    </tr>"
+    "\n  </table>"
+    "\n</html>".
+
+xml_namespace() ->
+    "<?xml version=\"1.0\"?>"
+    "<!-- initially, the default namespace is \"books\" -->"
+    "<book xmlns='urn:loc.gov:books' xmlns:isbn='urn:ISBN:0-395-36341-6'>"
+    "<title>Cheaper by the Dozen</title>"
+    "<isbn:number>1568491379</isbn:number>"
+    "<notes>"
+    "<!-- make HTML the default namespace for some comments -->"
+    "<p xmlns='urn:w3-org-ns:HTML'>"
+    "This is a <i>funny</i> book!"
+    "</p>"
+    "</notes>"
+    "</book>".
+
+xml_namespace_indented() ->
+  "<?xml version=\"1.0\"?>"
+  "\n<book xmlns=\"urn:loc.gov:books\" xmlns:isbn=\"urn:ISBN:0-395-36341-6\">"
+  "\n  <title>Cheaper by the Dozen</title>"
+  "\n  <isbn:number>1568491379</isbn:number>"
+  "\n  <notes>"
+  "\n    <p xmlns=\"urn:w3-org-ns:HTML\">This is a <i>funny</i> book!</p>"
+  "\n  </notes>"
+  "\n</book>".
+
+output_element_to_str(E) ->
+    Output = xmerl:export([E], xmerl_xml_indent),
+    [Str] = io_lib:format("~s", [lists:flatten(Output)]),
+    Str.
+
+%%======================================================================
+%% New formatter tests
+%%======================================================================
+formatter_pass(_Config) ->
+
+    FetchFun = fun(_DTDSpec, S) -> {ok, not_fetched, S} end,
+    %% Generate based on namespace-example
+    {Ns, _} = xmerl_scan:string(xml_namespace(), [{fetch_fun, FetchFun}]),
+    GNs = output_element_to_str(Ns),
+    INs = xml_namespace_indented(),
+    INs = GNs,
+
+    %% Generate based on html-example
+    {Html, _} = xmerl_scan:string(html(), [{fetch_fun, FetchFun}]),
+    GHtml = output_element_to_str(Html),
+    IHtml = html_indented(),
+    GHtml = IHtml,
+    ok.
+
+formatter_fail(_Config) ->
+    ok.