fastmachinelearning · calad0i · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 8, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -47,7 +47,10 @@ repos:
     exclude: docs/conf.py
     additional_dependencies: [flake8-bugbear, flake8-print]
     args: ['--max-line-length=125',  # github viewer width
-           '--extend-ignore=E203,T201']  # E203 is not PEP8 compliant
+           '--extend-ignore=E203,T201',  # E203 is not PEP8 compliant
+           '--per-file-ignores=hls4ml/model/optimizer/passes/bit_exact.py:E741',
+           # i for #int w/o sign, I for #int w/ sign when massively processing bw conversions
+    ]
 
 - repo: https://github.com/mgedmin/check-manifest
   rev: "0.50"

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -16,7 +16,7 @@ pipeline {
           sh '''#!/bin/bash --login
               conda activate hls4ml-py310
               conda install -y jupyterhub pydot graphviz pytest pytest-cov
-              pip install pytest-randomly jupyter onnx>=1.4.0 matplotlib pandas seaborn pydigitalwavetools==1.1 pyyaml tensorflow==2.14 qonnx torch git+https://github.com/jmitrevs/qkeras.git@qrecurrent_unstack pyparsing
+              pip install pytest-randomly jupyter onnx>=1.4.0 matplotlib pandas seaborn pydigitalwavetools==1.1 pyyaml tensorflow==2.14 qonnx torch git+https://github.com/jmitrevs/qkeras.git@qrecurrent_unstack pyparsing quantizers da4ml
               pip install -U ../ --user
               ./convert-keras-models.sh -x -f keras-models.txt
               pip uninstall hls4ml -y'''

diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 from hls4ml.backends.backend import Backend
-from hls4ml.model.attributes import ChoiceAttribute, ConfigurableAttribute, TypeAttribute
+from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
 from hls4ml.model.layers import (
     GRU,
     LSTM,
@@ -32,16 +32,13 @@
     SeparableConv1D,
     SeparableConv2D,
     SimpleRNN,
-    Softmax,
 )
 from hls4ml.model.optimizer import model_optimizer
 from hls4ml.model.types import (
     ExponentPrecisionType,
     FixedPrecisionType,
     IntegerPrecisionType,
     PrecisionType,
-    RoundingMode,
-    SaturationMode,
     UnspecifiedPrecisionType,
     XnorPrecisionType,
 )
@@ -109,34 +106,6 @@ def __init__(self, name):
         act_attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type))
         self.attribute_map[Activation] = act_attrs
 
-        softmax_attrs = self.attribute_map.get(Softmax, [])
-        softmax_attrs.append(
-            ChoiceAttribute(
-                'implementation',
-                ['latency', 'stable', 'argmax', 'legacy'],
-                default='stable',
-                description=descriptions.softmax_implementation,
-            )
-        )
-        softmax_attrs.append(
-            ConfigurableAttribute('skip', value_type=bool, default=False, description=descriptions.softmax_skip)
-        )
-        softmax_attrs.append(
-            TypeAttribute(
-                'exp_table',
-                default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
-                description=descriptions.table_type,
-            )
-        )
-        softmax_attrs.append(
-            TypeAttribute(
-                'inv_table',
-                default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
-                description=descriptions.table_type,
-            )
-        )
-        self.attribute_map[Softmax] = softmax_attrs
-
     def create_layer_class(self, layer_class):
         new_attrubutes = []
         for cls, attributes in self.attribute_map.items():
@@ -914,7 +883,7 @@ def generate_conv2d_line_buffer_fn(
         return generated_code
 
     @staticmethod
-    def permute_config_gen(name: str, shape: tuple[int, ...], perm: tuple[int, ...]):
+    def transpose_config_gen(name: str, shape: tuple[int, ...], perm: tuple[int, ...]):
         """
         Generate new shape and perm_strides for a permute operation. Operates by mapping the output index
         to input input index by:
@@ -933,12 +902,20 @@ def permute_config_gen(name: str, shape: tuple[int, ...], perm: tuple[int, ...])
             perm (tuple[int, ...]): The permutation of the dimensions.
 
         Returns:
-            (new_shape, perm_strides) (tuple, tuple):  the output shape and permutation strides.
+            dict: Dictionary containing the configuration.
         """
         new_shape = tuple(shape[i] for i in perm)
         strides = np.cumprod((shape[1:] + (1,))[::-1])[::-1]
         perm_strides = tuple(int(strides[i]) for i in perm)
-        return (new_shape, perm_strides)
+        return dict(
+            dims=len(shape),
+            N=math.prod(shape),
+            from_shape=', '.join(str(x) for x in shape),
+            perm=', '.join(str(x) for x in perm),
+            perm_strides=', '.join(str(x) for x in perm_strides),
+            to_shape=', '.join(str(x) for x in new_shape),
+            config_name=name,
+        )
 
     @model_optimizer()
     def write_hls(self, model):

diff --git a/hls4ml/backends/fpga/passes/fix_softmax_table_size.py b/hls4ml/backends/fpga/passes/fix_softmax_table_size.py
@@ -6,7 +6,11 @@
 
 class FixSoftmaxTableSize(OptimizerPass):
     def match(self, node):
-        return isinstance(node, Softmax)
+        if not isinstance(node, Softmax):
+            return False
+        if 'inv_table_size' in node.attributes:
+            return False  # handler generating inv_table_size sets it properly
+        return True
 
     def transform(self, model, node: Layer):
         inp_layer = node.get_input_node()  # type: ignore

diff --git a/hls4ml/backends/fpga/passes/hgq_proxy_model.py b/hls4ml/backends/fpga/passes/hgq_proxy_model.py
@@ -52,10 +52,6 @@ def match(self, node: Layer):
         return isinstance(node, FixedPointQuantizer)
 
     def transform(self, model, node: FixedPointQuantizer):
-        if node.fusible:
-            model.remove_node(node, rewire=True)
-            return True
-
         if model.config.config['IOType'] != 'io_parallel':
             raise NotImplementedError('Heterogenous quantization for activations is only supported with IOType=io_parallel')
 
@@ -96,7 +92,6 @@ def __init__(self):
 
     def format(self, node):
         params = self._default_function_params(node)
-        node.attributes['result_t'].precision = node.attributes['table_t'].precision
         params['config'] = f'unary_lut_config{node.index}'
         params['table'] = node.get_weights('table').name
 

diff --git a/hls4ml/backends/oneapi/passes/reshaping_templates.py b/hls4ml/backends/oneapi/passes/reshaping_templates.py
@@ -185,16 +185,8 @@ def format(self, node):
         perm = tuple(node.get_attr('perm'))
         name = f'config{node.index}'
 
-        new_shape, perm_strides = node.model.config.backend.permute_config_gen(name, shape, perm)
-        return transpose_config_template.format(
-            dims=len(shape),
-            N=int(np.prod(shape)),
-            from_shape=', '.join(str(x) for x in shape),
-            perm=', '.join(str(x) for x in perm),
-            perm_strides=', '.join(str(x) for x in perm_strides),
-            to_shape=', '.join(str(x) for x in new_shape),
-            config_name=name,
-        )
+        conf = node.model.config.backend.transpose_config_gen(name, shape, perm)
+        return transpose_config_template.format(**conf)
 
 
 class TransposeFunctionTemplate(FunctionCallTemplate):

diff --git a/hls4ml/backends/vivado/passes/convolution_templates.py b/hls4ml/backends/vivado/passes/convolution_templates.py
@@ -154,11 +154,21 @@ def format(self, node):
                         mult_params['dense_function'] = 'nnet::DenseResource_rf_gt_nin'
         elif node.get_attr('strategy').lower() == 'resource_unrolled':
             mult_params['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}'
+        elif node.get_attr('strategy').lower() == 'distributed_arithmetic':
+            mult_params['dense_function'] = f'{namespace}::dense_da_wrapper_{node.index}'
 
         mult_config = self.mult_template.format(**mult_params)
 
         return mult_config + '\n' + conv_config
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            io_type = node.model.config.get_config_value("IOType")
+            if io_type == 'io_parallel':
+                # DA impl use alternate entry point for io_parallel conv
+                return False
+        return super().match(node)
+
 
 class Conv1DFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
@@ -173,6 +183,14 @@ def format(self, node):
 
         return self.template.format(**params)
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            io_type = node.model.config.get_config_value("IOType")
+            if io_type == 'io_parallel':
+                # DA impl use alternate entry point for io_parallel conv
+                return False
+        return super().match(node)
+
 
 class DepthwiseConv1DFunctionTemplate(Conv1DFunctionTemplate):
     def __init__(self):
@@ -299,11 +317,21 @@ def format(self, node):
                         mult_params['dense_function'] = 'nnet::DenseResource_rf_gt_nin'
         elif node.get_attr('strategy').lower() == 'resource_unrolled':
             mult_params['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}'
+        elif node.get_attr('strategy').lower() == 'distributed_arithmetic':
+            mult_params['dense_function'] = f'{namespace}::dense_da_wrapper_{node.index}'
 
         mult_config = self.mult_template.format(**mult_params)
 
         return mult_config + '\n' + conv_config
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            io_type = node.model.config.get_config_value("IOType")
+            if io_type == 'io_parallel':
+                # DA impl use alternate entry point for io_parallel conv
+                return False
+        return super().match(node)
+
 
 class Conv2DFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
@@ -318,6 +346,14 @@ def format(self, node):
 
         return self.template.format(**params)
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            io_type = node.model.config.get_config_value("IOType")
+            if io_type == 'io_parallel':
+                # DA impl use alternate entry point for io_parallel conv
+                return False
+        return super().match(node)
+
 
 class DepthwiseConv2DFunctionTemplate(Conv2DFunctionTemplate):
     def __init__(self):

diff --git a/hls4ml/backends/vivado/passes/core_templates.py b/hls4ml/backends/vivado/passes/core_templates.py
@@ -1,3 +1,5 @@
+from math import ceil, log2
+
 from hls4ml.backends.backend import get_backend
 from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
 from hls4ml.model.layers import Activation, BatchNormalization, Dense, HardActivation, ParametrizedActivation, PReLU, Softmax
@@ -55,9 +57,17 @@ def format(self, node):
             # The 3rd case is never used
         elif node.get_attr('strategy').lower() == 'resource_unrolled':
             params['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}'
+        elif node.get_attr('strategy').lower() == 'distributed_arithmetic':
+            # Only triggered in io_streaming mode
+            params['dense_function'] = f'{namespace}::dense_da_wrapper_{node.index}'
 
         return self.template.format(**params)
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            return False  # DA does not use common dense template
+        return super().match(node)
+
 
 class DenseFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
@@ -71,6 +81,11 @@ def format(self, node):
 
         return self.template.format(**params)
 
+    def match(self, node):
+        if node.get_attr('strategy') == 'distributed_arithmetic':
+            return False  # DA does not use common dense template
+        return super().match(node)
+
 
 # BatchNormalization templates
 
@@ -152,13 +167,22 @@ def format(self, node):
 
 softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{
     static const unsigned n_in = {n_in};
-    static const unsigned table_size = {table_size};
+    static const unsigned n_slice = {n_slice};
+    static const unsigned n_outer = {n_outer};
+    static const unsigned n_inner = {n_inner};
+    static const unsigned parallelization_factor = {parallelization_factor};
+    static const unsigned exp_table_size = {exp_table_size};
+    static const unsigned inv_table_size = {inv_table_size};
     static const unsigned io_type = nnet::{iotype};
     static const unsigned reuse_factor = {reuse};
     static const unsigned axis = {axis};
     static const nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation};
+    static constexpr float exp_scale = {exp_scale};
     typedef {exp_table_t.name} exp_table_t;
     typedef {inv_table_t.name} inv_table_t;
+    typedef {accum_t.name} accum_t;
+    typedef {inv_inp_t.name} inv_inp_t;
+    typedef {inp_norm_t_str} inp_norm_t;
 }};\n"""
 
 activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});'
@@ -210,10 +234,66 @@ def __init__(self):
         super(ActivationConfigTemplate, self).__init__(Softmax)  # Skip ActivationConfigTemplate's __init__
         self.template = softmax_config_template
 
+    def format(self, node):
+        params = self._default_config_params(node)
+        params['type'] = node.get_attr('activation')
+        params.setdefault('exp_table_size', params['table_size'])
+        params.setdefault('inv_table_size', params['table_size'])
+        params.setdefault('n_inner', 1)
+        params.setdefault('n_outer', 1)
+        params.setdefault('exp_scale', 1.0)
+        params.setdefault('parallelization_factor', -1)
+
+        n_slice = params['n_in'] // params['n_inner'] // params['n_outer']  # type: ignore
+        params['n_slice'] = n_slice
+
+        if params['accum_t'].name == 'model_default_t':  # type: ignore
+            scale = ceil(log2(n_slice))
+            exp_table_t = node.attributes['exp_table_t'].precision
+            signed, width, integers = exp_table_t.signed, exp_table_t.width, exp_table_t.integer
+            params['accum_t_str'] = f'ap_{"" if signed else "u"}fixed<{width + scale}, {integers + scale}>'
+        else:
+            params['accum_t_str'] = params['accum_t'].name  # type: ignore
+        if params['inv_inp_t'].name == 'model_default_t':  # type: ignore
+            params['inv_inp_t'] = params['exp_table_t']
+
+        if params['implementation'] == 'stable':
+            if 'inp_norm_t' not in params:
+                # Only used in stable (max-normalized) implementation
+                input_t = node.get_input_variable().type.precision
+                width, iwidth, signed = input_t.width, input_t.integer, input_t.signed  # noqa: F841
+                width, iwidth = width - signed, iwidth - signed
+                if signed:
+                    # Fix table size if too large
+                    exp_table_size = params['inv_table_size']
+                    params['exp_table_size'] = str(min(int(exp_table_size), 2**width))
+                params['inp_norm_t_str'] = f'ap_ufixed<{width}, {iwidth}>'
+            else:
+                params['inp_norm_t_str'] = params['inp_norm_t'].name  # type: ignore
+        else:
+            params['inp_norm_t_str'] = 'ap_fixed<1,0>'
+
+        return self.template.format(**params)
+
+
+class SoftmaxFunctionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(Softmax, include_header=activ_include_list)
+        self.template = activ_function_template
+
+    def format(self, node):
+        params = self._default_function_params(node)
+        use_multidim = node.get_attr('n_inner', 1) > 1 or node.get_attr('n_outer', 1) > 1
+        use_multidim = use_multidim and node.model.config.get_config_value('IOType') == 'io_parallel'
+        params['activation'] = 'softmax' if not use_multidim else 'softmax_multidim'
+        params['config'] = f'softmax_config{node.index}'
+
+        return self.template.format(**params)
+
 
 class ActivationFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
-        super().__init__((Activation, HardActivation, Softmax), include_header=activ_include_list)
+        super().__init__((Activation, HardActivation), include_header=activ_include_list)
         self.template = activ_function_template
 
     def format(self, node):