#!/usr/bin/env python
"""
@purpose: This module is used to estimate the scales of the params of a model based on the model.
"""

import copy
import json
from collections import OrderedDict
from fractions import Fraction
from operator import attrgetter

import numpy as np
import pandas as pd
import tensorflow as tf
from past.utils import old_div

from hailo_model_optimization.acceleras.utils.acceleras_definitions import (
    DEFAULT_PADDING_NEG_INF_VALUE,
    MAX_NUM_REPEATS_ELTWISE,
    ZP_FEED_REPEAT,
    BiasMode,
    PrecisionMode,
)
from hailo_sdk_client.numeric_translator import quantization_tools
from hailo_sdk_client.numeric_translator.inter_layer_precision_mode import (
    MAX_ALLOWED_OVERFLOW_OFFSETS,
    InterLayerPrecisionMode,
)
from hailo_sdk_client.numeric_translator.layer_scale_matcher import (
    find_layer_connection_set,
    match_scales_in_connection_set,
    unify_connection_sets,
)
from hailo_sdk_client.numeric_translator.piecewise_calculator import (
    BackendNegativeSlopesException,
    BackendOffsetsException,
    PiecewiseActivator,
)
from hailo_sdk_client.numeric_translator.quantization_tools import DEFAULT_BIT_SIZE
from hailo_sdk_client.numeric_translator.shifts_calculator import ShiftsCalculator
from hailo_sdk_client.sdk_backend.sdk_backend_exceptions import (
    BackendNotImplementedError,
    BackendQuantizationException,
    SDKBackendException,
)
from hailo_sdk_common.hailo_nn.hn_definitions import ActivationType, LayerType
from hailo_sdk_common.logger.logger import default_logger
from hailo_sdk_common.model_params.model_params import get_param_key
from hailo_sdk_common.numeric_utils.numeric_utils import QBITS, get_deconv_stack_order
from hailo_sdk_common.numeric_utils.quantize_data import QuantParams, get_quantized_int
from hailo_sdk_common.paths_manager.config import get_parsed_config_from_path

MAX_ALLOWED_NEGATIVE_SLOPE = 4
MAX_RETRIES_NEGATIVE_SLOPES = 1


class MatmulShiftDeltaException(SDKBackendException):
    def __init__(self, message, shift_delta):
        super().__init__(message)
        self._shift_delta = shift_delta

    @property
    def shift_delta(self):
        return self._shift_delta


class ShiftDeltaException(SDKBackendException):
    def __init__(self, message, shift_delta):
        super().__init__(message)
        self._shift_delta = shift_delta

    @property
    def shift_delta(self):
        return self._shift_delta


def _calc_output_factor(qp_kernel, qp_in, qp_out, out_stage_mantissa_size):
    o_fact_exact = old_div(qp_in.scale * qp_kernel.scale, qp_out.scale)
    o_fact_exp, o_fact_mantissa = quantization_tools.quantize_multiplier(o_fact_exact, out_stage_mantissa_size)

    output_factor = old_div(o_fact_mantissa * 1.0, (2.0 ** (o_fact_exp)))
    if output_factor < o_fact_exact:
        o_fact_mantissa += 1
        output_factor = old_div(o_fact_mantissa * 1.0, (2.0 ** (o_fact_exp)))
    fixfactor = old_div(output_factor, o_fact_exact)

    return output_factor, fixfactor


def _calc_elwa_decomposition(qp_elwa, qp_in, qp_kernel, bits, max_elementwise_feed_repeat, name, is_signed=True):
    input_factor_elwa_total = np.round(old_div(qp_elwa.scale, (qp_in.scale * qp_kernel.scale)))
    if is_signed:
        input_factor_elwa, feed_repeat_elwa = quantization_tools.int_smallnum_factorize(
            input_factor_elwa_total,
            bits=bits,
            maxsmallnum=max_elementwise_feed_repeat,
        )
    else:
        input_factor_elwa, feed_repeat_elwa = quantization_tools.uint_smallnum_factorize(
            input_factor_elwa_total,
            bits=bits,
            maxsmallnum=max_elementwise_feed_repeat,
        )
    layer_params_elwa_decomp = {
        get_param_key(name, "elementwise_addition/input_factor"): input_factor_elwa,
        get_param_key(name, "elementwise_addition/feed_repeat"): feed_repeat_elwa,
    }
    return input_factor_elwa, feed_repeat_elwa, layer_params_elwa_decomp


def _divide_elwa_zp_compensation_between_apu_and_acc(
    zp_elwa_acc,
    zp_elwa_apu,
    feed_repeat_elwa,
    input_factor_elwa,
    qp_elwa,
    shifts_calculator,
    name,
    accumulator_scale,
    limvals_pre_act,
):
    output_offset_elwa = zp_elwa_acc * feed_repeat_elwa * input_factor_elwa
    zp_apu_compensation = feed_repeat_elwa * input_factor_elwa * zp_elwa_apu

    # Correct accumulator shift after moving some ELWA ZP compensation from accumulator to APU
    zp_elwa_apu_unscaled = zp_elwa_apu * qp_elwa.scale
    shift, shift_delta = shifts_calculator.calculate_shift(
        accumulator_scale,
        (limvals_pre_act[0] + zp_elwa_apu_unscaled, limvals_pre_act[1] + zp_elwa_apu_unscaled),
        name,
    )
    return output_offset_elwa, zp_apu_compensation, shift, shift_delta


def _calc_elementwise_quantization_params(
    limvals_elwa,
    name,
    inp,
    qp_out,
    qp_elwa,
    bias_mode,
    inter_layer_precision_mode,
):
    assert qp_elwa is not None and limvals_elwa is not None, f"Missing quantization and limvals params in layer {name}"

    # SDK-7487: Scaling the elementwise zero point to s_out to check that it does not exceed 255
    # This condition is for keeping the range in the output between 0-255
    bits = inter_layer_precision_mode.input_activation_bits
    zp_elwa_scaled_out = np.round(old_div(qp_elwa.zero_point * qp_elwa.scale, qp_out.scale))
    max_uint = 2**bits - 1
    if (not inter_layer_precision_mode.is_mode(16, 8)) or bias_mode in (
        BiasMode.double_scale_decomposition,
        BiasMode.double_scale_initialization,
    ):
        zp_elwa_apu = 0
        zp_elwa_acc = qp_elwa.zero_point
    elif zp_elwa_scaled_out <= max_uint:
        zp_elwa_apu = qp_elwa.zero_point
        zp_elwa_acc = 0.0
    else:
        zp_elwa_apu = np.round(old_div(max_uint * qp_out.scale, qp_elwa.scale))
        zp_elwa_acc = qp_elwa.zero_point - zp_elwa_apu

    layer_params = {
        get_param_key(name, "elementwise_addition/limvals_elwa_pre_scale_matching"): (
            inp["stats_min_elementwise_value"],
            inp["stats_max_elementwise_value"],
            inp["elementwise_group"],
        ),
        get_param_key(name, "elementwise_addition/qp_elwa"): qp_elwa,
        get_param_key(name, "elementwise_addition/limvals_elwa"): limvals_elwa,
    }

    return zp_elwa_apu, zp_elwa_acc, layer_params


def _calc_standalone_elementwise_quantization_params(inp, qp_in, qp_elwa, bits, type):
    fract = np.min([qp_elwa.scale, qp_in.scale]) / np.max([qp_elwa.scale, qp_in.scale])
    max_int = (2 ** (int(bits) - 1)) - 1
    f = Fraction(str(fract)).limit_denominator(max_int)
    factor = max_int // f.denominator
    sign = -1 if type == LayerType.ew_sub else 1
    features = inp["input_tensor"].shape[3] if len(inp["input_tensor"].shape) == 4 else inp["input_tensor"].shape[1]

    if qp_in.scale > qp_elwa.scale:
        kernel_q = np.array([factor * f.denominator, sign * factor * f.numerator])
    else:
        kernel_q = np.array([factor * f.numerator, sign * factor * f.denominator])
    qp_kernel = QuantParams(0.0, (1.0 / np.max(np.abs(kernel_q))))

    bias = np.zeros([features])
    output_offset_elwa = (qp_elwa.zero_point * kernel_q[1]) * np.ones([features])
    accumulator_scale = qp_in.scale / kernel_q[0]

    return kernel_q, qp_kernel, bias, output_offset_elwa, accumulator_scale


def _calc_elementwise_multiply_quantization_params(qp_in, qp_elwa, hw_arch):
    kernel_quantize_value = 2
    kernel_q = np.array([kernel_quantize_value, kernel_quantize_value])
    qp_kernel = QuantParams(0.0, (1.0 / kernel_quantize_value))
    bias = np.array([-qp_in.zero_point, -qp_elwa.zero_point])
    if hw_arch.is_mercury_arch:
        accumulator_scale = qp_in.scale * qp_elwa.scale * qp_kernel.scale * 2
    else:
        accumulator_scale = qp_in.scale * qp_elwa.scale * qp_kernel.scale * 4

    return kernel_q, qp_kernel, bias, accumulator_scale


def _calc_feature_multiply_quantization_params(qp_in, hw_arch):
    return _calc_elementwise_multiply_quantization_params(qp_in, qp_in, hw_arch)


def _calc_avgpool_quantization_params(inp, qp_in):
    avg_pool_kernel_quantize_value = 4
    avg_pool_factor = inp["avgpool_kernel_shape"][0] * inp["avgpool_kernel_shape"][1]
    kernel_q = (
        np.ones([inp["avgpool_kernel_shape"][0], inp["avgpool_kernel_shape"][1], 1, inp["avgpool_kernel_shape"][2]])
        * avg_pool_kernel_quantize_value
    )
    qp_kernel = QuantParams(0.0, (1.0 / (avg_pool_factor * avg_pool_kernel_quantize_value)))
    bias = np.zeros(inp["avgpool_kernel_shape"][2])
    accumulator_scale = qp_in.scale * qp_kernel.scale

    return kernel_q, qp_kernel, bias, accumulator_scale


def _calc_reduce_mean_quantization_params(inp, qp_in):
    reduce_mean_kernel_quantize_value = 4
    kernel_q = (
        np.ones(
            [
                1,
                inp["reducemean_kernel_shape"][0],
                inp["reducemean_kernel_shape"][1],
                inp["reducemean_kernel_shape"][2],
            ],
        )
        * reduce_mean_kernel_quantize_value
    )
    bias = np.zeros(1)
    qp_kernel = QuantParams(0.0, (1.0 / reduce_mean_kernel_quantize_value))
    accumulator_scale = qp_in.scale * qp_kernel.scale

    return kernel_q, qp_kernel, bias, accumulator_scale


def _calc_reduce_sum_quantization_params(inp, qp_in):
    reduce_sum_kernel_quantize_value = 4
    kernel_q = (
        np.ones(
            [1, inp["reducesum_kernel_shape"][0], inp["reducesum_kernel_shape"][1], inp["reducesum_kernel_shape"][2]],
        )
        * reduce_sum_kernel_quantize_value
    )
    bias = np.zeros(1)
    qp_kernel = QuantParams(0.0, (1.0 / reduce_sum_kernel_quantize_value))
    accumulator_scale = qp_in.scale * qp_kernel.scale

    return kernel_q, qp_kernel, bias, accumulator_scale


def _calc_standalone_activation_quantization_params(inp, qp_in, accumulator_width):
    activation_kernel_quantize_value = 128 if (ActivationType.inv_pos == inp["activation_type"]) else 64
    kernel_q = np.ones([1, 1, inp["output_tensor"].shape[-1], 1]) * activation_kernel_quantize_value
    qp_kernel = QuantParams(0.0, 1.0 / activation_kernel_quantize_value)
    bias = np.zeros(inp["output_tensor"].shape[-1])
    accumulator_scale = qp_in.scale * qp_kernel.scale

    return kernel_q, qp_kernel, bias, accumulator_scale


def update_translated_params_layer_bias(params_translated, layer_name, bias_diff, conv_layer_inference_item):
    new_bias_params = {}
    layer_params = params_translated[layer_name]
    if hasattr(layer_params, "bias"):
        layer_type = conv_layer_inference_item["layer_type"]
        bias_mode = BiasMode.single_scale_decomposition
        if conv_layer_inference_item is not None and "bias_mode" in conv_layer_inference_item:
            bias_mode = conv_layer_inference_item["bias_mode"]

        if hasattr(layer_params, "native_bias"):
            corrected_bias = layer_params.native_bias + bias_diff
        else:  # partial_numeric estimation not supported anymore. See SDK-12717
            raise BackendQuantizationException(
                f"In layer: {layer_name} - native_bias could not be found in npz, might be caused by an old npz file",
            )

        kernel_q = layer_params.kernel
        qp_in = QuantParams(layer_params.qp_in[0], layer_params.qp_in[1])
        shift = layer_params.output_stage.mult_shift
        output_offset_elwa = 0
        if hasattr(layer_params, "output_offset_elwa"):
            output_offset_elwa = layer_params.output_offset_elwa

        quantization_groups, _ = LayerQuantization.split_to_quantize_groups(
            kernel_q,
            corrected_bias,
            layer_params,
            layer_type,
        )
        bias_quantization = calc_bias_quantization(
            shift,
            corrected_bias,
            kernel_q,
            qp_in,
            quantization_groups,
            bias_mode=bias_mode,
            output_offset_elwa=output_offset_elwa,
            layer_type=layer_type,
            rate_h=_calculate_deconv_rate_h(conv_layer_inference_item),
            rate_w=_calculate_deconv_rate_w(conv_layer_inference_item),
        )

        # THERE's A BUG HERE - bias_mode forced to be <single_scale_decomposition> above, and nobody passes the bit-width.
        #  apparently didn't get updated properly to the new changes, in contrast to the other call to this function...

        new_bias_params[layer_name + "/bias:0"] = bias_quantization["bias_q"]
        new_bias_params[layer_name + "/bias_q:0"] = bias_quantization["bias_q"]
        new_bias_params[layer_name + "/bias_feed_repeat:0"] = bias_quantization["bias_feed_repeat"]
        new_bias_params[layer_name + "/bias_factor:0"] = bias_quantization["bias_factor"]

    return new_bias_params


def wrap(x, shift=0, bits=16):
    """
    The function takes x, numpy.array and represent using wrap around given the
    number of bits of the accumulator and the size of shift of the layer
    Args:
        x - data to wrap around
        shift - the accumulator shift for the layer
        bits - number of bits for the int representation (default=16)
    output - the function returns the wrapped around values of x
    """
    offset = 2 ** (bits - 1)
    return np.mod(x + (offset) * 2**shift, 2 * (offset) * 2**shift) - (offset) * 2**shift


def _get_bbox_decoder_bias_residue(bias, kernel_q, qp_in):
    # kernel_q[0] = anchors_heights, kernel_q[1] = anchors_widths, kernel_q[2] = anchors_heights_div_2
    # kernel_q[3] = anchors_widths_div_2, kernel_q[4] = anchors_heights_minus_div_2, kernel_q[5] = anchors_widths_minus_div_2
    # ymin = y_center + anchors_heights * In1 + anchors_heights_minus_div_2 * In2
    # xmin = x_center + anchors_widths * In1 + anchors_widths_minus_div_2 * In2
    # ymax = y_center + anchors_heights * In1 + anchors_heights_div_2 * In2
    # xmax = x_center + anchors_widths * In1 + anchors_widths_div_2 * In2
    #
    # Each y_center/x_center is a bias that depends on different weights. Therefor, it's residue compenstation is calculated
    # using the relevant weights.
    ax = tuple(range(len(kernel_q.shape) - 1))
    accumulator_th_ymin = qp_in.zero_point * np.sum([kernel_q[0], kernel_q[4]], axis=ax)
    accumulator_th_xmin = qp_in.zero_point * np.sum([kernel_q[1], kernel_q[5]], axis=ax)
    accumulator_th_ymax = qp_in.zero_point * np.sum([kernel_q[0], kernel_q[2]], axis=ax)
    accumulator_th_xmax = qp_in.zero_point * np.sum([kernel_q[1], kernel_q[3]], axis=ax)
    residue_y = []
    residue_x = []
    for i in range(len(accumulator_th_ymax)):
        residue_y.extend([-accumulator_th_ymin[i], -accumulator_th_ymax[i]])
        residue_x.extend([-accumulator_th_xmin[i], -accumulator_th_xmax[i]])
    residue_y = np.tile(residue_y, len(bias[0])).reshape(len(bias[0]), len(residue_y))
    residue_x = np.tile(residue_x, len(bias[1])).reshape(len(bias[1]), len(residue_x))
    return residue_y, residue_x


def _calculate_deconv_rate_h(inp):
    # This is a WA for rate calculation
    if inp["layer_type"] != LayerType.deconv:
        return 1
    return int(int(inp["output_tensor"].shape[1]) / int(inp["input_tensor"].shape[1]))


def _calculate_deconv_rate_w(inp):
    # This is a WA for rate calculation
    if inp["layer_type"] != LayerType.deconv:
        return 1
    return int(int(inp["output_tensor"].shape[2]) / int(inp["input_tensor"].shape[2]))


def _calculate_deconv_bias(kernel_q, zero_point, output_offset_elwa, rate_h, rate_w, bias=None):
    ax = tuple(range(len(kernel_q.shape) - 1))
    # in deconv we duplicate the biases (which size is output_features) * (rate_h * rate_w)

    kernel_h = kernel_q.shape[0]
    kernel_w = kernel_q.shape[1]
    # use_super_deconv = is_super_deconv(kernel_h, kernel_w, rate_h, rate_w)
    use_super_deconv = False  # Legacy
    if not use_super_deconv and (kernel_q.shape[0] == 2 or (kernel_q.shape[0] == 4 and rate_h == 4)):
        kernel_q_reflected = kernel_q  # ND: for consistency with tf_model.py TFnode._build_conv,
        # which "reflects" the kernel only for non 2x2/2 deconv kernels
        # (effectively trasnposes and then anti-trasnposes the HxW spatial dim of the kernel
        # across its channels and output feature dims )
        # This fixes the misalignement b/w biases and output features for 2x2/2 case
    else:
        kernel_q_reflected = kernel_q[::-1, ::-1]

    deconv_compensations = []

    if use_super_deconv:
        # For calculation of deconv_compensations need to split the kernel into sub-kernels,
        # as done in _build_super_deconv_split_kernels(self, l_desc, k) funciton in tf_model.py
        # The only difference is that there is no matter whether padding with zeros before or after.
        new_kernel_h = int(np.ceil(kernel_h / rate_h))
        new_kernel_w = int(np.ceil(kernel_w / rate_w))

        f_col_tmp = []
        f_row_tmp = []
        for row in range(rate_h):
            for col in range(rate_w):
                start_h = 0 if row == 0 else rate_h - row
                start_w = 0 if col == 0 else rate_w - col

                sliced_kernel = kernel_q_reflected[start_h::rate_h, start_w::rate_w, :, :]
                pad_kernel_after_h = new_kernel_h - np.size(sliced_kernel, 0)
                pad_kernel_after_w = new_kernel_w - np.size(sliced_kernel, 1)
                sliced_kernel_padded = tf.pad(
                    sliced_kernel,
                    ([0, pad_kernel_after_h], [0, pad_kernel_after_w], [0, 0], [0, 0]),
                )
                f_col_tmp.append(sliced_kernel_padded)

            f_row_tmp.append(f_col_tmp.copy())
            f_col_tmp.clear()

        roll_h = kernel_h - 1 if (kernel_h < rate_h) else np.ceil((kernel_h + rate_h - 2) / 2)
        roll_w = kernel_w - 1 if (kernel_w < rate_w) else np.ceil((kernel_w + rate_w - 2) / 2)
        f_row_tmp = tf.roll(f_row_tmp, shift=[int(roll_h), int(roll_w)], axis=[0, 1])

        for row in range(rate_h):
            for col in range(rate_w):
                deconv_compensations.append(zero_point * np.sum(f_row_tmp[row, col, :, :], axis=ax))

    else:
        stack_order = get_deconv_stack_order(kernel_h, rate_h)
        for slice_h, slice_w in stack_order:
            deconv_compensations.append(
                zero_point * np.sum(kernel_q_reflected[slice_h::rate_h, slice_w::rate_w, :, :], axis=ax),
            )

    # TODO: SDK-10099
    accumulator_th = np.reshape(list(map(list, zip(*deconv_compensations))), (-1,))
    residue = -accumulator_th - output_offset_elwa
    if bias is not None:
        bias = np.repeat(bias, int(len(residue) / len(bias)))
    return accumulator_th, residue, bias


def calc_bias_quantization(
    shift,
    bias,
    kernel_q,
    qp_in,
    quantization_group_params,
    bias_mode=BiasMode.single_scale_decomposition,
    is_4bit_weights=False,
    output_offset_elwa=0,
    accumulator_bits=DEFAULT_BIT_SIZE * 2,
    weight_bits=DEFAULT_BIT_SIZE,
    layer_type=None,
    max_bias_feed_repeat=None,
    rate_h=1,
    rate_w=1,
    dynamic_weights=False,
):
    bias_quantization = {}
    bias_quantization["native_bias"] = bias
    # ew mult layer doesn't have a standard bias, it has a bias value we need to substract from each
    # input before we multiply, so we have a special case here for it.

    if layer_type == LayerType.ew_mult:
        bias_quantization["bias_in_a"] = bias[0]
        bias_quantization["bias_in_b"] = bias[1]
        bias_quantization["bias_q"] = bias
        bias_quantization["native_bias"] = np.zeros_like(bias)
        return bias_quantization

    elif layer_type == LayerType.feature_multiplier:
        bias_quantization["bias_in"] = bias[0]
        bias_quantization["bias_q"] = bias
        return bias_quantization

    ax = tuple(range(len(kernel_q.shape) - 1))
    # In case of dw and normalization, we need to average on different axes.
    if layer_type in [LayerType.dw, LayerType.normalization, LayerType.activation]:
        ax = (0, 1, 3)

    if layer_type != LayerType.bbox_decoder:
        # create quantization groups only if the layer is not bbox_decoder
        for group in quantization_group_params:
            kernel_q = group.kernel_q
            qp_kernel = group.qp_kernel
            bias = group.bias
            bias_scale = qp_in.scale * group.qp_kernel.scale
            if layer_type == LayerType.deconv:
                accumulator_offset, residue, bias = _calculate_deconv_bias(
                    kernel_q,
                    qp_in.zero_point,
                    output_offset_elwa,
                    rate_h,
                    rate_w,
                    bias,
                )
            elif layer_type in [LayerType.ew_add, LayerType.ew_sub]:
                accumulator_offset = (qp_in.zero_point * kernel_q[0]) * np.ones_like(output_offset_elwa)
                residue = -accumulator_offset - output_offset_elwa
                bias_scale = qp_in.scale / kernel_q[0]
            elif dynamic_weights:
                residue = np.array([0])
                accumulator_offset = np.array([0])
            elif layer_type == LayerType.resize:
                accumulator_offset = np.array([qp_in.zero_point / qp_kernel.scale])
                residue = -accumulator_offset
            else:
                if np.isnan(qp_kernel.zero_point):
                    accumulator_offset = qp_in.zero_point * np.sum(kernel_q, axis=ax)
                else:
                    accumulator_offset = qp_in.zero_point * np.sum(kernel_q - qp_kernel.zero_point, axis=ax)
                residue = -accumulator_offset - output_offset_elwa

            index = quantization_group_params.index(group)
            bias_quantization[f"residue_{index}"] = residue
            bias_quantization[f"bias_scale_{index}"] = bias_scale

            if bias_mode == BiasMode.double_scale_initialization:
                bias_q_pre_wrap = np.round(old_div(old_div(bias, bias_scale) + residue, 2**shift))
                wrapped_bias_q = wrap(bias_q_pre_wrap, bits=accumulator_bits, shift=0)
            else:
                bias_q_pre_wrap = np.round(old_div(bias, bias_scale)) + residue
                wrapped_bias_q = wrap(bias_q_pre_wrap, bits=accumulator_bits, shift=shift)

            group.bias_q_pre_wrap = bias_q_pre_wrap
            group.bias_q = wrapped_bias_q
            group.residue = residue
            group.accumulator_offset = accumulator_offset
            group.bias_scale = bias_scale
        wrapped_bias_q = np.concatenate([group.bias_q for group in quantization_group_params])
        residue = np.concatenate([group.residue for group in quantization_group_params])
        accumulator_offset = np.concatenate([group.accumulator_offset for group in quantization_group_params])
        bias_scales = [group.bias_scale for group in quantization_group_params]

    if layer_type == LayerType.bbox_decoder:
        # bias[0] = y_centers, bias[1] = x_centers
        # These values are 16 bit values.
        bias_scale = qp_in.scale * quantization_group_params[0].qp_kernel.scale
        residue_y, residue_x = _get_bbox_decoder_bias_residue(bias, kernel_q, qp_in)
        y_centers_pre_wrap = np.rint(old_div(bias[0], (bias_scale * 2**shift)) + old_div(residue_y, (2**shift)))
        x_centers_pre_wrap = np.rint(old_div(bias[1], (bias_scale * 2**shift)) + old_div(residue_x, (2**shift)))
        bias_quantization["y_centers"] = y_centers_pre_wrap
        bias_quantization["x_centers"] = x_centers_pre_wrap
        bias_quantization["residue_y"] = residue_y
        bias_quantization["residue_x"] = residue_x
        # AF NOTE:  what about wrapping in this special case? @NIR?
    elif bias_mode == BiasMode.double_scale_initialization:
        bias_quantization["bias_q"] = np.rint(wrapped_bias_q)
        bias_quantization["bias_factor"] = 2**shift
        bias_quantization["bias_feed_repeat"] = 1
        bias_quantization["bias_scale"] = bias_scales if len(bias_scales) > 1 else bias_scales[0]
        bias_quantization["residue"] = residue
        bias_quantization["partial_numeric_residue"] = residue
        bias_quantization["accumulator_offset"] = accumulator_offset
    elif bias_mode == BiasMode.double_scale_decomposition:
        bias_feed_repeat, bias_total_value, bias_q, bias_factor = quantization_tools.rep_as_uint_x_int_repeats(
            wrapped_bias_q,
            bits=weight_bits,
            bias_mode=bias_mode,
            max_feed_repeat=max_bias_feed_repeat,
        )

        bias_quantization["bias_q_total_value"] = bias_total_value
        bias_quantization["bias_q_int8_vec_a"] = bias_q[0]
        bias_quantization["bias_q_int8_vec_b"] = bias_q[1]
        bias_quantization["bias_factor_a"] = bias_factor[0]
        bias_quantization["bias_factor_b"] = bias_factor[1]
        bias_quantization["bias_feed_repeat"] = bias_feed_repeat
        bias_quantization["bias_scale"] = bias_scales if len(bias_scales) > 1 else bias_scales[0]
        bias_quantization["residue"] = residue
        bias_quantization["partial_numeric_residue"] = residue
        bias_quantization["accumulator_offset"] = accumulator_offset
    else:
        bias_feed_repeat, bias_total_value, bias_q, bias_factor = quantization_tools.rep_as_uint_x_int_repeats(
            wrapped_bias_q,
            bits=weight_bits,
            max_feed_repeat=max_bias_feed_repeat,
        )

        bias_quantization["bias_q"] = bias_q
        bias_quantization["bias_factor"] = bias_factor
        bias_quantization["bias_feed_repeat"] = bias_feed_repeat
        bias_quantization["bias_scale"] = bias_scales if len(bias_scales) > 1 else bias_scales[0]
        bias_quantization["residue"] = residue
        bias_quantization["partial_numeric_residue"] = residue
        bias_quantization["accumulator_offset"] = accumulator_offset
        if is_4bit_weights:
            bias_quantization["bias_q"] /= float(2 ** (old_div(QBITS, 2)))

    return bias_quantization


def _handle_piecewise_negative_slopes(conv_layers_inference, layer_name, max_abs_negative_slope):
    # TODO: should be refactored to scaling limvals and NOT stats, change will be required in scale matching function
    inp = conv_layers_inference[layer_name]
    scale_factor = pow(2, max_abs_negative_slope)
    inp["stats_min_output_value"] *= scale_factor
    inp["stats_max_output_value"] *= scale_factor
    inp["negative_slopes_correction_factor"] = scale_factor


def _handle_piecewise_overflow_offsets(conv_layers_inference, layer_name, output_scale_factor):
    # TODO: should be refactored to scaling limvals and NOT stats, change will be required in scale matching function
    inp = conv_layers_inference[layer_name]
    inp["stats_min_output_value"] *= output_scale_factor
    inp["stats_max_output_value"] *= output_scale_factor


class QuantizationGroupParams:
    def __init__(
        self,
        kernel=None,
        bias=None,
        limvals_pre_act=None,
        kernel_q=None,
        qp_kernel=None,
        bias_q=None,
        accumulator_scale=None,
        output_offset_elwa=0,
        zp_apu_compensation=0,
        shift=None,
        pa_x_points=None,
        pa_slopes=None,
        pa_offsets=None,
        pa_slopes_m=None,
        pa_slopes_e=None,
        bias_q_pre_wrap=None,
        limvals_kernel=None,
        layer_type=None,
    ):
        self.kernel = kernel
        self.bias = bias
        self.limvals_pre_act = limvals_pre_act
        self.kernel_q = kernel_q
        self.qp_kernel = qp_kernel
        self.bias_q = bias_q
        self.accumulator_scale = accumulator_scale
        self.output_offset_elwa = output_offset_elwa
        self.zp_apu_compensation = zp_apu_compensation
        self.residue = None
        self.accumulator_offset = None
        self.bias_scale = None
        self.shift = shift
        self.pa_x_points = pa_x_points
        self.pa_slopes = pa_slopes
        self.pa_offsets = pa_offsets
        self.pa_slopes_m = pa_slopes_m
        self.pa_slopes_e = pa_slopes_e
        self.bias_q_pre_wrap = bias_q_pre_wrap
        self.limvals_kernel = limvals_kernel
        self.layer_type = layer_type

    def feature_size(self, rate_h, rate_w):
        if self.bias_q is not None and not isinstance(self.bias_q, list):
            if self.layer_type == LayerType.deconv:
                denum = rate_h * rate_w
                return self.bias_q.shape[0] / denum
            return self.bias_q.shape[0]


class LayerQuantization:
    def __repr__(self):
        activation = self._inp.get("activation_type", None)
        return f"{self._name}, {activation}, limvals_weights: {self._limvals_kernel}, limvals_in: {self._limvals_in}, limvals_out: {self._limvals_out}"

    def __init__(self, name, inp, inter_layer_precision_mode, config_quantization, hw_arch):
        self._name = name
        self._hw_arch = hw_arch
        self._limvals_kernel = None
        self._limvals_in = None
        self._limvals_out = None
        self._limvals_elwa = None
        self._inter_layer_precision_mode = inter_layer_precision_mode
        self._limvals_pre_act = inp["pre_act_minmax"]
        self._use_4bit_weights = inter_layer_precision_mode.use_4bit_weights
        self._exponential_mode_4bit_weights = inter_layer_precision_mode.exponential_mode_4bit_weights
        self._inp = inp
        self._bias_mode = inp.get("bias_mode", BiasMode.single_scale_decomposition)
        self._config_quantization = config_quantization
        self._matmul_first_attempt = True

    @property
    def inp(self):
        return self._inp

    # TODO: take HW consts properly (SDK-8991)
    def quantize(
        self,
        params,
        out_stage_mantissa_size=10,
        out_stage_exp_size=4,
        is_apu_2s_complement=False,
        max_elementwise_feed_repeat=MAX_NUM_REPEATS_ELTWISE,
        quantization_groups_num=1,
        signed_output=False,
    ):
        beta = self._inter_layer_precision_mode.beta
        accumulator_width = self._inter_layer_precision_mode.accumulator_size
        weight_bits = self._inter_layer_precision_mode.weight_bits
        input_activation_bits = self._inter_layer_precision_mode.input_activation_bits
        output_activation_bits = self._inter_layer_precision_mode.output_activation_bits
        zero_point_weights = self._inter_layer_precision_mode.zero_point_weights
        supported_shifts = self._hw_arch.supported_shifts
        if accumulator_width == 32:
            supported_shifts = [0]
        inp = self._inp
        name = self._name
        bias_mode = self._bias_mode
        rate_h = _calculate_deconv_rate_h(inp)
        rate_w = _calculate_deconv_rate_w(inp)

        pre_act_max_values = inp["stats_max_pre_act_features_value"]
        pre_act_min_values = inp["stats_min_pre_act_features_value"]

        layer_params = {}
        max_bias_feed_repeat = inp["max_bias_feed_repeat"]
        qp_in, self._limvals_in = self.extract_quantization_params(
            inp["stats_min_input_value"],
            inp["stats_max_input_value"],
            bits=input_activation_bits,
            limvals=inp["input_minmax"],
            data_type="data_input",
        )
        qp_out, self._limvals_out = self.extract_quantization_params(
            inp["stats_min_output_value"],
            inp["stats_max_output_value"],
            sym_flag=signed_output,
            bits=output_activation_bits,
            limvals=inp["output_minmax"],
            data_type="data_output",
        )
        qp_elwa = None
        if "elementwise_name" in inp:
            qp_elwa, self._limvals_elwa = self.extract_quantization_params(
                inp["stats_min_elementwise_value"],
                inp["stats_max_elementwise_value"],
                bits=output_activation_bits,
                limvals=inp["elementwise_minmax"],
                data_type="data_elwa",
            )

        zp_apu_compensation = 0
        output_offset_elwa = 0
        output_factor = 0
        quantization_groups = []
        # unroll shifts calculation loop in 4 stages:
        layer_type = inp["layer_type"]
        dynamic_weights = "dynamic_weights" in inp and inp["dynamic_weights"]
        shifts_calculator = ShiftsCalculator(supported_shifts, accumulator_width)
        if (
            layer_type
            not in [
                LayerType.avgpool,
                LayerType.ew_add,
                LayerType.ew_sub,
                LayerType.activation,
                LayerType.ew_mult,
                LayerType.reduce_sum,
                LayerType.feature_multiplier,
                LayerType.reduce_mean,
            ]
            and not dynamic_weights
        ):
            if layer_type == LayerType.resize:
                bias, kernel = quantization_tools.get_bilinear_weights(name, inp, qp_in)
            else:
                bias, kernel = quantization_tools.get_weights(name, params, layer_type)
            quantization_groups, split_dim = self.split_to_native_groups(
                kernel,
                bias,
                layer_type,
                pre_act_min_values,
                pre_act_max_values,
                quantization_groups_num,
            )

            limvals_all_kernel = None
            for i, group in enumerate(quantization_groups):
                # stage 1 - calculate output factor and fix factor:
                name_of_group = f"{name}_group_{i}" if quantization_groups_num > 1 else name
                qp_kernel, _kernel_q, self._limvals_kernel = self.get_quant_params_data(
                    data=group.kernel,
                    bits=weight_bits,
                    sym_flag=(not zero_point_weights),
                    exponential_mode_4bit_weights=self._exponential_mode_4bit_weights,
                )

                # override max_elementwise_feed_repeat for current layer only (set in quantization_script)
                if "max_elementwise_feed_repeat" in inp and inp["max_elementwise_feed_repeat"] is not None:
                    max_elementwise_feed_repeat = inp["max_elementwise_feed_repeat"]

                # stages 2-4:
                (
                    qp_kernel,
                    self._limvals_kernel,
                    kernel_q,
                    accumulator_scale,
                    output_factor,
                    shift,
                    shift_delta,
                ) = self._quantize_layer_stages_2_4(
                    inp,
                    name_of_group,
                    input_activation_bits,
                    weight_bits,
                    qp_in,
                    qp_out,
                    group.limvals_pre_act,
                    group.kernel,
                    qp_kernel,
                    self._limvals_kernel,
                    out_stage_mantissa_size,
                    shifts_calculator,
                    qp_elwa,
                    max_elementwise_feed_repeat=max_elementwise_feed_repeat,
                    exponential_mode_4bit_weights=self._exponential_mode_4bit_weights,
                    sym_flag=(not zero_point_weights),
                )

                if "elementwise_name" in self._inp:
                    zp_elwa_apu, zp_elwa_acc, layer_params_elwa = _calc_elementwise_quantization_params(
                        self._limvals_elwa,
                        name,
                        inp,
                        qp_out,
                        qp_elwa,
                        bias_mode=bias_mode,
                        inter_layer_precision_mode=self._inter_layer_precision_mode,
                    )
                    input_factor_elwa, feed_repeat_elwa, layer_params_elwa_decomp = _calc_elwa_decomposition(
                        qp_elwa,
                        qp_in,
                        qp_kernel,
                        input_activation_bits,
                        max_elementwise_feed_repeat,
                        name,
                        is_signed=weight_bits != 15,
                    )
                    layer_params_elwa.update(layer_params_elwa_decomp)

                    (
                        output_offset_elwa,
                        zp_apu_compensation,
                        shift,
                        shift_delta,
                    ) = _divide_elwa_zp_compensation_between_apu_and_acc(
                        zp_elwa_acc,
                        zp_elwa_apu,
                        feed_repeat_elwa,
                        input_factor_elwa,
                        qp_elwa,
                        shifts_calculator,
                        name,
                        accumulator_scale,
                        self._limvals_pre_act,
                    )

                    if shift_delta:
                        # Moving the ELWA ZP compensation to the APU created severe risk of wrap around in the accumulator.
                        # In this case, we keep the ELWA ZP compensation in the accumulator.
                        zp_elwa_acc += zp_elwa_apu
                        zp_elwa_apu = 0
                        (
                            output_offset_elwa,
                            zp_apu_compensation,
                            shift,
                            shift_delta,
                        ) = _divide_elwa_zp_compensation_between_apu_and_acc(
                            zp_elwa_acc,
                            zp_elwa_apu,
                            feed_repeat_elwa,
                            input_factor_elwa,
                            qp_elwa,
                            shifts_calculator,
                            name,
                            accumulator_scale,
                            self._limvals_pre_act,
                        )

                    layer_params_elwa[get_param_key(name, "elementwise_addition/zp_elwa_apu")] = zp_elwa_apu
                    layer_params_elwa[get_param_key(name, "elementwise_addition/zp_elwa_acc")] = zp_elwa_acc
                else:
                    zp_apu_compensation, output_offset_elwa, layer_params_elwa = 0, 0, {}

                group.kernel_q = kernel_q
                group.qp_kernel = qp_kernel
                group.accumulator_scale = accumulator_scale
                group.zp_apu_compensation = zp_apu_compensation
                group.output_offset_elwa = output_offset_elwa
                group.shift = shift
                group.limvals_kernel = self._limvals_kernel
                group.shift_delta = shift_delta
                layer_params.update(layer_params_elwa)
                if limvals_all_kernel is None:
                    limvals_all_kernel = copy.copy(self._limvals_kernel)
                else:
                    limvals_all_kernel[0] = np.min([self._limvals_kernel[0], limvals_all_kernel[0]])
                    limvals_all_kernel[1] = np.max([self._limvals_kernel[1], limvals_all_kernel[1]])

            if any(group.shift_delta > 0 for group in quantization_groups):
                max_shift_param = max(quantization_groups, key=attrgetter("shift_delta"))
            else:
                max_shift_param = max(quantization_groups, key=attrgetter("shift"))
            zp_apu_compensation = max_shift_param.zp_apu_compensation
            output_offset_elwa = max_shift_param.output_offset_elwa
            shift = max_shift_param.shift
            shift_delta = max_shift_param.shift_delta
            self._limvals_kernel = limvals_all_kernel
            kernel_q = np.concatenate([group.kernel_q for group in quantization_groups], axis=split_dim)

        elif layer_type in [LayerType.ew_add, LayerType.ew_sub]:
            (
                kernel_q,
                qp_kernel,
                bias,
                output_offset_elwa,
                accumulator_scale,
            ) = _calc_standalone_elementwise_quantization_params(inp, qp_in, qp_elwa, input_activation_bits, layer_type)
            shift, shift_delta = shifts_calculator.calculate_shift(accumulator_scale, self._limvals_pre_act, name)
            quantization_groups.append(
                QuantizationGroupParams(
                    bias=bias,
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    limvals_pre_act=self._limvals_pre_act,
                    accumulator_scale=accumulator_scale,
                    layer_type=layer_type,
                ),
            )

        elif layer_type == LayerType.ew_mult:
            kernel_q, qp_kernel, bias, accumulator_scale = _calc_elementwise_multiply_quantization_params(
                qp_in,
                qp_elwa,
                self._hw_arch,
            )
            shift, shift_delta = shifts_calculator.calculate_shift(
                accumulator_scale,
                self._limvals_pre_act,
                name,
                force_shift=1,
            )
            quantization_groups.append(
                QuantizationGroupParams(
                    bias=bias,
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    limvals_pre_act=self._limvals_pre_act,
                    accumulator_scale=accumulator_scale,
                    layer_type=layer_type,
                ),
            )

        elif layer_type == LayerType.feature_multiplier:
            kernel_q, qp_kernel, bias, accumulator_scale = _calc_feature_multiply_quantization_params(
                qp_in,
                self._hw_arch,
            )
            shift, shift_delta = shifts_calculator.calculate_shift(
                accumulator_scale,
                self._limvals_pre_act,
                name,
                force_shift=1,
            )
            if shift_delta != 0:
                raise ShiftDeltaException(f"feature_multiplier layer {name} had shift delta {shift_delta}", shift_delta)
            quantization_groups.append(
                QuantizationGroupParams(
                    bias=bias,
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    limvals_pre_act=self._limvals_pre_act,
                    accumulator_scale=accumulator_scale,
                    layer_type=layer_type,
                ),
            )

        elif layer_type == LayerType.activation:
            kernel_q, qp_kernel, bias, accumulator_scale = _calc_standalone_activation_quantization_params(
                inp,
                qp_in,
                accumulator_width,
            )
            force_shift = 0 if accumulator_width == 32 else 1
            shift, shift_delta = shifts_calculator.calculate_shift(
                accumulator_scale,
                self._limvals_pre_act,
                name,
                force_shift=force_shift,
            )
            quantization_groups.append(
                QuantizationGroupParams(
                    bias=bias,
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    limvals_pre_act=self._limvals_pre_act,
                    accumulator_scale=accumulator_scale,
                    layer_type=layer_type,
                ),
            )
        elif layer_type in (LayerType.reduce_sum, LayerType.reduce_mean):
            if layer_type == LayerType.reduce_sum:
                kernel_q, qp_kernel, bias, accumulator_scale = _calc_reduce_sum_quantization_params(inp, qp_in)
            else:
                kernel_q, qp_kernel, bias, accumulator_scale = _calc_reduce_mean_quantization_params(inp, qp_in)

            shift, shift_delta = shifts_calculator.calculate_shift(accumulator_scale, self._limvals_pre_act, name)
            if shift_delta != 0:
                default_logger().debug(f"ReducesUm layer {name} had shift delta {shift_delta}")
            quantization_groups.append(
                QuantizationGroupParams(
                    bias=bias,
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    limvals_pre_act=self._limvals_pre_act,
                    accumulator_scale=accumulator_scale,
                    layer_type=layer_type,
                ),
            )

        elif dynamic_weights:
            qp_weights, limvals_weights = self.extract_quantization_params(
                inp["stats_min_weights_input_value"],
                inp["stats_max_weights_input_value"],
                sym_flag=True,
                bits=input_activation_bits,
                limvals=inp["weights_input_minmax"],
                data_type="dynamic_weights",
            )

            accumulator_scale = qp_in.scale * qp_weights.scale
            shift, shift_delta = shifts_calculator.calculate_shift(accumulator_scale, self._limvals_pre_act, name)
            if shift_delta != 0 and self._matmul_first_attempt:
                self._matmul_first_attempt = False
                raise MatmulShiftDeltaException(f"Matmul layer {name} had shift delta {shift_delta}", shift_delta)

            output_factor, _ = _calc_output_factor(qp_weights, qp_in, qp_out, out_stage_mantissa_size)
            qp_kernel = qp_weights
            self._limvals_kernel = limvals_weights
            kernel_q = np.array([])
            bias = np.zeros(inp["output_tensor"].shape[-1])
            quantization_groups.append(
                QuantizationGroupParams(
                    bias=bias,
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    limvals_pre_act=self._limvals_pre_act,
                    accumulator_scale=accumulator_scale,
                    layer_type=layer_type,
                ),
            )
        else:
            kernel_q, qp_kernel, bias, accumulator_scale = _calc_avgpool_quantization_params(inp, qp_in)
            shift, shift_delta = shifts_calculator.calculate_shift(accumulator_scale, self._limvals_pre_act, name)
            if shift_delta != 0:
                default_logger().debug(f"Avgpool layer {name} had shift delta {shift_delta}")

            quantization_groups.append(
                QuantizationGroupParams(
                    bias=bias,
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    limvals_pre_act=self._limvals_pre_act,
                    accumulator_scale=accumulator_scale,
                    layer_type=layer_type,
                ),
            )

        # override max_bias_feed_repeat for current layer only (set in quantization_script)
        if "max_bias_feed_repeat" in inp and inp["max_bias_feed_repeat"] is not None:
            max_bias_feed_repeat = inp["max_bias_feed_repeat"]

        # calc bias factors, deconv is a special case
        # in order to quantize 1/x, which has negative derivatives,
        # we substitute it with -1/x and initialize the kernel with
        # -1's instead of 1's.
        if inp["activation_type"] == ActivationType.inv_pos:
            if any(bias):
                raise BackendNotImplementedError(
                    f"Bias {bias} received for layer {name}. \
                                inv_pos activation doesn't support non-zero bias",
                )
            if not qp_in.zero_point == 0.0:
                raise BackendNotImplementedError(
                    f"Zero-point {qp_in.zero_point} received for layer {name}. \
                    inv_pos activation doesn't support non-zero zero-point",
                )
            kernel_q = -1.0 * kernel_q

        bias_quantization = calc_bias_quantization(
            shift,
            bias,
            kernel_q,
            qp_in,
            quantization_groups,
            bias_mode,
            self._use_4bit_weights,
            output_offset_elwa,
            accumulator_bits=accumulator_width,
            weight_bits=weight_bits,
            layer_type=layer_type,
            max_bias_feed_repeat=max_bias_feed_repeat,
            rate_h=rate_h,
            rate_w=rate_w,
            dynamic_weights=dynamic_weights,
        )
        # Take leaky_alpha & activation_threshold only if it is present
        leaky_alpha = None
        activation_threshold = None
        activation_delta_bias = None
        activation_less_value = None
        hardsigmoid_alpha = None
        hardsigmoid_beta = None
        clip_min = None
        clip_max = None
        activation_greater_value = None

        if params:
            leaky_alpha = params[name].get("leaky_alpha", None) if name in params else None
            activation_threshold = params[name].get("activation_threshold", None) if name in params else None
            activation_delta_bias = params[name].get("activation_delta_bias", None) if name in params else None
            activation_less_value = params[name].get("activation_less_values", None) if name in params else None
            hardsigmoid_alpha = params[name].get("hardsigmoid_alpha", None) if name in params else None
            hardsigmoid_beta = params[name].get("hardsigmoid_beta", None) if name in params else None
            clip_min = params[name].get("clip_min", None) if name in params else None
            clip_max = params[name].get("clip_max", None) if name in params else None
            activation_greater_value = params[name].get("activation_greater_values", None) if name in params else None
            if activation_threshold:
                activation_threshold = float(activation_threshold)

        # eventually, get scaled params from piecewise activator (with final result from shift calculation)
        for group in quantization_groups:
            (
                pa_x_points,
                pa_slopes,
                pa_offsets,
                pa_slopes_m,
                pa_slopes_e,
                x_points,
                y_points,
                slopes,
                offsets,
            ) = PiecewiseActivator.get_piecewise_activator_scaled_params(
                inp["activation_type"],
                self._hw_arch,
                group.accumulator_scale,
                qp_out,
                beta,
                out_stage_mantissa_size,
                out_stage_exp_size,
                leaky_alpha,
                zp_apu_compensation,
                shift,
                is_apu_2s_complement,
                self._inter_layer_precision_mode,
                activation_threshold,
                activation_delta_bias,
                group.limvals_pre_act,
                activation_fit=inp["activation_fit"],
                quantization_groups=quantization_groups_num,
                limvals_out=self._limvals_out,
                signed_output=signed_output,
                activation_less_value=activation_less_value,
                hardsigmoid_alpha=hardsigmoid_alpha,
                hardsigmoid_beta=hardsigmoid_beta,
                clip_min=clip_min,
                clip_max=clip_max,
                activation_greater_value=activation_greater_value,
            )
            group.pa_x_points = pa_x_points
            group.pa_slopes = pa_slopes
            group.pa_offsets = pa_offsets
            group.pa_slopes_m = pa_slopes_m
            group.pa_slopes_e = pa_slopes_e
            group.x_points = x_points
            group.y_points = y_points
            group.slopes = slopes
            group.offsets = offsets

        # We support channel-wise with more than one table, only when the biases are equal for all groups
        if len(quantization_groups) > 1 and self._hw_arch.is_mercury_arch:
            if len(quantization_groups) * (len(pa_x_points) - 1) > 8:  # If we need more than one apu table
                first_group_biases = quantization_groups[0].pa_offsets
                for group in quantization_groups:
                    if (first_group_biases - group.pa_offsets).any():  # if there is any differeence
                        raise BackendQuantizationException(
                            "Number of quantization groups exceeds maximum for different biases",
                        )

        if bias_mode == BiasMode.double_scale_initialization:
            if layer_type == LayerType.bbox_decoder:
                bias_min_x = np.min(bias_quantization["x_centers"]).astype(int)
                bias_min_y = np.min(bias_quantization["y_centers"]).astype(int)
                bias_min = np.min([bias_min_y, bias_min_x])
                bias_max_x = np.max(bias_quantization["x_centers"]).astype(int)
                bias_max_y = np.max(bias_quantization["y_centers"]).astype(int)
                bias_max = np.max([bias_max_x, bias_max_y])
            else:
                bias_min = np.min(bias_quantization["bias_q"]).astype(int)
                bias_max = np.max(bias_quantization["bias_q"]).astype(int)

            val_double_scale_bias = pow(2, accumulator_width - 1)
            if bias_max > val_double_scale_bias - 1 or bias_min < -val_double_scale_bias:
                # AF NOTE: we don't test the partial numeric, since it's un-wrapped on purpose.
                # In fact, because of wrapping, this exception should really never happen no matter what the statistics
                # are, so consider replacing by ASSERT
                raise BackendQuantizationException("bias_q is bigger than 16 bit")

        # collect results dictionary and return
        default_logger().debug("current node is {}, input_node is {}".format(name, inp["input_name"]))

        layer_params[get_param_key(name, "zero_point_in")] = qp_in.zero_point
        layer_params[get_param_key(name, "qp_in")] = qp_in
        layer_params[get_param_key(name, "zp_feed_repeat")] = ZP_FEED_REPEAT
        if qp_elwa is not None:
            layer_params[get_param_key(name, "qp_ew_in")] = qp_elwa
        layer_params[get_param_key(name, "qp_out")] = qp_out

        if "input_group" in inp:
            in_group = inp["input_group"]
        else:
            # Put NaN explicitly to avoid serialization problems
            # TODO: put None when input_group is moved to an independent field, instead of an item in a tuple (SDK-9852)
            in_group = np.nan

        if "output_group" in inp:
            out_group = inp["output_group"]
        else:
            # Put NaN explicitly to avoid serialization problems
            # TODO: put None when output_group is moved to an independent field, instead of an item in a tuple (SDK-9852)
            out_group = np.nan

        layer_params[get_param_key(name, "negative_slopes_correction_factor")] = 0
        if "negative_slopes_correction_factor" in inp:
            layer_params[get_param_key(name, "negative_slopes_correction_factor")] = inp[
                "negative_slopes_correction_factor"
            ]

        layer_params[get_param_key(name, "limvals_in_pre_scale_matching")] = (
            inp["stats_min_input_value"],
            inp["stats_max_input_value"],
            in_group,
        )
        layer_params[get_param_key(name, "limvals_output_pre_scale_matching")] = (
            inp["stats_min_output_value"],
            inp["stats_max_output_value"],
            out_group,
        )
        layer_params[get_param_key(name, "limvals_in")] = self._limvals_in
        layer_params[get_param_key(name, "limvals_out")] = self._limvals_out
        layer_params[get_param_key(name, "limvals_kernel")] = self._limvals_kernel
        layer_params[get_param_key(name, "limvals_pre_act")] = self._limvals_pre_act

        if layer_type in [LayerType.ew_add, LayerType.ew_sub]:
            if len(inp["input_tensor"].shape) == 4:
                features = inp["input_tensor"].shape[3]
            else:
                features = inp["input_tensor"].shape[1]

            layer_params[get_param_key(name, "kernel")] = np.repeat(kernel_q, features)
        else:
            layer_params[get_param_key(name, "kernel")] = kernel_q

        layer_params[get_param_key(name, "output_offset_elwa")] = output_offset_elwa

        if not np.isnan(qp_kernel.zero_point):
            layer_params[get_param_key(name, "zp_kernel")] = qp_kernel.zero_point

        if layer_type == LayerType.bbox_decoder:
            layer_params[get_param_key(name, "y_centers")] = bias_quantization["y_centers"]
            layer_params[get_param_key(name, "x_centers")] = bias_quantization["x_centers"]
            layer_params[get_param_key(name, "residue_y")] = bias_quantization["residue_y"]
            layer_params[get_param_key(name, "residue_x")] = bias_quantization["residue_x"]
            layer_params[get_param_key(name, "anchors_heights")] = kernel_q[0]
            layer_params[get_param_key(name, "anchors_widths")] = kernel_q[1]
            layer_params[get_param_key(name, "anchors_heights_div_2")] = kernel_q[2]
            layer_params[get_param_key(name, "anchors_widths_div_2")] = kernel_q[3]
            layer_params[get_param_key(name, "anchors_heights_minus_div_2")] = kernel_q[4]
            layer_params[get_param_key(name, "anchors_widths_minus_div_2")] = kernel_q[5]
        elif layer_type == LayerType.ew_mult:
            layer_params[get_param_key(name, "bias_in_a")] = bias_quantization["bias_in_a"]
            layer_params[get_param_key(name, "bias_in_b")] = bias_quantization["bias_in_b"]

        elif layer_type == LayerType.feature_multiplier:
            quantized_one = round(1 / qp_in.scale)
            if quantized_one == 0:
                quantized_one = 1

            layer_params[get_param_key(name, "quantized_one")] = quantized_one
            layer_params[get_param_key(name, "bias_in")] = bias_quantization["bias_in"]
            layer_params[get_param_key(name, "power_table")] = params[name]["power_table"]
        else:
            if bias_mode == BiasMode.double_scale_decomposition:
                layer_params[get_param_key(name, "bias_q_total_value")] = bias_quantization["bias_q_total_value"]
                layer_params[get_param_key(name, "bias_q_int8_vec_a")] = bias_quantization["bias_q_int8_vec_a"]
                layer_params[get_param_key(name, "bias_q_int8_vec_b")] = bias_quantization["bias_q_int8_vec_b"]
                layer_params[get_param_key(name, "bias_factor_a")] = bias_quantization["bias_factor_a"]
                layer_params[get_param_key(name, "bias_factor_b")] = bias_quantization["bias_factor_b"]
            else:
                layer_params[get_param_key(name, "bias_q")] = bias_quantization["bias_q"]
                layer_params[get_param_key(name, "bias_factor")] = bias_quantization["bias_factor"]

            layer_params[get_param_key(name, "native_bias")] = bias_quantization["native_bias"]
            layer_params[get_param_key(name, "residue")] = bias_quantization["residue"]
            layer_params[get_param_key(name, "accumulator_offset")] = bias_quantization["accumulator_offset"]
            layer_params[get_param_key(name, "scale_bias")] = bias_quantization["bias_scale"]
            layer_params[get_param_key(name, "bias_feed_repeat")] = bias_quantization["bias_feed_repeat"]

        layer_params[get_param_key(name, "weight_bits")] = self._inter_layer_precision_mode.weight_bits
        layer_params[get_param_key(name, "scale_kernel")] = qp_kernel.scale
        for i, group in enumerate(quantization_groups):
            layer_params[get_param_key(name, f"accumulator_scale_{i}")] = group.accumulator_scale
            if layer_type not in [LayerType.bbox_decoder, LayerType.ew_mult, LayerType.feature_multiplier]:
                layer_params[get_param_key(name, f"residue_{i}")] = bias_quantization[f"residue_{i}"]
            layer_params[get_param_key(name, f"qp_kernel_{i}")] = group.qp_kernel
            layer_params[get_param_key(name, f"limvals_kernel_{i}")] = group.limvals_kernel
            # save non scaled points for debugging
            layer_params[get_param_key(name, f"x_points_non_scaled_{i}")] = group.x_points
            layer_params[get_param_key(name, f"y_points_non_scaled_{i}")] = group.y_points
            layer_params[get_param_key(name, f"slopes_non_scaled_{i}")] = group.slopes
            layer_params[get_param_key(name, f"offsets_non_scaled_{i}")] = group.offsets

        layer_params[get_param_key(name, "accumulator_scale")] = accumulator_scale
        layer_params[get_param_key(name, "input_activation_bits")] = (
            self._inter_layer_precision_mode.input_activation_bits
        )
        layer_params[get_param_key(name, "output_activation_bits")] = (
            self._inter_layer_precision_mode.output_activation_bits
        )

        layer_params[get_param_key(name, "output_stage/shift_delta")] = shift_delta
        layer_params[get_param_key(name, "output_stage/mult_shift")] = shift
        layer_params[get_param_key(name, "output_stage/output_factor")] = output_factor
        layer_params[get_param_key(name, "output_stage/zp_apu_compensation")] = zp_apu_compensation
        layer_params[get_param_key(name, "output_stage/beta")] = beta

        pa_x_points = np.concatenate(
            [np.expand_dims(group.pa_x_points, axis=0) for group in quantization_groups],
            axis=0,
        )
        pa_slopes = np.concatenate([np.expand_dims(group.pa_slopes, axis=0) for group in quantization_groups], axis=0)
        pa_offsets = np.concatenate([np.expand_dims(group.pa_offsets, axis=0) for group in quantization_groups], axis=0)
        pa_slopes_m = np.concatenate(
            [np.expand_dims(group.pa_slopes_m, axis=0) for group in quantization_groups],
            axis=0,
        )
        pa_slopes_e = np.concatenate(
            [np.expand_dims(group.pa_slopes_e, axis=0) for group in quantization_groups],
            axis=0,
        )

        layer_params[get_param_key(name, "output_stage/piecewise/x_points")] = pa_x_points
        layer_params[get_param_key(name, "output_stage/piecewise/slopes")] = pa_slopes
        layer_params[get_param_key(name, "output_stage/piecewise/offsets")] = pa_offsets
        layer_params[get_param_key(name, "output_stage/piecewise/slopes_m")] = pa_slopes_m
        layer_params[get_param_key(name, "output_stage/piecewise/slopes_e")] = pa_slopes_e
        if layer_type not in [
            LayerType.bbox_decoder,
            LayerType.ew_mult,
            LayerType.feature_multiplier,
            LayerType.resize,
        ]:
            layer_params[get_param_key(name, "output_stage/piecewise/size_splits")] = [
                group.feature_size(rate_h, rate_w) for group in quantization_groups
            ]

        layer_params[get_param_key(name, "output_stage/output_activation_bits")] = (
            self._inter_layer_precision_mode.output_activation_bits
        )
        layer_params[get_param_key(name, "output_stage/piecewise/x_points_mask_max_value")] = (
            self._inter_layer_precision_mode.x_points_mask_max_value
        )
        layer_params[get_param_key(name, "output_stage/piecewise/apu_mode")] = self._inter_layer_precision_mode.apu_mode
        layer_params[get_param_key(name, "output_stage/piecewise/accumulator_size")] = (
            self._inter_layer_precision_mode.accumulator_size
        )
        layer_params[get_param_key(name, "output_stage/piecewise/ebias")] = self._inter_layer_precision_mode.ebias
        layer_params[get_param_key(name, "output_stage/piecewise/shifter_bias_max_value")] = (
            self._inter_layer_precision_mode.shifter_bias_max_value
        )

        return layer_params

    def handle_elementwise_high_feedrepeats(
        self,
        abits,
        wbits,
        qp_kernel,
        qp_in,
        qp_elwa,
        layer_name,
        limvals_kernel,
        kernel,
        kernel_q,
        max_elementwise_feed_repeat,
        exponential_mode_4bit_weights=False,
        sym_flag=False,
    ):
        max_int = 2 ** (abits - 1) - 1.0
        input_factor_elwa_total = np.round(old_div(qp_elwa.scale, (qp_in.scale * qp_kernel.scale)))
        eltwa_feedrepeats_fixfactor = old_div(input_factor_elwa_total, (max_int * max_elementwise_feed_repeat))
        if eltwa_feedrepeats_fixfactor > 1:
            default_logger().debug(f"Refactor kernel scale to match feedrepeat=1 for elementwise layer {layer_name} ")
            # TODO: FUTURE RESEARCH - split the factor between the kernel scale and the input scale - currently all
            #       the noise is inserted into the kernel scale
            kernmax = limvals_kernel[1] * eltwa_feedrepeats_fixfactor
            limvals = (-kernmax, kernmax)
            qp_kernel, kernel_q, limvals_kernel = self.get_quant_params_data(
                data=kernel,
                bits=wbits,
                sym_flag=sym_flag,
                limvals=limvals,
                exponential_mode_4bit_weights=exponential_mode_4bit_weights,
            )
        return qp_kernel, kernel_q, limvals_kernel

    def get_quant_params_data(
        self,
        data,
        bits,
        sym_flag=0,
        limvals=None,
        prev_limvals=None,
        exponential_mode_4bit_weights=False,
    ):
        """
        Args:
            data: numpy array with data sample, on calibration set / batch / single image / whatever
                  Use min-max of this for the range, if limvals not given..
            sym_flag: Use a symmetrized range of data if True (and limvals not given)
            bits: quantize range to this bits (divide into (2^bits-1) bins)
            limvals: externally supplied boundaries of desired range.

        Returns:
            Named tuple of scale, zero-point shift.
            quantized version of the input data sample

        """
        qp, limvals = self.extract_quantization_params(
            stats_min=np.min(data),
            stats_max=np.max(data),
            sym_flag=sym_flag,
            bits=bits,
            limvals=limvals,
            prev_limvals=prev_limvals,
            data_type="data_kernel",
        )

        quant_data = get_quantized_int(data, qp, limvals, sym_flag, bits)

        # Special treatment to 4-bit kernels quantization in B0:

        if bits == 4:
            # 4-bit INT mode - MODIFIED in B0
            #     put the 4-bit weight into bits 1-4 of weight-input-to-multiplier
            #     (vs. bits 0-3 a.k.a low-nibble as in A0)
            qp = QuantParams(qp.zero_point, qp.scale / 2)
            quant_data *= 2
        elif exponential_mode_4bit_weights:
            # 4-bit EXP mode - NEW in B0
            #    the scale is as with 8-bit, because indeed the range is same, -127..127
            #    but only 15 of the values are permitted, hence 4-bit encodable: +-1,+-2,+-4,+-8,+-16,+-32,+-64
            # So, we quantize in 8-bit
            #  then find the (logarithmically) closest permitted value
            data_sign = np.sign(quant_data)
            quant_data[np.where(quant_data == 0)] = (
                1  # just to avoid a warning.. no impact - will be zeroed by *data_sign
            )
            quant_data = np.power(2, np.clip(np.round(np.log2(np.abs(quant_data))), 0, 6)) * data_sign

        elif exponential_mode_4bit_weights:
            raise BackendQuantizationException("cannot use exponential_mode_4bit_weights in current arch")

        return qp, quant_data.astype(int), limvals

    def _quantize_layer_stages_2_4(
        self,
        inp,
        name,
        abits,
        wbits,
        qp_in,
        qp_out,
        limvals_pre_act,
        kernel,
        qp_kernel,
        limvals_kernel,
        out_stage_mantissa_size,
        shifts_calculator,
        qp_elwa=None,
        max_elementwise_feed_repeat=MAX_NUM_REPEATS_ELTWISE,
        exponential_mode_4bit_weights=False,
        sym_flag=False,
    ):
        # stage 2 - first modification of qp_kernel before calculating the shift:
        # we calculate a new scale for the kernel in order to get an output factor mantissa that will be the closest to the
        # quantized value and have smallest quantization loss.
        # new_output_factor_mantissa = o_fact * 2 ** (out_stage_mantissa_size + (o_fact_exp - 2 * quantization_tools.QBITS))
        output_factor, fixfactor = _calc_output_factor(qp_kernel, qp_in, qp_out, out_stage_mantissa_size)
        kernmax = limvals_kernel[1] * fixfactor
        limvals = (-kernmax, kernmax) if sym_flag else [x * fixfactor for x in limvals_kernel]
        qp_kernel, kernel_q, limvals_kernel = self.get_quant_params_data(
            data=kernel,
            bits=wbits,
            sym_flag=sym_flag,
            limvals=limvals,
            exponential_mode_4bit_weights=exponential_mode_4bit_weights,
        )

        # stage 3 - calculate shift and do the second modification which updates the qp_kernel and accumulator scale
        accumulator_scale = qp_in.scale * qp_kernel.scale
        shift, shift_delta = shifts_calculator.calculate_shift(accumulator_scale, limvals_pre_act, name)
        limvals_kernel = [x * (2**shift_delta) for x in limvals_kernel]
        qp_kernel, kernel_q, limvals_kernel = self.get_quant_params_data(
            data=kernel,
            bits=wbits,
            sym_flag=sym_flag,
            limvals=limvals_kernel,
            exponential_mode_4bit_weights=exponential_mode_4bit_weights,
        )

        if "elementwise_name" in inp:
            qp_kernel, kernel_q, limvals_kernel = self.handle_elementwise_high_feedrepeats(
                abits,
                wbits,
                qp_kernel,
                qp_in,
                qp_elwa,
                name,
                limvals_kernel,
                kernel,
                kernel_q,
                max_elementwise_feed_repeat,
                exponential_mode_4bit_weights=exponential_mode_4bit_weights,
                sym_flag=sym_flag,
            )

        # stage 4 - update final output factor and acc_scale
        accumulator_scale = qp_in.scale * qp_kernel.scale
        output_factor, _ = _calc_output_factor(qp_kernel, qp_in, qp_out, out_stage_mantissa_size)
        return qp_kernel, limvals_kernel, kernel_q, accumulator_scale, output_factor, shift, shift_delta

    def extract_quantization_params(
        self,
        stats_min,
        stats_max,
        bits,
        sym_flag=0,
        limvals=None,
        prev_limvals=None,
        data_type="data_input",
    ):
        """
        Args:
            data: numpy array with data sample
            sym_flag: Use symmetrized range of data if True (and limvals not given)
            limvals: externally supplied boundaries of desired range.
            prev_limvals: previous limvals calculated on previous images. These are included in the
            calculations of the new limvals. (it allows us to extract statistics from images in batches).
            data_type: what data_type of we quantizing

        Returns:
            Named tuple of scale, zero-point shift.
            the calculated boundaries.

        """
        qmin, qmax = 0, (2**bits) - 1  # noqa: F841
        if limvals is None:
            limvals = quantization_tools.extract_limvals(stats_min, stats_max, sym_flag, prev_limvals)
        elif sym_flag:
            max_val = np.abs(limvals).max()
            limvals = [-max_val, max_val]

        # ! deep copy
        limvals = list(limvals)
        if limvals[0] == limvals[1]:
            same_data_continue = self._is_same_data_warning(self._config_quantization)
            if same_data_continue:
                if sym_flag:
                    limvals[0] = -max(limvals[1], np.float32(1))
                    limvals[1] = max(limvals[1], np.float32(1))
                else:
                    limvals[0] = min(limvals[0], np.float32(0))
                    limvals[1] = max(limvals[1], np.float32(1))
                default_logger().debug(
                    f"for layer {self._name} {data_type} is all the same - changing to limvals {limvals}",
                )
            else:
                raise BackendQuantizationException("quantized data is all the same. check network weights or inputs.")

        # we want to verify that Zero is inside representable range
        limvals[0] = min(limvals[0], 0)
        limvals[1] = max(limvals[1], 0)
        scale = old_div((limvals[1] - limvals[0]), qmax)

        if sym_flag:
            # zero point have no meaning for the KERNEL (INT) case, nothing more to do
            scale = old_div((limvals[1] - limvals[0]), (2**bits - 2))  # NOTE -127,..,+127 (OR -7,..,7 in 4-bit)
            return QuantParams(0, scale), limvals

        # DATA (UINT) case
        zero_point = old_div(-limvals[0], scale)

        # nudge the limits such that zero is exactly representable
        #  (may be important because special stuff happens there for many activations)
        # limvals = np.float64(limvals)
        zp_int = np.ceil(zero_point)
        if zp_int > qmax:
            zp_int = qmax

        scale = old_div(limvals[1], qmax - zp_int) if qmax > zp_int else old_div(limvals[0], 0 - zp_int)
        limvals[0] = scale * (0 - zp_int)

        qp = QuantParams(zp_int, scale)

        return qp, limvals

    @staticmethod
    def _is_same_data_warning(config_quantization):
        # return from config if we want to continue quantization if data is all the same or not.
        # The only way we will not continue but raise an error is if there is a section in the config file of quantization
        # and the same_data_continue is False. In all the other cases, we will continue quantization and warn the user.
        return bool(config_quantization.get("same_data_continue", "True"))

    @staticmethod
    def split_to_quantize_groups(kernel, bias, layer_params, layer_type):
        quantization_groups = []
        quantization_groups_num = layer_params.scale_bias.size
        base_group_size, split_dim = LayerQuantization._get_base_group_size(kernel, layer_type, quantization_groups_num)

        corrected_bias = copy.deepcopy(bias)
        kernel_q = copy.deepcopy(kernel)
        if quantization_groups_num > 1:
            for i in range(quantization_groups_num - 1):
                kernel_part, kernel_q = np.array_split(kernel_q, [base_group_size], axis=split_dim)
                bias_part, corrected_bias = np.array_split(corrected_bias, [base_group_size])
                qp_kernel_part = QuantParams(0, layer_params[f"qp_kernel_{i}"][1])
                quantization_groups.append(
                    QuantizationGroupParams(
                        kernel_q=kernel_part,
                        qp_kernel=qp_kernel_part,
                        bias=bias_part,
                        layer_type=layer_type,
                    ),
                )
            qp_kernel = QuantParams(0, layer_params[f"qp_kernel_{quantization_groups_num - 1}"][1])
            quantization_groups.append(
                QuantizationGroupParams(
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    bias=corrected_bias,
                    layer_type=layer_type,
                ),
            )
        else:
            qp_kernel = QuantParams(0, layer_params.scale_kernel)
            quantization_groups = [
                QuantizationGroupParams(
                    kernel_q=kernel_q,
                    qp_kernel=qp_kernel,
                    bias=corrected_bias,
                    layer_type=layer_type,
                ),
            ]
        return quantization_groups, split_dim

    @staticmethod
    def split_to_native_groups(kernel, bias, layer_type, pre_act_min, pre_act_max, quantization_groups_num):
        quantization_groups = []
        base_group_size, split_dim = LayerQuantization._get_base_group_size(kernel, layer_type, quantization_groups_num)
        for _ in range(quantization_groups_num - 1):
            kernel_part, kernel = np.array_split(kernel, [base_group_size], axis=split_dim)
            bias_part, bias = np.array_split(bias, [base_group_size])
            # get
            pre_act_min_part, pre_act_min = np.array_split(pre_act_min, [base_group_size])
            pre_act_max_part, pre_act_max = np.array_split(pre_act_max, [base_group_size])
            quantization_groups.append(
                QuantizationGroupParams(
                    kernel=kernel_part,
                    bias=bias_part,
                    limvals_pre_act=[np.min(pre_act_min_part), np.max(pre_act_max_part)],
                    layer_type=layer_type,
                ),
            )

        quantization_groups.append(
            QuantizationGroupParams(
                kernel=kernel,
                bias=bias,
                limvals_pre_act=[np.min(pre_act_min), np.max(pre_act_max)],
                layer_type=layer_type,
            ),
        )
        return quantization_groups, split_dim

    @staticmethod
    def _get_base_group_size(kernel, layer_type, quantization_groups_num):
        split_dim = -2 if layer_type in [LayerType.dw, LayerType.normalization] else -1
        if kernel.shape[split_dim] < quantization_groups_num:
            raise BackendQuantizationException(
                f"Cannot quantize layer {layer_type.name} (features: {kernel.shape[split_dim]}) with {quantization_groups_num} "
                "quantization groups",
            )
        if (
            layer_type == LayerType.dense
            and quantization_groups_num > 1
            and (kernel.shape[split_dim] / quantization_groups_num) % 8 != 0
        ):
            base_group_size = int(np.ceil(kernel.shape[split_dim] / quantization_groups_num / 8) * 8)
        else:
            base_group_size = int(np.ceil(kernel.shape[split_dim] / quantization_groups_num))
        return base_group_size, split_dim


def _get_softmax_lut(scale, cut_off=10):
    # generate 1/x lut
    max_16b = 2**16 - 1
    inv_lut = np.round(1.0 / np.linspace(1.0 / (255.0 * max_16b), 1.0 / max_16b, 256))
    inv_lut = np.rint(np.append(np.array(inv_lut[0]), np.array(0.5 * (np.add(inv_lut[0:-1], inv_lut[1:])))))
    # generate e^x lut. maps [0:255] --> (2**16 - 1)*[e^-255 --> 1]
    # cut_off assign zeros in lut in all entries > cut_off (quantized)
    cut_off_q = int(np.clip(np.ceil(cut_off / scale), 0, 255.0))
    lut_max = 2**16 - 1
    exp_lut = np.zeros(256)
    exp_lut[:cut_off_q] = np.arange(cut_off_q)
    inds = exp_lut.nonzero()
    exp_lut[inds] = np.round(lut_max * np.exp(-scale * exp_lut[inds]))
    exp_lut[0] = lut_max
    output = []
    for i, (exp, inv) in enumerate(zip(exp_lut, inv_lut)):
        # Aligned becuase each line mem is 48B, but each write is 32B.
        exp = int(exp.item())
        inv = int(inv.item())
        first = (inv << 16) | exp
        second = (int(i) << 8) | (inv >> 16)
        output.append(np.uint32(first))
        output.append(np.uint32(second))
    return np.array(output, dtype=np.uint32)


def _update_softmax_layer_params(hailo_nn, layer_params, hn_item, debug_precise_mode):
    if debug_precise_mode:
        qp_in = QuantParams(0, 1)
    else:
        qp_in_name = get_param_key(hn_item.name, "qp_in")
        qp_out_name = get_param_key(hn_item.name, "qp_out")
        limvals_out_name = get_param_key(hn_item.name, "limvals_out")
        qp_in = layer_params[qp_in_name]
        layer_params[qp_out_name] = np.array(
            [0, 1.0 / 255],
            dtype=np.float32,
        )  # softmax is a probability distribution function,
        # therefore scale is for normalization only
        layer_params[limvals_out_name] = np.array([0.0, 1.0], dtype=np.float32)
    layer_params[get_param_key(hn_item.name, "softmax_lut")] = _get_softmax_lut(qp_in[1])


def _precise_quantize_layer(name, params, inp):
    if inp["layer_type"] == LayerType.avgpool:
        avg_pool_factor = inp["avgpool_kernel_shape"][0] * inp["avgpool_kernel_shape"][1]
        kernel = np.ones(
            [inp["avgpool_kernel_shape"][0], inp["avgpool_kernel_shape"][1], 1, inp["avgpool_kernel_shape"][2]],
        ) * (1.0 / avg_pool_factor)
        bias = np.zeros(inp["avgpool_kernel_shape"][2])
    elif inp["layer_type"] == LayerType.activation:
        kernel = np.ones([1, 1, inp["output_tensor"].shape[3], 1])
        bias = np.zeros(inp["output_tensor"].shape[3])
    elif inp["layer_type"] == LayerType.deconv:
        # in deconv we duplicate the biases (which size is output_features) * (rate_h * rate_w)
        rate_h = _calculate_deconv_rate_h(inp)
        rate_w = _calculate_deconv_rate_w(inp)
        kernel, bias = params[name]["kernel"], np.repeat(params[name]["bias"], rate_h * rate_w)
    elif inp["layer_type"] == LayerType.bbox_decoder:
        kernel = np.array(
            [
                params[name]["anchors_heights"],
                params[name]["anchors_widths"],
                params[name]["anchors_heights_div_2"],
                params[name]["anchors_widths_div_2"],
                params[name]["anchors_heights_minus_div_2"],
                params[name]["anchors_widths_minus_div_2"],
            ],
        )
        bias = [params[name]["y_centers"], params[name]["x_centers"]]
    elif inp["layer_type"] in [LayerType.ew_add, LayerType.ew_sub]:
        factor = 1 if inp["layer_type"] == LayerType.ew_add else -1
        features = inp["input_tensor"].shape[3]
        kernel = np.array([1] * features + [factor] * features)
        bias = np.zeros([inp["input_tensor"].shape[3]])
    elif inp["layer_type"] in [LayerType.ew_mult, LayerType.feature_multiplier]:
        kernel = np.array([1, 1])
        bias = np.zeros([inp["input_tensor"].shape[3]])
    elif inp["dynamic_weights"]:
        kernel = np.array([])
        bias = np.zeros(inp["output_tensor"].shape[-1])
    else:
        kernel, bias = params[name]["kernel"], params[name]["bias"]

    default_logger().debug("current node is {}, input_node is {}".format(name, inp["input_name"]))
    layer_params = {}
    layer_params[get_param_key(name, "zero_point_in")] = 0
    layer_params[get_param_key(name, "kernel")] = kernel

    layer_params[get_param_key(name, "bias_q")] = bias
    layer_params[get_param_key(name, "scale_bias")] = 1
    layer_params[get_param_key(name, "bias_factor")] = 1
    layer_params[get_param_key(name, "bias_feed_repeat")] = 1

    layer_params[get_param_key(name, "scale_kernel")] = 1
    layer_params[get_param_key(name, "output_stage/output_factor")] = 1
    layer_params[get_param_key(name, "output_stage/mult_shift")] = 0

    layer_has_elementwise_addition = "elementwise_name" in inp
    if layer_has_elementwise_addition:
        layer_params[get_param_key(name, "elementwise_addition/input_factor")] = 1
        layer_params[get_param_key(name, "elementwise_addition/feed_repeat")] = 1

    layer_params[get_param_key(name, "qp_in")] = QuantParams(0, 1)
    layer_params[get_param_key(name, "limvals_in")] = None
    layer_params[get_param_key(name, "qp_out")] = QuantParams(0, 1)
    layer_params[get_param_key(name, "limvals_out")] = None

    if inp["layer_type"] == LayerType.bbox_decoder:
        layer_params.pop(get_param_key(name, "bias_q"))
        layer_params[get_param_key(name, "y_centers")] = bias[0]
        layer_params[get_param_key(name, "x_centers")] = bias[1]
        layer_params[get_param_key(name, "anchors_heights")] = kernel[0]
        layer_params[get_param_key(name, "anchors_widths")] = kernel[1]
        layer_params[get_param_key(name, "anchors_heights_div_2")] = kernel[2]
        layer_params[get_param_key(name, "anchors_widths_div_2")] = kernel[3]
        layer_params[get_param_key(name, "anchors_heights_minus_div_2")] = kernel[4]
        layer_params[get_param_key(name, "anchors_widths_minus_div_2")] = kernel[5]

    if inp["bias_mode"] == BiasMode.double_scale_decomposition:
        layer_params[get_param_key(name, "bias_q_int8_vec_a")] = bias
        layer_params[get_param_key(name, "bias_q_int8_vec_b")] = np.zeros_like(bias)
        layer_params[get_param_key(name, "bias_factor_a")] = 1
        layer_params[get_param_key(name, "bias_factor_b")] = 1

    return layer_params


def _calculate_layer_original_min_max_values(name, layer, previous_layer_params):
    """Find minmax values for each layer"""
    if "dummy_conv" in layer:
        return layer.copy()

    if previous_layer_params is not None:
        prev_limvals_in = previous_layer_params[get_param_key(name, "limvals_in")]
        prev_limvals_out = previous_layer_params[get_param_key(name, "limvals_out")]
        prev_limvals_pre_act = previous_layer_params[get_param_key(name, "limvals_pre_act")]
    else:
        prev_limvals_in = None
        prev_limvals_out = None
        prev_limvals_pre_act = None

    new_layer = layer.copy()
    new_layer["input_minmax"] = quantization_tools.extract_limvals(
        new_layer["stats_min_input_value"],
        new_layer["stats_max_input_value"],
        prev_limvals=prev_limvals_in,
    )
    new_layer["output_minmax"] = quantization_tools.extract_limvals(
        layer["stats_min_output_value"],
        layer["stats_max_output_value"],
        prev_limvals=prev_limvals_out,
    )
    new_layer["pre_act_minmax"] = quantization_tools.extract_limvals(
        layer["stats_min_pre_act_value"],
        layer["stats_max_pre_act_value"],
        prev_limvals=prev_limvals_pre_act,
    )
    if "elementwise_name" in new_layer:
        if previous_layer_params is not None:
            prev_limvals_elwa = previous_layer_params[get_param_key(name, "elementwise_addition/limvals_elwa")]
        else:
            prev_limvals_elwa = None
        new_layer["elementwise_minmax"] = quantization_tools.extract_limvals(
            new_layer["stats_min_elementwise_value"],
            new_layer["stats_max_elementwise_value"],
            prev_limvals=prev_limvals_elwa,
        )

    if "weights_input_name" in new_layer:
        new_layer["weights_input_minmax"] = quantization_tools.extract_limvals(
            new_layer["stats_min_weights_input_value"],
            new_layer["stats_max_weights_input_value"],
        )

    return new_layer


def _prepare_layers_matched_minmax_values(conv_layers_inference, previous_layer_params):
    """
    This function changes the minmax values of each layer, in order to prevent cases where the minmax values
    of a layer output don't match the minmax values of the relevant layer input  duo to non-convolutional layers
    in between.
    """
    updated_conv_layers_inference = conv_layers_inference.copy()
    layers_connection_sets = {}
    for name, inp in conv_layers_inference.items():
        updated_conv_layers_inference[name] = _calculate_layer_original_min_max_values(name, inp, previous_layer_params)
        connection_set = find_layer_connection_set(
            name,
            updated_conv_layers_inference[name],
            updated_conv_layers_inference,
        )
        if connection_set is not None:
            layers_connection_sets.update(connection_set)

    # now we have a dict with all the connection sets.
    # some of them probably have a non-zero intersection. so we unify them:
    unified_connection_set = unify_connection_sets(layers_connection_sets)
    match_scales_in_connection_set(updated_conv_layers_inference, unified_connection_set)
    return updated_conv_layers_inference


def _remove_dummy_layers(conv_layers_inference):
    for layer in [k for k, v in conv_layers_inference.items() if "dummy_conv" in v]:
        del conv_layers_inference[layer]


def save_statistics(layer_params):
    stats = {}
    for param_name, param_value in layer_params.items():
        prev = stats
        name_parts = param_name.split("/")
        for name_part in name_parts[:-1]:
            if name_part not in prev:
                prev[name_part] = {}
            prev = prev[name_part]
        prev[name_parts[-1]] = param_value

    j = pd.Series(stats).to_json()
    j = json.dumps(json.loads(j), indent=2)
    with open("statistics.json", "w") as statistics_file:
        statistics_file.write(j)


def update_layer_qp(layer_params, layer_name, qp, limvals):
    dst_qp_in_name = get_param_key(layer_name, "qp_in")
    dst_limvals_in_name = get_param_key(layer_name, "limvals_in")
    dst_qp_out_name = get_param_key(layer_name, "qp_out")
    dst_limvals_out_name = get_param_key(layer_name, "limvals_out")
    if dst_qp_in_name not in layer_params:
        layer_params[dst_qp_in_name] = copy.deepcopy(qp)
    if dst_limvals_in_name not in layer_params:
        layer_params[dst_limvals_in_name] = copy.deepcopy(limvals)
    if dst_qp_out_name not in layer_params:
        layer_params[dst_qp_out_name] = copy.deepcopy(qp)
    if dst_limvals_out_name not in layer_params:
        layer_params[dst_limvals_out_name] = copy.deepcopy(limvals)


def _validate_limvals_and_qp(hailo_nn, layer_params):
    for hn_item in hailo_nn.stable_toposort():
        src_qp_out_name = get_param_key(hn_item.name, "qp_out")
        src_limvals_out_name = get_param_key(hn_item.name, "limvals_out")
        src_qp_out = layer_params[src_qp_out_name]
        src_limvals_out = layer_params[src_limvals_out_name]
        for successor in hailo_nn.successors(hn_item):
            if successor.op == LayerType.output_mux:
                continue
            if (
                hn_item.op not in (LayerType.input_layer, LayerType.external_input_layer, LayerType.const_input)
            ) and next(hailo_nn.predecessors(hn_item)).op == LayerType.output_mux:
                continue
            if (successor.ew_add_enabled and hn_item in successor.ew_add_connections) or (
                successor.op in [LayerType.ew_add, LayerType.ew_sub, LayerType.ew_mult]
            ):
                continue

            if hn_item.precision_config.signed_output:
                suc_qp_in_name = get_param_key(successor.name, "qp_kernel_0")
                suc_limvals_in_name = get_param_key(successor.name, "limvals_kernel")
            else:
                suc_qp_in_name = get_param_key(successor.name, "qp_in")
                suc_limvals_in_name = get_param_key(successor.name, "limvals_in")
            suc_qp_in = layer_params[suc_qp_in_name]
            suc_limvals_in = layer_params[suc_limvals_in_name]
            if not np.array_equal(np.array(src_limvals_out), np.array(suc_limvals_in)):
                raise BackendQuantizationException(
                    f"{hn_item.name}:limvals_out is {src_limvals_out} while {successor.name}:limvals_in is {suc_limvals_in}",
                )
            if not hn_item.precision_config.signed_output and not np.array_equal(
                np.array(src_qp_out),
                np.array(suc_qp_in),
            ):
                raise BackendQuantizationException(
                    f"{hn_item.name}:qp_out is {src_qp_out} while {successor.name}:qp_in is {suc_qp_in}",
                )
            elif hn_item.precision_config.signed_output and src_qp_out.scale != suc_qp_in.scale:
                raise BackendQuantizationException(
                    f"{hn_item.name}:qp_out is {src_qp_out} while {successor.name}:qp_kernel is {suc_qp_in} with activation as weights",
                )


def _update_input_layer_qp(hailo_nn, layer_params, input_layer):
    checked_layers = [input_layer]

    while len(checked_layers) > 0:
        checked_layer = checked_layers.pop(0)
        for successor in hailo_nn.successors(checked_layer):
            src_qp_in_name = get_param_key(successor.name, "qp_in")
            src_limvals_in_name = get_param_key(successor.name, "limvals_in")
            if src_qp_in_name not in layer_params:
                checked_layers.append(successor)
                continue
            else:
                src_qp_in = layer_params[src_qp_in_name]
                src_limvals_in = layer_params[src_limvals_in_name]
                update_layer_qp(layer_params, input_layer.name, src_qp_in, src_limvals_in)
                return

    # If we got here there is no conv-like layers and the quantization is irrelevant
    max_val = (
        2**15 - 1.0
        if input_layer.precision_config.precision_mode
        in [PrecisionMode.a16_w16, PrecisionMode.a16_w16_non_zero, PrecisionMode.a16_w16_a16, PrecisionMode.a16_w16_a8]
        else 2**8 - 1.0
    )
    update_layer_qp(layer_params, input_layer.name, QuantParams(0, 1), [0.0, max_val])


def fill_layers_qp(hailo_nn, layer_params, debug_precise_mode=False):
    input_layers = hailo_nn.get_all_input_layers()
    for input_layer in input_layers:
        _update_input_layer_qp(hailo_nn, layer_params, input_layer)

    for hn_item in hailo_nn.stable_toposort():
        if hn_item.op in [LayerType.output_layer, LayerType.external_output_layer, LayerType.pp_output_layer]:
            qp_out_name = get_param_key(hn_item.name, "qp_out")
            if qp_out_name in layer_params:
                layer_params.remove(qp_out_name)

    for hn_item in hailo_nn.stable_toposort():
        src_qp_out_name = get_param_key(hn_item.name, "qp_out")
        src_limvals_out_name = get_param_key(hn_item.name, "limvals_out")
        if hailo_nn.get_layer_by_name(hn_item.name).op == LayerType.softmax:
            _update_softmax_layer_params(hailo_nn, layer_params, hn_item, debug_precise_mode)
        if src_qp_out_name not in layer_params:
            continue
        src_qp_out = layer_params[src_qp_out_name]
        src_limvals_out = layer_params[src_limvals_out_name]
        for successor in hailo_nn.successors(hn_item):
            update_layer_qp(layer_params, successor.name, src_qp_out, src_limvals_out)

    _validate_limvals_and_qp(hailo_nn, layer_params)


def quantize_model(
    hw_arch,
    params,
    conv_layers_inference,
    hailo_nn,
    previous_layer_params,
    debug_precise_mode,
    is_apu_2s_complement=False,
    max_elementwise_feed_repeat=MAX_NUM_REPEATS_ELTWISE,
    retry_attempt_index=None,
):
    """
    This function estimates scales for quantized model

    Args:
        hw_arch: hw arch.
        params: model params after batch normalization, ready to be calibrated.
        conv_layers_inference: dictionary of inference results per each conv layer.
        hailo_nn: the model graph in hn representation.
        previous_layer_params: previous layer statistics used for scale matching.
        debug_precise_mode: run model calibration in a higher precision for emulator code debug.
        is_b0_arch: Flag that toggles B0 architecture specific quantization behaviour.
        is_apu_2s_complement: Same as is_b0_arch.
        max_elementwise_feed_repeat: Max value of elementwise feed repeat, used for calculating the
            quantized representation of biases and elementwise-add.
        retry_attempt_index: Lookup dictionary, that maintains retry attempts per layer, in case of negative
            slopes in piecewise activation calculator fails.

    Returns:
        rescaled params + scales, ready to be loaded by the model simulator.

    # TODO: support first layers + net without conv-like
    # TODO: SDK-7509 - get supported shifts, beta, and mantissa_bits from numerical package

    """
    layer_params = {}
    config_quantization = dict(get_parsed_config_from_path()).get("quantization", {})
    if retry_attempt_index is None and conv_layers_inference is not None:
        retry_attempt_index = {name: 0 for name in conv_layers_inference}

    if not debug_precise_mode:
        scale_matched_conv_layers_inference = _prepare_layers_matched_minmax_values(
            conv_layers_inference,
            previous_layer_params,
        )
        _remove_dummy_layers(scale_matched_conv_layers_inference)
        layer_quantization_dict = {}
        for name, inp in scale_matched_conv_layers_inference.items():
            layer = hailo_nn.get_layer_by_name(name)
            inter_layer_precision_mode = InterLayerPrecisionMode.from_hailo_nn(hailo_nn, layer.name)
            layer_quantization = LayerQuantization(name, inp, inter_layer_precision_mode, config_quantization, hw_arch)
            quantization_groups_num = layer.precision_config.quantization_groups
            if quantization_groups_num is None:
                quantization_groups_num = 1
            layer_quantization_dict[layer.name] = layer_quantization

            if ("elementwise_name" in inp) and quantization_groups_num > 1:
                raise BackendQuantizationException(f"Conv&add layer {name} does not support quantization groups")

            if retry_attempt_index[name] <= MAX_RETRIES_NEGATIVE_SLOPES:
                try:
                    layer_params.update(
                        layer_quantization.quantize(
                            params,
                            is_apu_2s_complement=is_apu_2s_complement,
                            max_elementwise_feed_repeat=max_elementwise_feed_repeat,
                            quantization_groups_num=quantization_groups_num,
                            signed_output=layer.precision_config.signed_output,
                        ),
                    )
                except BackendOffsetsException as offsets_overflow:
                    if offsets_overflow.bit_loss <= MAX_ALLOWED_OVERFLOW_OFFSETS:
                        # Recursive retry step in order to handle offsets overflow. Should be very rare.
                        # TODO we should fix the retry attempts and check degradation
                        default_logger().debug(offsets_overflow.client_message)
                        _handle_piecewise_overflow_offsets(
                            conv_layers_inference,
                            name,
                            offsets_overflow.output_scale_factor,
                        )
                        default_logger().debug(f"Retry quantization after params update to layer {name}.")
                        return quantize_model(
                            hw_arch,
                            params,
                            conv_layers_inference,
                            hailo_nn,
                            previous_layer_params,
                            debug_precise_mode,
                            is_apu_2s_complement=is_apu_2s_complement,
                            max_elementwise_feed_repeat=max_elementwise_feed_repeat,
                            retry_attempt_index=retry_attempt_index,
                        )
                    else:
                        raise BackendQuantizationException(
                            f"Quantization failed in layer due to unsupported offset "
                            f"which cause to more than {MAX_ALLOWED_OVERFLOW_OFFSETS} bits loss",
                        )
                except BackendNegativeSlopesException as neg_slopes_error:
                    if neg_slopes_error.abs_min_slope < inter_layer_precision_mode.max_allowed_negative_slope:
                        # Recursive retry step in order to handle negative slopes. Should be very rare.
                        default_logger().debug(neg_slopes_error.client_message)
                        _handle_piecewise_negative_slopes(conv_layers_inference, name, neg_slopes_error.abs_min_slope)
                        default_logger().debug(f"Retry quantization after params update to layer {name}.")
                        retry_attempt_index[name] += 1
                        return quantize_model(
                            hw_arch,
                            params,
                            conv_layers_inference,
                            hailo_nn,
                            previous_layer_params,
                            debug_precise_mode,
                            is_apu_2s_complement=is_apu_2s_complement,
                            max_elementwise_feed_repeat=max_elementwise_feed_repeat,
                            retry_attempt_index=retry_attempt_index,
                        )
                    else:
                        raise BackendQuantizationException(
                            f"Quantization failed in layer due to unsupported required slope -{neg_slopes_error.abs_min_slope} "
                            f"where the minimum allowed is -{inter_layer_precision_mode.max_allowed_negative_slope}"
                            f"at layer {layer_quantization!r}. "
                            f"This error raises when the data or weight range is not balanced. Mostly happens when using random data, "
                            f"the input is not normalized properly or there is no batchnorm in the network.",
                        )
                except BackendQuantizationException as e:
                    raise BackendQuantizationException(f"{e.client_message} {layer_quantization!r}")
                except MatmulShiftDeltaException as e:
                    default_logger().info(f"Retry quantization on layer {name} after params update.")
                    shift_delta_factor = 2**e.shift_delta
                    weights_input_layer_name = layer.inputs[1]
                    weights_input_layer = hailo_nn.get_layer_by_name(weights_input_layer_name)

                    new_min = layer_quantization.inp["stats_min_weights_input_value"] * shift_delta_factor
                    new_max = layer_quantization.inp["stats_max_weights_input_value"] * shift_delta_factor
                    new_limvals = tuple(
                        [v * shift_delta_factor for v in layer_quantization.inp["weights_input_minmax"]],
                    )

                    layer_quantization_dict[weights_input_layer_name].inp["stats_min_output_value"] = new_min
                    layer_quantization_dict[weights_input_layer_name].inp["stats_max_output_value"] = new_max
                    layer_quantization_dict[weights_input_layer_name].inp["output_minmax"] = new_limvals
                    layer_quantization.inp["stats_min_weights_input_value"] = new_min
                    layer_quantization.inp["stats_max_weights_input_value"] = new_max
                    layer_quantization.inp["weights_input_minmax"] = new_limvals

                    layer_params.update(
                        layer_quantization_dict[weights_input_layer_name].quantize(
                            params,
                            is_apu_2s_complement=is_apu_2s_complement,
                            max_elementwise_feed_repeat=max_elementwise_feed_repeat,
                            quantization_groups_num=quantization_groups_num,
                            signed_output=weights_input_layer.precision_config.signed_output,
                        ),
                    )
                    layer_params.update(
                        layer_quantization.quantize(
                            params,
                            is_apu_2s_complement=is_apu_2s_complement,
                            max_elementwise_feed_repeat=max_elementwise_feed_repeat,
                            quantization_groups_num=quantization_groups_num,
                            signed_output=layer.precision_config.signed_output,
                        ),
                    )
            else:
                raise BackendQuantizationException(
                    "Quantization failed after multiple attempts to recover fron piecewise calculation errors, "
                    f"at layer {layer_quantization!r}",
                )

    else:
        _remove_dummy_layers(conv_layers_inference)
        for name, inp in conv_layers_inference.items():
            layer_params.update(_precise_quantize_layer(name, params, inp))

    fill_layers_qp(hailo_nn, layer_params)
    _update_maxpool_padding_const_value(hailo_nn, layer_params, params)
    return layer_params


def _update_maxpool_padding_const_value(hailo_nn, layer_params, native_params):
    pad_const_key = "padding_const_value"
    for layer in hailo_nn.stable_toposort():
        if layer.op != LayerType.maxpool:
            continue

        # The current layer is maxpool layer which has a constant padding value that should be quantized.
        # This layer has no scales of its own, and it's dependent on the scale matching.
        # Therefore this logic can't be part of the quantization itself.
        # The constant value is quantized according to the scaling and ZP and stored in layer_params.
        native_value = native_params.get(f"{layer.name}/{pad_const_key}:0", DEFAULT_PADDING_NEG_INF_VALUE)
        zp, scale = layer_params[f"{layer.name}/qp_in:0"]
        quant_value = native_value / scale + zp
        # TODO:clip if the value is greater than max bit-width, it should be handled in the compiler - SDK-29099
        quant_value = np.round(np.maximum(quant_value, 0)).astype(np.float32)
        layer_params[f"{layer.name}/{pad_const_key}:0"] = quant_value


def print_stats(params):
    stats = [
        "output_stage/output_factor",
        "bias_factor",
        "scale_kernel",
        "scale_bias",
        "bias_feed_repeat",
        "zero_point_in",
        "elementwise_addition/input_factor",
        "elementwise_addition/zero_point",
        "elementwise_addition/feed_repeat",
        "output_stage/zp_apu_compensation",
        "negative_slopes_correction_factor",
    ]

    result = {}
    for k, v in params.items():
        for stat in stats:
            if stat in k:
                result[k.split(":")[0]] = v
    result = OrderedDict(sorted(result.items()))
    for k, v in result.items():
        extra_space = "" if v < 0 else " "
        print(k, " " * (max([len(x) for x in result]) - len(k)), extra_space, np.around(v, decimals=5))
