-- Filename: multiply_sd_struct.vhd
-- Created by HDL-SCHEM-Editor at Fri Jun  6 15:58:03 2025
library ieee;
use ieee.std_logic_1164.all;
use ieee.math_real.all;
use work.multiply_sd_package.all;
architecture struct of multiply_sd is
    function calculate_multiplicand_width_even return natural is
        variable multiplicand_remainder : natural range 0 to 2;
    begin
        multiplicand_remainder := g_multiplicand_width rem 2;
        if multiplicand_remainder=0 then
            return g_multiplicand_width + 2;
        end if;
        return g_multiplicand_width + 1;
    end function;
    constant c_multiplicand_width_even: natural := calculate_multiplicand_width_even;

    function calculate_multiplier_width_odd return natural is
        variable multiplier_remainder : natural range 0 to 2;
    begin
        multiplier_remainder := g_multiplier_width rem 2;
        if multiplier_remainder=0 then
            return g_multiplier_width + 1;
        end if;
        return g_multiplier_width;
    end function;
    constant c_multiplier_width_odd: natural := calculate_multiplier_width_odd;

    function calculate_multiplier_digits_per_period return natural is
    begin
        -- "-1" because the first multiply_sd_step module handles 2 digits:
        if g_latency_mul=0 then
            return (c_multiplier_width_odd+1)/2-1;
        end if;
        return integer(ceil(real((c_multiplier_width_odd+1)/2-1)/real(g_latency_mul)));
    end function;
    constant c_multiplier_digits_per_period : natural := calculate_multiplier_digits_per_period; 

    function calculate_multiplier_width_even return natural is
    begin
        if g_latency_mul=0 then
            return c_multiplier_width_odd+1;
        end if;
        -- "+1" because the first multiply_sd_step module handles 2 digits:
        return 2*(c_multiplier_digits_per_period*g_latency_mul+1);
    end function;
    constant c_multiplier_width_even : natural := calculate_multiplier_width_even;

    function calculate_number_of_periods return natural is
    begin
        if g_latency_mul=0 then
            return 1;
        end if;
        return g_latency_mul;
    end function;
    constant c_number_of_periods : natural := calculate_number_of_periods;

    constant c_number_of_needed_sd_digits : natural := integer(ceil((real(g_multiplicand_width + g_multiplier_width))/2.0));

    type t_multiplier_bits_used is array (natural range <>) of unsigned(1 downto 0);
    type t_partial_product      is array (natural range <>) of t_sd_number(c_multiplicand_width_even/2+1 downto 0);
    type t_shift_registers      is array (natural range <>) of t_sd_number(c_number_of_periods-1 downto 0);
    signal last_multiplier_digit              : std_logic_vector(c_multiplier_digits_per_period-1 downto 0);
    signal last_step                          : std_logic;
    signal multiplicand_sd_1                  : t_sd_number(c_multiplicand_width_even/2+1 downto 0);
    signal multiplicand_sd_2                  : t_sd_number(c_multiplicand_width_even/2+1 downto 0);
    signal multiplicand_sd_3                  : t_sd_number(c_multiplicand_width_even/2+1 downto 0);
    signal multiplier_bits_from_shiftregister : t_multiplier_bits_used(c_multiplier_digits_per_period-1 downto 0);
    signal multiplier_bits_used               : t_multiplier_bits_used(c_multiplier_digits_per_period-1 downto 0);
    signal multiplier_int                     : unsigned(c_multiplier_width_even-1 downto 0);
    signal multiplier_odd                     : signed(c_multiplier_width_odd-1 downto 0);
    signal partial_product                    : t_partial_product(c_multiplier_digits_per_period downto 0);
    signal partial_product_0                  : t_sd_number(c_multiplicand_width_even/2+1 downto 0);
    signal partial_product_0_0_stored         : signed(2 downto 0);
    signal partial_product_stored             : t_sd_number(c_multiplicand_width_even/2 downto 0);
    signal product_low_part                   : t_sd_number(c_multiplier_width_even/2-1 downto 0);
    signal product_sd                         : t_sd_number(c_number_of_needed_sd_digits-1 downto 0);
    signal product_sd_all                     : t_sd_number(c_multiplicand_width_even/2 + c_multiplier_width_even/2 downto 0);
    signal product_wide                       : signed(2*c_number_of_needed_sd_digits-1 downto 0);
    signal ready                              : std_logic;
    signal ready_mul                          : std_logic;
    signal reg_enable                         : std_logic;
    signal shift_registers                    : t_shift_registers(c_multiplier_digits_per_period-1 downto 0);
    component multiply_sd_control is
        generic (
            g_counter_max : natural := 8
        );
        port (
            clk_i        : in  std_logic ;
            res_i        : in  std_logic ;
            start_i      : in  std_logic ;
            last_step_o  : out std_logic ;
            ready_o      : out std_logic ;
            reg_enable_o : out std_logic 
        );
    end component;
    component multiply_sd_step is
        generic (
            g_multiplicand_width_ext : natural := 8
        );
        port (
            last_multiplier_bit_i  : in  std_logic ;
            multiplicand_sd_1_i    : in  t_sd_number (g_multiplicand_width_ext/2+1 downto 0);
            multiplicand_sd_2_i    : in  t_sd_number (g_multiplicand_width_ext/2+1 downto 0);
            multiplicand_sd_3_i    : in  t_sd_number (g_multiplicand_width_ext/2+1 downto 0);
            multiplier_bits_used_i : in  unsigned (1 downto 0);
            partial_product_i      : in  t_sd_number (g_multiplicand_width_ext/2 downto 0);
            partial_product_o      : out t_sd_number (g_multiplicand_width_ext/2+1 downto 0)
        );
    end component;
    component multiply_sd_convert is
        generic (
            constant g_latency_convert        : natural :=  0;
            constant g_product_sd_width : natural := 4 
        );
        port (
            clk_i        : in  std_logic ;
            product_sd_i : in  t_sd_number (g_product_sd_width-1 downto 0);
            res_i        : in  std_logic ;
            start_i      : in  std_logic ;
            product_o    : out signed (2*g_product_sd_width-1 downto 0);
            ready_o      : out std_logic 
        );
    end component;
    component multiply_sd_prepare is
        generic (
            g_multiplicand_width     : natural := 8;
            g_multiplicand_width_ext : natural := 8 
        );
        port (
            multiplicand_i      : in  signed (g_multiplicand_width-1 downto 0);
            multiplicand_sd_1_o : out t_sd_number (g_multiplicand_width_ext/2+1 downto 0);
            multiplicand_sd_2_o : out t_sd_number (g_multiplicand_width_ext/2+1 downto 0);
            multiplicand_sd_3_o : out t_sd_number (g_multiplicand_width_ext/2+1 downto 0)
        );
    end component;
begin
    multiply_sd_control_inst : multiply_sd_control
        generic map (
            g_counter_max => c_number_of_periods-1
        )
        port map (
            clk_i        => clk_i,
            res_i        => res_i,
            start_i      => start_i,
            last_step_o  => last_step,
            ready_o      => ready,
            reg_enable_o => reg_enable
        );
    -- During multiplication the multiplier is handled as if it would be always a positive number.
    -- If the multiplier is indeed a positive number, everything is correct, as at all additions the
    -- sign of the multiplicand is handled in a correct way. The signbit of the positive multiplier has
    -- the value 0, so nothing is added in the step when this signbit is "multiplied" with
    -- the multiplicand.
    -- If the multiplier is a negative number then handling it as a positive number will not give
    -- the correct result. Because a negative number N is represented in 2's complement as 2**n+N (n=
    -- number of bits the 2's complent is using) the product would be:
    -- product = multiplicand * (2**n+N) = multiplicand*2**n + multiplicand*N
    -- This means the calculated product is too big by multiplicand*2**n.
    -- But this can be fixed easily during multiplication:
    -- When the multiplication reaches the sign bit of the multiplier and it has the value 1, then
    -- multiplicand*2**n must be subtracted from the result.
    -- So the module multiply_step must know, if the bit of the multiplier which is handled now, is the
    -- sign bit of the multiplier.
    -- This information is created here:
    process (last_step)
    begin
        last_multiplier_digit <= (others => '0');
        last_multiplier_digit(c_multiplier_digits_per_period-1) <= last_step;
    end process;
    multiply_sd_prepare_inst : multiply_sd_prepare
        generic map (
            g_multiplicand_width     => g_multiplicand_width,
            g_multiplicand_width_ext => c_multiplicand_width_even 
        )
        port map (
            multiplicand_i      => multiplicand_i,
            multiplicand_sd_1_o => multiplicand_sd_1,
            multiplicand_sd_2_o => multiplicand_sd_2,
            multiplicand_sd_3_o => multiplicand_sd_3
        );
    -- Calculate the first partial product:
    process (multiplier_i, multiplicand_sd_3, multiplicand_sd_2, multiplicand_sd_1)
    begin
        case multiplier_i(1 downto 0) is
            when "11"   =>
                partial_product_0 <= multiplicand_sd_3;
            when "10"   =>
                partial_product_0 <= multiplicand_sd_2;
            when "01"   =>
                partial_product_0 <= multiplicand_sd_1;
            when others =>
                partial_product_0 <= (others => "000");
        end case;
    end process;
    -- Make the number of bits odd (preparation for the most
    -- significant SD-digit, which shall contain the sign at the low bit:
    multiplier_odd <= resize(multiplier_i, c_multiplier_width_odd);
    -- Add additional bits to adapt to g_latency_mul and add a leading '0' to get a last digit with value '0'&sign-bit:
    multiplier_int <= unsigned('0' & resize(multiplier_odd, c_multiplier_width_even-1));
    -- During the first step the multiplier bits are taken from the input multiplier_i,
    -- afterwards they are taken from the outputs of the shift registers:
    process (start_i, multiplier_int, multiplier_bits_from_shiftregister)
    begin
        if start_i='1' or g_latency_mul=0 or g_latency_mul=1 then
            for i in 0 to c_multiplier_digits_per_period-1 loop
                multiplier_bits_used(i) <= multiplier_int(2*i+3 downto 2*i+2);
            end loop;
        else
            multiplier_bits_used <= multiplier_bits_from_shiftregister;
        end if;
    end process;
    -- Multiplexer:
    partial_product(0)(c_multiplicand_width_even/2+1 downto 1) <=
        partial_product_0(c_multiplicand_width_even/2+1 downto 1) when start_i='1' or g_latency_mul=0 or g_latency_mul=1 else
        partial_product_stored;
    -- The signal partial_product_stored does not have the least significant digit of the output of
    -- module multiply_sd_step anymore. So in order to define all digits of partial_product(0), this last
    -- digit is filled with a dummy value, which is not used, when partial_product(0) is connected to
    -- module multiply_sd_step.
    partial_product(0)(0) <= "000";
    multiply_sd_step_g: for i in 0 to c_multiplier_digits_per_period-1 generate
        multiply_sd_step_inst : multiply_sd_step
            generic map (
                g_multiplicand_width_ext => c_multiplicand_width_even
            )
            port map (
                last_multiplier_bit_i  => last_multiplier_digit(i),
                multiplicand_sd_1_i    => multiplicand_sd_1,
                multiplicand_sd_2_i    => multiplicand_sd_2,
                multiplicand_sd_3_i    => multiplicand_sd_3,
                multiplier_bits_used_i => multiplier_bits_used(i),
                partial_product_i      => partial_product(i)(c_multiplicand_width_even/2+1 downto 1),
                partial_product_o      => partial_product(i+1)
            );
    end generate multiply_sd_step_g;
    register_g: if g_latency_mul/=0 generate
        process(res_i, clk_i)
        begin
            if res_i='1' then
                partial_product_0_0_stored <= "000";
                partial_product_stored     <= (others => "000");
                shift_registers            <= (others => (others => "000"));
            elsif rising_edge(clk_i) then
                if reg_enable='1' then
                    partial_product_stored <= partial_product(c_multiplier_digits_per_period)(c_multiplicand_width_even/2+1 downto 1);
                    for i in 0 to c_multiplier_digits_per_period-1 loop
                        shift_registers(i)(c_number_of_periods-1)          <= partial_product(i+1)(0);
                        shift_registers(i)(c_number_of_periods-2 downto 0) <= shift_registers(i)(c_number_of_periods-1 downto 1);
                    end loop;
                end if;
                -- When start_i=1 then also reg_enable=1
                if start_i='1' then
                    partial_product_0_0_stored <= partial_product_0(0);
                    -- In the first step fill in all the multiplier bits at the lower bits of shift_registers:
                    for m in c_multiplier_digits_per_period to c_multiplier_width_even/2-1-1 loop
                        shift_registers(m mod c_multiplier_digits_per_period)(m/c_multiplier_digits_per_period-1)(0) <= multiplier_int(2*m+2);
                        shift_registers(m mod c_multiplier_digits_per_period)(m/c_multiplier_digits_per_period-1)(1) <= multiplier_int(2*m+1+2);
                    end loop;
                end if;
            end if;
        end process;
        ready_mul <= ready;
    end generate register_g;
    combinatoric_g: if g_latency_mul=0 generate
        process(partial_product_0, partial_product)
        begin
            partial_product_0_0_stored <= partial_product_0(0);
            partial_product_stored     <= partial_product(c_multiplier_digits_per_period)(c_multiplicand_width_even/2+1 downto 1);
            for i in 0 to c_multiplier_digits_per_period-1 loop
                shift_registers(i)(0) <= partial_product(i+1)(0);
            end loop;
        end process;
        ready_mul <= start_i;
    end generate combinatoric_g;
    process (shift_registers)
    begin
        for i in 0 to c_multiplier_digits_per_period-1 loop
            multiplier_bits_from_shiftregister(i)(1) <= shift_registers(i)(0)(1);
            multiplier_bits_from_shiftregister(i)(0) <= shift_registers(i)(0)(0);
        end loop;
    end process;
    process (partial_product_0_0_stored, shift_registers)
    begin
        product_low_part(0) <= partial_product_0_0_stored;
        for period_number in 0 to c_number_of_periods-1 loop
            for shift_register_number in 0 to c_multiplier_digits_per_period-1 loop
                product_low_part(c_multiplier_digits_per_period*period_number + shift_register_number+1) <= shift_registers(shift_register_number)(period_number);
            end loop;
        end loop;
    end process;
    product_sd_all(c_multiplier_width_even/2-1 downto 0) <= product_low_part;
    product_sd_all(c_multiplicand_width_even/2 + c_multiplier_width_even/2 downto c_multiplier_width_even/2) <= partial_product_stored;
    -- Take only as many digits as needed:
    product_sd <= product_sd_all(c_number_of_needed_sd_digits-1 downto 0);
    multiply_sd_convert_inst : multiply_sd_convert
        generic map (
            g_latency_convert  => g_latency_convert,
            g_product_sd_width => c_number_of_needed_sd_digits
        )
        port map (
            clk_i        => clk_i,
            product_sd_i => product_sd,
            res_i        => res_i,
            start_i      => ready_mul,
            product_o    => product_wide,
            ready_o      => ready_o
        );
    product_o <= product_wide(g_multiplicand_width+g_multiplier_width-1 downto 0);
end architecture;
