-- Filename: multiply_struct.vhd
-- Created by HDL-SCHEM-Editor at Fri May 16 09:23:34 2025
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
architecture struct of multiply is
    function calculate_multiplier_bits_per_period return natural is
    begin
        if g_latency=0 then
            return g_multiplier_width-1;
        end if;
        return integer(ceil(real(g_multiplier_width-1)/real(g_latency)));
    end function;
    constant c_multiplier_bits_per_period : natural := calculate_multiplier_bits_per_period; 
    function calculate_multiplier_width_internal return natural is
    begin
        if g_latency=0 then
            return g_multiplier_width;
        end if;
        return c_multiplier_bits_per_period*g_latency + 1;
    end function;
    constant c_multiplier_width_internal : natural := calculate_multiplier_width_internal;
    function calculate_number_of_periods return natural is
    begin
        if g_latency=0 then
            return 1;
        end if;
        return g_latency;
    end function;
    constant c_number_of_periods : natural := calculate_number_of_periods;

    type t_partial_product is array (natural range <>) of signed(g_multiplicand_width downto 0);
    type t_shift_registers is array (natural range <>) of std_logic_vector(c_number_of_periods-1 downto 0);
    signal last_multiplier_bit                : std_logic_vector(c_multiplier_bits_per_period-1 downto 0);
    signal last_step                          : std_logic;
    signal multiplier_bits_from_shiftregister : signed(c_multiplier_bits_per_period-1 downto 0);
    signal multiplier_bits_used               : signed(c_multiplier_bits_per_period-1 downto 0);
    signal multiplier_int                     : signed(c_multiplier_width_internal-1 downto 0);
    signal partial_product                    : t_partial_product(c_multiplier_bits_per_period downto 0);
    signal partial_product_0                  : signed(g_multiplicand_width downto 0);
    signal partial_product_0_0_stored         : std_logic;
    signal partial_product_stored             : signed(g_multiplicand_width-1 downto 0);
    signal product_all                        : signed(g_multiplicand_width+c_multiplier_width_internal-1 downto 0);
    signal product_low_part                   : signed(c_multiplier_width_internal-1 downto 0);
    signal ready                              : std_logic;
    signal reg_enable                         : std_logic;
    signal shift_registers                    : t_shift_registers(c_multiplier_bits_per_period-1 downto 0);
    component multiply_control is
        generic (
            g_counter_max : natural := 8
        );
        port (
            clk_i        : in  std_logic;
            res_i        : in  std_logic;
            start_i      : in  std_logic;
            last_step_o  : out std_logic;
            ready_o      : out std_logic;
            reg_enable_o : out std_logic
        );
    end component;
    component multiply_step is
        generic (
            g_multiplicand_width : natural := 8
        );
        port (
            factor_bit_i          : in  std_logic;
            last_multiplier_bit_i : in  std_logic;
            multiplicand_i        : in  signed(g_multiplicand_width-1 downto 0);
            partial_product_i     : in  signed(g_multiplicand_width-1 downto 0);
            partial_product_o     : out signed(g_multiplicand_width downto 0)
        );
    end component;
begin
    multiply_control_inst : multiply_control
        generic map (
            g_counter_max => c_number_of_periods-1
        )
        port map (
            clk_i        => clk_i,
            res_i        => res_i,
            start_i      => start_i,
            last_step_o  => last_step,
            ready_o      => ready,
            reg_enable_o => reg_enable
        );
    -- During multiplication the multiplier is handled as if it would be always a positive number.
    -- If the multiplier is indeed a positive number, everything is correct, as at all additions the
    -- sign of the multiplicand is handled in a correct way. The signbit of the positive multiplier has
    -- the value 0, so nothing is added in the step when this signbit is "multiplied" with
    -- the multiplicand.
    -- If the multiplier is a negative number then handling it as a positive number will not give
    -- the correct result. Because a negative number N is represented in 2's complement as 2**n+N (n=
    -- number of bits the 2's complent is using) the product would be:
    -- product = multiplicand * (2**n+N) = multiplicand*2**n + multiplicand*N
    -- This means the calculated product is too big by multiplicand*2**n.
    -- But this can be fixed easily during multiplication:
    -- When the multiplication reaches the sign bit of the multiplier and it has the value 1, then
    -- multiplicand*2**n must be subtracted from the result.
    -- So the module multiply_step must know, if the bit of the multiplier which is handled now, is the
    -- sign bit of the multiplier.
    -- This information is created here:
    process (last_step)
    begin
        last_multiplier_bit <= (others => '0');
        last_multiplier_bit(c_multiplier_bits_per_period-1) <= last_step;
    end process;
    -- When g_multiplier_width-1 is not an integer multiple of g_latency,
    -- then the multiplier must be extended by additional bits to the
    -- next multiple of g_latency:
    multiplier_int <= resize(multiplier_i, multiplier_int'length);
    -- During the first step the multiplier bits are taken from the input,
    -- afterwards they are taken from the outputs of the shift registers:
    process (start_i, multiplier_i, multiplier_bits_from_shiftregister)
    begin
        if start_i='1' or g_latency=0 or g_latency=1 then
            multiplier_bits_used <= multiplier_i(c_multiplier_bits_per_period downto 1);
        else
            multiplier_bits_used <= multiplier_bits_from_shiftregister;
        end if;
    end process;
    partial_product_0  <= multiplicand_i(multiplicand_i'high) & multiplicand_i when multiplier_i(0)='1' else
                          (others => '0');
    partial_product(0) <= partial_product_0 when start_i='1' or g_latency=0 or g_latency=1 else
                          partial_product_stored & '0'; -- The LSB is not used.
    multiply_step_g: for i in 0 to c_multiplier_bits_per_period-1 generate
        multiply_step_inst : multiply_step
            generic map (
                g_multiplicand_width => g_multiplicand_width
            )
            port map (
                factor_bit_i          => multiplier_bits_used(i),
                last_multiplier_bit_i => last_multiplier_bit(i),
                multiplicand_i        => multiplicand_i,
                partial_product_i     => partial_product(i)(g_multiplicand_width downto 1),
                partial_product_o     => partial_product(i+1)
            );
    end generate multiply_step_g;
    register_g: if g_latency/=0 generate
        process(res_i, clk_i)
        begin
            if res_i='1' then
                partial_product_0_0_stored <= '0';
                partial_product_stored   <= (others => '0');
                shift_registers          <= (others => (others => '0'));
            elsif rising_edge(clk_i) then
                if reg_enable='1' then
                    partial_product_stored <= partial_product(c_multiplier_bits_per_period)(g_multiplicand_width downto 1);
                    for i in 0 to c_multiplier_bits_per_period-1 loop
                        shift_registers(i) <= partial_product(i+1)(0) & shift_registers(i)(c_number_of_periods-1 downto 1);
                    end loop;
                end if;
                if start_i='1' then
                    partial_product_0_0_stored <= partial_product(0)(0);
                    -- In the first step fill in all the multiplier bits at the lower bits of shift_registers:
                    for m in c_multiplier_bits_per_period to c_multiplier_width_internal-2 loop
                        shift_registers(m mod c_multiplier_bits_per_period)(m/c_multiplier_bits_per_period-1) <= multiplier_int(m+1);
                    end loop;
                end if;
            end if;
        end process;
        ready_o <= ready;
    end generate register_g;
    combinatoric_g: if g_latency=0 generate
        partial_product_0_0_stored <= partial_product(0)(0);
        process(partial_product)
        begin
            partial_product_stored <= partial_product(c_multiplier_bits_per_period)(g_multiplicand_width downto 1);
            for i in 0 to c_multiplier_bits_per_period-1 loop
                shift_registers(i)(0) <= partial_product(i+1)(0);
            end loop;
        end process;
        ready_o <= start_i;
    end generate combinatoric_g;
    process (shift_registers)
    begin
        for i in 0 to c_multiplier_bits_per_period-1 loop
            multiplier_bits_from_shiftregister(i) <= shift_registers(i)(0);
        end loop;
    end process;
    process (shift_registers, partial_product_0_0_stored)
    begin
        product_low_part(0) <= partial_product_0_0_stored;
        for period_number in 0 to c_number_of_periods-1 loop
            for shift_register_number in 0 to c_multiplier_bits_per_period-1 loop
                product_low_part(c_multiplier_bits_per_period*period_number + shift_register_number + 1) <= shift_registers(shift_register_number)(period_number);
            end loop;
        end loop;
    end process;
    product_all <= partial_product_stored & product_low_part;
    product_o <= product_all(g_multiplicand_width+g_multiplier_width-1 downto 0);
end architecture;
