-- Filename: multiply_wt_booth_struct.vhd
-- Created by HDL-SCHEM-Editor at Wed Jul 16 10:55:04 2025
library ieee;
use ieee.std_logic_1164.all;
use ieee.math_real.all;
use work.multiply_wt_booth_package.all;
architecture struct of multiply_wt_booth is
    function calculate_number_of_periods return positive is
    begin
        if g_latency_steps=0 then
            return 1;
        end if;
        return g_latency_steps;
    end function;
    constant c_number_of_periods : positive := calculate_number_of_periods;

    function calculate_multiplier_radix_4_width return positive is
    begin
        if (g_multiplier_width rem 2)=0 then
            return g_multiplier_width/2;
        end if;
        return (g_multiplier_width+1)/2;
    end function;
    constant c_multiplier_radix_4_width : positive := calculate_multiplier_radix_4_width;

    function calculate_number_of_steps return natural is
        variable number_of_rows   : positive range 2 to c_multiplier_radix_4_width+1;
        variable number_of_adders : positive range 1 to (c_multiplier_radix_4_width+1)/3;
        variable number_of_spares : natural  range 0 to 2;
        variable number_of_steps  : natural;
    begin
        number_of_steps := 0;
        number_of_rows  := c_multiplier_radix_4_width+1;
        while number_of_rows/=2 loop
            number_of_adders := number_of_rows/3;
            number_of_spares := number_of_rows mod 3;
            number_of_rows   := 2*number_of_adders + number_of_spares;
            number_of_steps  := number_of_steps + 1;
        end loop;
        return number_of_steps;
    end function;
    constant c_number_of_steps : natural := calculate_number_of_steps;

    function calculate_number_of_steps_per_clock_period return natural is
    begin
        if g_latency_steps=0 then
            return c_number_of_steps;
        end if;
        return integer(ceil(real(c_number_of_steps)/real(g_latency_steps)));
    end function;
    constant c_number_of_steps_per_clock_period : natural := calculate_number_of_steps_per_clock_period;

    type t_positive_array is array (natural range <>) of positive range 2 to c_multiplier_radix_4_width+1;
    function calculate_number_of_rows_for_each_step return t_positive_array is
        variable number_of_rows : t_positive_array(0 to c_number_of_steps_per_clock_period);
    begin
        number_of_rows(0) := c_multiplier_radix_4_width+1;
        for i in 1 to c_number_of_steps_per_clock_period loop
            number_of_rows(i) := 2*(number_of_rows(i-1)/3) + number_of_rows(i-1) mod 3;
        end loop;
        return number_of_rows;
    end function;
    constant c_number_of_rows : t_positive_array := calculate_number_of_rows_for_each_step;
    constant c_rows_to_store  : positive range 2 to c_multiplier_radix_4_width+1 := c_number_of_rows(c_number_of_steps_per_clock_period);

    -- The element size of this type (number of rows and columns in type t_matrix) is determined by the partial_products:
    type t_matrix_array is array (natural range <>) of t_matrix(c_multiplier_radix_4_width downto 0)(g_multiplicand_width+g_multiplier_width-1 downto 0);

    signal multiplier_radix_4 : t_signed3_array(c_multiplier_radix_4_width-1 downto 0);
    signal partial_products   : t_matrix(c_multiplier_radix_4_width downto 0)(g_multiplicand_width+g_multiplier_width-1 downto 0);
    signal ready              : std_logic;
    signal ready_mul          : std_logic;
    signal reg_enable         : std_logic;
    signal sum_matrix         : t_matrix_array(0 to c_number_of_steps_per_clock_period);
    signal sum_matrix_stored  : t_matrix(c_rows_to_store-1 downto 0)(g_multiplicand_width+g_multiplier_width-1 downto 0);
    component multiply_wt_booth_control is
        generic (
            g_counter_max : natural := 8
        );
        port (
            clk_i        : in  std_logic ;
            res_i        : in  std_logic ;
            start_i      : in  std_logic ;
            ready_o      : out std_logic ;
            reg_enable_o : out std_logic 
        );
    end component;
    component multiply_wt_booth_addition is
        generic (
            constant g_multiplicand_width : natural := 16;
            constant g_multiplier_width   : natural := 16;
            constant g_latency_addition   : natural :=  0 
        );
        port (
            clk_i                 : in  std_logic ;
            res_i                 : in  std_logic ;
            start_i               : in  std_logic ;
            sum_matrix_stored_0_i : in  signed (g_multiplicand_width+g_multiplier_width-1 downto 0);
            sum_matrix_stored_1_i : in  signed (g_multiplicand_width+g_multiplier_width-1 downto 0);
            product_o             : out signed (g_multiplicand_width+g_multiplier_width-1 downto 0);
            ready_o               : out std_logic 
        );
    end component;
    component multiply_wt_booth_partial_products is
        generic (
            constant g_multiplicand_width       : natural := 16;
            constant g_multiplier_width         : natural := 6;
            constant g_multiplier_radix_4_width : natural := 3 
        );
        port (
            multiplicand_i       : in  signed (g_multiplicand_width-1 downto 0);
            multiplier_radix_4_i : in  t_signed3_array (g_multiplier_radix_4_width-1 downto 0);
            sign_of_multiplier_i : in  std_logic ;
            partial_products_o   : out t_matrix (g_multiplier_radix_4_width downto 0)(g_multiplicand_width+g_multiplier_width-1 downto 0)
        );
    end component;
    component multiply_wt_booth_step is
        generic (
            constant g_multiplicand_width         : natural := 16;
            constant g_multiplier_width           : natural := 6;
            constant g_multiplier_radix_4_width   : natural := 3;
            constant g_number_of_rows             : natural := 4 -- must be g_multiplier_radix_4_width+1 at the first step
        );
        port (
            sum_matrix_i : in  t_matrix (g_multiplier_radix_4_width downto 0)(g_multiplicand_width+g_multiplier_width-1 downto 0);
            sum_matrix_o : out t_matrix (g_multiplier_radix_4_width downto 0)(g_multiplicand_width+g_multiplier_width-1 downto 0)
        );
    end component;
    component multiply_wt_booth_convert is
        generic (
            constant g_multiplier_width         : positive := 32;
            constant g_multiplier_radix_4_width : positive := 16  -- (g_multiplier_width+1)/2 if g_multiplier_width is an odd number else g_multiplier_width/2
        );
        port (
            multiplier_i         : in  signed (g_multiplier_width-1 downto 0);
            multiplier_radix_4_o : out t_signed3_array (g_multiplier_radix_4_width-1 downto 0)
        );
    end component;
begin
    multiply_wt_booth_convert_inst : multiply_wt_booth_convert
        generic map (
            g_multiplier_width         => g_multiplier_width,
            g_multiplier_radix_4_width => c_multiplier_radix_4_width
        )
        port map (
            multiplier_i         => multiplier_i,
            multiplier_radix_4_o => multiplier_radix_4
        );
    multiply_wt_booth_partial_products_inst : multiply_wt_booth_partial_products
        generic map (
            g_multiplicand_width       => g_multiplicand_width,
            g_multiplier_width         => g_multiplier_width,
            g_multiplier_radix_4_width => c_multiplier_radix_4_width 
        )
        port map (
            multiplicand_i       => multiplicand_i,
            multiplier_radix_4_i => multiplier_radix_4,
            sign_of_multiplier_i => multiplier_i(g_multiplier_width-1),
            partial_products_o   => partial_products
        );
    multiply_wt_booth_control_inst : multiply_wt_booth_control
        generic map (
            g_counter_max => c_number_of_periods-1
        )
        port map (
            clk_i        => clk_i,
            res_i        => res_i,
            start_i      => start_i,
            ready_o      => ready,
            reg_enable_o => reg_enable
        );
    process(start_i, partial_products, sum_matrix_stored)
    begin
        if start_i='1' or g_latency_steps=0 or g_latency_steps=1 then
            sum_matrix(0) <= partial_products;
        else
            -- Feed back only the filled rows:
            sum_matrix(0) <= (others => (others => '0'));
            sum_matrix(0)(c_rows_to_store-1 downto 0) <= sum_matrix_stored;
        end if;
    end process;
    multiply_wt_booth_step_g: for i in 0 to c_number_of_steps_per_clock_period-1 generate
        multiply_wt_booth_step_inst : multiply_wt_booth_step
            generic map (
                g_multiplicand_width       => g_multiplicand_width,
                g_multiplier_width         => g_multiplier_width,
                g_multiplier_radix_4_width => c_multiplier_radix_4_width,
                g_number_of_rows           => c_number_of_rows(i)
            )
            port map (
                sum_matrix_i => sum_matrix(i),
                sum_matrix_o => sum_matrix(i+1)
            );
    end generate multiply_wt_booth_step_g;
    register_g: if g_latency_steps/=0 generate
        process(res_i, clk_i)
        begin
            if res_i='1' then
                sum_matrix_stored <= (others => (others => '0'));
            elsif rising_edge(clk_i) then
                -- When start_i=1 then also reg_enable=1
                if reg_enable='1' then
                    sum_matrix_stored <= sum_matrix(c_number_of_steps_per_clock_period)(c_rows_to_store-1 downto 0);
                end if;
            end if;
        end process;
        ready_mul <= ready;
    end generate register_g;
    combinatoric_g: if g_latency_steps=0 generate
        process(sum_matrix)
        begin
            sum_matrix_stored <= sum_matrix(c_number_of_steps_per_clock_period)(c_rows_to_store-1 downto 0);
        end process;
        ready_mul <= start_i;
    end generate combinatoric_g;
    multiply_wt_booth_addition_inst : multiply_wt_booth_addition
        generic map (
            g_multiplicand_width => g_multiplicand_width,
            g_multiplier_width   => g_multiplier_width,
            g_latency_addition   => g_latency_addition 
        )
        port map (
            clk_i                 => clk_i,
            res_i                 => res_i,
            start_i               => ready_mul,
            sum_matrix_stored_0_i => sum_matrix_stored(0),
            sum_matrix_stored_1_i => sum_matrix_stored(1),
            product_o             => product_o,
            ready_o               => ready_o
        );
end architecture;
