-- Filename: square_root_struct.vhd.vhd
-- Created by HDL-SCHEM-Editor at Sun Jan 12 11:54:05 2025
library ieee;
use ieee.math_real.all;
architecture struct of square_root is
    function calculate_number_of_periods return natural is
    begin
        if g_latency=0 then
            return 1;
        end if;
        return g_latency;
    end function;
    constant c_nr_of_periods : natural := calculate_number_of_periods;

    function calculate_number_of_iterations_per_period return natural is
    begin
        if g_latency=0 or g_latency=1 then
            return (g_radicand_width + g_radicand_width mod 2)/2 + g_additional_root_bits;
        end if;
        return integer(ceil(real((g_radicand_width + g_radicand_width mod 2)/2 + g_additional_root_bits)/real(c_nr_of_periods)));
    end function;
    constant c_number_of_iterations_per_period : natural := calculate_number_of_iterations_per_period;
    constant c_number_of_iterations            : natural := c_number_of_iterations_per_period*c_nr_of_periods;

    type t_step_number_array is array (natural range <>) of natural range 1 to c_number_of_iterations;
    type t_c_step is array (0 to c_nr_of_periods-1) of t_step_number_array(0 to c_number_of_iterations_per_period-1);
    function calculate_step_numbers return t_c_step is
        variable v_step : t_c_step;
    begin
        for p in 0 to c_nr_of_periods-1 loop
            for i in 0 to c_number_of_iterations_per_period-1 loop
                if p=0 then
                    v_step(c_nr_of_periods-1)(i) := i + 1;
                else
                    v_step(p-1)              (i) := c_number_of_iterations_per_period*p + i + 1;
                end if;
            end loop;
        end loop;
        return v_step;
    end function;
    constant c_step : t_c_step := calculate_step_numbers; -- c_step has values from 1 to c_number_of_iterations

    type t_unsigned_op_width is array (natural range <>) of unsigned(2*c_number_of_iterations-1 downto 0);
    signal counter       : natural range 0 to c_nr_of_periods-1;
    signal enable_reg    : std_logic;
    signal first_step    : std_logic;
    signal radicand_ext  : unsigned(2*c_number_of_iterations-1 downto 0);
    signal ready_steps   : std_logic;
    signal remainder     : t_unsigned_op_width(c_number_of_iterations_per_period downto 0);
    signal remainder_reg : unsigned(2*c_number_of_iterations-1 downto 0);
    signal root          : t_unsigned_op_width(c_number_of_iterations_per_period downto 0);
    signal root_reg      : unsigned(2*c_number_of_iterations-1 downto 0);
    signal steps         : t_step_number_array(0 to c_number_of_iterations_per_period-1);
    component square_root_control is
        generic (
            constant g_counter_max : natural := 16
        );
        port (
            clk_i         : in  std_logic;
            res_i         : in  std_logic;
            start_i       : in  std_logic;
            counter_o     : out natural range 0 to g_counter_max;
            enable_reg_o  : out std_logic;
            first_step_o  : out std_logic;
            ready_steps_o : out std_logic
        );
    end component;
    component square_root_step is
        generic (
            constant g_number_of_iterations : natural := 16;
            constant g_additional_root_bits : natural := 0 
        );
        port (
            remainder_i : in  unsigned(2*g_number_of_iterations-1 downto 0);
            root_i      : in  unsigned(2*g_number_of_iterations-1 downto 0);
            step_i      : in  natural range 1 to g_number_of_iterations;
            remainder_o : out unsigned(2*g_number_of_iterations-1 downto 0);
            root_o      : out unsigned(2*g_number_of_iterations-1 downto 0)
        );
    end component;
begin
    -- When g_latency=0, then the signals connected to the outputs
    -- ready_steps_o and enable_reg_o of square_root_control will not be read.
    -- The output counter_o will have the correct value 0 in this case.
    square_root_control_inst : square_root_control
        generic map (
            g_counter_max => c_nr_of_periods-1
        )
        port map (
            clk_i         => clk_i,
            res_i         => res_i,
            start_i       => start_i,
            counter_o     => counter,
            enable_reg_o  => enable_reg,
            first_step_o  => first_step,
            ready_steps_o => ready_steps
        );
    p_step: process (res_i, clk_i)
    begin
        if res_i='1' then
            steps <= c_step(c_nr_of_periods-1);
        elsif rising_edge(clk_i) then
            if enable_reg='1' and
               g_latency/=0   and
               g_latency/=1
            then
                steps <= c_step(counter);
            end if;
        end if;
    end process;
    p_ext: process(radicand_i)
    begin
        radicand_ext <= (others => '0');
        -- If g_radicand_width is an odd number, an additional MSB with value 0 is added here:
        radicand_ext(2*c_number_of_iterations-1-(g_radicand_width mod 2) downto
                     2*c_number_of_iterations-1-(g_radicand_width mod 2)-g_radicand_width+1) <= radicand_i;
    end process;
    root(0)      <= (others => '0') when first_step='1' or g_latency=0 or g_latency=1 else root_reg;
    remainder(0) <= radicand_ext    when first_step='1' or g_latency=0 or g_latency=1 else remainder_reg;
    square_root_step_g: for i in 0 to c_number_of_iterations_per_period-1 generate
        square_root_step_inst : square_root_step
            generic map (
                g_number_of_iterations => c_number_of_iterations,
                g_additional_root_bits => g_additional_root_bits
            )
            port map (
                remainder_i => remainder(i),
                root_i      => root(i),
                step_i      => steps(i),
                remainder_o => remainder(i+1),
                root_o      => root(i+1)
            );
    end generate square_root_step_g;
    comb_g: if g_latency=0 generate
        root_reg      <= root(c_number_of_iterations_per_period);
        remainder_reg <= (others => '0');
        ready_o       <= start_i;
    end generate comb_g;
    clocked_g: if g_latency/=0 generate
        process (res_i, clk_i)
        begin
            if res_i='1' then
                root_reg      <= (others => '0');
                remainder_reg <= (others => '0');
            elsif rising_edge(clk_i) then
                if enable_reg='1' then
                    root_reg      <= root     (c_number_of_iterations_per_period);
                    remainder_reg <= remainder(c_number_of_iterations_per_period);
                end if;
            end if;
        end process;
        ready_o <= ready_steps;
    end generate clocked_g;
    root_o <= root_reg(c_number_of_iterations-1 downto
                       c_number_of_iterations-1-(g_radicand_width/2-1+g_additional_root_bits+g_radicand_width mod 2));
end architecture;
